diff options
Diffstat (limited to 'thirdparty')
320 files changed, 20113 insertions, 16195 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index 25d2e1cfe3..f8055ac3e7 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -5,10 +5,22 @@ respective folder names. Use two empty lines to separate categories for readability. +## amd-fsr + +Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR +Version: 1.0.2 (a21ffb8f6c13233ba336352bdff293894c706575, 2021) +License: MIT + +Files extracted from upstream source: + +- `ffx_a.h` and `ffx_fsr1.h` from `ffx-fsr` +- `license.txt` + + ## basis_universal - Upstream: https://github.com/BinomialLLC/basis_universal -- Version: git (ba1c3e40f1d434ebaf9a167b44e9b11d2bf0f765, 2021) +- Version: git (646a9f826131cb0b9e14b5e4740874808315f83a, 2021) - License: Apache 2.0 Files extracted from upstream source: @@ -176,20 +188,20 @@ Files extracted from upstream source: ## graphite - Upstream: https://github.com/silnrsi/graphite -- Version: 1.3.14 (92f59dcc52f73ce747f1cdc831579ed2546884aa, 2020) +- Version: 1.3.14 (80c52493ef42e6fe605a69dcddd2a691cd8a1380, 2021) - License: MPL-2.0 Files extracted from upstream source: - the `include` folder -- the `src` folder -- `COPYING`, `ChangeLog` +- the `src` folder (minus `CMakeLists.txt` and `files.mk`) +- `COPYING` ## harfbuzz - Upstream: https://github.com/harfbuzz/harfbuzz -- Version: 3.1.1 (cd5c6cd0419ac5e4de975d6c476fb760bf06d2ce, 2021) +- Version: 3.1.2 (8aed5c21a31eece6a9f3cd775fda8facb6c28b9b, 2021) - License: MIT Files extracted from upstream source: @@ -235,14 +247,14 @@ Files extracted from upstream source: ## libogg - Upstream: https://www.xiph.org/ogg -- Version: git (c8fca6b4a02d695b1ceea39b330d4406001c03ed, 2019) +- Version: 1.3.5 (e1774cd77f471443541596e09078e78fdc342e4f, 2021) - License: BSD-3-Clause Files extracted from upstream source: - `src/*.{c,h}` -- `include/ogg/*.h` in ogg/ -- COPYING +- `include/ogg/*.h` in `ogg/` (run `configure` to generate `config_types.h`) +- `COPYING` ## libpng @@ -279,26 +291,26 @@ on top of the 1.1.1 source (not included in any stable release yet). ## libvorbis - Upstream: https://www.xiph.org/vorbis -- Version: 1.3.6 (2018) +- Version: 1.3.7 (0657aee69dec8508a0011f47f3b69d7538e9d262, 2020) - License: BSD-3-Clause Files extracted from upstream source: -- `src/*` except from: `lookups.pl`, `Makefile.*` -- `include/vorbis/*.h` as vorbis/ -- COPYING +- `lib/*` except from: `lookups.pl`, `Makefile.*` +- `include/vorbis/*.h` as `vorbis/` +- `COPYING` ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ -- Version: 1.1.0 (d7844e9762b61c9638c263657bd49e1690184832, 2020) +- Version: 1.2.1 (9ce5843dbabcfd3f7c39ec7ceba9cbeb213cbfdf, 2021) - License: BSD-3-Clause Files extracted from upstream source: -- `src/*` except from: .am, .rc and .in files -- AUTHORS, COPYING, PATENTS +- `src/*` except from: `.am`, `.rc` and `.in` files +- `AUTHORS`, `COPYING`, `PATENTS` Important: The files `utils/bit_reader_utils.{c,h}` have Godot-made changes to ensure they build for Javascript/HTML5. Those @@ -333,7 +345,7 @@ File extracted from upstream release tarball: ## meshoptimizer - Upstream: https://github.com/zeux/meshoptimizer -- Version: git (f5d83e879c48f8664783a69b4f50711d27549b66, 2021) +- Version: git (f4c356d79fadb99cbf432f7e199d823581b0e19e, 2021) - License: MIT Files extracted from upstream repository: @@ -347,25 +359,35 @@ instead of a combination of distance and attribute errors. Patches for both chan found in the `patches` directory. +## minimp3 + +- Upstream: https://github.com/lieff/minimp3 +- Version: git (afb604c06bc8beb145fecd42c0ceb5bda8795144, 2021) +- License: CC0 1.0 + +Files extracted from upstream repository: + +- `minimp3.h` +- `minimp3_ex.h` +- `LICENSE` + + ## miniupnpc - Upstream: https://github.com/miniupnp/miniupnp -- Version: 2.2.2 (81029a860baf1f727903e5b85307903b3f40cbc8, 2021) +- Version: 2.2.3 (2df8120326ed4246e049a7a6de707539604cd514, 2021) - License: BSD-3-Clause Files extracted from upstream source: -- All `*.c` and `*.h` files from `miniupnpc` to `thirdparty/miniupnpc/miniupnpc` +- Copy `miniupnpc/src` and `miniupnpc/include` to `thirdparty/miniupnpc` - Remove the following test or sample files: - `listdevices.c minihttptestserver.c miniupnpcmodule.c upnpc.c upnperrors.* test* wingenminiupnpcstrings.c` + `listdevices.c minihttptestserver.c miniupnpcmodule.c upnpc.c upnperrors.* test*` - `LICENSE` -The only modified file is `miniupnpcstrings.h`, which was created for Godot -(it is usually autogenerated by cmake). Bump the version number for miniupnpc in that -file when upgrading. - -Note: The following upstream patch has been applied, remove this notice on next update. -https://github.com/miniupnp/miniupnp/commit/3a08dd4b89af2e9effa22a136bac86f2f306fd79 +The only modified file is `src/miniupnpcstrings.h`, which was created for Godot +(it is usually autogenerated by cmake). Bump the version number for miniupnpc in +that file when upgrading. ## minizip @@ -506,7 +528,7 @@ Patch files are provided in `oidn/patches/`. ## pcre2 - Upstream: http://www.pcre.org -- Version: 10.36 (r1288, 2020) +- Version: 10.39 (35fee4193b852cb504892352bd0155de10809889, 2021) - License: BSD-3-Clause Files extracted from upstream source: @@ -599,18 +621,21 @@ comments and a patch is provided in the squish/ folder. ## tinyexr - Upstream: https://github.com/syoyo/tinyexr -- Version: 1.0.0 (e4b7840d9448b7d57a88384ce26143004f3c0c71, 2020) +- Version: 1.0.1 (67010eae802211202d0797f4df2b809f4ba7442c, 2021) - License: BSD-3-Clause Files extracted from upstream source: - `tinyexr.{cc,h}` +The `tinyexr.cc` file was modified to include `zlib.h` which we provide, +instead of `miniz.h` as an external dependency. + ## vhacd - Upstream: https://github.com/kmammou/v-hacd -- Version: git (b07958e18e01d504e3af80eeaeb9f033226533d7, 2019) +- Version: git (1a49edf29c69039df15286181f2f27e17ceb9aef, 2020) - License: BSD-3-Clause Files extracted from upstream source: @@ -670,25 +695,29 @@ Patches in the `patches` directory should be re-applied after updates. ## wslay - Upstream: https://github.com/tatsuhiro-t/wslay -- Version: 1.1.1 (c9a84aa6df8512584c77c8cd15be9536b89c35aa, 2020) +- Version: 1.1.1+git (45d22583b488f79d5a4e598cc7675c191c5ab53f, 2021) - License: MIT File extracted from upstream release tarball: -- All `*.c` and `*.h` in `lib/` and `lib/includes/` -- `wslay.h` has a small Godot addition to fix MSVC build. - See `thirdparty/wslay/msvcfix.diff` +- Run `cmake .` to generate `config.h` and `wslayver.h`. + Contents might need tweaking for Godot, review diff. +- All `*.c` and `*.h` files from `lib/` +- All `*.h` in `lib/includes/wslay/` as `wslay/` +- `wslay/wslay.h` has a small Godot addition to fix MSVC build. + See `patches/msvcfix.diff` +- `COPYING` ## xatlas - Upstream: https://github.com/jpcy/xatlas -- Version: git (5571fc7ef0d06832947c0a935ccdcf083f7a9264, 2020) +- Version: git (ec707faeac3b95e6b416076a9509718cce105b6a, 2021) - License: MIT Files extracted from upstream source: -- `xatlas.{cpp,h}` +- `source/xatlas/xatlas.{cpp,h}` - `LICENSE` @@ -706,11 +735,11 @@ Files extracted from upstream source: ## zstd - Upstream: https://github.com/facebook/zstd -- Version: 1.4.8 (97a3da1df009d4dc67251de0c4b1c9d7fe286fc1, 2020) +- Version: 1.5.0 (a488ba114ec17ea1054b9057c26a046fc122b3b6, 2021) - License: BSD-3-Clause Files extracted from upstream source: -- lib/{common/,compress/,decompress/,zstd.h} -- LICENSE +- `lib/{common/,compress/,decompress/,zstd.h,zstd_errors.h}` +- `LICENSE` diff --git a/thirdparty/amd-fsr/ffx_a.h b/thirdparty/amd-fsr/ffx_a.h new file mode 100644 index 0000000000..d04bff55cb --- /dev/null +++ b/thirdparty/amd-fsr/ffx_a.h @@ -0,0 +1,2656 @@ +//============================================================================================================================== +// +// [A] SHADER PORTABILITY 1.20210629 +// +//============================================================================================================================== +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +//------------------------------------------------------------------------------------------------------------------------------ +// MIT LICENSE +// =========== +// Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS"). +// ----------- +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// ----------- +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +// Software. +// ----------- +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +//------------------------------------------------------------------------------------------------------------------------------ +// ABOUT +// ===== +// Common central point for high-level shading language and C portability for various shader headers. +//------------------------------------------------------------------------------------------------------------------------------ +// DEFINES +// ======= +// A_CPU ..... Include the CPU related code. +// A_GPU ..... Include the GPU related code. +// A_GLSL .... Using GLSL. +// A_HLSL .... Using HLSL. +// A_HLSL_6_2 Using HLSL 6.2 with new 'uint16_t' and related types (requires '-enable-16bit-types'). +// A_NO_16_BIT_CAST Don't use instructions that are not availabe in SPIR-V (needed for running A_HLSL_6_2 on Vulkan) +// A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default). +// ======= +// A_BYTE .... Support 8-bit integer. +// A_HALF .... Support 16-bit integer and floating point. +// A_LONG .... Support 64-bit integer. +// A_DUBL .... Support 64-bit floating point. +// ======= +// A_WAVE .... Support wave-wide operations. +//------------------------------------------------------------------------------------------------------------------------------ +// To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'. +//------------------------------------------------------------------------------------------------------------------------------ +// SIMPLIFIED TYPE SYSTEM +// ====================== +// - All ints will be unsigned with exception of when signed is required. +// - Type naming simplified and shortened "A<type><#components>", +// - H = 16-bit float (half) +// - F = 32-bit float (float) +// - D = 64-bit float (double) +// - P = 1-bit integer (predicate, not using bool because 'B' is used for byte) +// - B = 8-bit integer (byte) +// - W = 16-bit integer (word) +// - U = 32-bit integer (unsigned) +// - L = 64-bit integer (long) +// - Using "AS<type><#components>" for signed when required. +//------------------------------------------------------------------------------------------------------------------------------ +// TODO +// ==== +// - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops). +//------------------------------------------------------------------------------------------------------------------------------ +// CHANGE LOG +// ========== +// 20200914 - Expanded wave ops and prx code. +// 20200713 - Added [ZOL] section, fixed serious bugs in sRGB and Rec.709 color conversion code, etc. +//============================================================================================================================== +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// COMMON +//============================================================================================================================== +#define A_2PI 6.28318530718 +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// +// CPU +// +// +//============================================================================================================================== +#ifdef A_CPU + // Supporting user defined overrides. + #ifndef A_RESTRICT + #define A_RESTRICT __restrict + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifndef A_STATIC + #define A_STATIC static + #endif +//------------------------------------------------------------------------------------------------------------------------------ + // Same types across CPU and GPU. + // Predicate uses 32-bit integer (C friendly bool). + typedef uint32_t AP1; + typedef float AF1; + typedef double AD1; + typedef uint8_t AB1; + typedef uint16_t AW1; + typedef uint32_t AU1; + typedef uint64_t AL1; + typedef int8_t ASB1; + typedef int16_t ASW1; + typedef int32_t ASU1; + typedef int64_t ASL1; +//------------------------------------------------------------------------------------------------------------------------------ + #define AD1_(a) ((AD1)(a)) + #define AF1_(a) ((AF1)(a)) + #define AL1_(a) ((AL1)(a)) + #define AU1_(a) ((AU1)(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define ASL1_(a) ((ASL1)(a)) + #define ASU1_(a) ((ASU1)(a)) +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;} +//------------------------------------------------------------------------------------------------------------------------------ + #define A_TRUE 1 + #define A_FALSE 0 +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// CPU/GPU PORTING +// +//------------------------------------------------------------------------------------------------------------------------------ +// Get CPU and GPU to share all setup code, without duplicate code paths. +// This uses a lower-case prefix for special vector constructs. +// - In C restrict pointers are used. +// - In the shading language, in/inout/out arguments are used. +// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]). +//============================================================================================================================== +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY +//============================================================================================================================== + #define retAD2 AD1 *A_RESTRICT + #define retAD3 AD1 *A_RESTRICT + #define retAD4 AD1 *A_RESTRICT + #define retAF2 AF1 *A_RESTRICT + #define retAF3 AF1 *A_RESTRICT + #define retAF4 AF1 *A_RESTRICT + #define retAL2 AL1 *A_RESTRICT + #define retAL3 AL1 *A_RESTRICT + #define retAL4 AL1 *A_RESTRICT + #define retAU2 AU1 *A_RESTRICT + #define retAU3 AU1 *A_RESTRICT + #define retAU4 AU1 *A_RESTRICT +//------------------------------------------------------------------------------------------------------------------------------ + #define inAD2 AD1 *A_RESTRICT + #define inAD3 AD1 *A_RESTRICT + #define inAD4 AD1 *A_RESTRICT + #define inAF2 AF1 *A_RESTRICT + #define inAF3 AF1 *A_RESTRICT + #define inAF4 AF1 *A_RESTRICT + #define inAL2 AL1 *A_RESTRICT + #define inAL3 AL1 *A_RESTRICT + #define inAL4 AL1 *A_RESTRICT + #define inAU2 AU1 *A_RESTRICT + #define inAU3 AU1 *A_RESTRICT + #define inAU4 AU1 *A_RESTRICT +//------------------------------------------------------------------------------------------------------------------------------ + #define inoutAD2 AD1 *A_RESTRICT + #define inoutAD3 AD1 *A_RESTRICT + #define inoutAD4 AD1 *A_RESTRICT + #define inoutAF2 AF1 *A_RESTRICT + #define inoutAF3 AF1 *A_RESTRICT + #define inoutAF4 AF1 *A_RESTRICT + #define inoutAL2 AL1 *A_RESTRICT + #define inoutAL3 AL1 *A_RESTRICT + #define inoutAL4 AL1 *A_RESTRICT + #define inoutAU2 AU1 *A_RESTRICT + #define inoutAU3 AU1 *A_RESTRICT + #define inoutAU4 AU1 *A_RESTRICT +//------------------------------------------------------------------------------------------------------------------------------ + #define outAD2 AD1 *A_RESTRICT + #define outAD3 AD1 *A_RESTRICT + #define outAD4 AD1 *A_RESTRICT + #define outAF2 AF1 *A_RESTRICT + #define outAF3 AF1 *A_RESTRICT + #define outAF4 AF1 *A_RESTRICT + #define outAL2 AL1 *A_RESTRICT + #define outAL3 AL1 *A_RESTRICT + #define outAL4 AL1 *A_RESTRICT + #define outAU2 AU1 *A_RESTRICT + #define outAU3 AU1 *A_RESTRICT + #define outAU4 AU1 *A_RESTRICT +//------------------------------------------------------------------------------------------------------------------------------ + #define varAD2(x) AD1 x[2] + #define varAD3(x) AD1 x[3] + #define varAD4(x) AD1 x[4] + #define varAF2(x) AF1 x[2] + #define varAF3(x) AF1 x[3] + #define varAF4(x) AF1 x[4] + #define varAL2(x) AL1 x[2] + #define varAL3(x) AL1 x[3] + #define varAL4(x) AL1 x[4] + #define varAU2(x) AU1 x[2] + #define varAU3(x) AU1 x[3] + #define varAU4(x) AU1 x[4] +//------------------------------------------------------------------------------------------------------------------------------ + #define initAD2(x,y) {x,y} + #define initAD3(x,y,z) {x,y,z} + #define initAD4(x,y,z,w) {x,y,z,w} + #define initAF2(x,y) {x,y} + #define initAF3(x,y,z) {x,y,z} + #define initAF4(x,y,z,w) {x,y,z,w} + #define initAL2(x,y) {x,y} + #define initAL3(x,y,z) {x,y,z} + #define initAL4(x,y,z,w) {x,y,z,w} + #define initAU2(x,y) {x,y} + #define initAU3(x,y,z) {x,y,z} + #define initAU4(x,y,z,w) {x,y,z,w} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// SCALAR RETURN OPS +//------------------------------------------------------------------------------------------------------------------------------ +// TODO +// ==== +// - Replace transcendentals with manual versions. +//============================================================================================================================== + #ifdef A_GCC + A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);} + A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);} + A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));} + A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));} + #else + A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);} + A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);} + A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));} + A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_GCC + A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);} + A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);} + #else + A_STATIC AD1 ACosD1(AD1 a){return cos(a);} + A_STATIC AF1 ACosF1(AF1 a){return cosf(a);} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];} + A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} + A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} + A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];} + A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} + A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_GCC + A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);} + A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);} + #else + A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);} + A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_GCC + A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);} + A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);} + #else + A_STATIC AD1 AFloorD1(AD1 a){return floor(a);} + A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);} + A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);} +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_GCC + A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);} + A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);} + #else + A_STATIC AD1 ALog2D1(AD1 a){return log2(a);} + A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;} + A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;} + A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;} + A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;} +//------------------------------------------------------------------------------------------------------------------------------ + // These follow the convention that A integer types don't have signage, until they are operated on. + A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;} + A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a<b?a:b;} + A_STATIC AF1 AMinF1(AF1 a,AF1 b){return a<b?a:b;} + A_STATIC AL1 AMinL1(AL1 a,AL1 b){return a<b?a:b;} + A_STATIC AU1 AMinU1(AU1 a,AU1 b){return a<b?a:b;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AL1 AMinSL1(AL1 a,AL1 b){return (ASL1_(a)<ASL1_(b))?a:b;} + A_STATIC AU1 AMinSU1(AU1 a,AU1 b){return (ASU1_(a)<ASU1_(b))?a:b;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 ARcpD1(AD1 a){return 1.0/a;} + A_STATIC AF1 ARcpF1(AF1 a){return 1.0f/a;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AL1 AShrSL1(AL1 a,AL1 b){return AL1_(ASL1_(a)>>ASL1_(b));} + A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));} +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_GCC + A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);} + A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);} + #else + A_STATIC AD1 ASinD1(AD1 a){return sin(a);} + A_STATIC AF1 ASinF1(AF1 a){return sinf(a);} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_GCC + A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);} + A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);} + #else + A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);} + A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// SCALAR RETURN OPS - DEPENDENT +//============================================================================================================================== + A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));} + A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);} + A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));} + A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));} + A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));} + A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// VECTOR OPS +//------------------------------------------------------------------------------------------------------------------------------ +// These are added as needed for production or prototyping, so not necessarily a complete set. +// They follow a convention of taking in a destination and also returning the destination value to increase utility. +//============================================================================================================================== + A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;} + A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;} + A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;} + A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;} + A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;} +//============================================================================================================================== + A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} + A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} + A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} + A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} + A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} +//============================================================================================================================== + A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} + A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} + A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} + A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} + A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} +//============================================================================================================================== + A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;} + A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} + A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;} + A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} + A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} +//============================================================================================================================== + A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;} + A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;} + A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;} + A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;} + A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;} +//============================================================================================================================== + A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;} + A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;} + A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;} + A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;} + A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;} +//============================================================================================================================== + A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;} + A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;} + A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;} + A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;} + A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;} +//============================================================================================================================== + A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;} + A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;} + A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;} + A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;} + A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;} +//============================================================================================================================== + A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} + A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} + A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} + A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} + A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} +//============================================================================================================================== + A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} + A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} + A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} + A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} + A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} +//============================================================================================================================== + A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;} + A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} + A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;} + A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} + A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} +//============================================================================================================================== + A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;} + A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;} + A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;} + A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;} + A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// HALF FLOAT PACKING +//============================================================================================================================== + // Convert float to half (in lower 16-bits of output). + // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf + // Supports denormals. + // Conversion rules are to make computations possibly "safer" on the GPU, + // -INF & -NaN -> -65504 + // +INF & +NaN -> +65504 + A_STATIC AU1 AU1_AH1_AF1(AF1 f){ + static AW1 base[512]={ + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100, + 0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00, + 0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100, + 0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00, + 0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, + 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff}; + static AB1 shift[512]={ + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, + 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, + 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, + 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, + 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18}; + union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);} +//------------------------------------------------------------------------------------------------------------------------------ + // Used to output packed constant. + A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// +// GLSL +// +// +//============================================================================================================================== +#if defined(A_GLSL) && defined(A_GPU) + #ifndef A_SKIP_EXT + #ifdef A_HALF + #extension GL_EXT_shader_16bit_storage:require + #extension GL_EXT_shader_explicit_arithmetic_types:require + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_LONG + #extension GL_ARB_gpu_shader_int64:require + #extension GL_NV_shader_atomic_int64:require + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_WAVE + #extension GL_KHR_shader_subgroup_arithmetic:require + #extension GL_KHR_shader_subgroup_ballot:require + #extension GL_KHR_shader_subgroup_quad:require + #extension GL_KHR_shader_subgroup_shuffle:require + #endif + #endif +//============================================================================================================================== + #define AP1 bool + #define AP2 bvec2 + #define AP3 bvec3 + #define AP4 bvec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AF1 float + #define AF2 vec2 + #define AF3 vec3 + #define AF4 vec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1 uint + #define AU2 uvec2 + #define AU3 uvec3 + #define AU4 uvec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASU1 int + #define ASU2 ivec2 + #define ASU3 ivec3 + #define ASU4 ivec4 +//============================================================================================================================== + #define AF1_AU1(x) uintBitsToFloat(AU1(x)) + #define AF2_AU2(x) uintBitsToFloat(AU2(x)) + #define AF3_AU3(x) uintBitsToFloat(AU3(x)) + #define AF4_AU4(x) uintBitsToFloat(AU4(x)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1_AF1(x) floatBitsToUint(AF1(x)) + #define AU2_AF2(x) floatBitsToUint(AF2(x)) + #define AU3_AF3(x) floatBitsToUint(AF3(x)) + #define AU4_AF4(x) floatBitsToUint(AF4(x)) +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AU1_AH1_AF1_x(AF1 a){return packHalf2x16(AF2(a,0.0));} + #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1_AH2_AF2 packHalf2x16 + #define AU1_AW2Unorm_AF2 packUnorm2x16 + #define AU1_AB4Unorm_AF4 packUnorm4x8 +//------------------------------------------------------------------------------------------------------------------------------ + #define AF2_AH2_AU1 unpackHalf2x16 + #define AF2_AW2Unorm_AU1 unpackUnorm2x16 + #define AF4_AB4Unorm_AU1 unpackUnorm4x8 +//============================================================================================================================== + AF1 AF1_x(AF1 a){return AF1(a);} + AF2 AF2_x(AF1 a){return AF2(a,a);} + AF3 AF3_x(AF1 a){return AF3(a,a,a);} + AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} + #define AF1_(a) AF1_x(AF1(a)) + #define AF2_(a) AF2_x(AF1(a)) + #define AF3_(a) AF3_x(AF1(a)) + #define AF4_(a) AF4_x(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AU1_x(AU1 a){return AU1(a);} + AU2 AU2_x(AU1 a){return AU2(a,a);} + AU3 AU3_x(AU1 a){return AU3(a,a,a);} + AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} + #define AU1_(a) AU1_x(AU1(a)) + #define AU2_(a) AU2_x(AU1(a)) + #define AU3_(a) AU3_x(AU1(a)) + #define AU4_(a) AU4_x(AU1(a)) +//============================================================================================================================== + AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} + AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} + AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} + AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));} + AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} + // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate. + AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){return bitfieldInsert(src,ins,0,ASU1(bits));} +//------------------------------------------------------------------------------------------------------------------------------ + // V_MED3_F32. + AF1 AClampF1(AF1 x,AF1 n,AF1 m){return clamp(x,n,m);} + AF2 AClampF2(AF2 x,AF2 n,AF2 m){return clamp(x,n,m);} + AF3 AClampF3(AF3 x,AF3 n,AF3 m){return clamp(x,n,m);} + AF4 AClampF4(AF4 x,AF4 n,AF4 m){return clamp(x,n,m);} +//------------------------------------------------------------------------------------------------------------------------------ + // V_FRACT_F32 (note DX frac() is different). + AF1 AFractF1(AF1 x){return fract(x);} + AF2 AFractF2(AF2 x){return fract(x);} + AF3 AFractF3(AF3 x){return fract(x);} + AF4 AFractF4(AF4 x){return fract(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return mix(x,y,a);} + AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return mix(x,y,a);} + AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return mix(x,y,a);} + AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return mix(x,y,a);} +//------------------------------------------------------------------------------------------------------------------------------ + // V_MAX3_F32. + AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));} + AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));} + AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));} + AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));} + AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));} + AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));} + AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));} + AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));} + AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));} + AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));} + AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));} + AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));} + AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + // Clamp has an easier pattern match for med3 when some ordering is known. + // V_MED3_F32. + AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));} + AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));} + AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));} + AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));} +//------------------------------------------------------------------------------------------------------------------------------ + // V_MIN3_F32. + AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));} + AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));} + AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));} + AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));} + AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));} + AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));} + AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));} + AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));} + AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));} + AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));} + AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));} + AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));} + AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently. + // V_COS_F32. + AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));} + AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));} + AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));} + AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));} +//------------------------------------------------------------------------------------------------------------------------------ + // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently. + // V_SIN_F32. + AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));} + AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));} + AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));} + AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ARcpF1(AF1 x){return AF1_(1.0)/x;} + AF2 ARcpF2(AF2 x){return AF2_(1.0)/x;} + AF3 ARcpF3(AF3 x){return AF3_(1.0)/x;} + AF4 ARcpF4(AF4 x){return AF4_(1.0)/x;} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ARsqF1(AF1 x){return AF1_(1.0)/sqrt(x);} + AF2 ARsqF2(AF2 x){return AF2_(1.0)/sqrt(x);} + AF3 ARsqF3(AF3 x){return AF3_(1.0)/sqrt(x);} + AF4 ARsqF4(AF4 x){return AF4_(1.0)/sqrt(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ASatF1(AF1 x){return clamp(x,AF1_(0.0),AF1_(1.0));} + AF2 ASatF2(AF2 x){return clamp(x,AF2_(0.0),AF2_(1.0));} + AF3 ASatF3(AF3 x){return clamp(x,AF3_(0.0),AF3_(1.0));} + AF4 ASatF4(AF4 x){return clamp(x,AF4_(0.0),AF4_(1.0));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));} + AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} + AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} + AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// GLSL BYTE +//============================================================================================================================== + #ifdef A_BYTE + #define AB1 uint8_t + #define AB2 u8vec2 + #define AB3 u8vec3 + #define AB4 u8vec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASB1 int8_t + #define ASB2 i8vec2 + #define ASB3 i8vec3 + #define ASB4 i8vec4 +//------------------------------------------------------------------------------------------------------------------------------ + AB1 AB1_x(AB1 a){return AB1(a);} + AB2 AB2_x(AB1 a){return AB2(a,a);} + AB3 AB3_x(AB1 a){return AB3(a,a,a);} + AB4 AB4_x(AB1 a){return AB4(a,a,a,a);} + #define AB1_(a) AB1_x(AB1(a)) + #define AB2_(a) AB2_x(AB1(a)) + #define AB3_(a) AB3_x(AB1(a)) + #define AB4_(a) AB4_x(AB1(a)) + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// GLSL HALF +//============================================================================================================================== + #ifdef A_HALF + #define AH1 float16_t + #define AH2 f16vec2 + #define AH3 f16vec3 + #define AH4 f16vec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AW1 uint16_t + #define AW2 u16vec2 + #define AW3 u16vec3 + #define AW4 u16vec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASW1 int16_t + #define ASW2 i16vec2 + #define ASW3 i16vec3 + #define ASW4 i16vec4 +//============================================================================================================================== + #define AH2_AU1(x) unpackFloat2x16(AU1(x)) + AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));} + #define AH4_AU2(x) AH4_AU2_x(AU2(x)) + #define AW2_AU1(x) unpackUint2x16(AU1(x)) + #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x))) +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1_AH2(x) packFloat2x16(AH2(x)) + AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));} + #define AU2_AH4(x) AU2_AH4_x(AH4(x)) + #define AU1_AW2(x) packUint2x16(AW2(x)) + #define AU2_AW4(x) unpack32(packUint4x16(AW4(x))) +//============================================================================================================================== + #define AW1_AH1(x) halfBitsToUint16(AH1(x)) + #define AW2_AH2(x) halfBitsToUint16(AH2(x)) + #define AW3_AH3(x) halfBitsToUint16(AH3(x)) + #define AW4_AH4(x) halfBitsToUint16(AH4(x)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AH1_AW1(x) uint16BitsToHalf(AW1(x)) + #define AH2_AW2(x) uint16BitsToHalf(AW2(x)) + #define AH3_AW3(x) uint16BitsToHalf(AW3(x)) + #define AH4_AW4(x) uint16BitsToHalf(AW4(x)) +//============================================================================================================================== + AH1 AH1_x(AH1 a){return AH1(a);} + AH2 AH2_x(AH1 a){return AH2(a,a);} + AH3 AH3_x(AH1 a){return AH3(a,a,a);} + AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} + #define AH1_(a) AH1_x(AH1(a)) + #define AH2_(a) AH2_x(AH1(a)) + #define AH3_(a) AH3_x(AH1(a)) + #define AH4_(a) AH4_x(AH1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AW1_x(AW1 a){return AW1(a);} + AW2 AW2_x(AW1 a){return AW2(a,a);} + AW3 AW3_x(AW1 a){return AW3(a,a,a);} + AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} + #define AW1_(a) AW1_x(AW1(a)) + #define AW2_(a) AW2_x(AW1(a)) + #define AW3_(a) AW3_x(AW1(a)) + #define AW4_(a) AW4_x(AW1(a)) +//============================================================================================================================== + AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} + AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} + AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} + AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AClampH1(AH1 x,AH1 n,AH1 m){return clamp(x,n,m);} + AH2 AClampH2(AH2 x,AH2 n,AH2 m){return clamp(x,n,m);} + AH3 AClampH3(AH3 x,AH3 n,AH3 m){return clamp(x,n,m);} + AH4 AClampH4(AH4 x,AH4 n,AH4 m){return clamp(x,n,m);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AFractH1(AH1 x){return fract(x);} + AH2 AFractH2(AH2 x){return fract(x);} + AH3 AFractH3(AH3 x){return fract(x);} + AH4 AFractH4(AH4 x){return fract(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);} + AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);} + AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);} + AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);} +//------------------------------------------------------------------------------------------------------------------------------ + // No packed version of max3. + AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} + AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} + AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} + AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} + AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} + AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} + AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + // No packed version of min3. + AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} + AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} + AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} + AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} + AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} + AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} + AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;} + AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;} + AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;} + AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);} + AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);} + AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);} + AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));} + AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));} + AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));} + AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} + AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} + AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} + AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// GLSL DOUBLE +//============================================================================================================================== + #ifdef A_DUBL + #define AD1 double + #define AD2 dvec2 + #define AD3 dvec3 + #define AD4 dvec4 +//------------------------------------------------------------------------------------------------------------------------------ + AD1 AD1_x(AD1 a){return AD1(a);} + AD2 AD2_x(AD1 a){return AD2(a,a);} + AD3 AD3_x(AD1 a){return AD3(a,a,a);} + AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} + #define AD1_(a) AD1_x(AD1(a)) + #define AD2_(a) AD2_x(AD1(a)) + #define AD3_(a) AD3_x(AD1(a)) + #define AD4_(a) AD4_x(AD1(a)) +//============================================================================================================================== + AD1 AFractD1(AD1 x){return fract(x);} + AD2 AFractD2(AD2 x){return fract(x);} + AD3 AFractD3(AD3 x){return fract(x);} + AD4 AFractD4(AD4 x){return fract(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);} + AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);} + AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);} + AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;} + AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;} + AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;} + AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);} + AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);} + AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);} + AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));} + AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));} + AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));} + AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// GLSL LONG +//============================================================================================================================== + #ifdef A_LONG + #define AL1 uint64_t + #define AL2 u64vec2 + #define AL3 u64vec3 + #define AL4 u64vec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASL1 int64_t + #define ASL2 i64vec2 + #define ASL3 i64vec3 + #define ASL4 i64vec4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AL1_AU2(x) packUint2x32(AU2(x)) + #define AU2_AL1(x) unpackUint2x32(AL1(x)) +//------------------------------------------------------------------------------------------------------------------------------ + AL1 AL1_x(AL1 a){return AL1(a);} + AL2 AL2_x(AL1 a){return AL2(a,a);} + AL3 AL3_x(AL1 a){return AL3(a,a,a);} + AL4 AL4_x(AL1 a){return AL4(a,a,a,a);} + #define AL1_(a) AL1_x(AL1(a)) + #define AL2_(a) AL2_x(AL1(a)) + #define AL3_(a) AL3_x(AL1(a)) + #define AL4_(a) AL4_x(AL1(a)) +//============================================================================================================================== + AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));} + AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));} + AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));} + AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));} +//------------------------------------------------------------------------------------------------------------------------------ + AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));} + AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));} + AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));} + AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));} + AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));} + AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));} + AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// WAVE OPERATIONS +//============================================================================================================================== + #ifdef A_WAVE + // Where 'x' must be a compile time literal. + AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);} + AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);} + AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);} + AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);} + AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);} + AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);} + AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);} + AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);} +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_HALF + AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(subgroupShuffleXor(AU1_AH2(v),x));} + AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(subgroupShuffleXor(AU2_AH4(v),x));} + AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(subgroupShuffleXor(AU1_AW2(v),x));} + AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU2(subgroupShuffleXor(AU2_AW4(v),x));} + #endif + #endif +//============================================================================================================================== +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// +// HLSL +// +// +//============================================================================================================================== +#if defined(A_HLSL) && defined(A_GPU) + #ifdef A_HLSL_6_2 + #define AP1 bool + #define AP2 bool2 + #define AP3 bool3 + #define AP4 bool4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AF1 float32_t + #define AF2 float32_t2 + #define AF3 float32_t3 + #define AF4 float32_t4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1 uint32_t + #define AU2 uint32_t2 + #define AU3 uint32_t3 + #define AU4 uint32_t4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASU1 int32_t + #define ASU2 int32_t2 + #define ASU3 int32_t3 + #define ASU4 int32_t4 + #else + #define AP1 bool + #define AP2 bool2 + #define AP3 bool3 + #define AP4 bool4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AF1 float + #define AF2 float2 + #define AF3 float3 + #define AF4 float4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1 uint + #define AU2 uint2 + #define AU3 uint3 + #define AU4 uint4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASU1 int + #define ASU2 int2 + #define ASU3 int3 + #define ASU4 int4 + #endif +//============================================================================================================================== + #define AF1_AU1(x) asfloat(AU1(x)) + #define AF2_AU2(x) asfloat(AU2(x)) + #define AF3_AU3(x) asfloat(AU3(x)) + #define AF4_AU4(x) asfloat(AU4(x)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AU1_AF1(x) asuint(AF1(x)) + #define AU2_AF2(x) asuint(AF2(x)) + #define AU3_AF3(x) asuint(AF3(x)) + #define AU4_AF4(x) asuint(AF4(x)) +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);} + #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);} + #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a)) + #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x)) +//------------------------------------------------------------------------------------------------------------------------------ + AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));} + #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x)) +//============================================================================================================================== + AF1 AF1_x(AF1 a){return AF1(a);} + AF2 AF2_x(AF1 a){return AF2(a,a);} + AF3 AF3_x(AF1 a){return AF3(a,a,a);} + AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} + #define AF1_(a) AF1_x(AF1(a)) + #define AF2_(a) AF2_x(AF1(a)) + #define AF3_(a) AF3_x(AF1(a)) + #define AF4_(a) AF4_x(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AU1_x(AU1 a){return AU1(a);} + AU2 AU2_x(AU1 a){return AU2(a,a);} + AU3 AU3_x(AU1 a){return AU3(a,a,a);} + AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} + #define AU1_(a) AU1_x(AU1(a)) + #define AU2_(a) AU2_x(AU1(a)) + #define AU3_(a) AU3_x(AU1(a)) + #define AU4_(a) AU4_x(AU1(a)) +//============================================================================================================================== + AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} + AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} + AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} + AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<<bits)-1;return (src>>off)&mask;} + AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} + AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<<bits)-1;return (ins&mask)|(src&(~mask));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AClampF1(AF1 x,AF1 n,AF1 m){return max(n,min(x,m));} + AF2 AClampF2(AF2 x,AF2 n,AF2 m){return max(n,min(x,m));} + AF3 AClampF3(AF3 x,AF3 n,AF3 m){return max(n,min(x,m));} + AF4 AClampF4(AF4 x,AF4 n,AF4 m){return max(n,min(x,m));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AFractF1(AF1 x){return x-floor(x);} + AF2 AFractF2(AF2 x){return x-floor(x);} + AF3 AFractF3(AF3 x){return x-floor(x);} + AF4 AFractF4(AF4 x){return x-floor(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return lerp(x,y,a);} + AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return lerp(x,y,a);} + AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return lerp(x,y,a);} + AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return lerp(x,y,a);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));} + AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));} + AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));} + AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));} + AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));} + AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));} + AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));} + AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));} + AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));} + AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));} + AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));} + AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));} + AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));} + AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));} + AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));} + AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));} + AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));} + AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));} + AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));} + AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));} + AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));} + AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));} + AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));} + AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));} + AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));} + AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));} + AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));} + AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));} + AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));} + AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));} + AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));} + AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));} + AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));} + AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ARcpF1(AF1 x){return rcp(x);} + AF2 ARcpF2(AF2 x){return rcp(x);} + AF3 ARcpF3(AF3 x){return rcp(x);} + AF4 ARcpF4(AF4 x){return rcp(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ARsqF1(AF1 x){return rsqrt(x);} + AF2 ARsqF2(AF2 x){return rsqrt(x);} + AF3 ARsqF3(AF3 x){return rsqrt(x);} + AF4 ARsqF4(AF4 x){return rsqrt(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ASatF1(AF1 x){return saturate(x);} + AF2 ASatF2(AF2 x){return saturate(x);} + AF3 ASatF3(AF3 x){return saturate(x);} + AF4 ASatF4(AF4 x){return saturate(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));} + AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} + AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} + AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// HLSL BYTE +//============================================================================================================================== + #ifdef A_BYTE + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// HLSL HALF +//============================================================================================================================== + #ifdef A_HALF + #ifdef A_HLSL_6_2 + #define AH1 float16_t + #define AH2 float16_t2 + #define AH3 float16_t3 + #define AH4 float16_t4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AW1 uint16_t + #define AW2 uint16_t2 + #define AW3 uint16_t3 + #define AW4 uint16_t4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASW1 int16_t + #define ASW2 int16_t2 + #define ASW3 int16_t3 + #define ASW4 int16_t4 + #else + #define AH1 min16float + #define AH2 min16float2 + #define AH3 min16float3 + #define AH4 min16float4 +//------------------------------------------------------------------------------------------------------------------------------ + #define AW1 min16uint + #define AW2 min16uint2 + #define AW3 min16uint3 + #define AW4 min16uint4 +//------------------------------------------------------------------------------------------------------------------------------ + #define ASW1 min16int + #define ASW2 min16int2 + #define ASW3 min16int3 + #define ASW4 min16int4 + #endif +//============================================================================================================================== + // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). + // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ + AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);} + AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));} + AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);} + AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));} + #define AH2_AU1(x) AH2_AU1_x(AU1(x)) + #define AH4_AU2(x) AH4_AU2_x(AU2(x)) + #define AW2_AU1(x) AW2_AU1_x(AU1(x)) + #define AW4_AU2(x) AW4_AU2_x(AU2(x)) +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);} + AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));} + AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);} + AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));} + #define AU1_AH2(x) AU1_AH2_x(AH2(x)) + #define AU2_AH4(x) AU2_AH4_x(AH4(x)) + #define AU1_AW2(x) AU1_AW2_x(AW2(x)) + #define AU2_AW4(x) AU2_AW4_x(AW4(x)) +//============================================================================================================================== + #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) + #define AW1_AH1(x) asuint16(x) + #define AW2_AH2(x) asuint16(x) + #define AW3_AH3(x) asuint16(x) + #define AW4_AH4(x) asuint16(x) + #else + #define AW1_AH1(a) AW1(f32tof16(AF1(a))) + #define AW2_AH2(a) AW2(AW1_AH1((a).x),AW1_AH1((a).y)) + #define AW3_AH3(a) AW3(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z)) + #define AW4_AH4(a) AW4(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z),AW1_AH1((a).w)) + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) + #define AH1_AW1(x) asfloat16(x) + #define AH2_AW2(x) asfloat16(x) + #define AH3_AW3(x) asfloat16(x) + #define AH4_AW4(x) asfloat16(x) + #else + #define AH1_AW1(a) AH1(f16tof32(AU1(a))) + #define AH2_AW2(a) AH2(AH1_AW1((a).x),AH1_AW1((a).y)) + #define AH3_AW3(a) AH3(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z)) + #define AH4_AW4(a) AH4(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z),AH1_AW1((a).w)) + #endif +//============================================================================================================================== + AH1 AH1_x(AH1 a){return AH1(a);} + AH2 AH2_x(AH1 a){return AH2(a,a);} + AH3 AH3_x(AH1 a){return AH3(a,a,a);} + AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} + #define AH1_(a) AH1_x(AH1(a)) + #define AH2_(a) AH2_x(AH1(a)) + #define AH3_(a) AH3_x(AH1(a)) + #define AH4_(a) AH4_x(AH1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AW1_x(AW1 a){return AW1(a);} + AW2 AW2_x(AW1 a){return AW2(a,a);} + AW3 AW3_x(AW1 a){return AW3(a,a,a);} + AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} + #define AW1_(a) AW1_x(AW1(a)) + #define AW2_(a) AW2_x(AW1(a)) + #define AW3_(a) AW3_x(AW1(a)) + #define AW4_(a) AW4_x(AW1(a)) +//============================================================================================================================== + AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} + AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} + AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} + AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AClampH1(AH1 x,AH1 n,AH1 m){return max(n,min(x,m));} + AH2 AClampH2(AH2 x,AH2 n,AH2 m){return max(n,min(x,m));} + AH3 AClampH3(AH3 x,AH3 n,AH3 m){return max(n,min(x,m));} + AH4 AClampH4(AH4 x,AH4 n,AH4 m){return max(n,min(x,m));} +//------------------------------------------------------------------------------------------------------------------------------ + // V_FRACT_F16 (note DX frac() is different). + AH1 AFractH1(AH1 x){return x-floor(x);} + AH2 AFractH2(AH2 x){return x-floor(x);} + AH3 AFractH3(AH3 x){return x-floor(x);} + AH4 AFractH4(AH4 x){return x-floor(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);} + AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);} + AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);} + AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} + AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} + AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} + AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} + AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} + AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} + AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} + AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} + AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} + AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} + AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} + AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} + AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ARcpH1(AH1 x){return rcp(x);} + AH2 ARcpH2(AH2 x){return rcp(x);} + AH3 ARcpH3(AH3 x){return rcp(x);} + AH4 ARcpH4(AH4 x){return rcp(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ARsqH1(AH1 x){return rsqrt(x);} + AH2 ARsqH2(AH2 x){return rsqrt(x);} + AH3 ARsqH3(AH3 x){return rsqrt(x);} + AH4 ARsqH4(AH4 x){return rsqrt(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ASatH1(AH1 x){return saturate(x);} + AH2 ASatH2(AH2 x){return saturate(x);} + AH3 ASatH3(AH3 x){return saturate(x);} + AH4 ASatH4(AH4 x){return saturate(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} + AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} + AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} + AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// HLSL DOUBLE +//============================================================================================================================== + #ifdef A_DUBL + #ifdef A_HLSL_6_2 + #define AD1 float64_t + #define AD2 float64_t2 + #define AD3 float64_t3 + #define AD4 float64_t4 + #else + #define AD1 double + #define AD2 double2 + #define AD3 double3 + #define AD4 double4 + #endif +//------------------------------------------------------------------------------------------------------------------------------ + AD1 AD1_x(AD1 a){return AD1(a);} + AD2 AD2_x(AD1 a){return AD2(a,a);} + AD3 AD3_x(AD1 a){return AD3(a,a,a);} + AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} + #define AD1_(a) AD1_x(AD1(a)) + #define AD2_(a) AD2_x(AD1(a)) + #define AD3_(a) AD3_x(AD1(a)) + #define AD4_(a) AD4_x(AD1(a)) +//============================================================================================================================== + AD1 AFractD1(AD1 a){return a-floor(a);} + AD2 AFractD2(AD2 a){return a-floor(a);} + AD3 AFractD3(AD3 a){return a-floor(a);} + AD4 AFractD4(AD4 a){return a-floor(a);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);} + AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);} + AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);} + AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ARcpD1(AD1 x){return rcp(x);} + AD2 ARcpD2(AD2 x){return rcp(x);} + AD3 ARcpD3(AD3 x){return rcp(x);} + AD4 ARcpD4(AD4 x){return rcp(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ARsqD1(AD1 x){return rsqrt(x);} + AD2 ARsqD2(AD2 x){return rsqrt(x);} + AD3 ARsqD3(AD3 x){return rsqrt(x);} + AD4 ARsqD4(AD4 x){return rsqrt(x);} +//------------------------------------------------------------------------------------------------------------------------------ + AD1 ASatD1(AD1 x){return saturate(x);} + AD2 ASatD2(AD2 x){return saturate(x);} + AD3 ASatD3(AD3 x){return saturate(x);} + AD4 ASatD4(AD4 x){return saturate(x);} + #endif +//============================================================================================================================== +// HLSL WAVE +//============================================================================================================================== + #ifdef A_WAVE + // Where 'x' must be a compile time literal. + AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} + AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_HALF + AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(WaveReadLaneAt(AU1_AH2(v),WaveGetLaneIndex()^x));} + AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(WaveReadLaneAt(AU2_AH4(v),WaveGetLaneIndex()^x));} + AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(WaveReadLaneAt(AU1_AW2(v),WaveGetLaneIndex()^x));} + AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU1(WaveReadLaneAt(AU1_AW4(v),WaveGetLaneIndex()^x));} + #endif + #endif +//============================================================================================================================== +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// +// GPU COMMON +// +// +//============================================================================================================================== +#ifdef A_GPU + // Negative and positive infinity. + #define A_INFP_F AF1_AU1(0x7f800000u) + #define A_INFN_F AF1_AU1(0xff800000u) +//------------------------------------------------------------------------------------------------------------------------------ + // Copy sign from 's' to positive 'd'. + AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));} + AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));} + AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));} + AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));} +//------------------------------------------------------------------------------------------------------------------------------ + // Single operation to return (useful to create a mask to use in lerp for branch free logic), + // m=NaN := 0 + // m>=0 := 0 + // m<0 := 1 + // Uses the following useful floating point logic, + // saturate(+a*(-INF)==-INF) := 0 + // saturate( 0*(-INF)== NaN) := 0 + // saturate(-a*(-INF)==+INF) := 1 + AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));} + AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));} + AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));} + AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));} + AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));} + AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));} + AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));} +//============================================================================================================================== + #ifdef A_HALF + #ifdef A_HLSL_6_2 + #define A_INFP_H AH1_AW1((uint16_t)0x7c00u) + #define A_INFN_H AH1_AW1((uint16_t)0xfc00u) + #else + #define A_INFP_H AH1_AW1(0x7c00u) + #define A_INFN_H AH1_AW1(0xfc00u) + #endif + +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));} + AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));} + AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));} + AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));} + AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));} + AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));} + AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AGtZeroH1(AH1 m){return ASatH1(m*AH1_(A_INFP_H));} + AH2 AGtZeroH2(AH2 m){return ASatH2(m*AH2_(A_INFP_H));} + AH3 AGtZeroH3(AH3 m){return ASatH3(m*AH3_(A_INFP_H));} + AH4 AGtZeroH4(AH4 m){return ASatH4(m*AH4_(A_INFP_H));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// [FIS] FLOAT INTEGER SORTABLE +//------------------------------------------------------------------------------------------------------------------------------ +// Float to integer sortable. +// - If sign bit=0, flip the sign bit (positives). +// - If sign bit=1, flip all bits (negatives). +// Integer sortable to float. +// - If sign bit=1, flip the sign bit (positives). +// - If sign bit=0, flip all bits (negatives). +// Has nice side effects. +// - Larger integers are more positive values. +// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +// Burns 3 ops for conversion {shift,or,xor}. +//============================================================================================================================== + AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} + AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} +//------------------------------------------------------------------------------------------------------------------------------ + // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value). + AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} + AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_HALF + AW1 AFisToW1(AW1 x){return x^(( AShrSW1(x,AW1_(15)))|AW1_(0x8000));} + AW1 AFisFromW1(AW1 x){return x^((~AShrSW1(x,AW1_(15)))|AW1_(0x8000));} +//------------------------------------------------------------------------------------------------------------------------------ + AW2 AFisToW2(AW2 x){return x^(( AShrSW2(x,AW2_(15)))|AW2_(0x8000));} + AW2 AFisFromW2(AW2 x){return x^((~AShrSW2(x,AW2_(15)))|AW2_(0x8000));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// [PERM] V_PERM_B32 +//------------------------------------------------------------------------------------------------------------------------------ +// Support for V_PERM_B32 started in the 3rd generation of GCN. +//------------------------------------------------------------------------------------------------------------------------------ +// yyyyxxxx - The 'i' input. +// 76543210 +// ======== +// HGFEDCBA - Naming on permutation. +//------------------------------------------------------------------------------------------------------------------------------ +// TODO +// ==== +// - Make sure compiler optimizes this. +//============================================================================================================================== + #ifdef A_HALF + AU1 APerm0E0A(AU2 i){return((i.x )&0xffu)|((i.y<<16)&0xff0000u);} + AU1 APerm0F0B(AU2 i){return((i.x>> 8)&0xffu)|((i.y<< 8)&0xff0000u);} + AU1 APerm0G0C(AU2 i){return((i.x>>16)&0xffu)|((i.y )&0xff0000u);} + AU1 APerm0H0D(AU2 i){return((i.x>>24)&0xffu)|((i.y>> 8)&0xff0000u);} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 APermHGFA(AU2 i){return((i.x )&0x000000ffu)|(i.y&0xffffff00u);} + AU1 APermHGFC(AU2 i){return((i.x>>16)&0x000000ffu)|(i.y&0xffffff00u);} + AU1 APermHGAE(AU2 i){return((i.x<< 8)&0x0000ff00u)|(i.y&0xffff00ffu);} + AU1 APermHGCE(AU2 i){return((i.x>> 8)&0x0000ff00u)|(i.y&0xffff00ffu);} + AU1 APermHAFE(AU2 i){return((i.x<<16)&0x00ff0000u)|(i.y&0xff00ffffu);} + AU1 APermHCFE(AU2 i){return((i.x )&0x00ff0000u)|(i.y&0xff00ffffu);} + AU1 APermAGFE(AU2 i){return((i.x<<24)&0xff000000u)|(i.y&0x00ffffffu);} + AU1 APermCGFE(AU2 i){return((i.x<< 8)&0xff000000u)|(i.y&0x00ffffffu);} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 APermGCEA(AU2 i){return((i.x)&0x00ff00ffu)|((i.y<<8)&0xff00ff00u);} + AU1 APermGECA(AU2 i){return(((i.x)&0xffu)|((i.x>>8)&0xff00u)|((i.y<<16)&0xff0000u)|((i.y<<8)&0xff000000u));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// [BUC] BYTE UNSIGNED CONVERSION +//------------------------------------------------------------------------------------------------------------------------------ +// Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation. +// Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively. +//------------------------------------------------------------------------------------------------------------------------------ +// OPCODE NOTES +// ============ +// GCN does not do UNORM or SNORM for bytes in opcodes. +// - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float. +// - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer). +// V_PERM_B32 does byte packing with ability to zero fill bytes as well. +// - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo. +//------------------------------------------------------------------------------------------------------------------------------ +// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops. +// ==== ===== +// 0 : 0 +// 1 : 1 +// ... +// 255 : 255 +// : 256 (just outside the encoding range) +//------------------------------------------------------------------------------------------------------------------------------ +// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. +// ==== ===== +// 0 : 0 +// 1 : 1/512 +// 2 : 1/256 +// ... +// 64 : 1/8 +// 128 : 1/4 +// 255 : 255/512 +// : 1/2 (just outside the encoding range) +//------------------------------------------------------------------------------------------------------------------------------ +// OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES +// ============================================ +// r=ABuc0FromU1(i) +// V_CVT_F32_UBYTE0 r,i +// -------------------------------------------- +// r=ABuc0ToU1(d,i) +// V_CVT_PKACCUM_U8_F32 r,i,0,d +// -------------------------------------------- +// d=ABuc0FromU2(i) +// Where 'k0' is an SGPR with 0x0E0A +// Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits +// V_PERM_B32 d,i.x,i.y,k0 +// V_PK_FMA_F16 d,d,k1.x,0 +// -------------------------------------------- +// r=ABuc0ToU2(d,i) +// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +// Where 'k1' is an SGPR with 0x???? +// Where 'k2' is an SGPR with 0x???? +// V_PK_FMA_F16 i,i,k0.x,0 +// V_PERM_B32 r.x,i,i,k1 +// V_PERM_B32 r.y,i,i,k2 +//============================================================================================================================== + // Peak range for 32-bit and 16-bit operations. + #define A_BUC_32 (255.0) + #define A_BUC_16 (255.0/512.0) +//============================================================================================================================== + #if 1 + // Designed to be one V_CVT_PKACCUM_U8_F32. + // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32. + AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u) )&(0x000000ffu));} + AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));} + AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));} + AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));} +//------------------------------------------------------------------------------------------------------------------------------ + // Designed to be one V_CVT_F32_UBYTE*. + AF1 ABuc0FromU1(AU1 i){return AF1((i )&255u);} + AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);} + AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);} + AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);} + #endif +//============================================================================================================================== + #ifdef A_HALF + // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. + AW2 ABuc01ToW2(AH2 x,AH2 y){x*=AH2_(1.0/32768.0);y*=AH2_(1.0/32768.0); + return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} +//------------------------------------------------------------------------------------------------------------------------------ + // Designed for 3 ops to do SOA to AOS and conversion. + AU2 ABuc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); + return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} + AU2 ABuc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); + return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} + AU2 ABuc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); + return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} + AU2 ABuc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); + return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} +//------------------------------------------------------------------------------------------------------------------------------ + // Designed for 2 ops to do both AOS to SOA, and conversion. + AH2 ABuc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0);} + AH2 ABuc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0);} + AH2 ABuc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0);} + AH2 ABuc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0);} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// [BSC] BYTE SIGNED CONVERSION +//------------------------------------------------------------------------------------------------------------------------------ +// Similar to [BUC]. +// Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively. +//------------------------------------------------------------------------------------------------------------------------------ +// ENCODING (without zero-based encoding) +// ======== +// 0 = unused (can be used to mean something else) +// 1 = lowest value +// 128 = exact zero center (zero based encoding +// 255 = highest value +//------------------------------------------------------------------------------------------------------------------------------ +// Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero). +// This is useful if there is a desire for cleared values to decode as zero. +//------------------------------------------------------------------------------------------------------------------------------ +// BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. +// ==== ===== +// 0 : -127/512 (unused) +// 1 : -126/512 +// 2 : -125/512 +// ... +// 128 : 0 +// ... +// 255 : 127/512 +// : 1/4 (just outside the encoding range) +//============================================================================================================================== + // Peak range for 32-bit and 16-bit operations. + #define A_BSC_32 (127.0) + #define A_BSC_16 (127.0/512.0) +//============================================================================================================================== + #if 1 + AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u) )&(0x000000ffu));} + AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));} + AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));} + AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u) )&(0x000000ffu)))^0x00000080u;} + AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;} + AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;} + AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ABsc0FromU1(AU1 i){return AF1((i )&255u)-128.0;} + AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;} + AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;} + AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ABsc0FromZbU1(AU1 i){return AF1(((i )&255u)^0x80u)-128.0;} + AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;} + AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;} + AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;} + #endif +//============================================================================================================================== + #ifdef A_HALF + // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. + AW2 ABsc01ToW2(AH2 x,AH2 y){x=x*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);y=y*AH2_(1.0/32768.0)+AH2_(0.25/32768.0); + return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} +//------------------------------------------------------------------------------------------------------------------------------ + AU2 ABsc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); + return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} + AU2 ABsc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); + return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} + AU2 ABsc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); + return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} + AU2 ABsc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); + return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AU2 ABsc0ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; + return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} + AU2 ABsc1ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; + return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} + AU2 ABsc2ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; + return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} + AU2 ABsc3ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; + return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} +//------------------------------------------------------------------------------------------------------------------------------ + AH2 ABsc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0)-AH2_(0.25);} + AH2 ABsc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0)-AH2_(0.25);} + AH2 ABsc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0)-AH2_(0.25);} + AH2 ABsc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0)-AH2_(0.25);} +//------------------------------------------------------------------------------------------------------------------------------ + AH2 ABsc0FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} + AH2 ABsc1FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} + AH2 ABsc2FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} + AH2 ABsc3FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// HALF APPROXIMATIONS +//------------------------------------------------------------------------------------------------------------------------------ +// These support only positive inputs. +// Did not see value yet in specialization for range. +// Using quick testing, ended up mostly getting the same "best" approximation for various ranges. +// With hardware that can co-execute transcendentals, the value in approximations could be less than expected. +// However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total. +// And co-execution would require a compiler interleaving a lot of independent work for packed usage. +//------------------------------------------------------------------------------------------------------------------------------ +// The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total). +// Same with sqrt(), as this could be x*rsq() (7 ops). +//============================================================================================================================== + #ifdef A_HALF + // Minimize squared error across full positive range, 2 ops. + // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output. + AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));} + AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));} + AH3 APrxLoSqrtH3(AH3 a){return AH3_AW3((AW3_AH3(a)>>AW3_(1))+AW3_(0x1de2));} + AH4 APrxLoSqrtH4(AH4 a){return AH4_AW4((AW4_AH4(a)>>AW4_(1))+AW4_(0x1de2));} +//------------------------------------------------------------------------------------------------------------------------------ + // Lower precision estimation, 1 op. + // Minimize squared error across {smallest normal to 16384.0}. + AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));} + AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));} + AH3 APrxLoRcpH3(AH3 a){return AH3_AW3(AW3_(0x7784)-AW3_AH3(a));} + AH4 APrxLoRcpH4(AH4 a){return AH4_AW4(AW4_(0x7784)-AW4_AH4(a));} +//------------------------------------------------------------------------------------------------------------------------------ + // Medium precision estimation, one Newton Raphson iteration, 3 ops. + AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));} + AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));} + AH3 APrxMedRcpH3(AH3 a){AH3 b=AH3_AW3(AW3_(0x778d)-AW3_AH3(a));return b*(-b*a+AH3_(2.0));} + AH4 APrxMedRcpH4(AH4 a){AH4 b=AH4_AW4(AW4_(0x778d)-AW4_AH4(a));return b*(-b*a+AH4_(2.0));} +//------------------------------------------------------------------------------------------------------------------------------ + // Minimize squared error across {smallest normal to 16384.0}, 2 ops. + AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));} + AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));} + AH3 APrxLoRsqH3(AH3 a){return AH3_AW3(AW3_(0x59a3)-(AW3_AH3(a)>>AW3_(1)));} + AH4 APrxLoRsqH4(AH4 a){return AH4_AW4(AW4_(0x59a3)-(AW4_AH4(a)>>AW4_(1)));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// FLOAT APPROXIMATIONS +//------------------------------------------------------------------------------------------------------------------------------ +// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN", +// - Idea dates back to SGI, then to Quake 3, etc. +// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +// - sqrt(x)=rsqrt(x)*x +// - rcp(x)=rsqrt(x)*rsqrt(x) for positive x +// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +//------------------------------------------------------------------------------------------------------------------------------ +// These below are from perhaps less complete searching for optimal. +// Used FP16 normal range for testing with +4096 32-bit step size for sampling error. +// So these match up well with the half approximations. +//============================================================================================================================== + AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));} + AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));} + AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));} + AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));} + AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));} + AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));} + AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));} +//------------------------------------------------------------------------------------------------------------------------------ + AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));} + AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));} + AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));} + AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));} +//------------------------------------------------------------------------------------------------------------------------------ + AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));} + AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));} + AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));} + AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PQ APPROXIMATIONS +//------------------------------------------------------------------------------------------------------------------------------ +// PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do +// PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%. +//============================================================================================================================== +// Helpers + AF1 Quart(AF1 a) { a = a * a; return a * a;} + AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; } + AF2 Quart(AF2 a) { a = a * a; return a * a; } + AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; } + AF3 Quart(AF3 a) { a = a * a; return a * a; } + AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; } + AF4 Quart(AF4 a) { a = a * a; return a * a; } + AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; } + //------------------------------------------------------------------------------------------------------------------------------ + AF1 APrxPQToGamma2(AF1 a) { return Quart(a); } + AF1 APrxPQToLinear(AF1 a) { return Oct(a); } + AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); } + AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } + AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); } + AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); } + AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } + AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); } + //------------------------------------------------------------------------------------------------------------------------------ + AF2 APrxPQToGamma2(AF2 a) { return Quart(a); } + AF2 APrxPQToLinear(AF2 a) { return Oct(a); } + AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); } + AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } + AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); } + AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); } + AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } + AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); } + //------------------------------------------------------------------------------------------------------------------------------ + AF3 APrxPQToGamma2(AF3 a) { return Quart(a); } + AF3 APrxPQToLinear(AF3 a) { return Oct(a); } + AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); } + AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } + AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); } + AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); } + AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } + AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); } + //------------------------------------------------------------------------------------------------------------------------------ + AF4 APrxPQToGamma2(AF4 a) { return Quart(a); } + AF4 APrxPQToLinear(AF4 a) { return Oct(a); } + AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); } + AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } + AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); } + AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); } + AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } + AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PARABOLIC SIN & COS +//------------------------------------------------------------------------------------------------------------------------------ +// Approximate answers to transcendental questions. +//------------------------------------------------------------------------------------------------------------------------------ +//============================================================================================================================== + #if 1 + // Valid input range is {-1 to 1} representing {0 to 2 pi}. + // Output range is {-1/4 to 1/4} representing {-1 to 1}. + AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD. + AF2 APSinF2(AF2 x){return x*abs(x)-x;} + AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT + AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);} + AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));} + #endif +//------------------------------------------------------------------------------------------------------------------------------ + #ifdef A_HALF + // For a packed {sin,cos} pair, + // - Native takes 16 clocks and 4 issue slots (no packed transcendentals). + // - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed). + AH1 APSinH1(AH1 x){return x*abs(x)-x;} + AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA + AH1 APCosH1(AH1 x){x=AFractH1(x*AH1_(0.5)+AH1_(0.75));x=x*AH1_(2.0)-AH1_(1.0);return APSinH1(x);} + AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND + AH2 APSinCosH1(AH1 x){AH1 y=AFractH1(x*AH1_(0.5)+AH1_(0.75));y=y*AH1_(2.0)-AH1_(1.0);return APSinH2(AH2(x,y));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// [ZOL] ZERO ONE LOGIC +//------------------------------------------------------------------------------------------------------------------------------ +// Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit. +//------------------------------------------------------------------------------------------------------------------------------ +// 0 := false +// 1 := true +//------------------------------------------------------------------------------------------------------------------------------ +// AndNot(x,y) -> !(x&y) .... One op. +// AndOr(x,y,z) -> (x&y)|z ... One op. +// GtZero(x) -> x>0.0 ..... One op. +// Sel(x,y,z) -> x?y:z ..... Two ops, has no precision loss. +// Signed(x) -> x<0.0 ..... One op. +// ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer. +//------------------------------------------------------------------------------------------------------------------------------ +// OPTIMIZATION NOTES +// ================== +// - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'. +// For example 'a.xy*k.xx+k.yy'. +//============================================================================================================================== + #if 1 + AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);} + AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);} + AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);} + AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AZolNotU1(AU1 x){return x^AU1_(1);} + AU2 AZolNotU2(AU2 x){return x^AU2_(1);} + AU3 AZolNotU3(AU3 x){return x^AU3_(1);} + AU4 AZolNotU4(AU4 x){return x^AU4_(1);} +//------------------------------------------------------------------------------------------------------------------------------ + AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);} + AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);} + AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);} + AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);} +//============================================================================================================================== + AU1 AZolF1ToU1(AF1 x){return AU1(x);} + AU2 AZolF2ToU2(AF2 x){return AU2(x);} + AU3 AZolF3ToU3(AF3 x){return AU3(x);} + AU4 AZolF4ToU4(AF4 x){return AU4(x);} +//------------------------------------------------------------------------------------------------------------------------------ + // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled). + AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);} + AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);} + AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);} + AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolU1ToF1(AU1 x){return AF1(x);} + AF2 AZolU2ToF2(AU2 x){return AF2(x);} + AF3 AZolU3ToF3(AU3 x){return AF3(x);} + AF4 AZolU4ToF4(AU4 x){return AF4(x);} +//============================================================================================================================== + AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);} + AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);} + AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);} + AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);} + AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);} + AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);} + AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);} + AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);} + AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);} + AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));} + AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));} + AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));} + AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;} + AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;} + AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;} + AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);} + AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);} + AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);} + AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;} + AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;} + AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;} + AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));} + AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));} + AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));} + AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));} + AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));} + AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));} + AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));} + #endif +//============================================================================================================================== + #ifdef A_HALF + AW1 AZolAndW1(AW1 x,AW1 y){return min(x,y);} + AW2 AZolAndW2(AW2 x,AW2 y){return min(x,y);} + AW3 AZolAndW3(AW3 x,AW3 y){return min(x,y);} + AW4 AZolAndW4(AW4 x,AW4 y){return min(x,y);} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AZolNotW1(AW1 x){return x^AW1_(1);} + AW2 AZolNotW2(AW2 x){return x^AW2_(1);} + AW3 AZolNotW3(AW3 x){return x^AW3_(1);} + AW4 AZolNotW4(AW4 x){return x^AW4_(1);} +//------------------------------------------------------------------------------------------------------------------------------ + AW1 AZolOrW1(AW1 x,AW1 y){return max(x,y);} + AW2 AZolOrW2(AW2 x,AW2 y){return max(x,y);} + AW3 AZolOrW3(AW3 x,AW3 y){return max(x,y);} + AW4 AZolOrW4(AW4 x,AW4 y){return max(x,y);} +//============================================================================================================================== + // Uses denormal trick. + AW1 AZolH1ToW1(AH1 x){return AW1_AH1(x*AH1_AW1(AW1_(1)));} + AW2 AZolH2ToW2(AH2 x){return AW2_AH2(x*AH2_AW2(AW2_(1)));} + AW3 AZolH3ToW3(AH3 x){return AW3_AH3(x*AH3_AW3(AW3_(1)));} + AW4 AZolH4ToW4(AH4 x){return AW4_AH4(x*AH4_AW4(AW4_(1)));} +//------------------------------------------------------------------------------------------------------------------------------ + // AMD arch lacks a packed conversion opcode. + AH1 AZolW1ToH1(AW1 x){return AH1_AW1(x*AW1_AH1(AH1_(1.0)));} + AH2 AZolW2ToH2(AW2 x){return AH2_AW2(x*AW2_AH2(AH2_(1.0)));} + AH3 AZolW1ToH3(AW3 x){return AH3_AW3(x*AW3_AH3(AH3_(1.0)));} + AH4 AZolW2ToH4(AW4 x){return AH4_AW4(x*AW4_AH4(AH4_(1.0)));} +//============================================================================================================================== + AH1 AZolAndH1(AH1 x,AH1 y){return min(x,y);} + AH2 AZolAndH2(AH2 x,AH2 y){return min(x,y);} + AH3 AZolAndH3(AH3 x,AH3 y){return min(x,y);} + AH4 AZolAndH4(AH4 x,AH4 y){return min(x,y);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 ASolAndNotH1(AH1 x,AH1 y){return (-x)*y+AH1_(1.0);} + AH2 ASolAndNotH2(AH2 x,AH2 y){return (-x)*y+AH2_(1.0);} + AH3 ASolAndNotH3(AH3 x,AH3 y){return (-x)*y+AH3_(1.0);} + AH4 ASolAndNotH4(AH4 x,AH4 y){return (-x)*y+AH4_(1.0);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AZolAndOrH1(AH1 x,AH1 y,AH1 z){return ASatH1(x*y+z);} + AH2 AZolAndOrH2(AH2 x,AH2 y,AH2 z){return ASatH2(x*y+z);} + AH3 AZolAndOrH3(AH3 x,AH3 y,AH3 z){return ASatH3(x*y+z);} + AH4 AZolAndOrH4(AH4 x,AH4 y,AH4 z){return ASatH4(x*y+z);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AZolGtZeroH1(AH1 x){return ASatH1(x*AH1_(A_INFP_H));} + AH2 AZolGtZeroH2(AH2 x){return ASatH2(x*AH2_(A_INFP_H));} + AH3 AZolGtZeroH3(AH3 x){return ASatH3(x*AH3_(A_INFP_H));} + AH4 AZolGtZeroH4(AH4 x){return ASatH4(x*AH4_(A_INFP_H));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AZolNotH1(AH1 x){return AH1_(1.0)-x;} + AH2 AZolNotH2(AH2 x){return AH2_(1.0)-x;} + AH3 AZolNotH3(AH3 x){return AH3_(1.0)-x;} + AH4 AZolNotH4(AH4 x){return AH4_(1.0)-x;} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AZolOrH1(AH1 x,AH1 y){return max(x,y);} + AH2 AZolOrH2(AH2 x,AH2 y){return max(x,y);} + AH3 AZolOrH3(AH3 x,AH3 y){return max(x,y);} + AH4 AZolOrH4(AH4 x,AH4 y){return max(x,y);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AZolSelH1(AH1 x,AH1 y,AH1 z){AH1 r=(-x)*z+z;return x*y+r;} + AH2 AZolSelH2(AH2 x,AH2 y,AH2 z){AH2 r=(-x)*z+z;return x*y+r;} + AH3 AZolSelH3(AH3 x,AH3 y,AH3 z){AH3 r=(-x)*z+z;return x*y+r;} + AH4 AZolSelH4(AH4 x,AH4 y,AH4 z){AH4 r=(-x)*z+z;return x*y+r;} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AZolSignedH1(AH1 x){return ASatH1(x*AH1_(A_INFN_H));} + AH2 AZolSignedH2(AH2 x){return ASatH2(x*AH2_(A_INFN_H));} + AH3 AZolSignedH3(AH3 x){return ASatH3(x*AH3_(A_INFN_H));} + AH4 AZolSignedH4(AH4 x){return ASatH4(x*AH4_(A_INFN_H));} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// COLOR CONVERSIONS +//------------------------------------------------------------------------------------------------------------------------------ +// These are all linear to/from some other space (where 'linear' has been shortened out of the function name). +// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'. +// These are branch free implementations. +// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion. +//------------------------------------------------------------------------------------------------------------------------------ +// TRANSFER FUNCTIONS +// ================== +// 709 ..... Rec709 used for some HDTVs +// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native +// Pq ...... PQ native for HDR10 +// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type +// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations) +// Three ... Gamma 3.0, less fast, but good for HDR. +//------------------------------------------------------------------------------------------------------------------------------ +// KEEPING TO SPEC +// =============== +// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +// Also there is a slight step in the transition regions. +// Precision of the coefficients in the spec being the likely cause. +// Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store. +// This is to work around lack of hardware (typically only ROP does the conversion for free). +// To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free). +// So this header keeps with the spec. +// For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear. +// Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear. +//------------------------------------------------------------------------------------------------------------------------------ +// FOR PQ +// ====== +// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2. +// All constants are only specified to FP32 precision. +// External PQ source reference, +// - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl +//------------------------------------------------------------------------------------------------------------------------------ +// PACKED VERSIONS +// =============== +// These are the A*H2() functions. +// There is no PQ functions as FP16 seemed to not have enough precision for the conversion. +// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors. +// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least). +//------------------------------------------------------------------------------------------------------------------------------ +// NOTES +// ===== +// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case. +//============================================================================================================================== + #if 1 + AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); + return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} + AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); + return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} + AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); + return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} +//------------------------------------------------------------------------------------------------------------------------------ + // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma(). + AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));} + AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));} + AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302)); + return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));} + AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302)); + return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));} + AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302)); + return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); + return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} + AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); + return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} + AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); + return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AToTwoF1(AF1 c){return sqrt(c);} + AF2 AToTwoF2(AF2 c){return sqrt(c);} + AF3 AToTwoF3(AF3 c){return sqrt(c);} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));} + AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));} + AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));} + #endif +//============================================================================================================================== + #if 1 + // Unfortunately median won't work here. + AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); + return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} + AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); + return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} + AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); + return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));} + AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));} + AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833)); + return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));} + AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833)); + return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));} + AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833)); + return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));} +//------------------------------------------------------------------------------------------------------------------------------ + // Unfortunately median won't work here. + AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); + return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} + AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); + return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} + AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); + return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AFromTwoF1(AF1 c){return c*c;} + AF2 AFromTwoF2(AF2 c){return c*c;} + AF3 AFromTwoF3(AF3 c){return c*c;} +//------------------------------------------------------------------------------------------------------------------------------ + AF1 AFromThreeF1(AF1 c){return c*c*c;} + AF2 AFromThreeF2(AF2 c){return c*c*c;} + AF3 AFromThreeF3(AF3 c){return c*c*c;} + #endif +//============================================================================================================================== + #ifdef A_HALF + AH1 ATo709H1(AH1 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); + return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} + AH2 ATo709H2(AH2 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); + return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} + AH3 ATo709H3(AH3 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); + return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AToGammaH1(AH1 c,AH1 rcpX){return pow(c,AH1_(rcpX));} + AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));} + AH3 AToGammaH3(AH3 c,AH1 rcpX){return pow(c,AH3_(rcpX));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AToSrgbH1(AH1 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); + return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} + AH2 AToSrgbH2(AH2 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); + return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} + AH3 AToSrgbH3(AH3 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); + return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AToTwoH1(AH1 c){return sqrt(c);} + AH2 AToTwoH2(AH2 c){return sqrt(c);} + AH3 AToTwoH3(AH3 c){return sqrt(c);} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AToThreeF1(AH1 c){return pow(c,AH1_(1.0/3.0));} + AH2 AToThreeF2(AH2 c){return pow(c,AH2_(1.0/3.0));} + AH3 AToThreeF3(AH3 c){return pow(c,AH3_(1.0/3.0));} + #endif +//============================================================================================================================== + #ifdef A_HALF + AH1 AFrom709H1(AH1 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); + return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} + AH2 AFrom709H2(AH2 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); + return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} + AH3 AFrom709H3(AH3 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); + return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AFromGammaH1(AH1 c,AH1 x){return pow(c,AH1_(x));} + AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));} + AH3 AFromGammaH3(AH3 c,AH1 x){return pow(c,AH3_(x));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AHromSrgbF1(AH1 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); + return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} + AH2 AHromSrgbF2(AH2 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); + return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} + AH3 AHromSrgbF3(AH3 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); + return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AFromTwoH1(AH1 c){return c*c;} + AH2 AFromTwoH2(AH2 c){return c*c;} + AH3 AFromTwoH3(AH3 c){return c*c;} +//------------------------------------------------------------------------------------------------------------------------------ + AH1 AFromThreeH1(AH1 c){return c*c*c;} + AH2 AFromThreeH2(AH2 c){return c*c*c;} + AH3 AFromThreeH3(AH3 c){return c*c*c;} + #endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CS REMAP +//============================================================================================================================== + // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear. + // 543210 + // ====== + // ..xxx. + // yy...y + AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} +//============================================================================================================================== + // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions. + // 543210 + // ====== + // .xx..x + // y..yy. + // Details, + // LANE TO 8x8 MAPPING + // =================== + // 00 01 08 09 10 11 18 19 + // 02 03 0a 0b 12 13 1a 1b + // 04 05 0c 0d 14 15 1c 1d + // 06 07 0e 0f 16 17 1e 1f + // 20 21 28 29 30 31 38 39 + // 22 23 2a 2b 32 33 3a 3b + // 24 25 2c 2d 34 35 3c 3d + // 26 27 2e 2f 36 37 3e 3f + AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} +//============================================================================================================================== + #ifdef A_HALF + AW2 ARmp8x8H(AU1 a){return AW2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} + AW2 ARmpRed8x8H(AU1 a){return AW2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} + #endif +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// REFERENCE +// +//------------------------------------------------------------------------------------------------------------------------------ +// IEEE FLOAT RULES +// ================ +// - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1 +// - {+/-}0 * {+/-}INF = NaN +// - -INF + (+INF) = NaN +// - {+/-}0 / {+/-}0 = NaN +// - {+/-}INF / {+/-}INF = NaN +// - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN) +// - 0 == -0 +// - 4/0 = +INF +// - 4/-0 = -INF +// - 4+INF = +INF +// - 4-INF = -INF +// - 4*(+INF) = +INF +// - 4*(-INF) = -INF +// - -4*(+INF) = -INF +// - sqrt(+INF) = +INF +//------------------------------------------------------------------------------------------------------------------------------ +// FP16 ENCODING +// ============= +// fedcba9876543210 +// ---------------- +// ......mmmmmmmmmm 10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals) +// .eeeee.......... 5-bit exponent +// .00000.......... denormals +// .00001.......... -14 exponent +// .11110.......... 15 exponent +// .111110000000000 infinity +// .11111nnnnnnnnnn NaN with n!=0 +// s............... sign +//------------------------------------------------------------------------------------------------------------------------------ +// FP16/INT16 ALIASING DENORMAL +// ============================ +// 11-bit unsigned integers alias with half float denormal/normal values, +// 1 = 2^(-24) = 1/16777216 ....................... first denormal value +// 2 = 2^(-23) +// ... +// 1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value +// 1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers +// 2047 .............................................. last normal value that still maps to integers +// Scaling limits, +// 2^15 = 32768 ...................................... largest power of 2 scaling +// Largest pow2 conversion mapping is at *32768, +// 1 : 2^(-9) = 1/512 +// 2 : 1/256 +// 4 : 1/128 +// 8 : 1/64 +// 16 : 1/32 +// 32 : 1/16 +// 64 : 1/8 +// 128 : 1/4 +// 256 : 1/2 +// 512 : 1 +// 1024 : 2 +// 2047 : a little less than 4 +//============================================================================================================================== +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// +// GPU/CPU PORTABILITY +// +// +//------------------------------------------------------------------------------------------------------------------------------ +// This is the GPU implementation. +// See the CPU implementation for docs. +//============================================================================================================================== +#ifdef A_GPU + #define A_TRUE true + #define A_FALSE false + #define A_STATIC +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY +//============================================================================================================================== + #define retAD2 AD2 + #define retAD3 AD3 + #define retAD4 AD4 + #define retAF2 AF2 + #define retAF3 AF3 + #define retAF4 AF4 + #define retAL2 AL2 + #define retAL3 AL3 + #define retAL4 AL4 + #define retAU2 AU2 + #define retAU3 AU3 + #define retAU4 AU4 +//------------------------------------------------------------------------------------------------------------------------------ + #define inAD2 in AD2 + #define inAD3 in AD3 + #define inAD4 in AD4 + #define inAF2 in AF2 + #define inAF3 in AF3 + #define inAF4 in AF4 + #define inAL2 in AL2 + #define inAL3 in AL3 + #define inAL4 in AL4 + #define inAU2 in AU2 + #define inAU3 in AU3 + #define inAU4 in AU4 +//------------------------------------------------------------------------------------------------------------------------------ + #define inoutAD2 inout AD2 + #define inoutAD3 inout AD3 + #define inoutAD4 inout AD4 + #define inoutAF2 inout AF2 + #define inoutAF3 inout AF3 + #define inoutAF4 inout AF4 + #define inoutAL2 inout AL2 + #define inoutAL3 inout AL3 + #define inoutAL4 inout AL4 + #define inoutAU2 inout AU2 + #define inoutAU3 inout AU3 + #define inoutAU4 inout AU4 +//------------------------------------------------------------------------------------------------------------------------------ + #define outAD2 out AD2 + #define outAD3 out AD3 + #define outAD4 out AD4 + #define outAF2 out AF2 + #define outAF3 out AF3 + #define outAF4 out AF4 + #define outAL2 out AL2 + #define outAL3 out AL3 + #define outAL4 out AL4 + #define outAU2 out AU2 + #define outAU3 out AU3 + #define outAU4 out AU4 +//------------------------------------------------------------------------------------------------------------------------------ + #define varAD2(x) AD2 x + #define varAD3(x) AD3 x + #define varAD4(x) AD4 x + #define varAF2(x) AF2 x + #define varAF3(x) AF3 x + #define varAF4(x) AF4 x + #define varAL2(x) AL2 x + #define varAL3(x) AL3 x + #define varAL4(x) AL4 x + #define varAU2(x) AU2 x + #define varAU3(x) AU3 x + #define varAU4(x) AU4 x +//------------------------------------------------------------------------------------------------------------------------------ + #define initAD2(x,y) AD2(x,y) + #define initAD3(x,y,z) AD3(x,y,z) + #define initAD4(x,y,z,w) AD4(x,y,z,w) + #define initAF2(x,y) AF2(x,y) + #define initAF3(x,y,z) AF3(x,y,z) + #define initAF4(x,y,z,w) AF4(x,y,z,w) + #define initAL2(x,y) AL2(x,y) + #define initAL3(x,y,z) AL3(x,y,z) + #define initAL4(x,y,z,w) AL4(x,y,z,w) + #define initAU2(x,y) AU2(x,y) + #define initAU3(x,y,z) AU3(x,y,z) + #define initAU4(x,y,z,w) AU4(x,y,z,w) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// SCALAR RETURN OPS +//============================================================================================================================== + #define AAbsD1(a) abs(AD1(a)) + #define AAbsF1(a) abs(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define ACosD1(a) cos(AD1(a)) + #define ACosF1(a) cos(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define ADotD2(a,b) dot(AD2(a),AD2(b)) + #define ADotD3(a,b) dot(AD3(a),AD3(b)) + #define ADotD4(a,b) dot(AD4(a),AD4(b)) + #define ADotF2(a,b) dot(AF2(a),AF2(b)) + #define ADotF3(a,b) dot(AF3(a),AF3(b)) + #define ADotF4(a,b) dot(AF4(a),AF4(b)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AExp2D1(a) exp2(AD1(a)) + #define AExp2F1(a) exp2(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AFloorD1(a) floor(AD1(a)) + #define AFloorF1(a) floor(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define ALog2D1(a) log2(AD1(a)) + #define ALog2F1(a) log2(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define AMaxD1(a,b) max(a,b) + #define AMaxF1(a,b) max(a,b) + #define AMaxL1(a,b) max(a,b) + #define AMaxU1(a,b) max(a,b) +//------------------------------------------------------------------------------------------------------------------------------ + #define AMinD1(a,b) min(a,b) + #define AMinF1(a,b) min(a,b) + #define AMinL1(a,b) min(a,b) + #define AMinU1(a,b) min(a,b) +//------------------------------------------------------------------------------------------------------------------------------ + #define ASinD1(a) sin(AD1(a)) + #define ASinF1(a) sin(AF1(a)) +//------------------------------------------------------------------------------------------------------------------------------ + #define ASqrtD1(a) sqrt(AD1(a)) + #define ASqrtF1(a) sqrt(AF1(a)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// SCALAR RETURN OPS - DEPENDENT +//============================================================================================================================== + #define APowD1(a,b) pow(AD1(a),AF1(b)) + #define APowF1(a,b) pow(AF1(a),AF1(b)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// VECTOR OPS +//------------------------------------------------------------------------------------------------------------------------------ +// These are added as needed for production or prototyping, so not necessarily a complete set. +// They follow a convention of taking in a destination and also returning the destination value to increase utility. +//============================================================================================================================== + #ifdef A_DUBL + AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;} + AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;} + AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;} + AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;} + AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;} + AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;} + AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;} + AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;} + AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;} + AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;} + AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;} + AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;} + AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;} + AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;} + AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;} + AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;} + AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;} + AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;} + AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;} + AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;} + AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;} + AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;} + AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;} + AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;} + AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;} + #endif +//============================================================================================================================== + AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;} + AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;} + AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;} + AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;} + AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;} + AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;} + AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;} + AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;} + AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;} + AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;} + AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;} + AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;} + AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;} + AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;} + AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;} + AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;} + AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;} + AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;} + AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;} + AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;} + AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;} + AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;} + AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;} +//------------------------------------------------------------------------------------------------------------------------------ + AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;} + AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;} + AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;} +#endif diff --git a/thirdparty/amd-fsr/ffx_fsr1.h b/thirdparty/amd-fsr/ffx_fsr1.h new file mode 100644 index 0000000000..4e0b3d5485 --- /dev/null +++ b/thirdparty/amd-fsr/ffx_fsr1.h @@ -0,0 +1,1199 @@ +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// +// AMD FidelityFX SUPER RESOLUTION [FSR 1] ::: SPATIAL SCALING & EXTRAS - v1.20210629 +// +// +//------------------------------------------------------------------------------------------------------------------------------ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//------------------------------------------------------------------------------------------------------------------------------ +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +//------------------------------------------------------------------------------------------------------------------------------ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//------------------------------------------------------------------------------------------------------------------------------ +// ABOUT +// ===== +// FSR is a collection of algorithms relating to generating a higher resolution image. +// This specific header focuses on single-image non-temporal image scaling, and related tools. +// +// The core functions are EASU and RCAS: +// [EASU] Edge Adaptive Spatial Upsampling ....... 1x to 4x area range spatial scaling, clamped adaptive elliptical filter. +// [RCAS] Robust Contrast Adaptive Sharpening .... A non-scaling variation on CAS. +// RCAS needs to be applied after EASU as a separate pass. +// +// Optional utility functions are: +// [LFGA] Linear Film Grain Applicator ........... Tool to apply film grain after scaling. +// [SRTM] Simple Reversible Tone-Mapper .......... Linear HDR {0 to FP16_MAX} to {0 to 1} and back. +// [TEPD] Temporal Energy Preserving Dither ...... Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// See each individual sub-section for inline documentation. +//------------------------------------------------------------------------------------------------------------------------------ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//------------------------------------------------------------------------------------------------------------------------------ +// FUNCTION PERMUTATIONS +// ===================== +// *F() ..... Single item computation with 32-bit. +// *H() ..... Single item computation with 16-bit, with packing (aka two 16-bit ops in parallel) when possible. +// *Hx2() ... Processing two items in parallel with 16-bit, easier packing. +// Not all interfaces in this file have a *Hx2() form. +//============================================================================================================================== +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING +// +//------------------------------------------------------------------------------------------------------------------------------ +// EASU provides a high quality spatial-only scaling at relatively low cost. +// Meaning EASU is appropiate for laptops and other low-end GPUs. +// Quality from 1x to 4x area scaling is good. +//------------------------------------------------------------------------------------------------------------------------------ +// The scalar uses a modified fast approximation to the standard lanczos(size=2) kernel. +// EASU runs in a single pass, so it applies a directionally and anisotropically adaptive radial lanczos. +// This is also kept as simple as possible to have minimum runtime. +//------------------------------------------------------------------------------------------------------------------------------ +// The lanzcos filter has negative lobes, so by itself it will introduce ringing. +// To remove all ringing, the algorithm uses the nearest 2x2 input texels as a neighborhood, +// and limits output to the minimum and maximum of that neighborhood. +//------------------------------------------------------------------------------------------------------------------------------ +// Input image requirements: +// +// Color needs to be encoded as 3 channel[red, green, blue](e.g.XYZ not supported) +// Each channel needs to be in the range[0, 1] +// Any color primaries are supported +// Display / tonemapping curve needs to be as if presenting to sRGB display or similar(e.g.Gamma 2.0) +// There should be no banding in the input +// There should be no high amplitude noise in the input +// There should be no noise in the input that is not at input pixel granularity +// For performance purposes, use 32bpp formats +//------------------------------------------------------------------------------------------------------------------------------ +// Best to apply EASU at the end of the frame after tonemapping +// but before film grain or composite of the UI. +//------------------------------------------------------------------------------------------------------------------------------ +// Example of including this header for D3D HLSL : +// +// #define A_GPU 1 +// #define A_HLSL 1 +// #define A_HALF 1 +// #include "ffx_a.h" +// #define FSR_EASU_H 1 +// #define FSR_RCAS_H 1 +// //declare input callbacks +// #include "ffx_fsr1.h" +// +// Example of including this header for Vulkan GLSL : +// +// #define A_GPU 1 +// #define A_GLSL 1 +// #define A_HALF 1 +// #include "ffx_a.h" +// #define FSR_EASU_H 1 +// #define FSR_RCAS_H 1 +// //declare input callbacks +// #include "ffx_fsr1.h" +// +// Example of including this header for Vulkan HLSL : +// +// #define A_GPU 1 +// #define A_HLSL 1 +// #define A_HLSL_6_2 1 +// #define A_NO_16_BIT_CAST 1 +// #define A_HALF 1 +// #include "ffx_a.h" +// #define FSR_EASU_H 1 +// #define FSR_RCAS_H 1 +// //declare input callbacks +// #include "ffx_fsr1.h" +// +// Example of declaring the required input callbacks for GLSL : +// The callbacks need to gather4 for each color channel using the specified texture coordinate 'p'. +// EASU uses gather4 to reduce position computation logic and for free Arrays of Structures to Structures of Arrays conversion. +// +// AH4 FsrEasuRH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,0));} +// AH4 FsrEasuGH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,1));} +// AH4 FsrEasuBH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,2));} +// ... +// The FsrEasuCon function needs to be called from the CPU or GPU to set up constants. +// The difference in viewport and input image size is there to support Dynamic Resolution Scaling. +// To use FsrEasuCon() on the CPU, define A_CPU before including ffx_a and ffx_fsr1. +// Including a GPU example here, the 'con0' through 'con3' values would be stored out to a constant buffer. +// AU4 con0,con1,con2,con3; +// FsrEasuCon(con0,con1,con2,con3, +// 1920.0,1080.0, // Viewport size (top left aligned) in the input image which is to be scaled. +// 3840.0,2160.0, // The size of the input image. +// 2560.0,1440.0); // The output resolution. +//============================================================================================================================== +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). +A_STATIC void FsrEasuCon( +outAU4 con0, +outAU4 con1, +outAU4 con2, +outAU4 con3, +// This the rendered image resolution being upscaled +AF1 inputViewportInPixelsX, +AF1 inputViewportInPixelsY, +// This is the resolution of the resource containing the input image (useful for dynamic resolution) +AF1 inputSizeInPixelsX, +AF1 inputSizeInPixelsY, +// This is the display resolution which the input image gets upscaled to +AF1 outputSizeInPixelsX, +AF1 outputSizeInPixelsY){ + // Output integer position to a pixel position in viewport. + con0[0]=AU1_AF1(inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)); + con0[1]=AU1_AF1(inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)); + con0[2]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5)); + con0[3]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5)); + // Viewport pixel position to normalized image space. + // This is used to get upper-left of 'F' tap. + con1[0]=AU1_AF1(ARcpF1(inputSizeInPixelsX)); + con1[1]=AU1_AF1(ARcpF1(inputSizeInPixelsY)); + // Centers of gather4, first offset from upper-left of 'F'. + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + con1[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); + con1[3]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsY)); + // These are from (0) instead of 'F'. + con2[0]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsX)); + con2[1]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); + con2[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); + con2[3]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); + con3[0]=AU1_AF1(AF1_( 0.0)*ARcpF1(inputSizeInPixelsX)); + con3[1]=AU1_AF1(AF1_( 4.0)*ARcpF1(inputSizeInPixelsY)); + con3[2]=con3[3]=0;} + +//If the an offset into the input image resource +A_STATIC void FsrEasuConOffset( + outAU4 con0, + outAU4 con1, + outAU4 con2, + outAU4 con3, + // This the rendered image resolution being upscaled + AF1 inputViewportInPixelsX, + AF1 inputViewportInPixelsY, + // This is the resolution of the resource containing the input image (useful for dynamic resolution) + AF1 inputSizeInPixelsX, + AF1 inputSizeInPixelsY, + // This is the display resolution which the input image gets upscaled to + AF1 outputSizeInPixelsX, + AF1 outputSizeInPixelsY, + // This is the input image offset into the resource containing it (useful for dynamic resolution) + AF1 inputOffsetInPixelsX, + AF1 inputOffsetInPixelsY) { + FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); + con0[2] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsX * ARcpF1(outputSizeInPixelsX) - AF1_(0.5) + inputOffsetInPixelsX); + con0[3] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsY * ARcpF1(outputSizeInPixelsY) - AF1_(0.5) + inputOffsetInPixelsY); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(A_GPU)&&defined(FSR_EASU_F) + // Input callback prototypes, need to be implemented by calling shader + AF4 FsrEasuRF(AF2 p); + AF4 FsrEasuGF(AF2 p); + AF4 FsrEasuBF(AF2 p); +//------------------------------------------------------------------------------------------------------------------------------ + // Filtering for a given tap for the scalar. + void FsrEasuTapF( + inout AF3 aC, // Accumulated color, with negative lobe. + inout AF1 aW, // Accumulated weight. + AF2 off, // Pixel offset from resolve position to tap. + AF2 dir, // Gradient direction. + AF2 len, // Length. + AF1 lob, // Negative lobe strength. + AF1 clp, // Clipping point. + AF3 c){ // Tap color. + // Rotate offset by direction. + AF2 v; + v.x=(off.x*( dir.x))+(off.y*dir.y); + v.y=(off.x*(-dir.y))+(off.y*dir.x); + // Anisotropy. + v*=len; + // Compute distance^2. + AF1 d2=v.x*v.x+v.y*v.y; + // Limit to the window as at corner, 2 taps can easily be outside. + d2=min(d2,clp); + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0); + AF1 wA=lob*d2+AF1_(-1.0); + wB*=wB; + wA*=wA; + wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0)); + AF1 w=wB*wA; + // Do weighted average. + aC+=c*w;aW+=w;} +//------------------------------------------------------------------------------------------------------------------------------ + // Accumulate direction and length. + void FsrEasuSetF( + inout AF2 dir, + inout AF1 len, + AF2 pp, + AP1 biS,AP1 biT,AP1 biU,AP1 biV, + AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + AF1 w = AF1_(0.0); + if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y); + if(biT)w= pp.x *(AF1_(1.0)-pp.y); + if(biU)w=(AF1_(1.0)-pp.x)* pp.y ; + if(biV)w= pp.x * pp.y ; + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + AF1 dc=lD-lC; + AF1 cb=lC-lB; + AF1 lenX=max(abs(dc),abs(cb)); + lenX=APrxLoRcpF1(lenX); + AF1 dirX=lD-lB; + dir.x+=dirX*w; + lenX=ASatF1(abs(dirX)*lenX); + lenX*=lenX; + len+=lenX*w; + // Repeat for the y axis. + AF1 ec=lE-lC; + AF1 ca=lC-lA; + AF1 lenY=max(abs(ec),abs(ca)); + lenY=APrxLoRcpF1(lenY); + AF1 dirY=lE-lA; + dir.y+=dirY*w; + lenY=ASatF1(abs(dirY)*lenY); + lenY*=lenY; + len+=lenY*w;} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrEasuF( + out AF3 pix, + AU2 ip, // Integer pixel position in output. + AU4 con0, // Constants generated by FsrEasuCon(). + AU4 con1, + AU4 con2, + AU4 con3){ +//------------------------------------------------------------------------------------------------------------------------------ + // Get position of 'f'. + AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw); + AF2 fp=floor(pp); + pp-=fp; +//------------------------------------------------------------------------------------------------------------------------------ + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw); + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + AF2 p1=p0+AF2_AU2(con2.xy); + AF2 p2=p0+AF2_AU2(con2.zw); + AF2 p3=p0+AF2_AU2(con3.xy); + AF4 bczzR=FsrEasuRF(p0); + AF4 bczzG=FsrEasuGF(p0); + AF4 bczzB=FsrEasuBF(p0); + AF4 ijfeR=FsrEasuRF(p1); + AF4 ijfeG=FsrEasuGF(p1); + AF4 ijfeB=FsrEasuBF(p1); + AF4 klhgR=FsrEasuRF(p2); + AF4 klhgG=FsrEasuGF(p2); + AF4 klhgB=FsrEasuBF(p2); + AF4 zzonR=FsrEasuRF(p3); + AF4 zzonG=FsrEasuGF(p3); + AF4 zzonB=FsrEasuBF(p3); +//------------------------------------------------------------------------------------------------------------------------------ + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG); + AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG); + AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG); + AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG); + // Rename. + AF1 bL=bczzL.x; + AF1 cL=bczzL.y; + AF1 iL=ijfeL.x; + AF1 jL=ijfeL.y; + AF1 fL=ijfeL.z; + AF1 eL=ijfeL.w; + AF1 kL=klhgL.x; + AF1 lL=klhgL.y; + AF1 hL=klhgL.z; + AF1 gL=klhgL.w; + AF1 oL=zzonL.z; + AF1 nL=zzonL.w; + // Accumulate for bilinear interpolation. + AF2 dir=AF2_(0.0); + AF1 len=AF1_(0.0); + FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL); + FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL); + FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL); + FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL); +//------------------------------------------------------------------------------------------------------------------------------ + // Normalize with approximation, and cleanup close to zero. + AF2 dir2=dir*dir; + AF1 dirR=dir2.x+dir2.y; + AP1 zro=dirR<AF1_(1.0/32768.0); + dirR=APrxLoRsqF1(dirR); + dirR=zro?AF1_(1.0):dirR; + dir.x=zro?AF1_(1.0):dir.x; + dir*=AF2_(dirR); + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len=len*AF1_(0.5); + len*=len; + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + AF1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpF1(max(abs(dir.x),abs(dir.y))); + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + AF2 len2=AF2(AF1_(1.0)+(stretch-AF1_(1.0))*len,AF1_(1.0)+AF1_(-0.5)*len); + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + AF1 lob=AF1_(0.5)+AF1_((1.0/4.0-0.04)-0.5)*len; + // Set distance^2 clipping point to the end of the adjustable window. + AF1 clp=APrxLoRcpF1(lob); +//------------------------------------------------------------------------------------------------------------------------------ + // Accumulation mixed with min/max of 4 nearest. + // b c + // e f g h + // i j k l + // n o + AF3 min4=min(AMin3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)), + AF3(klhgR.x,klhgG.x,klhgB.x)); + AF3 max4=max(AMax3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)), + AF3(klhgR.x,klhgG.x,klhgB.x)); + // Accumulation. + AF3 aC=AF3_(0.0); + AF1 aW=AF1_(0.0); + FsrEasuTapF(aC,aW,AF2( 0.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.x,bczzG.x,bczzB.x)); // b + FsrEasuTapF(aC,aW,AF2( 1.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.y,bczzG.y,bczzB.y)); // c + FsrEasuTapF(aC,aW,AF2(-1.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.x,ijfeG.x,ijfeB.x)); // i + FsrEasuTapF(aC,aW,AF2( 0.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.y,ijfeG.y,ijfeB.y)); // j + FsrEasuTapF(aC,aW,AF2( 0.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.z,ijfeG.z,ijfeB.z)); // f + FsrEasuTapF(aC,aW,AF2(-1.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.w,ijfeG.w,ijfeB.w)); // e + FsrEasuTapF(aC,aW,AF2( 1.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.x,klhgG.x,klhgB.x)); // k + FsrEasuTapF(aC,aW,AF2( 2.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.y,klhgG.y,klhgB.y)); // l + FsrEasuTapF(aC,aW,AF2( 2.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.z,klhgG.z,klhgB.z)); // h + FsrEasuTapF(aC,aW,AF2( 1.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.w,klhgG.w,klhgB.w)); // g + FsrEasuTapF(aC,aW,AF2( 1.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.z,zzonG.z,zzonB.z)); // o + FsrEasuTapF(aC,aW,AF2( 0.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.w,zzonG.w,zzonB.w)); // n +//------------------------------------------------------------------------------------------------------------------------------ + // Normalize and dering. + pix=min(max4,max(min4,aC*AF3_(ARcpF1(aW))));} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_EASU_H) +// Input callback prototypes, need to be implemented by calling shader + AH4 FsrEasuRH(AF2 p); + AH4 FsrEasuGH(AF2 p); + AH4 FsrEasuBH(AF2 p); +//------------------------------------------------------------------------------------------------------------------------------ + // This runs 2 taps in parallel. + void FsrEasuTapH( + inout AH2 aCR,inout AH2 aCG,inout AH2 aCB, + inout AH2 aW, + AH2 offX,AH2 offY, + AH2 dir, + AH2 len, + AH1 lob, + AH1 clp, + AH2 cR,AH2 cG,AH2 cB){ + AH2 vX,vY; + vX=offX* dir.xx +offY*dir.yy; + vY=offX*(-dir.yy)+offY*dir.xx; + vX*=len.x;vY*=len.y; + AH2 d2=vX*vX+vY*vY; + d2=min(d2,AH2_(clp)); + AH2 wB=AH2_(2.0/5.0)*d2+AH2_(-1.0); + AH2 wA=AH2_(lob)*d2+AH2_(-1.0); + wB*=wB; + wA*=wA; + wB=AH2_(25.0/16.0)*wB+AH2_(-(25.0/16.0-1.0)); + AH2 w=wB*wA; + aCR+=cR*w;aCG+=cG*w;aCB+=cB*w;aW+=w;} +//------------------------------------------------------------------------------------------------------------------------------ + // This runs 2 taps in parallel. + void FsrEasuSetH( + inout AH2 dirPX,inout AH2 dirPY, + inout AH2 lenP, + AH2 pp, + AP1 biST,AP1 biUV, + AH2 lA,AH2 lB,AH2 lC,AH2 lD,AH2 lE){ + AH2 w = AH2_(0.0); + if(biST)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_(AH1_(1.0)-pp.y); + if(biUV)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_( pp.y); + // ABS is not free in the packed FP16 path. + AH2 dc=lD-lC; + AH2 cb=lC-lB; + AH2 lenX=max(abs(dc),abs(cb)); + lenX=ARcpH2(lenX); + AH2 dirX=lD-lB; + dirPX+=dirX*w; + lenX=ASatH2(abs(dirX)*lenX); + lenX*=lenX; + lenP+=lenX*w; + AH2 ec=lE-lC; + AH2 ca=lC-lA; + AH2 lenY=max(abs(ec),abs(ca)); + lenY=ARcpH2(lenY); + AH2 dirY=lE-lA; + dirPY+=dirY*w; + lenY=ASatH2(abs(dirY)*lenY); + lenY*=lenY; + lenP+=lenY*w;} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrEasuH( + out AH3 pix, + AU2 ip, + AU4 con0, + AU4 con1, + AU4 con2, + AU4 con3){ +//------------------------------------------------------------------------------------------------------------------------------ + AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw); + AF2 fp=floor(pp); + pp-=fp; + AH2 ppp=AH2(pp); +//------------------------------------------------------------------------------------------------------------------------------ + AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw); + AF2 p1=p0+AF2_AU2(con2.xy); + AF2 p2=p0+AF2_AU2(con2.zw); + AF2 p3=p0+AF2_AU2(con3.xy); + AH4 bczzR=FsrEasuRH(p0); + AH4 bczzG=FsrEasuGH(p0); + AH4 bczzB=FsrEasuBH(p0); + AH4 ijfeR=FsrEasuRH(p1); + AH4 ijfeG=FsrEasuGH(p1); + AH4 ijfeB=FsrEasuBH(p1); + AH4 klhgR=FsrEasuRH(p2); + AH4 klhgG=FsrEasuGH(p2); + AH4 klhgB=FsrEasuBH(p2); + AH4 zzonR=FsrEasuRH(p3); + AH4 zzonG=FsrEasuGH(p3); + AH4 zzonB=FsrEasuBH(p3); +//------------------------------------------------------------------------------------------------------------------------------ + AH4 bczzL=bczzB*AH4_(0.5)+(bczzR*AH4_(0.5)+bczzG); + AH4 ijfeL=ijfeB*AH4_(0.5)+(ijfeR*AH4_(0.5)+ijfeG); + AH4 klhgL=klhgB*AH4_(0.5)+(klhgR*AH4_(0.5)+klhgG); + AH4 zzonL=zzonB*AH4_(0.5)+(zzonR*AH4_(0.5)+zzonG); + AH1 bL=bczzL.x; + AH1 cL=bczzL.y; + AH1 iL=ijfeL.x; + AH1 jL=ijfeL.y; + AH1 fL=ijfeL.z; + AH1 eL=ijfeL.w; + AH1 kL=klhgL.x; + AH1 lL=klhgL.y; + AH1 hL=klhgL.z; + AH1 gL=klhgL.w; + AH1 oL=zzonL.z; + AH1 nL=zzonL.w; + // This part is different, accumulating 2 taps in parallel. + AH2 dirPX=AH2_(0.0); + AH2 dirPY=AH2_(0.0); + AH2 lenP=AH2_(0.0); + FsrEasuSetH(dirPX,dirPY,lenP,ppp,true, false,AH2(bL,cL),AH2(eL,fL),AH2(fL,gL),AH2(gL,hL),AH2(jL,kL)); + FsrEasuSetH(dirPX,dirPY,lenP,ppp,false,true ,AH2(fL,gL),AH2(iL,jL),AH2(jL,kL),AH2(kL,lL),AH2(nL,oL)); + AH2 dir=AH2(dirPX.r+dirPX.g,dirPY.r+dirPY.g); + AH1 len=lenP.r+lenP.g; +//------------------------------------------------------------------------------------------------------------------------------ + AH2 dir2=dir*dir; + AH1 dirR=dir2.x+dir2.y; + AP1 zro=dirR<AH1_(1.0/32768.0); + dirR=APrxLoRsqH1(dirR); + dirR=zro?AH1_(1.0):dirR; + dir.x=zro?AH1_(1.0):dir.x; + dir*=AH2_(dirR); + len=len*AH1_(0.5); + len*=len; + AH1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpH1(max(abs(dir.x),abs(dir.y))); + AH2 len2=AH2(AH1_(1.0)+(stretch-AH1_(1.0))*len,AH1_(1.0)+AH1_(-0.5)*len); + AH1 lob=AH1_(0.5)+AH1_((1.0/4.0-0.04)-0.5)*len; + AH1 clp=APrxLoRcpH1(lob); +//------------------------------------------------------------------------------------------------------------------------------ + // FP16 is different, using packed trick to do min and max in same operation. + AH2 bothR=max(max(AH2(-ijfeR.z,ijfeR.z),AH2(-klhgR.w,klhgR.w)),max(AH2(-ijfeR.y,ijfeR.y),AH2(-klhgR.x,klhgR.x))); + AH2 bothG=max(max(AH2(-ijfeG.z,ijfeG.z),AH2(-klhgG.w,klhgG.w)),max(AH2(-ijfeG.y,ijfeG.y),AH2(-klhgG.x,klhgG.x))); + AH2 bothB=max(max(AH2(-ijfeB.z,ijfeB.z),AH2(-klhgB.w,klhgB.w)),max(AH2(-ijfeB.y,ijfeB.y),AH2(-klhgB.x,klhgB.x))); + // This part is different for FP16, working pairs of taps at a time. + AH2 pR=AH2_(0.0); + AH2 pG=AH2_(0.0); + AH2 pB=AH2_(0.0); + AH2 pW=AH2_(0.0); + FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0, 1.0)-ppp.xx,AH2(-1.0,-1.0)-ppp.yy,dir,len2,lob,clp,bczzR.xy,bczzG.xy,bczzB.xy); + FsrEasuTapH(pR,pG,pB,pW,AH2(-1.0, 0.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,ijfeR.xy,ijfeG.xy,ijfeB.xy); + FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0,-1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,ijfeR.zw,ijfeG.zw,ijfeB.zw); + FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 2.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,klhgR.xy,klhgG.xy,klhgB.xy); + FsrEasuTapH(pR,pG,pB,pW,AH2( 2.0, 1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,klhgR.zw,klhgG.zw,klhgB.zw); + FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 0.0)-ppp.xx,AH2( 2.0, 2.0)-ppp.yy,dir,len2,lob,clp,zzonR.zw,zzonG.zw,zzonB.zw); + AH3 aC=AH3(pR.x+pR.y,pG.x+pG.y,pB.x+pB.y); + AH1 aW=pW.x+pW.y; +//------------------------------------------------------------------------------------------------------------------------------ + // Slightly different for FP16 version due to combined min and max. + pix=min(AH3(bothR.y,bothG.y,bothB.y),max(-AH3(bothR.x,bothG.x,bothB.x),aC*AH3_(ARcpH1(aW))));} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +// +//------------------------------------------------------------------------------------------------------------------------------ +// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. +// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. +// RCAS also has a built in process to limit sharpening of what it detects as possible noise. +// RCAS sharper does not support scaling, as it should be applied after EASU scaling. +// Pass EASU output straight into RCAS, no color conversions necessary. +//------------------------------------------------------------------------------------------------------------------------------ +// RCAS is based on the following logic. +// RCAS uses a 5 tap filter in a cross pattern (same as CAS), +// w n +// w 1 w for taps w m e +// w s +// Where 'w' is the negative lobe weight. +// output = (w*(n+e+w+s)+m)/(4*w+1) +// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, +// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) +// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) +// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. +// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. +// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. +// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. +// This stabilizes RCAS. +// RCAS does a simple highpass which is normalized against the local contrast then shaped, +// 0.25 +// 0.25 -1 0.25 +// 0.25 +// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +// +// GLSL example for the required callbacks : +// +// AH4 FsrRcasLoadH(ASW2 p){return AH4(imageLoad(imgSrc,ASU2(p)));} +// void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b) +// { +// //do any simple input color conversions here or leave empty if none needed +// } +// +// FsrRcasCon need to be called from the CPU or GPU to set up constants. +// Including a GPU example here, the 'con' value would be stored out to a constant buffer. +// +// AU4 con; +// FsrRcasCon(con, +// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +// --------------- +// RCAS sharpening supports a CAS-like pass-through alpha via, +// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 +// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. +// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, +// #define FSR_RCAS_DENOISE 1 +//============================================================================================================================== +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). +A_STATIC void FsrRcasCon( +outAU4 con, +// The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +AF1 sharpness){ + // Transform from stops to linear value. + sharpness=AExp2F1(-sharpness); + varAF2(hSharp)=initAF2(sharpness,sharpness); + con[0]=AU1_AF1(sharpness); + con[1]=AU1_AH2_AF2(hSharp); + con[2]=0; + con[3]=0;} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(A_GPU)&&defined(FSR_RCAS_F) + // Input callback prototypes that need to be implemented by calling shader + AF4 FsrRcasLoadF(ASU2 p); + void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasF( + out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out AF1 pixG, + out AF1 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out AF1 pixA, + #endif + AU2 ip, // Integer pixel position in output. + AU4 con){ // Constant generated by RcasSetup(). + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + ASU2 sp=ASU2(ip); + AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb; + AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + AF4 ee=FsrRcasLoadF(sp); + AF3 e=ee.rgb;pixA=ee.a; + #else + AF3 e=FsrRcasLoadF(sp).rgb; + #endif + AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb; + AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + AF1 bR=b.r; + AF1 bG=b.g; + AF1 bB=b.b; + AF1 dR=d.r; + AF1 dG=d.g; + AF1 dB=d.b; + AF1 eR=e.r; + AF1 eG=e.g; + AF1 eB=e.b; + AF1 fR=f.r; + AF1 fG=f.g; + AF1 fB=f.b; + AF1 hR=h.r; + AF1 hG=h.g; + AF1 hB=h.b; + // Run optional input transform. + FsrRcasInputF(bR,bG,bB); + FsrRcasInputF(dR,dG,dB); + FsrRcasInputF(eR,eG,eB); + FsrRcasInputF(fR,fG,fB); + FsrRcasInputF(hR,hG,hB); + // Luma times 2. + AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG); + AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG); + AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG); + AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG); + AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG); + // Noise detection. + AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL; + nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL))); + nz=AF1_(-0.5)*nz+AF1_(1.0); + // Min and max of ring. + AF1 mn4R=min(AMin3F1(bR,dR,fR),hR); + AF1 mn4G=min(AMin3F1(bG,dG,fG),hG); + AF1 mn4B=min(AMin3F1(bB,dB,fB),hB); + AF1 mx4R=max(AMax3F1(bR,dR,fR),hR); + AF1 mx4G=max(AMax3F1(bG,dG,fG),hG); + AF1 mx4B=max(AMax3F1(bB,dB,fB),hB); + // Immediate constants for peak range. + AF2 peakC=AF2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + AF1 hitMinR=min(mn4R,eR)*ARcpF1(AF1_(4.0)*mx4R); + AF1 hitMinG=min(mn4G,eG)*ARcpF1(AF1_(4.0)*mx4G); + AF1 hitMinB=min(mn4B,eB)*ARcpF1(AF1_(4.0)*mx4B); + AF1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpF1(AF1_(4.0)*mn4R+peakC.y); + AF1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpF1(AF1_(4.0)*mn4G+peakC.y); + AF1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpF1(AF1_(4.0)*mn4B+peakC.y); + AF1 lobeR=max(-hitMinR,hitMaxR); + AF1 lobeG=max(-hitMinG,hitMaxG); + AF1 lobeB=max(-hitMinB,hitMaxB); + AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; + return;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H) + // Input callback prototypes that need to be implemented by calling shader + AH4 FsrRcasLoadH(ASW2 p); + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasH( + out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out AH1 pixG, + out AH1 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out AH1 pixA, + #endif + AU2 ip, // Integer pixel position in output. + AU4 con){ // Constant generated by RcasSetup(). + // Sharpening algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + ASW2 sp=ASW2(ip); + AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb; + AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + AH4 ee=FsrRcasLoadH(sp); + AH3 e=ee.rgb;pixA=ee.a; + #else + AH3 e=FsrRcasLoadH(sp).rgb; + #endif + AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb; + AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + AH1 bR=b.r; + AH1 bG=b.g; + AH1 bB=b.b; + AH1 dR=d.r; + AH1 dG=d.g; + AH1 dB=d.b; + AH1 eR=e.r; + AH1 eG=e.g; + AH1 eB=e.b; + AH1 fR=f.r; + AH1 fG=f.g; + AH1 fB=f.b; + AH1 hR=h.r; + AH1 hG=h.g; + AH1 hB=h.b; + // Run optional input transform. + FsrRcasInputH(bR,bG,bB); + FsrRcasInputH(dR,dG,dB); + FsrRcasInputH(eR,eG,eB); + FsrRcasInputH(fR,fG,fB); + FsrRcasInputH(hR,hG,hB); + // Luma times 2. + AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG); + AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG); + AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG); + AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG); + AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG); + // Noise detection. + AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL; + nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL))); + nz=AH1_(-0.5)*nz+AH1_(1.0); + // Min and max of ring. + AH1 mn4R=min(AMin3H1(bR,dR,fR),hR); + AH1 mn4G=min(AMin3H1(bG,dG,fG),hG); + AH1 mn4B=min(AMin3H1(bB,dB,fB),hB); + AH1 mx4R=max(AMax3H1(bR,dR,fR),hR); + AH1 mx4G=max(AMax3H1(bG,dG,fG),hG); + AH1 mx4B=max(AMax3H1(bB,dB,fB),hB); + // Immediate constants for peak range. + AH2 peakC=AH2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + AH1 hitMinR=min(mn4R,eR)*ARcpH1(AH1_(4.0)*mx4R); + AH1 hitMinG=min(mn4G,eG)*ARcpH1(AH1_(4.0)*mx4G); + AH1 hitMinB=min(mn4B,eB)*ARcpH1(AH1_(4.0)*mx4B); + AH1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH1(AH1_(4.0)*mn4R+peakC.y); + AH1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH1(AH1_(4.0)*mn4G+peakC.y); + AH1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH1(AH1_(4.0)*mn4B+peakC.y); + AH1 lobeR=max(-hitMinR,hitMaxR); + AH1 lobeG=max(-hitMinG,hitMaxG); + AH1 lobeB=max(-hitMinB,hitMaxB); + AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x; + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2) + // Input callback prototypes that need to be implemented by the calling shader + AH4 FsrRcasLoadHx2(ASW2 p); + void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b); +//------------------------------------------------------------------------------------------------------------------------------ + // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. + void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){ + #ifdef A_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a=pix1.a=0.0; + #endif + pix0.rgb=AH3(pixR.x,pixG.x,pixB.x); + pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasHx2( + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix<R,G,B>.x = left 8x8 tile + // pix<R,G,B>.y = right 8x8 tile + // This enables later processing to easily be packed as well. + out AH2 pixR, + out AH2 pixG, + out AH2 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out AH2 pixA, + #endif + AU2 ip, // Integer pixel position in output. + AU4 con){ // Constant generated by RcasSetup(). + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + ASW2 sp0=ASW2(ip); + AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb; + AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + AH4 ee0=FsrRcasLoadHx2(sp0); + AH3 e0=ee0.rgb;pixA.r=ee0.a; + #else + AH3 e0=FsrRcasLoadHx2(sp0).rgb; + #endif + AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb; + AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb; + ASW2 sp1=sp0+ASW2(8,0); + AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb; + AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + AH4 ee1=FsrRcasLoadHx2(sp1); + AH3 e1=ee1.rgb;pixA.g=ee1.a; + #else + AH3 e1=FsrRcasLoadHx2(sp1).rgb; + #endif + AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb; + AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb; + // Arrays of Structures to Structures of Arrays conversion. + AH2 bR=AH2(b0.r,b1.r); + AH2 bG=AH2(b0.g,b1.g); + AH2 bB=AH2(b0.b,b1.b); + AH2 dR=AH2(d0.r,d1.r); + AH2 dG=AH2(d0.g,d1.g); + AH2 dB=AH2(d0.b,d1.b); + AH2 eR=AH2(e0.r,e1.r); + AH2 eG=AH2(e0.g,e1.g); + AH2 eB=AH2(e0.b,e1.b); + AH2 fR=AH2(f0.r,f1.r); + AH2 fG=AH2(f0.g,f1.g); + AH2 fB=AH2(f0.b,f1.b); + AH2 hR=AH2(h0.r,h1.r); + AH2 hG=AH2(h0.g,h1.g); + AH2 hB=AH2(h0.b,h1.b); + // Run optional input transform. + FsrRcasInputHx2(bR,bG,bB); + FsrRcasInputHx2(dR,dG,dB); + FsrRcasInputHx2(eR,eG,eB); + FsrRcasInputHx2(fR,fG,fB); + FsrRcasInputHx2(hR,hG,hB); + // Luma times 2. + AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG); + AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG); + AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG); + AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG); + AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG); + // Noise detection. + AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL; + nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL))); + nz=AH2_(-0.5)*nz+AH2_(1.0); + // Min and max of ring. + AH2 mn4R=min(AMin3H2(bR,dR,fR),hR); + AH2 mn4G=min(AMin3H2(bG,dG,fG),hG); + AH2 mn4B=min(AMin3H2(bB,dB,fB),hB); + AH2 mx4R=max(AMax3H2(bR,dR,fR),hR); + AH2 mx4G=max(AMax3H2(bG,dG,fG),hG); + AH2 mx4B=max(AMax3H2(bB,dB,fB),hB); + // Immediate constants for peak range. + AH2 peakC=AH2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + AH2 hitMinR=min(mn4R,eR)*ARcpH2(AH2_(4.0)*mx4R); + AH2 hitMinG=min(mn4G,eG)*ARcpH2(AH2_(4.0)*mx4G); + AH2 hitMinB=min(mn4B,eB)*ARcpH2(AH2_(4.0)*mx4B); + AH2 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH2(AH2_(4.0)*mn4R+peakC.y); + AH2 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH2(AH2_(4.0)*mn4G+peakC.y); + AH2 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH2(AH2_(4.0)*mn4B+peakC.y); + AH2 lobeR=max(-hitMinR,hitMaxR); + AH2 lobeG=max(-hitMinG,hitMaxG); + AH2 lobeB=max(-hitMinB,hitMaxB); + AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR +// +//------------------------------------------------------------------------------------------------------------------------------ +// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. +// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. +// The 'Lfga*()' functions provide a convenient way to introduce grain. +// These functions limit grain based on distance to signal limits. +// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. +// Grain application should be done in a linear colorspace. +// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). +//------------------------------------------------------------------------------------------------------------------------------ +// Usage, +// FsrLfga*( +// color, // In/out linear colorspace color {0 to 1} ranged. +// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. +// amount); // Amount of grain (0 to 1} ranged. +//------------------------------------------------------------------------------------------------------------------------------ +// Example if grain texture is monochrome: 'FsrLfgaF(color,AF3_(grain),amount)' +//============================================================================================================================== +#if defined(A_GPU) + // Maximum grain is the minimum distance to the signal limit. + void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);} +#endif +//============================================================================================================================== +#if defined(A_GPU)&&defined(A_HALF) + // Half precision version (slower). + void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);} +//------------------------------------------------------------------------------------------------------------------------------ + // Packed half precision version (faster). + void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){ + cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER +// +//------------------------------------------------------------------------------------------------------------------------------ +// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. +// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. +//------------------------------------------------------------------------------------------------------------------------------ +// Reversible tonemapper usage, +// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. +// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. +//============================================================================================================================== +#if defined(A_GPU) + void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));} + // The extra max solves the c=1.0 case (which is a /0). + void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));} +#endif +//============================================================================================================================== +#if defined(A_GPU)&&defined(A_HALF) + void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));} + void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ + AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;} + void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ + AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER +// +//------------------------------------------------------------------------------------------------------------------------------ +// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// Gamma 2.0 is used so that the conversion back to linear is just to square the color. +// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. +// Given good non-biased temporal blue noise as dither input, +// the output dither will temporally conserve energy. +// This is done by choosing the linear nearest step point instead of perceptual nearest. +// See code below for details. +//------------------------------------------------------------------------------------------------------------------------------ +// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION +// =============================================== +// - Output is 'uint(floor(saturate(n)*255.0+0.5))'. +// - Thus rounding is to nearest. +// - NaN gets converted to zero. +// - INF is clamped to {0.0 to 1.0}. +//============================================================================================================================== +#if defined(A_GPU) + // Hand tuned integer position to dither value, with more values than simple checkerboard. + // Only 32-bit has enough precision for this compddation. + // Output is {0 to <1}. + AF1 FsrTepdDitF(AU2 p,AU1 f){ + AF1 x=AF1_(p.x+f); + AF1 y=AF1_(p.y); + // The 1.61803 golden ratio. + AF1 a=AF1_((1.0+sqrt(5.0))/2.0); + // Number designed to provide a good visual pattern. + AF1 b=AF1_(1.0/3.69); + x=x*a+(y*b); + return AFractF1(x);} +//------------------------------------------------------------------------------------------------------------------------------ + // This version is 8-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC8F(inout AF3 c,AF1 dit){ + AF3 n=sqrt(c); + n=floor(n*AF3_(255.0))*AF3_(1.0/255.0); + AF3 a=n*n; + AF3 b=n+AF3_(1.0/255.0);b=b*b; + // Ratio of 'a' to 'b' required to produce 'c'. + // APrxLoRcpF1() won't work here (at least for very high dynamic ranges). + // APrxMedRcpF1() is an IADD,FMA,MUL. + AF3 r=(c-b)*APrxMedRcpF3(a-b); + // Use the ratio as a cutoff to choose 'a' or 'b'. + // AGtZeroF1() is a MUL. + c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));} +//------------------------------------------------------------------------------------------------------------------------------ + // This version is 10-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC10F(inout AF3 c,AF1 dit){ + AF3 n=sqrt(c); + n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0); + AF3 a=n*n; + AF3 b=n+AF3_(1.0/1023.0);b=b*b; + AF3 r=(c-b)*APrxMedRcpF3(a-b); + c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));} +#endif +//============================================================================================================================== +#if defined(A_GPU)&&defined(A_HALF) + AH1 FsrTepdDitH(AU2 p,AU1 f){ + AF1 x=AF1_(p.x+f); + AF1 y=AF1_(p.y); + AF1 a=AF1_((1.0+sqrt(5.0))/2.0); + AF1 b=AF1_(1.0/3.69); + x=x*a+(y*b); + return AH1(AFractF1(x));} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8H(inout AH3 c,AH1 dit){ + AH3 n=sqrt(c); + n=floor(n*AH3_(255.0))*AH3_(1.0/255.0); + AH3 a=n*n; + AH3 b=n+AH3_(1.0/255.0);b=b*b; + AH3 r=(c-b)*APrxMedRcpH3(a-b); + c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10H(inout AH3 c,AH1 dit){ + AH3 n=sqrt(c); + n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0); + AH3 a=n*n; + AH3 b=n+AH3_(1.0/1023.0);b=b*b; + AH3 r=(c-b)*APrxMedRcpH3(a-b); + c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));} +//============================================================================================================================== + // This computes dither for positions 'p' and 'p+{8,0}'. + AH2 FsrTepdDitHx2(AU2 p,AU1 f){ + AF2 x; + x.x=AF1_(p.x+f); + x.y=x.x+AF1_(8.0); + AF1 y=AF1_(p.y); + AF1 a=AF1_((1.0+sqrt(5.0))/2.0); + AF1 b=AF1_(1.0/3.69); + x=x*AF2_(a)+AF2_(y*b); + return AH2(AFractF2(x));} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ + AH2 nR=sqrt(cR); + AH2 nG=sqrt(cG); + AH2 nB=sqrt(cB); + nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0); + nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0); + nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0); + AH2 aR=nR*nR; + AH2 aG=nG*nG; + AH2 aB=nB*nB; + AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR; + AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG; + AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB; + AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); + AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); + AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); + cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0)); + cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0)); + cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ + AH2 nR=sqrt(cR); + AH2 nG=sqrt(cG); + AH2 nB=sqrt(cB); + nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0); + nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0); + nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0); + AH2 aR=nR*nR; + AH2 aG=nG*nG; + AH2 aB=nB*nB; + AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR; + AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG; + AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB; + AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); + AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); + AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); + cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0)); + cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0)); + cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));} +#endif diff --git a/thirdparty/amd-fsr/license.txt b/thirdparty/amd-fsr/license.txt new file mode 100644 index 0000000000..324cba594d --- /dev/null +++ b/thirdparty/amd-fsr/license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/thirdparty/basis_universal/encoder/apg_bmp.c b/thirdparty/basis_universal/encoder/apg_bmp.c index ef3d015e40..d342b20fc8 100644 --- a/thirdparty/basis_universal/encoder/apg_bmp.c +++ b/thirdparty/basis_universal/encoder/apg_bmp.c @@ -247,7 +247,7 @@ unsigned char* apg_bmp_read( const char* filename, int* w, int* h, unsigned int* } // allocate memory for the output pixels block. cast to size_t in case width and height are both the max of 65536 and n_dst_chans > 1 - unsigned char* dst_img_ptr = malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans ); + unsigned char* dst_img_ptr = (unsigned char*)malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans ); if ( !dst_img_ptr ) { free( record.data ); return NULL; @@ -480,7 +480,7 @@ unsigned int apg_bmp_write( const char* filename, unsigned char* pixels_ptr, int dib_hdr.bitmask_b = 0x0000FF00; } - uint8_t* dst_pixels_ptr = malloc( dst_pixels_padded_sz ); + uint8_t* dst_pixels_ptr = (uint8_t*)malloc( dst_pixels_padded_sz ); if ( !dst_pixels_ptr ) { return 0; } { size_t dst_byte_idx = 0; diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp index 06aa7eb8b1..22fdfa603f 100644 --- a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp @@ -174,9 +174,8 @@ static void astc_init() } // range } -static inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w) +static inline uint32_t astc_interpolate_linear(uint32_t l, uint32_t h, uint32_t w) { - // This is for linear values, not sRGB. l = (l << 8) | l; h = (h << 8) | h; uint32_t k = (l * (64 - w) + h * w + 32) >> 6; @@ -230,7 +229,7 @@ void bc7enc_compress_block_init() { uint32_t high = (h << 4) | h; - const int k = astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + const int k = astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); if (err < best.m_error) @@ -259,7 +258,7 @@ void bc7enc_compress_block_init() { uint32_t high = (h << 4) | h; - const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); if (err < best.m_error) @@ -288,7 +287,7 @@ void bc7enc_compress_block_init() { uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant; - const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); if (err < best.m_error) @@ -317,7 +316,7 @@ void bc7enc_compress_block_init() { uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; - const int k = astc_interpolate(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); + const int k = astc_interpolate_linear(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); if (err < best.m_error) @@ -346,7 +345,7 @@ void bc7enc_compress_block_init() { uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; - const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); if (err < best.m_error) @@ -375,7 +374,7 @@ void bc7enc_compress_block_init() { uint32_t high = g_astc_sorted_order_unquant[11][h].m_unquant; - const int k = astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + const int k = astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); if (err < best.m_error) @@ -650,7 +649,7 @@ static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_par uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; - p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); } p.m_c[3] = 255; @@ -689,7 +688,7 @@ static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compresso uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; - p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); } uint64_t total_err = 0; @@ -728,7 +727,7 @@ static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_p uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant; - p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); } p.m_c[3] = 255; @@ -768,7 +767,7 @@ static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_ uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant; - p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); } uint64_t total_err = 0; @@ -807,7 +806,7 @@ static uint64_t pack_astc_range11_5bit_to_one_color(const color_cell_compressor_ uint32_t low = g_astc_sorted_order_unquant[11][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[11][pResults->m_high_endpoint.m_c[i]].m_unquant; - p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); } uint64_t total_err = 0; @@ -863,7 +862,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 for (uint32_t i = 1; i < (N - 1); i++) { for (uint32_t j = 0; j < nc; j++) - weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i])); + weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate_linear(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i])); } } else @@ -1300,7 +1299,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++) for (uint32_t c = 0; c < 4; c++) - colors[i].m_c[c] = (uint8_t)astc_interpolate(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); + colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); uint64_t total_err = 0; for (uint32_t p = 0; p < pParams->m_num_pixels; p++) @@ -1815,10 +1814,10 @@ uint64_t color_cell_compression_est_astc( weightedColors[num_weights - 1] = highColor; for (uint32_t i = 1; i < (num_weights - 1); i++) { - weightedColors[i].m_c[0] = (uint8_t)astc_interpolate(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]); - weightedColors[i].m_c[1] = (uint8_t)astc_interpolate(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]); - weightedColors[i].m_c[2] = (uint8_t)astc_interpolate(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]); - weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255; + weightedColors[i].m_c[0] = (uint8_t)astc_interpolate_linear(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]); + weightedColors[i].m_c[1] = (uint8_t)astc_interpolate_linear(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]); + weightedColors[i].m_c[2] = (uint8_t)astc_interpolate_linear(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]); + weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate_linear(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255; } // Compute dots and thresholds diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.h b/thirdparty/basis_universal/encoder/basisu_bc7enc.h index 23469912e2..8d8b7888ca 100644 --- a/thirdparty/basis_universal/encoder/basisu_bc7enc.h +++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.h @@ -12,6 +12,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#pragma once #include "basisu_enc.h" #include "../transcoder/basisu_transcoder_uastc.h" diff --git a/thirdparty/basis_universal/encoder/basisu_comp.cpp b/thirdparty/basis_universal/encoder/basisu_comp.cpp index dc4ae11539..10f96cec4a 100644 --- a/thirdparty/basis_universal/encoder/basisu_comp.cpp +++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp @@ -467,7 +467,10 @@ namespace basisu return false; } - printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); + if (m_params.m_status_output) + { + printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); + } // Optionally load another image and put a grayscale version of it into the alpha channel. if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) @@ -1427,7 +1430,10 @@ namespace basisu return false; } - printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); + if (m_params.m_status_output) + { + printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); + } } size_t comp_size = 0; diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp index f02fb62c11..daaf65badc 100644 --- a/thirdparty/basis_universal/encoder/basisu_enc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp @@ -195,7 +195,7 @@ namespace basisu { QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks)); } -#elif defined(__APPLE__) +#elif defined(__APPLE__) || defined(__OpenBSD__) #include <sys/time.h> inline void query_counter(timer_ticks* pTicks) { @@ -1779,8 +1779,6 @@ namespace basisu return nullptr; } - const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel; - const uint8_t *pSrc = pBuf + sizeof(tga_header); uint32_t bytes_remaining = buf_size - sizeof(tga_header); diff --git a/thirdparty/basis_universal/encoder/basisu_enc.h b/thirdparty/basis_universal/encoder/basisu_enc.h index 05c95cbc3b..0ce011452d 100644 --- a/thirdparty/basis_universal/encoder/basisu_enc.h +++ b/thirdparty/basis_universal/encoder/basisu_enc.h @@ -1634,6 +1634,14 @@ namespace basisu if ((!l_weight) || (!r_weight)) { + l_children.resize(0); + new_l_child.set(0.0f); + l_ttsum = 0.0f; + l_weight = 0; + r_children.resize(0); + new_r_child.set(0.0f); + r_ttsum = 0.0f; + r_weight = 0; TrainingVectorType firstVec; for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) { @@ -1660,7 +1668,7 @@ namespace basisu } } - if (!l_weight) + if ((!l_weight) || (!r_weight)) return false; } diff --git a/thirdparty/basis_universal/encoder/basisu_resampler.cpp b/thirdparty/basis_universal/encoder/basisu_resampler.cpp index e193ce83ff..f4cedf0031 100644 --- a/thirdparty/basis_universal/encoder/basisu_resampler.cpp +++ b/thirdparty/basis_universal/encoder/basisu_resampler.cpp @@ -15,14 +15,6 @@ #include "basisu_resampler.h" #include "basisu_resampler_filters.h" -#ifndef max -#define max(a, b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef min -#define min(a, b) (((a) < (b)) ? (a) : (b)) -#endif - #define RESAMPLER_DEBUG 0 namespace basisu diff --git a/thirdparty/basis_universal/encoder/cppspmd_sse.h b/thirdparty/basis_universal/encoder/cppspmd_sse.h index b39cb82a5f..9a97eeb695 100644 --- a/thirdparty/basis_universal/encoder/cppspmd_sse.h +++ b/thirdparty/basis_universal/encoder/cppspmd_sse.h @@ -1327,33 +1327,15 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) { __m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask)); - -//#if CPPSPMD_SSE2 -#if 1 - // See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026 - __m128 shuf = _mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(2, 3, 0, 1)); - __m128 sums = _mm_add_ps(k3210, shuf); - shuf = _mm_movehl_ps(shuf, sums); - sums = _mm_add_ss(sums, shuf); - return _mm_cvtss_f32(sums); -#else - // This is pretty slow. - __m128 a = _mm_hadd_ps(k3210, k3210); - __m128 b = _mm_hadd_ps(a, a); - return extractf_ps_x(b); -#endif + __m128 temp = _mm_add_ps(_mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); + return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(temp, temp), temp)); } - + CPPSPMD_FORCE_INLINE int reduce_add(vint v) { __m128i k3210 = blendv_mask_epi32(_mm_setzero_si128(), v.m_value, m_exec.m_mask); - - // See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026 - __m128i shuf = _mm_shuffle_epi32(k3210, _MM_SHUFFLE(2, 3, 0, 1)); - __m128i sums = _mm_add_epi32(k3210, shuf); - shuf = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(shuf), _mm_castsi128_ps(sums))); - sums = _mm_add_epi32(sums, shuf); - return extract_x(sums); + __m128i temp = _mm_add_epi32(_mm_shuffle_epi32(k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); + return extract_x(_mm_add_epi32(_mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(temp), _mm_castsi128_ps(temp))), temp)); } #include "cppspmd_math_declares.h" @@ -1686,6 +1668,12 @@ CPPSPMD_FORCE_INLINE vint uniform_shift_right_epi16(const vint& a, const vint& b CPPSPMD_FORCE_INLINE vint undefined_vint() { return vint{ _mm_undefined_si128() }; } CPPSPMD_FORCE_INLINE vfloat undefined_vfloat() { return vfloat{ _mm_undefined_ps() }; } +CPPSPMD_FORCE_INLINE vint vint_lane_set(int v0, int v1, int v2, int v3) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set(float v0, float v1, float v2, float v3) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; } + +CPPSPMD_FORCE_INLINE vint vint_lane_set_r(int v3, int v2, int v1, int v0) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set_r(float v3, float v2, float v1, float v0) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; } + // control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int32's in each 128-bit lane. #define VINT_LANE_SHUFFLE_EPI32(a, control) vint(_mm_shuffle_epi32((a).m_value, control)) diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp index 29eb3c0d55..0b3733385d 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp @@ -10778,8 +10778,6 @@ namespace basist return false; } - const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; - if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); @@ -17336,7 +17334,6 @@ namespace basist bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) { - const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; const uint64_t comp_size = m_levels[level_index].m_byte_length; const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length; @@ -17361,6 +17358,7 @@ namespace basist if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { #if BASISD_SUPPORT_KTX2_ZSTD + const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size); if (ZSTD_isError(actualUncompSize)) { diff --git a/thirdparty/certs/ca-certificates.crt b/thirdparty/certs/ca-certificates.crt index 7d5ea3bef1..7f89e81d01 100644 --- a/thirdparty/certs/ca-certificates.crt +++ b/thirdparty/certs/ca-certificates.crt @@ -1,7 +1,7 @@ ## ## Bundle of CA Root Certificates ## -## Certificate data from Mozilla as of: Mon Jul 5 21:36:52 2021 GMT +## Certificate data from Mozilla as of: Mon Nov 1 15:39:58 2021 GMT ## ## This is a bundle of X.509 certificates of public Certificate Authorities ## (CA). These were automatically extracted from Mozilla's root certificates @@ -14,7 +14,7 @@ ## Just configure this file as the SSLCACertificateFile. ## ## Conversion done with mk-ca-bundle.pl version 1.28. -## SHA256: c8f6733d1ff4e6a4769c182971a1234f95ae079247a9c439a13423fe8ba5c24f +## SHA256: bb36818a81feaa4cca61101e6d6276cd09e972efcb08112dfed846918ca41d7f ## @@ -381,26 +381,6 @@ mNEVX58Svnw2Yzi9RKR/5CYrCsSXaQ3pjOLAEFe4yHYSkVXySGnYvCoCWw9E1CAx2/S6cCZdkGCe vEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep+OkuE6N36B9K -----END CERTIFICATE----- -DST Root CA X3 -============== ------BEGIN CERTIFICATE----- -MIIDSjCCAjKgAwIBAgIQRK+wgNajJ7qJMDmGLvhAazANBgkqhkiG9w0BAQUFADA/MSQwIgYDVQQK -ExtEaWdpdGFsIFNpZ25hdHVyZSBUcnVzdCBDby4xFzAVBgNVBAMTDkRTVCBSb290IENBIFgzMB4X -DTAwMDkzMDIxMTIxOVoXDTIxMDkzMDE0MDExNVowPzEkMCIGA1UEChMbRGlnaXRhbCBTaWduYXR1 -cmUgVHJ1c3QgQ28uMRcwFQYDVQQDEw5EU1QgUm9vdCBDQSBYMzCCASIwDQYJKoZIhvcNAQEBBQAD -ggEPADCCAQoCggEBAN+v6ZdQCINXtMxiZfaQguzH0yxrMMpb7NnDfcdAwRgUi+DoM3ZJKuM/IUmT -rE4Orz5Iy2Xu/NMhD2XSKtkyj4zl93ewEnu1lcCJo6m67XMuegwGMoOifooUMM0RoOEqOLl5CjH9 -UL2AZd+3UWODyOKIYepLYYHsUmu5ouJLGiifSKOeDNoJjj4XLh7dIN9bxiqKqy69cK3FCxolkHRy -xXtqqzTWMIn/5WgTe1QLyNau7Fqckh49ZLOMxt+/yUFw7BZy1SbsOFU5Q9D8/RhcQPGX69Wam40d -utolucbY38EVAjqr2m7xPi71XAicPNaDaeQQmxkqtilX4+U9m5/wAl0CAwEAAaNCMEAwDwYDVR0T -AQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFMSnsaR7LHH62+FLkHX/xBVghYkQ -MA0GCSqGSIb3DQEBBQUAA4IBAQCjGiybFwBcqR7uKGY3Or+Dxz9LwwmglSBd49lZRNI+DT69ikug -dB/OEIKcdBodfpga3csTS7MgROSR6cz8faXbauX+5v3gTt23ADq1cEmv8uXrAvHRAosZy5Q6XkjE -GB5YGV8eAlrwDPGxrancWYaLbumR9YbK+rlmM6pZW87ipxZzR8srzJmwN0jP41ZL9c8PDHIyh8bw -RLtTcm1D9SZImlJnt1ir/md2cXjbDaJWFBM5JDGFoqgCWjBH4d1QB7wCCZAA62RjYJsWvIjJEubS -fZGL+T0yjWW06XyxV3bqxbYoOb8VZRzI9neWagqNdwvYkQsEjgfbKbYK7p2CNTUQ ------END CERTIFICATE----- - SwissSign Gold CA - G2 ====================== -----BEGIN CERTIFICATE----- @@ -2713,7 +2693,8 @@ CBeQyh+KTOgNG3qxrdWBCUfvO6wIBHxcmbHtRwfSAjEAnbpV/KlK6O3t5nYBQnvI+GDZjVGLVTv7 jHvrZQnD+JbNR6iC8hZVdyR+EhCVBCyj -----END CERTIFICATE----- -# emSign Root CA - C1 +emSign Root CA - C1 +=================== -----BEGIN CERTIFICATE----- MIIDczCCAlugAwIBAgILAK7PALrEzzL4Q7IwDQYJKoZIhvcNAQELBQAwVjELMAkGA1UEBhMCVVMx EzARBgNVBAsTCmVtU2lnbiBQS0kxFDASBgNVBAoTC2VNdWRocmEgSW5jMRwwGgYDVQQDExNlbVNp @@ -2733,7 +2714,8 @@ wC68AivTxEDkigcxHpvOJpkT+xHqmiIMERnHXhuBUDDIlhJu58tBf5E7oke3VIAb3ADMmpDqw8NQ BmIMMMAVSKeoWXzhriKi4gp6D/piq1JM4fHfyr6DDUI= -----END CERTIFICATE----- -# emSign ECC Root CA - C3 +emSign ECC Root CA - C3 +======================= -----BEGIN CERTIFICATE----- MIICKzCCAbGgAwIBAgIKe3G2gla4EnycqDAKBggqhkjOPQQDAzBaMQswCQYDVQQGEwJVUzETMBEG A1UECxMKZW1TaWduIFBLSTEUMBIGA1UEChMLZU11ZGhyYSBJbmMxIDAeBgNVBAMTF2VtU2lnbiBF @@ -2747,7 +2729,8 @@ MGUCMQC02C8Cif22TGK6Q04ThHK1rt0c3ta13FaPWEBaLd4gTCKDypOofu4SQMfWh0/434UCMBwU ZOR8loMRnLDRWmFLpg9J0wD8ofzkpf9/rdcw0Md3f76BB1UwUCAU9Vc4CqgxUQ== -----END CERTIFICATE----- -# Hongkong Post Root CA 3 +Hongkong Post Root CA 3 +======================= -----BEGIN CERTIFICATE----- MIIFzzCCA7egAwIBAgIUCBZfikyl7ADJk0DfxMauI7gcWqQwDQYJKoZIhvcNAQELBQAwbzELMAkG A1UEBhMCSEsxEjAQBgNVBAgTCUhvbmcgS29uZzESMBAGA1UEBxMJSG9uZyBLb25nMRYwFAYDVQQK @@ -2778,7 +2761,8 @@ hcErulWuBurQB7Lcq9CClnXO0lD+mefPL5/ndtFhKvshuzHQqp9HpLIiyhY6UFfEW0NnxWViA0kB dBb9HxEGmpv0 -----END CERTIFICATE----- -# Entrust Root Certification Authority - G4 +Entrust Root Certification Authority - G4 +========================================= -----BEGIN CERTIFICATE----- MIIGSzCCBDOgAwIBAgIRANm1Q3+vqTkPAAAAAFVlrVgwDQYJKoZIhvcNAQELBQAwgb4xCzAJBgNV BAYTAlVTMRYwFAYDVQQKEw1FbnRydXN0LCBJbmMuMSgwJgYDVQQLEx9TZWUgd3d3LmVudHJ1c3Qu @@ -2811,7 +2795,8 @@ JOgc47OlIQ6SwJAfzyBfyjs4x7dtOvPmRLgOMWuIjnDrnBdSqEGULoe256YSxXXfW8AKbnuk5F6G kcpG2om3PVODLAgfi49T3f+sHw== -----END CERTIFICATE----- -# Microsoft ECC Root Certificate Authority 2017 +Microsoft ECC Root Certificate Authority 2017 +============================================= -----BEGIN CERTIFICATE----- MIICWTCCAd+gAwIBAgIQZvI9r4fei7FK6gxXMQHC7DAKBggqhkjOPQQDAzBlMQswCQYDVQQGEwJV UzEeMBwGA1UEChMVTWljcm9zb2Z0IENvcnBvcmF0aW9uMTYwNAYDVQQDEy1NaWNyb3NvZnQgRUND @@ -2826,7 +2811,8 @@ Xu5gKcs68tvWMoQZP3zVL8KxzJOuULsJMsbG7X7JNpQS5GiFBqIb0C8CMQCZ6Ra0DvpWSNSkMBaR eNtUjGUBiudQZsIxtzm6uBoiB078a1QWIP8rtedMDE2mT3M= -----END CERTIFICATE----- -# Microsoft RSA Root Certificate Authority 2017 +Microsoft RSA Root Certificate Authority 2017 +============================================= -----BEGIN CERTIFICATE----- MIIFqDCCA5CgAwIBAgIQHtOXCV/YtLNHcB6qvn9FszANBgkqhkiG9w0BAQwFADBlMQswCQYDVQQG EwJVUzEeMBwGA1UEChMVTWljcm9zb2Z0IENvcnBvcmF0aW9uMTYwNAYDVQQDEy1NaWNyb3NvZnQg @@ -2856,7 +2842,8 @@ c0QWbej09+CVgI+WXTik9KveCjCHk9hNAHFiRSdLOkKEW39lt2c0Ui2cFmuqqNh7o0JMcccMyj6D 5KbvtwEwXlGjefVwaaZBRA+GsCyRxj3qrg+E -----END CERTIFICATE----- -# e-Szigno Root CA 2017 +e-Szigno Root CA 2017 +===================== -----BEGIN CERTIFICATE----- MIICQDCCAeWgAwIBAgIMAVRI7yH9l1kN9QQKMAoGCCqGSM49BAMCMHExCzAJBgNVBAYTAkhVMREw DwYDVQQHDAhCdWRhcGVzdDEWMBQGA1UECgwNTWljcm9zZWMgTHRkLjEXMBUGA1UEYQwOVkFUSFUt @@ -2871,7 +2858,8 @@ tVfd14pVCzbhhkT61NlojbjcI4qKDdQvfepz7L9NbKgCIQDLpbQS+ue16M9+k/zzNY9vTlp8tLxO svxyqltZ+efcMQ== -----END CERTIFICATE----- -# certSIGN Root CA G2 +certSIGN Root CA G2 +=================== -----BEGIN CERTIFICATE----- MIIFRzCCAy+gAwIBAgIJEQA0tk7GNi02MA0GCSqGSIb3DQEBCwUAMEExCzAJBgNVBAYTAlJPMRQw EgYDVQQKEwtDRVJUU0lHTiBTQTEcMBoGA1UECxMTY2VydFNJR04gUk9PVCBDQSBHMjAeFw0xNzAy @@ -2899,7 +2887,8 @@ NMn5X7azKFGnpyuqSfqNZSlO42sTp5SjLVFteAxEy9/eCG/Oo2Sr05WE1LlSVHJ7liXMvGnjSG4N 0MedJ5qq+BOS3R7fY581qRY27Iy4g/Q9iY/NtBde17MXQRBdJ3NghVdJIgc= -----END CERTIFICATE----- -# Trustwave Global Certification Authority +Trustwave Global Certification Authority +======================================== -----BEGIN CERTIFICATE----- MIIF2jCCA8KgAwIBAgIMBfcOhtpJ80Y1LrqyMA0GCSqGSIb3DQEBCwUAMIGIMQswCQYDVQQGEwJV UzERMA8GA1UECAwISWxsaW5vaXMxEDAOBgNVBAcMB0NoaWNhZ28xITAfBgNVBAoMGFRydXN0d2F2 @@ -2930,7 +2919,8 @@ Yj6RS8fZMXZC+fc8Y+wmjHMMfRod6qh8h6jCJ3zhM0EPz8/8AKAigJ5Kp28AsEFFtyLKaEjFQqKu 29FpHOTKyeC2nOnOcXHebD8WpHk= -----END CERTIFICATE----- -# Trustwave Global ECC P256 Certification Authority +Trustwave Global ECC P256 Certification Authority +================================================= -----BEGIN CERTIFICATE----- MIICYDCCAgegAwIBAgIMDWpfCD8oXD5Rld9dMAoGCCqGSM49BAMCMIGRMQswCQYDVQQGEwJVUzER MA8GA1UECBMISWxsaW5vaXMxEDAOBgNVBAcTB0NoaWNhZ28xITAfBgNVBAoTGFRydXN0d2F2ZSBI @@ -2945,7 +2935,8 @@ P62jQzBBMA8GA1UdEwEB/wQFMAMBAf8wDwYDVR0PAQH/BAUDAwcGADAdBgNVHQ4EFgQUo0EGrJBt RM4q3wghDDcCIC0mA6AFvWvR9lz4ZcyGbbOcNEhjhAnFjXca4syc4XR7 -----END CERTIFICATE----- -# Trustwave Global ECC P384 Certification Authority +Trustwave Global ECC P384 Certification Authority +================================================= -----BEGIN CERTIFICATE----- MIICnTCCAiSgAwIBAgIMCL2Fl2yZJ6SAaEc7MAoGCCqGSM49BAMDMIGRMQswCQYDVQQGEwJVUzER MA8GA1UECBMISWxsaW5vaXMxEDAOBgNVBAcTB0NoaWNhZ28xITAfBgNVBAoTGFRydXN0d2F2ZSBI @@ -2961,7 +2952,8 @@ ADBkAjA3AZKXRRJ+oPM+rRk6ct30UJMDEr5E0k9BpIycnR+j9sKS50gU/k6bpZFXrsY3crsCMGcl CrEMXu6pY5Jv5ZAL/mYiykf9ijH3g/56vxC+GCsej/YpHpRZ744hN8tRmKVuSw== -----END CERTIFICATE----- -# NAVER Global Root Certification Authority +NAVER Global Root Certification Authority +========================================= -----BEGIN CERTIFICATE----- MIIFojCCA4qgAwIBAgIUAZQwHqIL3fXFMyqxQ0Rx+NZQTQ0wDQYJKoZIhvcNAQEMBQAwaTELMAkG A1UEBhMCS1IxJjAkBgNVBAoMHU5BVkVSIEJVU0lORVNTIFBMQVRGT1JNIENvcnAuMTIwMAYDVQQD @@ -2991,7 +2983,8 @@ I/hGoiLtk/bdmuYqh7GYVPEi92tF4+KOdh2ajcQGjTa3FPOdVGm3jjzVpG2Tgbet9r1ke8LJaDmg kpzNNIaRkPpkUZ3+/uul9XXeifdy -----END CERTIFICATE----- -# AC RAIZ FNMT-RCM SERVIDORES SEGUROS +AC RAIZ FNMT-RCM SERVIDORES SEGUROS +=================================== -----BEGIN CERTIFICATE----- MIICbjCCAfOgAwIBAgIQYvYybOXE42hcG2LdnC6dlTAKBggqhkjOPQQDAzB4MQswCQYDVQQGEwJF UzERMA8GA1UECgwIRk5NVC1SQ00xDjAMBgNVBAsMBUNlcmVzMRgwFgYDVQRhDA9WQVRFUy1RMjgy @@ -3006,7 +2999,8 @@ SM49BAMDA2kAMGYCMQCuSuMrQMN0EfKVrRYj3k4MGuZdpSRea0R7/DjiT8ucRRcRTBQnJlU5dUoD zBOQn5ICMQD6SmxgiHPz7riYYqnOK8LZiqZwMR2vsJRM60/G49HzYqc8/5MuB1xJAWdpEgJyv+c= -----END CERTIFICATE----- -# GlobalSign Root R46 +GlobalSign Root R46 +=================== -----BEGIN CERTIFICATE----- MIIFWjCCA0KgAwIBAgISEdK7udcjGJ5AXwqdLdDfJWfRMA0GCSqGSIb3DQEBDAUAMEYxCzAJBgNV BAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRwwGgYDVQQDExNHbG9iYWxTaWduIFJv @@ -3035,7 +3029,8 @@ DEJ4Y9HiD2971KE9dJeFt0g5QdYg/NA6s/rob8SKunE3vouXsXgxT7PntgMTzlSdriVZzH81Xwj3 QEUxeCp6 -----END CERTIFICATE----- -# GlobalSign Root E46 +GlobalSign Root E46 +=================== -----BEGIN CERTIFICATE----- MIICCzCCAZGgAwIBAgISEdK7ujNu1LzmJGjFDYQdmOhDMAoGCCqGSM49BAMDMEYxCzAJBgNVBAYT AkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRwwGgYDVQQDExNHbG9iYWxTaWduIFJvb3Qg @@ -3049,7 +3044,8 @@ vLtoURMMA/cVi4RguYv/Uo7njLwcAjA8+RHUjE7AwWHCFUyqqx0LMV87HOIAl0Qx5v5zli/altP+ CAezNIm8BZ/3Hobui3A= -----END CERTIFICATE----- -# GLOBALTRUST 2020 +GLOBALTRUST 2020 +================ -----BEGIN CERTIFICATE----- MIIFgjCCA2qgAwIBAgILWku9WvtPilv6ZeUwDQYJKoZIhvcNAQELBQAwTTELMAkGA1UEBhMCQVQx IzAhBgNVBAoTGmUtY29tbWVyY2UgbW9uaXRvcmluZyBHbWJIMRkwFwYDVQQDExBHTE9CQUxUUlVT @@ -3078,7 +3074,8 @@ YqYK6miyeUcGbvJXqBUzxvd4Sj1Ce2t+/vdG6tHrju+IaFvowdlxfv1k7/9nR4hYJS8+hge9+6jl gqispdNpQ80xiEmEU5LAsTkbOYMBMMTyqfrQA71yN2BWHzZ8vTmR9W0Nv3vXkg== -----END CERTIFICATE----- -# ANF Secure Server Root CA +ANF Secure Server Root CA +========================= -----BEGIN CERTIFICATE----- MIIF7zCCA9egAwIBAgIIDdPjvGz5a7EwDQYJKoZIhvcNAQELBQAwgYQxEjAQBgNVBAUTCUc2MzI4 NzUxMDELMAkGA1UEBhMCRVMxJzAlBgNVBAoTHkFORiBBdXRvcmlkYWQgZGUgQ2VydGlmaWNhY2lv @@ -3109,7 +3106,8 @@ g77FGr8H6lnco4g175x2MjxNBiLOFeXdntiP2t7SxDnlF4HPOEfrf4htWRvfn0IUrn7PqLBmZdo3 r5+qPeoott7VMVgWglvquxl1AnMaykgaIZOQCo6ThKd9OyMYkomgjaw= -----END CERTIFICATE----- -# Certum EC-384 CA +Certum EC-384 CA +================ -----BEGIN CERTIFICATE----- MIICZTCCAeugAwIBAgIQeI8nXIESUiClBNAt3bpz9DAKBggqhkjOPQQDAzB0MQswCQYDVQQGEwJQ TDEhMB8GA1UEChMYQXNzZWNvIERhdGEgU3lzdGVtcyBTLkEuMScwJQYDVQQLEx5DZXJ0dW0gQ2Vy @@ -3124,7 +3122,8 @@ ADBlAjADVS2m5hjEfO/JUG7BJw+ch69u1RsIGL2SKcHvlJF40jocVYli5RsJHrpka/F2tNQCMQC0 QoSZ/6vnnvuRlydd3LBbMHHOXjgaatkl5+r3YZJW+OraNsKHZZYuciUvf9/DE8k= -----END CERTIFICATE----- -# Certum Trusted Root CA +Certum Trusted Root CA +====================== -----BEGIN CERTIFICATE----- MIIFwDCCA6igAwIBAgIQHr9ZULjJgDdMBvfrVU+17TANBgkqhkiG9w0BAQ0FADB6MQswCQYDVQQG EwJQTDEhMB8GA1UEChMYQXNzZWNvIERhdGEgU3lzdGVtcyBTLkEuMScwJQYDVQQLEx5DZXJ0dW0g @@ -3153,3 +3152,81 @@ WWRrJ8/vJ8HjJLWG965+Mk2weWjROeiQWMODvA8s1pfrzgzhIMfatz7DP78v3DSk+yshzWePS/Tj OPQD8rv7gmsHINFSH5pkAnuYZttcTVoP0ISVoDwUQwbKytu4QTbaakRnh6+v40URFWkIsr4WOZck bxJF0WddCajJFdr60qZfE2Efv4WstK2tBZQIgx51F9NxO5NQI1mg7TyRVJ12AMXDuDjb -----END CERTIFICATE----- + +TunTrust Root CA +================ +-----BEGIN CERTIFICATE----- +MIIFszCCA5ugAwIBAgIUEwLV4kBMkkaGFmddtLu7sms+/BMwDQYJKoZIhvcNAQELBQAwYTELMAkG +A1UEBhMCVE4xNzA1BgNVBAoMLkFnZW5jZSBOYXRpb25hbGUgZGUgQ2VydGlmaWNhdGlvbiBFbGVj +dHJvbmlxdWUxGTAXBgNVBAMMEFR1blRydXN0IFJvb3QgQ0EwHhcNMTkwNDI2MDg1NzU2WhcNNDQw +NDI2MDg1NzU2WjBhMQswCQYDVQQGEwJUTjE3MDUGA1UECgwuQWdlbmNlIE5hdGlvbmFsZSBkZSBD +ZXJ0aWZpY2F0aW9uIEVsZWN0cm9uaXF1ZTEZMBcGA1UEAwwQVHVuVHJ1c3QgUm9vdCBDQTCCAiIw +DQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMPN0/y9BFPdDCA61YguBUtB9YOCfvdZn56eY+hz +2vYGqU8ftPkLHzmMmiDQfgbU7DTZhrx1W4eI8NLZ1KMKsmwb60ksPqxd2JQDoOw05TDENX37Jk0b +bjBU2PWARZw5rZzJJQRNmpA+TkBuimvNKWfGzC3gdOgFVwpIUPp6Q9p+7FuaDmJ2/uqdHYVy7BG7 +NegfJ7/Boce7SBbdVtfMTqDhuazb1YMZGoXRlJfXyqNlC/M4+QKu3fZnz8k/9YosRxqZbwUN/dAd +gjH8KcwAWJeRTIAAHDOFli/LQcKLEITDCSSJH7UP2dl3RxiSlGBcx5kDPP73lad9UKGAwqmDrViW +VSHbhlnUr8a83YFuB9tgYv7sEG7aaAH0gxupPqJbI9dkxt/con3YS7qC0lH4Zr8GRuR5KiY2eY8f +Tpkdso8MDhz/yV3A/ZAQprE38806JG60hZC/gLkMjNWb1sjxVj8agIl6qeIbMlEsPvLfe/ZdeikZ +juXIvTZxi11Mwh0/rViizz1wTaZQmCXcI/m4WEEIcb9PuISgjwBUFfyRbVinljvrS5YnzWuioYas +DXxU5mZMZl+QviGaAkYt5IPCgLnPSz7ofzwB7I9ezX/SKEIBlYrilz0QIX32nRzFNKHsLA4KUiwS +VXAkPcvCFDVDXSdOvsC9qnyW5/yeYa1E0wCXAgMBAAGjYzBhMB0GA1UdDgQWBBQGmpsfU33x9aTI +04Y+oXNZtPdEITAPBgNVHRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFAaamx9TffH1pMjThj6hc1m0 +90QhMA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAqgVutt0Vyb+zxiD2BkewhpMl +0425yAA/l/VSJ4hxyXT968pk21vvHl26v9Hr7lxpuhbI87mP0zYuQEkHDVneixCwSQXi/5E/S7fd +Ao74gShczNxtr18UnH1YeA32gAm56Q6XKRm4t+v4FstVEuTGfbvE7Pi1HE4+Z7/FXxttbUcoqgRY +YdZ2vyJ/0Adqp2RT8JeNnYA/u8EH22Wv5psymsNUk8QcCMNE+3tjEUPRahphanltkE8pjkcFwRJp +adbGNjHh/PqAulxPxOu3Mqz4dWEX1xAZufHSCe96Qp1bWgvUxpVOKs7/B9dPfhgGiPEZtdmYu65x +xBzndFlY7wyJz4sfdZMaBBSSSFCp61cpABbjNhzI+L/wM9VBD8TMPN3pM0MBkRArHtG5Xc0yGYuP +jCB31yLEQtyEFpslbei0VXF/sHyz03FJuc9SpAQ/3D2gu68zngowYI7bnV2UqL1g52KAdoGDDIzM +MEZJ4gzSqK/rYXHv5yJiqfdcZGyfFoxnNidF9Ql7v/YQCvGwjVRDjAS6oz/v4jXH+XTgbzRB0L9z +ZVcg+ZtnemZoJE6AZb0QmQZZ8mWvuMZHu/2QeItBcy6vVR/cO5JyboTT0GFMDcx2V+IthSIVNg3r +AZ3r2OvEhJn7wAzMMujjd9qDRIueVSjAi1jTkD5OGwDxFa2DK5o= +-----END CERTIFICATE----- + +HARICA TLS RSA Root CA 2021 +=========================== +-----BEGIN CERTIFICATE----- +MIIFpDCCA4ygAwIBAgIQOcqTHO9D88aOk8f0ZIk4fjANBgkqhkiG9w0BAQsFADBsMQswCQYDVQQG +EwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJlc2VhcmNoIEluc3RpdHV0aW9u +cyBDQTEkMCIGA1UEAwwbSEFSSUNBIFRMUyBSU0EgUm9vdCBDQSAyMDIxMB4XDTIxMDIxOTEwNTUz +OFoXDTQ1MDIxMzEwNTUzN1owbDELMAkGA1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRl +bWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgQ0ExJDAiBgNVBAMMG0hBUklDQSBUTFMgUlNB +IFJvb3QgQ0EgMjAyMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAIvC569lmwVnlskN +JLnQDmT8zuIkGCyEf3dRywQRNrhe7Wlxp57kJQmXZ8FHws+RFjZiPTgE4VGC/6zStGndLuwRo0Xu +a2s7TL+MjaQenRG56Tj5eg4MmOIjHdFOY9TnuEFE+2uva9of08WRiFukiZLRgeaMOVig1mlDqa2Y +Ulhu2wr7a89o+uOkXjpFc5gH6l8Cct4MpbOfrqkdtx2z/IpZ525yZa31MJQjB/OCFks1mJxTuy/K +5FrZx40d/JiZ+yykgmvwKh+OC19xXFyuQnspiYHLA6OZyoieC0AJQTPb5lh6/a6ZcMBaD9YThnEv +dmn8kN3bLW7R8pv1GmuebxWMevBLKKAiOIAkbDakO/IwkfN4E8/BPzWr8R0RI7VDIp4BkrcYAuUR +0YLbFQDMYTfBKnya4dC6s1BG7oKsnTH4+yPiAwBIcKMJJnkVU2DzOFytOOqBAGMUuTNe3QvboEUH +GjMJ+E20pwKmafTCWQWIZYVWrkvL4N48fS0ayOn7H6NhStYqE613TBoYm5EPWNgGVMWX+Ko/IIqm +haZ39qb8HOLubpQzKoNQhArlT4b4UEV4AIHrW2jjJo3Me1xR9BQsQL4aYB16cmEdH2MtiKrOokWQ +CPxrvrNQKlr9qEgYRtaQQJKQCoReaDH46+0N0x3GfZkYVVYnZS6NRcUk7M7jAgMBAAGjQjBAMA8G +A1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFApII6ZgpJIKM+qTW8VX6iVNvRLuMA4GA1UdDwEB/wQE +AwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAPpBIqm5iFSVmewzVjIuJndftTgfvnNAUX15QvWiWkKQU +EapobQk1OUAJ2vQJLDSle1mESSmXdMgHHkdt8s4cUCbjnj1AUz/3f5Z2EMVGpdAgS1D0NTsY9FVq +QRtHBmg8uwkIYtlfVUKqrFOFrJVWNlar5AWMxajaH6NpvVMPxP/cyuN+8kyIhkdGGvMA9YCRotxD +QpSbIPDRzbLrLFPCU3hKTwSUQZqPJzLB5UkZv/HywouoCjkxKLR9YjYsTewfM7Z+d21+UPCfDtcR +j88YxeMn/ibvBZ3PzzfF0HvaO7AWhAw6k9a+F9sPPg4ZeAnHqQJyIkv3N3a6dcSFA1pj1bF1BcK5 +vZStjBWZp5N99sXzqnTPBIWUmAD04vnKJGW/4GKvyMX6ssmeVkjaef2WdhW+o45WxLM0/L5H9MG0 +qPzVMIho7suuyWPEdr6sOBjhXlzPrjoiUevRi7PzKzMHVIf6tLITe7pTBGIBnfHAT+7hOtSLIBD6 +Alfm78ELt5BGnBkpjNxvoEppaZS3JGWg/6w/zgH7IS79aPib8qXPMThcFarmlwDB31qlpzmq6YR/ +PFGoOtmUW4y/Twhx5duoXNTSpv4Ao8YWxw/ogM4cKGR0GQjTQuPOAF1/sdwTsOEFy9EgqoZ0njnn +kf3/W9b3raYvAwtt41dU63ZTGI0RmLo= +-----END CERTIFICATE----- + +HARICA TLS ECC Root CA 2021 +=========================== +-----BEGIN CERTIFICATE----- +MIICVDCCAdugAwIBAgIQZ3SdjXfYO2rbIvT/WeK/zjAKBggqhkjOPQQDAzBsMQswCQYDVQQGEwJH +UjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJlc2VhcmNoIEluc3RpdHV0aW9ucyBD +QTEkMCIGA1UEAwwbSEFSSUNBIFRMUyBFQ0MgUm9vdCBDQSAyMDIxMB4XDTIxMDIxOTExMDExMFoX +DTQ1MDIxMzExMDEwOVowbDELMAkGA1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWlj +IGFuZCBSZXNlYXJjaCBJbnN0aXR1dGlvbnMgQ0ExJDAiBgNVBAMMG0hBUklDQSBUTFMgRUNDIFJv +b3QgQ0EgMjAyMTB2MBAGByqGSM49AgEGBSuBBAAiA2IABDgI/rGgltJ6rK9JOtDA4MM7KKrxcm1l +AEeIhPyaJmuqS7psBAqIXhfyVYf8MLA04jRYVxqEU+kw2anylnTDUR9YSTHMmE5gEYd103KUkE+b +ECUqqHgtvpBBWJAVcqeht6NCMEAwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUyRtTgRL+BNUW +0aq8mm+3oJUZbsowDgYDVR0PAQH/BAQDAgGGMAoGCCqGSM49BAMDA2cAMGQCMBHervjcToiwqfAi +rcJRQO9gcS3ujwLEXQNwSaSS6sUUiHCm0w2wqsosQJz76YJumgIwK0eaB8bRwoF8yguWGEEbo/Qw +CZ61IygNnxS2PFOiTAZpffpskcYqSUXm7LcT4Tps +-----END CERTIFICATE----- diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h index 697e07bb86..3e386c4944 100644 --- a/thirdparty/embree/common/sys/platform.h +++ b/thirdparty/embree/common/sys/platform.h @@ -183,7 +183,7 @@ // #define THROW_RUNTIME_ERROR(str) // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define THROW_RUNTIME_ERROR(str) \ - printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort(); + printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); // -- GODOT end -- #else // -- GODOT start -- diff --git a/thirdparty/embree/kernels/common/rtcore.h b/thirdparty/embree/kernels/common/rtcore.h index 373e49a689..f8aad7c7cb 100644 --- a/thirdparty/embree/kernels/common/rtcore.h +++ b/thirdparty/embree/kernels/common/rtcore.h @@ -126,7 +126,7 @@ namespace embree // #define throw_RTCError(error,str) \ // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define throw_RTCError(error,str) \ - printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort(); + printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); // -- GODOT end -- #else // -- GODOT begin -- diff --git a/thirdparty/embree/patches/godot-changes-noexcept.patch b/thirdparty/embree/patches/godot-changes-noexcept.patch index c587a0e2be..598a7f2ddc 100644 --- a/thirdparty/embree/patches/godot-changes-noexcept.patch +++ b/thirdparty/embree/patches/godot-changes-noexcept.patch @@ -259,7 +259,7 @@ index 8a6d9fa0a9..697e07bb86 100644 + // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define THROW_RUNTIME_ERROR(str) \ - throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); -+ printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort(); ++ printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); + // -- GODOT end -- #else + // -- GODOT start -- @@ -583,7 +583,7 @@ index 4e4b24e9c2..373e49a689 100644 + // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define throw_RTCError(error,str) \ - throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); -+ printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort(); ++ printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); + // -- GODOT end -- #else + // -- GODOT begin -- diff --git a/thirdparty/graphite/ChangeLog b/thirdparty/graphite/ChangeLog deleted file mode 100644 index e36110e1c1..0000000000 --- a/thirdparty/graphite/ChangeLog +++ /dev/null @@ -1,238 +0,0 @@ -1.3.14 - . Bug fixes - . Allow features to be hidden (for aliases) - . Move to python3 - . Rename doc files from .txt to .asc - -1.3.13 - . Resolve minor spacing issue in rtl non-overlap kerning - . python3 for graphite.py - . Better fuzzing - . Better building on windows - -1.3.12 - . Graphite no longer does dumb rendering for fonts with no smarts - . Segment caching code removed. Anything attempting to use the segment cache gets given a regular face instead - . Add libfuzzer support - . Builds now require C++11 - . Improvements to Windows 64 bit builds - . Support different versions of python including 32 bit and python 3 - . Various minor bug fixes - -1.3.11 - . Fixes due to security review - . Minor collision avoidance fixes - . Fix LZ4 decompressor against high compression - -1.3.10 - . Address floating point build parameters to give consistent positioning results across platforms - . Various bug fixes - -1.3.9 - . Add Collision COLL_ISSPACE to allow for visible spaces in collision avoidance - . Add segment and pass direction information to tracing output - . Bug fix rule length testing in 32-bit - . Increase slanted margin distances for collision avoidance - . Change kerning algorithm to simple outline expansion. Seems to make no visible difference. - . Add trace2svg to test tools - -1.3.8 - . Various bug fixes arising from fuzzing - . Fix regression that stopped piglatin from working - . Make collision avoidance kerning give more regular results - . Minor modification to clustering algorithm to handle variable width chars - -1.3.7 - . Bug fixes - . Start to deprecate SegCache. This will be going away in a later release. - -1.3.6 - . Bug fixes - -1.3.5 - . Bug fixes - . Security bug fix - . Fix ARM misalignment problem - . Track latest cmake - -1.3.4 - . Transition from Mercurial to Git - . Bug fixes - . Fix Collision Kerning ignoring some diacritics - . Handle pass bits 16-31 to speed up fonts with > 16 passes - . Various minor fuzz bug fixes - . Make Coverity happy - . Add GR_FALLTHROUGH macro for clang c++11 - -1.3.3 - . Slight speed up in Collision Avoidance - . Remove dead bidi code - . Bug fixes - . Between pass bidi reorderings and at the end - . Decompressor fuzz bugs - . Other fuzz bugs - -1.3.2 - . Remove full bidi. All segments are assumed to be single directioned. - . Bug fixes: - . Decompressor corner cases - . Various fuzz bugs - -1.3.1 - . Deprecation warning: Full bidi support is about to be deprecated. Make contact - if this impacts you. - . Change compression block format slightly to conform to LZ4 - . Bug fixes: - . Handle mono direction text with diacritics consistently. Fonts - now see the direction they expect consistently and bidi now - gives expected results. - . Fixed lots of fuzz bugs - . Coverity cleanups - . Build now works for clang and/or asan and/or afl etc. - -1.3.0 - . Add collision avoidance - . Shift Collider - . Kern Collider - . Octabox outlines and subboxes - . Add compressed Silf and Glat table support - . Bug fixes: - . Stop loops forming in the child, sibling tree - . Handle bidi mirroring correctly if no bidi occurring - -1.2.4 - . Face failure now has error code reporting via debug logging - . can now call gr_start_logging(NULL, fname) - . gr2fonttest --alltrace added - . Format 14 table support - . Not done. To be handled entirely in the compiler - . Bidi support for Unicode 6.3 Isolating direction controls - . Fonts no longer require a glyf/loca table. In such cases the bounding box is always 0. - . Clang ASAN build support added for testing. - . Handle out of memory sanely. - . Documentation improvements - . Bug fixes: - . Enforce fonts having to store glyph attributes by monotonically increasing attribute number - . zeropadding was not getting called on feature tags - . automatic associations for unassociated characters - . use direct engine on Mac - . various extreme case reading 1 past the end errors fixed - . remove tabs from sources so that it becomes readable again - -1.2.3 - . Bug fixes only: - . fix byte swapping when testing cmap subtable lengths - . work around armel compilation problems with conditional operators - . fix pseudoglyph support for advance and bbox - -1.2.2 - . Add support for passKeySlot (makes Charis 2x faster) up to 32 passes - . Add telemetry output to json if enabled in build GRAPHITE2_TELEMETRY - . Shrink font memory footprint particularly in the fsm - . Add -S to comparerenderer - . Bug fixes: - . Fix shift.x being reversed for rtl text - . Fix faulty fallback justification - . Fix bad cmap handling - . Support compiling on old Solaris where bidi attributes clash with register names - . Follow the crowd in using Windows.h - -1.2.1 - . Bug fixes: - . Allow glyph reattachment - . Allow signed glyph attributes - . Various build problems with MacOS, old gcc versions, etc. - . Various overrun read errors fixed - -1.2.0 - . API Changes: - . Added Windows friendly gr_start_logging and gr_stop_logging, now per face - . Added gr_make_face_with_ops, gr_make_face_with_seg_cache_and_ops - . Added gr_make_font_with_ops - . Added gr_face_is_char_supported - . Added gr_face_info to give info to apps about face capabilities - . Deprecated gr_make_face, gr_make_face_with_seg_cache, gr_make_font_with_advance_fn - . Deprecated graphite_start_logging and graphite_stop_logging - . These functions are stubbed now and do nothing, but do compile and link. - . Bump API version to 3 - . Add C# wrapper to contrib - . Handle justification information in a font and do something useful with it - . Builds clang clean (has done for a while) - . Bug fixes - . Windows build and bug fixes - . Add extra information to json debug output - . Added windows build documentation - . Added freetype sample code and test - -1.1.3 - . Default build has GRAPHITE2_COMPARE_RENDERER to OFF to reduce dependencies - . Builds on Mac with clang - . Debug output improvements - . Tidy up perl wrappers - . Fuzz tester improvements - . Various bug fixes for bad font handling - -1.1.2 - . Support feature ids < 4 chars when space padded for inclusion in FF 14. - . More fuzztesting and removal of causes of valgrind bad reads and sigabrts - . Remove contrib/android into its own repo (http://hg.palaso.org/grandroid) - . Update comparerenderer to latest harfbuzzng api - -1.1.1 - . Missing Log.h included - . perl wrappers updated - -1.1.0 - . Refactored debug output to use json - . Renamed VM_MACHINE_TYPE to GRAPHITE2_VM_TYPE - . Renamed DISABLE_SEGCACHE to GRAPHITE2_NSEGCACE - . Renamed DISBALE_FILE_FACE to GRAPHITE2_NFILEFACE - . Renamed ENABLE_COMPARE_RENDERER to GRAPHTIE2_COMPARE_RENDERER - . Renamed DOXYGEN_CONFIG to GRAPHITE2_DOXYGEN_CONFIG - . Renamed GR2_CUSTOM_HEADER to GRAPHITE2_CUSTOM_HEADER - . Renamed GR2_EXPORTING to GRAPHITE2_EXPORTING - . Added GRAPHITE2_STATIC for static only builds - . Added GRAPHITE2_NTRACING to compile out tracing code - . Documented GRAPHITE2_{EXPORTING,STATIC,NTRACING} in hacking.txt - . Bump libtool version to 2.1.0 - . dumb font rendering works - . slot user attributes are now signed rather than unsigned - . add support for long class maps - . Rename perl library to avoid nameclash on Windows - . Various robustness fixes - . Moved internal .h files into src/inc - . Parallelise fuzztest - . General build improvements, particularly on Windows - -1.0.3 - . Fix UTF16 surrogate support - . script and lang tags may be space padded or null padded - . Remove need for WORDS_BIGENDIAN, do it all automatically - . Remove all #include <new>. Use CLASS_NEW_DELETE instead. - . Fix comparerenderer to work with current hbng - . Add valgrind to fuzztest to ensure good memory use at all times - . Fix new fuzztest exposed bugs. - . Fix bugs exposed by Mozilla security review - . Add continuous integration build on Windows support - -1.0.2 - . Fix Windows build - . Comparerenderer uses hbng enforcing ot rendering - . Add Bidi .hasChar support and refactor mirroring code - . Make cmake default Release rather than debug - . Don't compile in a boat load of TtfUtil that isn't used, saving 15% of binary - . Chase the FSF around its latest office moves - . WORDS_BIGENDIAN is set at the top so tests now pass on ppc, etc. - . More words in the manual - -1.0.1 - . Release is the default build in cmake now. - . Refactor cmake build to not rebuild things so much. - . Include a missing file - . Remove -nostdlibs, making gcc happy everywhere - . Update comparerenderer to latest hbng interface - . Add changelog - -1.0.0 - . First major release of perfect code! - diff --git a/thirdparty/graphite/src/Pass.cpp b/thirdparty/graphite/src/Pass.cpp index db31c22d46..47ae2064f7 100644 --- a/thirdparty/graphite/src/Pass.cpp +++ b/thirdparty/graphite/src/Pass.cpp @@ -1056,12 +1056,17 @@ float Pass::resolveKern(Segment *seg, Slot *slotFix, GR_MAYBE_UNUSED Slot *start ymin = min(by + bbb.bl.y, ymin); for (nbor = slotFix->next(); nbor; nbor = nbor->next()) { - if (nbor->isChildOf(base)) - continue; if (!gc.check(nbor->gid())) return 0.; const Rect &bb = seg->theGlyphBBoxTemporary(nbor->gid()); SlotCollision *cNbor = seg->collisionInfo(nbor); + const float nby = nbor->origin().y + cNbor->shift().y; + if (nbor->isChildOf(base)) + { + ymax = max(nby + bb.tr.y, ymax); + ymin = min(nby + bb.bl.y, ymin); + continue; + } if ((bb.bl.y == 0.f && bb.tr.y == 0.f) || (cNbor->flags() & SlotCollision::COLL_ISSPACE)) { if (m_kernColls == InWord) diff --git a/thirdparty/harfbuzz/src/hb-array.hh b/thirdparty/harfbuzz/src/hb-array.hh index dd61509b2e..0beffb078f 100644 --- a/thirdparty/harfbuzz/src/hb-array.hh +++ b/thirdparty/harfbuzz/src/hb-array.hh @@ -51,13 +51,19 @@ struct hb_array_t : hb_iter_with_fallback_t<hb_array_t<Type>, Type&> * Constructors. */ hb_array_t () = default; - hb_array_t (Type *array_, unsigned int length_) : arrayZ (array_), length (length_) {} + hb_array_t (const hb_array_t&) = default; + ~hb_array_t () = default; + hb_array_t& operator= (const hb_array_t&) = default; + hb_array_t& operator= (hb_array_t&&) = default; + + constexpr hb_array_t (std::nullptr_t) : hb_array_t () {} + constexpr hb_array_t (Type *array_, unsigned int length_) : arrayZ (array_), length (length_) {} template <unsigned int length_> - hb_array_t (Type (&array_)[length_]) : hb_array_t (array_, length_) {} + constexpr hb_array_t (Type (&array_)[length_]) : hb_array_t (array_, length_) {} template <typename U, hb_enable_if (hb_is_cr_convertible(U, Type))> - hb_array_t (const hb_array_t<U> &o) : + constexpr hb_array_t (const hb_array_t<U> &o) : hb_iter_with_fallback_t<hb_array_t, Type&> (), arrayZ (o.arrayZ), length (o.length), backwards_length (o.backwards_length) {} template <typename U, @@ -303,13 +309,19 @@ struct hb_sorted_array_t : static constexpr bool is_sorted_iterator = true; hb_sorted_array_t () = default; - hb_sorted_array_t (Type *array_, unsigned int length_) : hb_array_t<Type> (array_, length_) {} + hb_sorted_array_t (const hb_sorted_array_t&) = default; + ~hb_sorted_array_t () = default; + hb_sorted_array_t& operator= (const hb_sorted_array_t&) = default; + hb_sorted_array_t& operator= (hb_sorted_array_t&&) = default; + + constexpr hb_sorted_array_t (std::nullptr_t) : hb_sorted_array_t () {} + constexpr hb_sorted_array_t (Type *array_, unsigned int length_) : hb_array_t<Type> (array_, length_) {} template <unsigned int length_> - hb_sorted_array_t (Type (&array_)[length_]) : hb_array_t<Type> (array_) {} + constexpr hb_sorted_array_t (Type (&array_)[length_]) : hb_array_t<Type> (array_) {} template <typename U, hb_enable_if (hb_is_cr_convertible(U, Type))> - hb_sorted_array_t (const hb_array_t<U> &o) : + constexpr hb_sorted_array_t (const hb_array_t<U> &o) : hb_iter_t<hb_sorted_array_t, Type&> (), hb_array_t<Type> (o) {} template <typename U, diff --git a/thirdparty/harfbuzz/src/hb-map.hh b/thirdparty/harfbuzz/src/hb-map.hh index bb4a0eb5d1..793dcf22ca 100644 --- a/thirdparty/harfbuzz/src/hb-map.hh +++ b/thirdparty/harfbuzz/src/hb-map.hh @@ -35,8 +35,10 @@ */ template <typename K, typename V, - K kINVALID = hb_is_pointer (K) ? 0 : std::is_signed<K>::value ? hb_int_min (K) : (K) -1, - V vINVALID = hb_is_pointer (V) ? 0 : std::is_signed<V>::value ? hb_int_min (V) : (V) -1> + typename k_invalid_t = K, + typename v_invalid_t = V, + k_invalid_t kINVALID = hb_is_pointer (K) ? 0 : std::is_signed<K>::value ? hb_int_min (K) : (K) -1, + v_invalid_t vINVALID = hb_is_pointer (V) ? 0 : std::is_signed<V>::value ? hb_int_min (V) : (V) -1> struct hb_hashmap_t { static constexpr K INVALID_KEY = kINVALID; @@ -62,8 +64,10 @@ struct hb_hashmap_t hb_copy (o, *this); } - static_assert (std::is_integral<K>::value || hb_is_pointer (K), ""); - static_assert (std::is_integral<V>::value || hb_is_pointer (V), ""); + static_assert (std::is_trivially_copyable<K>::value, ""); + static_assert (std::is_trivially_copyable<V>::value, ""); + static_assert (std::is_trivially_destructible<K>::value, ""); + static_assert (std::is_trivially_destructible<V>::value, ""); struct item_t { @@ -348,19 +352,23 @@ struct hb_hashmap_t struct hb_map_t : hb_hashmap_t<hb_codepoint_t, hb_codepoint_t, + hb_codepoint_t, + hb_codepoint_t, HB_MAP_VALUE_INVALID, HB_MAP_VALUE_INVALID> { using hashmap = hb_hashmap_t<hb_codepoint_t, hb_codepoint_t, + hb_codepoint_t, + hb_codepoint_t, HB_MAP_VALUE_INVALID, HB_MAP_VALUE_INVALID>; hb_map_t () = default; ~hb_map_t () = default; - hb_map_t (hb_map_t& o) = default; - hb_map_t& operator= (const hb_map_t& other) = default; - hb_map_t& operator= (hb_map_t&& other) = default; + hb_map_t (hb_map_t&) = default; + hb_map_t& operator= (const hb_map_t&) = default; + hb_map_t& operator= (hb_map_t&&) = default; hb_map_t (std::initializer_list<hb_pair_t<hb_codepoint_t, hb_codepoint_t>> lst) : hashmap (lst) {} template <typename Iterable, hb_requires (hb_is_iterable (Iterable))> diff --git a/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh index 03476faba7..a3c55fa8f4 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh @@ -38,8 +38,8 @@ */ #define HB_OT_TAG_COLR HB_TAG('C','O','L','R') -#ifndef COLRV1_MAX_NESTING_LEVEL -#define COLRV1_MAX_NESTING_LEVEL 100 +#ifndef HB_COLRV1_MAX_NESTING_LEVEL +#define HB_COLRV1_MAX_NESTING_LEVEL 100 #endif #ifndef COLRV1_ENABLE_SUBSETTING @@ -102,7 +102,7 @@ struct hb_colrv1_closure_context_t : hb_set_t *glyphs_, hb_set_t *layer_indices_, hb_set_t *palette_indices_, - unsigned nesting_level_left_ = COLRV1_MAX_NESTING_LEVEL) : + unsigned nesting_level_left_ = HB_COLRV1_MAX_NESTING_LEVEL) : base (base_), glyphs (glyphs_), layer_indices (layer_indices_), @@ -985,7 +985,7 @@ struct ClipList for (const hb_codepoint_t _ : gids.iter ()) { if (_ == start_gid) continue; - + offset = gid_offset_map.get (_); if (_ == prev_gid + 1 && offset == prev_offset) { @@ -1027,7 +1027,7 @@ struct ClipList const hb_set_t& glyphset = *c->plan->_glyphset; const hb_map_t &glyph_map = *c->plan->glyph_map; - + hb_map_t new_gid_offset_map; hb_set_t new_gids; for (const ClipRecord& record : clips.iter ()) @@ -1062,6 +1062,18 @@ struct ClipList struct Paint { + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const + { + TRACE_SANITIZE (this); + + if (unlikely (!c->check_start_recursion (HB_COLRV1_MAX_NESTING_LEVEL))) + return_trace (c->no_dispatch_return_value ()); + + return_trace (c->end_recursion (this->dispatch (c, std::forward<Ts> (ds)...))); + } + template <typename context_t, typename ...Ts> typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const { diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh index 5d98278bed..882c3ae96f 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh @@ -98,7 +98,7 @@ static void ClassDef_remap_and_serialize (hb_serialize_context_t *c, struct hb_prune_langsys_context_t { hb_prune_langsys_context_t (const void *table_, - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *script_langsys_map_, + hb_hashmap_t<unsigned, hb_set_t *> *script_langsys_map_, const hb_map_t *duplicate_feature_map_, hb_set_t *new_collected_feature_indexes_) :table (table_), @@ -137,7 +137,7 @@ struct hb_prune_langsys_context_t public: const void *table; - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *script_langsys_map; + hb_hashmap_t<unsigned, hb_set_t *> *script_langsys_map; const hb_map_t *duplicate_feature_map; hb_set_t *new_feature_indexes; @@ -179,14 +179,14 @@ struct hb_subset_layout_context_t : hb_subset_context_t *subset_context; const hb_tag_t table_tag; const hb_map_t *lookup_index_map; - const hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *script_langsys_map; + const hb_hashmap_t<unsigned, hb_set_t *> *script_langsys_map; const hb_map_t *feature_index_map; unsigned cur_script_index; hb_subset_layout_context_t (hb_subset_context_t *c_, hb_tag_t tag_, hb_map_t *lookup_map_, - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *script_langsys_map_, + hb_hashmap_t<unsigned, hb_set_t *> *script_langsys_map_, hb_map_t *feature_index_map_) : subset_context (c_), table_tag (tag_), @@ -1357,7 +1357,7 @@ struct Lookup if (unlikely (!get_subtables<TSubTable> ().sanitize (c, this, get_type ()))) return_trace (false); - if (unlikely (get_type () == TSubTable::Extension && !c->get_edit_count ())) + if (unlikely (get_type () == TSubTable::Extension && subtables && !c->get_edit_count ())) { /* The spec says all subtables of an Extension lookup should * have the same type, which shall not be the Extension type diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh index c0ed2bcc03..6bc06b50ed 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh @@ -163,7 +163,7 @@ struct hb_closure_context_t : hb_set_t *glyphs_, hb_set_t *cur_intersected_glyphs_, hb_map_t *done_lookups_glyph_count_, - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *done_lookups_glyph_set_, + hb_hashmap_t<unsigned, hb_set_t *> *done_lookups_glyph_set_, unsigned int nesting_level_left_ = HB_MAX_NESTING_LEVEL) : face (face_), glyphs (glyphs_), @@ -192,7 +192,7 @@ struct hb_closure_context_t : private: hb_map_t *done_lookups_glyph_count; - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *done_lookups_glyph_set; + hb_hashmap_t<unsigned, hb_set_t *> *done_lookups_glyph_set; unsigned int lookup_count; }; @@ -1642,9 +1642,8 @@ struct Rule const hb_map_t *klass_map = nullptr) const { TRACE_SUBSET (this); - - const hb_array_t<const HBUINT16> input = inputZ.as_array ((inputCount ? inputCount - 1 : 0)); - if (!input.length) return_trace (false); + if (unlikely (!inputCount)) return_trace (false); + const hb_array_t<const HBUINT16> input = inputZ.as_array (inputCount - 1); const hb_map_t *mapping = klass_map == nullptr ? c->plan->glyph_map : klass_map; if (!hb_all (input, mapping)) return_trace (false); @@ -3631,7 +3630,7 @@ struct GSUBGPOS } void prune_langsys (const hb_map_t *duplicate_feature_map, - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *script_langsys_map, + hb_hashmap_t<unsigned, hb_set_t *> *script_langsys_map, hb_set_t *new_feature_indexes /* OUT */) const { hb_prune_langsys_context_t c (this, script_langsys_map, duplicate_feature_map, new_feature_indexes); @@ -3689,7 +3688,7 @@ struct GSUBGPOS hb_map_t *duplicate_feature_map /* OUT */) const { if (feature_indices->is_empty ()) return; - hb_hashmap_t<hb_tag_t, hb_set_t *, (unsigned)-1, nullptr> unique_features; + hb_hashmap_t<hb_tag_t, hb_set_t *> unique_features; //find out duplicate features after subset for (unsigned i : feature_indices->iter ()) { @@ -3784,8 +3783,12 @@ struct GSUBGPOS // http://lists.freedesktop.org/archives/harfbuzz/2012-November/002660.html continue; - if (f.featureParams.is_null () - && !f.intersects_lookup_indexes (lookup_indices) + + if (!f.featureParams.is_null () && + tag == HB_TAG ('s', 'i', 'z', 'e')) + continue; + + if (!f.intersects_lookup_indexes (lookup_indices) #ifndef HB_NO_VAR && !alternate_feature_indices.has (i) #endif diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.cc b/thirdparty/harfbuzz/src/hb-ot-layout.cc index fbdedd0e20..4e1d23eba5 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout.cc +++ b/thirdparty/harfbuzz/src/hb-ot-layout.cc @@ -1493,7 +1493,7 @@ hb_ot_layout_lookup_substitute_closure (hb_face_t *face, { hb_set_t cur_intersected_glyphs; hb_map_t done_lookups_glyph_count; - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> done_lookups_glyph_set; + hb_hashmap_t<unsigned, hb_set_t *> done_lookups_glyph_set; OT::hb_closure_context_t c (face, glyphs, &cur_intersected_glyphs, &done_lookups_glyph_count, &done_lookups_glyph_set); const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index); @@ -1522,7 +1522,7 @@ hb_ot_layout_lookups_substitute_closure (hb_face_t *face, { hb_set_t cur_intersected_glyphs; hb_map_t done_lookups_glyph_count; - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> done_lookups_glyph_set; + hb_hashmap_t<unsigned, hb_set_t *> done_lookups_glyph_set; OT::hb_closure_context_t c (face, glyphs, &cur_intersected_glyphs, &done_lookups_glyph_count, &done_lookups_glyph_set); const OT::GSUB& gsub = *face->table.GSUB->table; diff --git a/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh b/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh index 94450eb53a..504de2de74 100644 --- a/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh +++ b/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh @@ -79,6 +79,7 @@ HB_INTERNAL bool postV2Tail::subset (hb_subset_context_t *c) const post::accelerator_t _post; _post.init (c->plan->source); + hb_hashmap_t<hb_bytes_t, unsigned, std::nullptr_t, unsigned, nullptr, (unsigned)-1> glyph_name_to_new_index; for (hb_codepoint_t new_gid = 0; new_gid < num_glyphs; new_gid++) { hb_codepoint_t old_gid = reverse_glyph_map.get (new_gid); @@ -90,22 +91,28 @@ HB_INTERNAL bool postV2Tail::subset (hb_subset_context_t *c) const else { hb_bytes_t s = _post.find_glyph_name (old_gid); - int standard_glyph_index = -1; - for (unsigned i = 0; i < format1_names_length; i++) + new_index = glyph_name_to_new_index.get (s); + if (new_index == (unsigned)-1) { - if (s == format1_names (i)) + int standard_glyph_index = -1; + for (unsigned i = 0; i < format1_names_length; i++) { - standard_glyph_index = i; - break; + if (s == format1_names (i)) + { + standard_glyph_index = i; + break; + } } + + if (standard_glyph_index == -1) + { + new_index = 258 + i; + i++; + } + else + { new_index = standard_glyph_index; } + glyph_name_to_new_index.set (s, new_index); } - if (standard_glyph_index == -1) - { - new_index = 258 + i; - i++; - } - else - { new_index = standard_glyph_index; } old_new_index_map.set (old_index, new_index); } old_gid_new_index_map.set (old_gid, new_index); diff --git a/thirdparty/harfbuzz/src/hb-sanitize.hh b/thirdparty/harfbuzz/src/hb-sanitize.hh index 2e536c7a81..65c2772201 100644 --- a/thirdparty/harfbuzz/src/hb-sanitize.hh +++ b/thirdparty/harfbuzz/src/hb-sanitize.hh @@ -123,6 +123,7 @@ struct hb_sanitize_context_t : hb_sanitize_context_t () : start (nullptr), end (nullptr), max_ops (0), max_subtables (0), + recursion_depth (0), writable (false), edit_count (0), blob (nullptr), num_glyphs (65536), @@ -205,6 +206,7 @@ struct hb_sanitize_context_t : (unsigned) HB_SANITIZE_MAX_OPS_MAX); this->edit_count = 0; this->debug_depth = 0; + this->recursion_depth = 0; DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1, "start [%p..%p] (%lu bytes)", @@ -278,6 +280,18 @@ struct hb_sanitize_context_t : return this->check_range (base, a, b, hb_static_size (T)); } + bool check_start_recursion (int max_depth) + { + if (unlikely (recursion_depth >= max_depth)) return false; + return ++recursion_depth; + } + + bool end_recursion (bool result) + { + recursion_depth--; + return result; + } + template <typename Type> bool check_struct (const Type *obj) const { return likely (this->check_range (obj, obj->min_size)); } @@ -389,6 +403,7 @@ struct hb_sanitize_context_t : const char *start, *end; mutable int max_ops, max_subtables; private: + int recursion_depth; bool writable; unsigned int edit_count; hb_blob_t *blob; diff --git a/thirdparty/harfbuzz/src/hb-serialize.hh b/thirdparty/harfbuzz/src/hb-serialize.hh index 57689916f6..d22ae06087 100644 --- a/thirdparty/harfbuzz/src/hb-serialize.hh +++ b/thirdparty/harfbuzz/src/hb-serialize.hh @@ -652,7 +652,9 @@ struct hb_serialize_context_t hb_vector_t<object_t *> packed; /* Map view of packed objects. */ - hb_hashmap_t<const object_t *, objidx_t, nullptr, 0> packed_map; + hb_hashmap_t<const object_t *, objidx_t, + const object_t *, objidx_t, + nullptr, 0> packed_map; }; #endif /* HB_SERIALIZE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-set.hh b/thirdparty/harfbuzz/src/hb-set.hh index 8841427189..af02e9e12b 100644 --- a/thirdparty/harfbuzz/src/hb-set.hh +++ b/thirdparty/harfbuzz/src/hb-set.hh @@ -157,9 +157,9 @@ struct hb_set_t : hb_sparseset_t<hb_bit_set_invertible_t> { hb_set_t () = default; ~hb_set_t () = default; - hb_set_t (hb_set_t& o) = default; - hb_set_t& operator= (const hb_set_t& other) = default; - hb_set_t& operator= (hb_set_t&& other) = default; + hb_set_t (hb_set_t&) = default; + hb_set_t& operator= (const hb_set_t&) = default; + hb_set_t& operator= (hb_set_t&&) = default; hb_set_t (std::initializer_list<hb_codepoint_t> lst) : hb_sparseset_t<hb_bit_set_invertible_t> (lst) {} template <typename Iterable, hb_requires (hb_is_iterable (Iterable))> diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.cc b/thirdparty/harfbuzz/src/hb-subset-plan.cc index 1e195ff660..53f8664d92 100644 --- a/thirdparty/harfbuzz/src/hb-subset-plan.cc +++ b/thirdparty/harfbuzz/src/hb-subset-plan.cc @@ -41,7 +41,7 @@ #include "hb-ot-math-table.hh" -typedef hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> script_langsys_map; +typedef hb_hashmap_t<unsigned, hb_set_t *> script_langsys_map; #ifndef HB_NO_SUBSET_CFF static inline void _add_cff_seac_components (const OT::cff1::accelerator_t &cff, @@ -100,11 +100,23 @@ static void _collect_layout_indices (hb_face_t *face, if (!features.alloc (table.get_feature_count () + 1)) return; + hb_set_t visited_features; + bool retain_all_features = true; for (unsigned i = 0; i < table.get_feature_count (); i++) { hb_tag_t tag = table.get_feature_tag (i); - if (tag && layout_features_to_retain->has (tag)) - features.push (tag); + if (!tag) continue; + if (!layout_features_to_retain->has (tag)) + { + retain_all_features = false; + continue; + } + + if (visited_features.has (tag)) + continue; + + features.push (tag); + visited_features.add (tag); } if (!features) @@ -113,7 +125,7 @@ static void _collect_layout_indices (hb_face_t *face, // The collect function needs a null element to signal end of the array. features.push (0); - if (features.get_size () == table.get_feature_count () + 1) + if (retain_all_features) { // Looking for all features, trigger the faster collection method. layout_collect_func (face, diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.hh b/thirdparty/harfbuzz/src/hb-subset-plan.hh index c30feeb42f..c0232480bf 100644 --- a/thirdparty/harfbuzz/src/hb-subset-plan.hh +++ b/thirdparty/harfbuzz/src/hb-subset-plan.hh @@ -84,8 +84,8 @@ struct hb_subset_plan_t hb_map_t *gpos_lookups; //active langsys we'd like to retain - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *gsub_langsys; - hb_hashmap_t<unsigned, hb_set_t *, (unsigned)-1, nullptr> *gpos_langsys; + hb_hashmap_t<unsigned, hb_set_t *> *gsub_langsys; + hb_hashmap_t<unsigned, hb_set_t *> *gpos_langsys; //active features after removing redundant langsys and prune_features hb_map_t *gsub_features; diff --git a/thirdparty/harfbuzz/src/hb-version.h b/thirdparty/harfbuzz/src/hb-version.h index 1a4f0bf62a..c9fefa1df6 100644 --- a/thirdparty/harfbuzz/src/hb-version.h +++ b/thirdparty/harfbuzz/src/hb-version.h @@ -53,14 +53,14 @@ HB_BEGIN_DECLS * * The micro component of the library version available at compile-time. */ -#define HB_VERSION_MICRO 1 +#define HB_VERSION_MICRO 2 /** * HB_VERSION_STRING: * * A string literal containing the library version available at compile-time. */ -#define HB_VERSION_STRING "3.1.1" +#define HB_VERSION_STRING "3.1.2" /** * HB_VERSION_ATLEAST: diff --git a/thirdparty/libogg/framing.c b/thirdparty/libogg/framing.c index 83601199ad..724d116d7f 100644 --- a/thirdparty/libogg/framing.c +++ b/thirdparty/libogg/framing.c @@ -597,9 +597,14 @@ char *ogg_sync_buffer(ogg_sync_state *oy, long size){ if(size>oy->storage-oy->fill){ /* We need to extend the internal buffer */ - long newsize=size+oy->fill+4096; /* an extra page to be nice */ + long newsize; void *ret; + if(size>INT_MAX-4096-oy->fill){ + ogg_sync_clear(oy); + return NULL; + } + newsize=size+oy->fill+4096; /* an extra page to be nice */ if(oy->data) ret=_ogg_realloc(oy->data,newsize); else @@ -1564,7 +1569,7 @@ void test_pack(const int *pl, const int **headers, int byteskip, byteskipcount=byteskip; } - ogg_sync_wrote(&oy,next-buf); + ogg_sync_wrote(&oy,(long)(next-buf)); while(1){ int ret=ogg_sync_pageout(&oy,&og_de); diff --git a/thirdparty/libogg/ogg/config_types.h b/thirdparty/libogg/ogg/config_types.h index 4dc8393fdc..1a87df6423 100644 --- a/thirdparty/libogg/ogg/config_types.h +++ b/thirdparty/libogg/ogg/config_types.h @@ -1,7 +1,20 @@ #ifndef __CONFIG_TYPES_H__ #define __CONFIG_TYPES_H__ -#include <stdint.h> +/* these are filled in by configure or cmake*/ +#define INCLUDE_INTTYPES_H 1 +#define INCLUDE_STDINT_H 1 +#define INCLUDE_SYS_TYPES_H 1 + +#if INCLUDE_INTTYPES_H +# include <inttypes.h> +#endif +#if INCLUDE_STDINT_H +# include <stdint.h> +#endif +#if INCLUDE_SYS_TYPES_H +# include <sys/types.h> +#endif typedef int16_t ogg_int16_t; typedef uint16_t ogg_uint16_t; diff --git a/thirdparty/libvorbis/COPYING b/thirdparty/libvorbis/COPYING index 153b926a15..fb456a87bd 100644 --- a/thirdparty/libvorbis/COPYING +++ b/thirdparty/libvorbis/COPYING @@ -1,4 +1,4 @@ -Copyright (c) 2002-2018 Xiph.org Foundation +Copyright (c) 2002-2020 Xiph.org Foundation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/thirdparty/libvorbis/analysis.c b/thirdparty/libvorbis/analysis.c index 0e11a167be..14919737eb 100644 --- a/thirdparty/libvorbis/analysis.c +++ b/thirdparty/libvorbis/analysis.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/backends.h b/thirdparty/libvorbis/backends.h index 22809d46d5..670b0b902e 100644 --- a/thirdparty/libvorbis/backends.h +++ b/thirdparty/libvorbis/backends.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/barkmel.c b/thirdparty/libvorbis/barkmel.c index 4b19935f30..f833c30211 100644 --- a/thirdparty/libvorbis/barkmel.c +++ b/thirdparty/libvorbis/barkmel.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/bitrate.c b/thirdparty/libvorbis/bitrate.c index 96055140f7..132553cbee 100644 --- a/thirdparty/libvorbis/bitrate.c +++ b/thirdparty/libvorbis/bitrate.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/bitrate.h b/thirdparty/libvorbis/bitrate.h index 655a68cc09..48fa150596 100644 --- a/thirdparty/libvorbis/bitrate.h +++ b/thirdparty/libvorbis/bitrate.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/block.c b/thirdparty/libvorbis/block.c index db245b3e69..6a50da0843 100644 --- a/thirdparty/libvorbis/block.c +++ b/thirdparty/libvorbis/block.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/books/coupled/res_books_51.h b/thirdparty/libvorbis/books/coupled/res_books_51.h index 47df4b221b..eb569c6f04 100644 --- a/thirdparty/libvorbis/books/coupled/res_books_51.h +++ b/thirdparty/libvorbis/books/coupled/res_books_51.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** * diff --git a/thirdparty/libvorbis/books/coupled/res_books_stereo.h b/thirdparty/libvorbis/books/coupled/res_books_stereo.h index 61d934046d..7b53cb972b 100644 --- a/thirdparty/libvorbis/books/coupled/res_books_stereo.h +++ b/thirdparty/libvorbis/books/coupled/res_books_stereo.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/books/floor/floor_books.h b/thirdparty/libvorbis/books/floor/floor_books.h index 67d5f31a3b..d26664f766 100644 --- a/thirdparty/libvorbis/books/floor/floor_books.h +++ b/thirdparty/libvorbis/books/floor/floor_books.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/books/uncoupled/res_books_uncoupled.h b/thirdparty/libvorbis/books/uncoupled/res_books_uncoupled.h index 3d658ec470..107e22f9e3 100644 --- a/thirdparty/libvorbis/books/uncoupled/res_books_uncoupled.h +++ b/thirdparty/libvorbis/books/uncoupled/res_books_uncoupled.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/codebook.c b/thirdparty/libvorbis/codebook.c index 78672e222d..7a0c206783 100644 --- a/thirdparty/libvorbis/codebook.c +++ b/thirdparty/libvorbis/codebook.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/codebook.h b/thirdparty/libvorbis/codebook.h index 08440c6962..7d4e2aae4f 100644 --- a/thirdparty/libvorbis/codebook.h +++ b/thirdparty/libvorbis/codebook.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/codec_internal.h b/thirdparty/libvorbis/codec_internal.h index e522be18da..2ecf5e5c73 100644 --- a/thirdparty/libvorbis/codec_internal.h +++ b/thirdparty/libvorbis/codec_internal.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/envelope.c b/thirdparty/libvorbis/envelope.c index da75237542..22d39aa6e0 100644 --- a/thirdparty/libvorbis/envelope.c +++ b/thirdparty/libvorbis/envelope.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/envelope.h b/thirdparty/libvorbis/envelope.h index f466efde8a..2ef60a82ca 100644 --- a/thirdparty/libvorbis/envelope.h +++ b/thirdparty/libvorbis/envelope.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/floor0.c b/thirdparty/libvorbis/floor0.c index 443c0e5a96..f4a6d4d559 100644 --- a/thirdparty/libvorbis/floor0.c +++ b/thirdparty/libvorbis/floor0.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/floor1.c b/thirdparty/libvorbis/floor1.c index 673e954c53..c4fe3ea7e7 100644 --- a/thirdparty/libvorbis/floor1.c +++ b/thirdparty/libvorbis/floor1.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/highlevel.h b/thirdparty/libvorbis/highlevel.h index 337b75bfa4..7690e3ebfb 100644 --- a/thirdparty/libvorbis/highlevel.h +++ b/thirdparty/libvorbis/highlevel.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/info.c b/thirdparty/libvorbis/info.c index 3fbb7c757a..f2e39e387e 100644 --- a/thirdparty/libvorbis/info.c +++ b/thirdparty/libvorbis/info.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -19,7 +19,6 @@ #include <stdlib.h> #include <string.h> -#include <ctype.h> #include <ogg/ogg.h> #include "vorbis/codec.h" #include "codec_internal.h" @@ -30,8 +29,8 @@ #include "misc.h" #include "os.h" -#define GENERAL_VENDOR_STRING "Xiph.Org libVorbis 1.3.6" -#define ENCODE_VENDOR_STRING "Xiph.Org libVorbis I 20180316 (Now 100% fewer shells)" +#define GENERAL_VENDOR_STRING "Xiph.Org libVorbis 1.3.7" +#define ENCODE_VENDOR_STRING "Xiph.Org libVorbis I 20200704 (Reducing Environment)" /* helpers */ static void _v_writestring(oggpack_buffer *o,const char *s, int bytes){ @@ -47,6 +46,10 @@ static void _v_readstring(oggpack_buffer *o,char *buf,int bytes){ } } +static int _v_toupper(int c) { + return (c >= 'a' && c <= 'z') ? (c & ~('a' - 'A')) : c; +} + void vorbis_comment_init(vorbis_comment *vc){ memset(vc,0,sizeof(*vc)); } @@ -78,7 +81,7 @@ void vorbis_comment_add_tag(vorbis_comment *vc, const char *tag, const char *con static int tagcompare(const char *s1, const char *s2, int n){ int c=0; while(c < n){ - if(toupper(s1[c]) != toupper(s2[c])) + if(_v_toupper(s1[c]) != _v_toupper(s2[c])) return !0; c++; } @@ -203,6 +206,7 @@ void vorbis_info_clear(vorbis_info *vi){ static int _vorbis_unpack_info(vorbis_info *vi,oggpack_buffer *opb){ codec_setup_info *ci=vi->codec_setup; + int bs; if(!ci)return(OV_EFAULT); vi->version=oggpack_read(opb,32); @@ -215,8 +219,12 @@ static int _vorbis_unpack_info(vorbis_info *vi,oggpack_buffer *opb){ vi->bitrate_nominal=(ogg_int32_t)oggpack_read(opb,32); vi->bitrate_lower=(ogg_int32_t)oggpack_read(opb,32); - ci->blocksizes[0]=1<<oggpack_read(opb,4); - ci->blocksizes[1]=1<<oggpack_read(opb,4); + bs = oggpack_read(opb,4); + if(bs<0)goto err_out; + ci->blocksizes[0]=1<<bs; + bs = oggpack_read(opb,4); + if(bs<0)goto err_out; + ci->blocksizes[1]=1<<bs; if(vi->rate<1)goto err_out; if(vi->channels<1)goto err_out; diff --git a/thirdparty/libvorbis/lookup.c b/thirdparty/libvorbis/lookup.c index 1cc1f88ee9..7cd01a44d3 100644 --- a/thirdparty/libvorbis/lookup.c +++ b/thirdparty/libvorbis/lookup.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/lookup.h b/thirdparty/libvorbis/lookup.h index 4bc0f3a206..ec05014f44 100644 --- a/thirdparty/libvorbis/lookup.h +++ b/thirdparty/libvorbis/lookup.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/lookup_data.h b/thirdparty/libvorbis/lookup_data.h index 5de3cfdc7e..7935715a70 100644 --- a/thirdparty/libvorbis/lookup_data.h +++ b/thirdparty/libvorbis/lookup_data.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/lpc.c b/thirdparty/libvorbis/lpc.c index 798f4cf076..877da47f8e 100644 --- a/thirdparty/libvorbis/lpc.c +++ b/thirdparty/libvorbis/lpc.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/lpc.h b/thirdparty/libvorbis/lpc.h index 9cc79451b6..4f59e6d32d 100644 --- a/thirdparty/libvorbis/lpc.h +++ b/thirdparty/libvorbis/lpc.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/lsp.c b/thirdparty/libvorbis/lsp.c index 8588054515..8afa305f50 100644 --- a/thirdparty/libvorbis/lsp.c +++ b/thirdparty/libvorbis/lsp.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -15,9 +15,10 @@ The LSP generation code is taken (with minimal modification and a few bugfixes) from "On the Computation of the LSP Frequencies" by Joseph Rothweiler (see http://www.rothweiler.us for contact info). + The paper is available at: - http://www.myown1.com/joe/lsf + https://web.archive.org/web/20110810174000/http://home.myfairpoint.net/vzenxj75/myown1/joe/lsf/index.html ********************************************************************/ diff --git a/thirdparty/libvorbis/lsp.h b/thirdparty/libvorbis/lsp.h index 8a8d10e978..68b38daf16 100644 --- a/thirdparty/libvorbis/lsp.h +++ b/thirdparty/libvorbis/lsp.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/mapping0.c b/thirdparty/libvorbis/mapping0.c index ccb4493d4c..efa0fbcd93 100644 --- a/thirdparty/libvorbis/mapping0.c +++ b/thirdparty/libvorbis/mapping0.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/masking.h b/thirdparty/libvorbis/masking.h index 955e18c719..7a196a37eb 100644 --- a/thirdparty/libvorbis/masking.h +++ b/thirdparty/libvorbis/masking.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/mdct.c b/thirdparty/libvorbis/mdct.c index f3f1ed805b..2a0ff8d01b 100644 --- a/thirdparty/libvorbis/mdct.c +++ b/thirdparty/libvorbis/mdct.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/mdct.h b/thirdparty/libvorbis/mdct.h index 3b8c9ba4a2..ceaea617a3 100644 --- a/thirdparty/libvorbis/mdct.h +++ b/thirdparty/libvorbis/mdct.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/misc.h b/thirdparty/libvorbis/misc.h index 13788445a3..eac5160e88 100644 --- a/thirdparty/libvorbis/misc.h +++ b/thirdparty/libvorbis/misc.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/floor_all.h b/thirdparty/libvorbis/modes/floor_all.h index 20928aac87..2e3d4a5012 100644 --- a/thirdparty/libvorbis/modes/floor_all.h +++ b/thirdparty/libvorbis/modes/floor_all.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/psych_11.h b/thirdparty/libvorbis/modes/psych_11.h index cc5eea2402..9d8ed357ee 100644 --- a/thirdparty/libvorbis/modes/psych_11.h +++ b/thirdparty/libvorbis/modes/psych_11.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/psych_16.h b/thirdparty/libvorbis/modes/psych_16.h index 477cb4d90f..49cbf7c4b2 100644 --- a/thirdparty/libvorbis/modes/psych_16.h +++ b/thirdparty/libvorbis/modes/psych_16.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/psych_44.h b/thirdparty/libvorbis/modes/psych_44.h index 6c9eaa4e5f..d15509b71d 100644 --- a/thirdparty/libvorbis/modes/psych_44.h +++ b/thirdparty/libvorbis/modes/psych_44.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/psych_8.h b/thirdparty/libvorbis/modes/psych_8.h index 277db8436c..a19817f760 100644 --- a/thirdparty/libvorbis/modes/psych_8.h +++ b/thirdparty/libvorbis/modes/psych_8.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/residue_16.h b/thirdparty/libvorbis/modes/residue_16.h index 3e05471cec..15e161c862 100644 --- a/thirdparty/libvorbis/modes/residue_16.h +++ b/thirdparty/libvorbis/modes/residue_16.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/residue_44.h b/thirdparty/libvorbis/modes/residue_44.h index e89bc0e486..3f982695a7 100644 --- a/thirdparty/libvorbis/modes/residue_44.h +++ b/thirdparty/libvorbis/modes/residue_44.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/residue_44p51.h b/thirdparty/libvorbis/modes/residue_44p51.h index 7f33e250e2..8ac5f65e62 100644 --- a/thirdparty/libvorbis/modes/residue_44p51.h +++ b/thirdparty/libvorbis/modes/residue_44p51.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/residue_44u.h b/thirdparty/libvorbis/modes/residue_44u.h index e55ac12548..2f3595e49f 100644 --- a/thirdparty/libvorbis/modes/residue_44u.h +++ b/thirdparty/libvorbis/modes/residue_44u.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/residue_8.h b/thirdparty/libvorbis/modes/residue_8.h index ae123a276a..b836f79c84 100644 --- a/thirdparty/libvorbis/modes/residue_8.h +++ b/thirdparty/libvorbis/modes/residue_8.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_11.h b/thirdparty/libvorbis/modes/setup_11.h index 0cbcaafcb2..5ade5dd169 100644 --- a/thirdparty/libvorbis/modes/setup_11.h +++ b/thirdparty/libvorbis/modes/setup_11.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_16.h b/thirdparty/libvorbis/modes/setup_16.h index d59ad70d2e..8b2daafa3f 100644 --- a/thirdparty/libvorbis/modes/setup_16.h +++ b/thirdparty/libvorbis/modes/setup_16.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_22.h b/thirdparty/libvorbis/modes/setup_22.h index bc38af9630..eef5a4e7da 100644 --- a/thirdparty/libvorbis/modes/setup_22.h +++ b/thirdparty/libvorbis/modes/setup_22.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_32.h b/thirdparty/libvorbis/modes/setup_32.h index f66a0bcd00..f87cb767d0 100644 --- a/thirdparty/libvorbis/modes/setup_32.h +++ b/thirdparty/libvorbis/modes/setup_32.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_44.h b/thirdparty/libvorbis/modes/setup_44.h index a189b5fb95..12d592808e 100644 --- a/thirdparty/libvorbis/modes/setup_44.h +++ b/thirdparty/libvorbis/modes/setup_44.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_44p51.h b/thirdparty/libvorbis/modes/setup_44p51.h index 3bde7b340c..4d49173ffb 100644 --- a/thirdparty/libvorbis/modes/setup_44p51.h +++ b/thirdparty/libvorbis/modes/setup_44p51.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_44u.h b/thirdparty/libvorbis/modes/setup_44u.h index 7ae3af6b2a..2dd8bf701f 100644 --- a/thirdparty/libvorbis/modes/setup_44u.h +++ b/thirdparty/libvorbis/modes/setup_44u.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_8.h b/thirdparty/libvorbis/modes/setup_8.h index 7502556879..16b02e01b7 100644 --- a/thirdparty/libvorbis/modes/setup_8.h +++ b/thirdparty/libvorbis/modes/setup_8.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/modes/setup_X.h b/thirdparty/libvorbis/modes/setup_X.h index 2229a5ef2f..27807c10b4 100644 --- a/thirdparty/libvorbis/modes/setup_X.h +++ b/thirdparty/libvorbis/modes/setup_X.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/os.h b/thirdparty/libvorbis/os.h index 416a401dd1..9ded7358d4 100644 --- a/thirdparty/libvorbis/os.h +++ b/thirdparty/libvorbis/os.h @@ -8,7 +8,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -60,7 +60,7 @@ void *_alloca(size_t size); # define FAST_HYPOT hypot #endif -#endif +#endif /* _V_IFDEFJAIL_H_ */ #ifdef HAVE_ALLOCA_H # include <alloca.h> @@ -80,7 +80,7 @@ void *_alloca(size_t size); /* Special i386 GCC implementation */ -#if defined(__i386__) && defined(__GNUC__) && !defined(__BEOS__) +#if defined(__i386__) && defined(__GNUC__) && !defined(__BEOS__) && !defined(__SSE2_MATH__) # define VORBIS_FPU_CONTROL /* both GCC and MSVC are kinda stupid about rounding/casting to int. Because of encapsulation constraints (GCC can't see inside the asm @@ -119,8 +119,7 @@ static inline int vorbis_ftoi(double f){ /* yes, double! Otherwise, /* MSVC inline assembly. 32 bit only; inline ASM isn't implemented in the * 64 bit compiler and doesn't work on arm. */ -#if defined(_MSC_VER) && !defined(_WIN64) && \ - !defined(_WIN32_WCE) && !defined(_M_ARM) +#if defined(_MSC_VER) && defined(_M_IX86) && !defined(_WIN32_WCE) # define VORBIS_FPU_CONTROL typedef ogg_int16_t vorbis_fpu_control; @@ -147,7 +146,7 @@ static __inline void vorbis_fpu_restore(vorbis_fpu_control fpu){ /* Optimized code path for x86_64 builds. Uses SSE2 intrinsics. This can be done safely because all x86_64 CPUs supports SSE2. */ -#if (defined(_MSC_VER) && defined(_WIN64)) || (defined(__GNUC__) && defined (__x86_64__)) +#if (defined(_MSC_VER) && defined(_M_X64)) || (defined(__GNUC__) && defined (__SSE2_MATH__)) # define VORBIS_FPU_CONTROL typedef ogg_int16_t vorbis_fpu_control; @@ -174,7 +173,7 @@ static __inline void vorbis_fpu_restore(vorbis_fpu_control fpu){ typedef int vorbis_fpu_control; -static int vorbis_ftoi(double f){ +STIN int vorbis_ftoi(double f){ /* Note: MSVC and GCC (at least on some systems) round towards zero, thus, the floor() call is required to ensure correct roudning of negative numbers */ diff --git a/thirdparty/libvorbis/psy.c b/thirdparty/libvorbis/psy.c index 422c6f1e41..036b094aa7 100644 --- a/thirdparty/libvorbis/psy.c +++ b/thirdparty/libvorbis/psy.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -599,11 +599,12 @@ static void bark_noise_hybridmp(int n,const long *b, XY[i] = tXY; } - for (i = 0, x = 0.f;; i++, x += 1.f) { + for (i = 0, x = 0.f; i < n; i++, x += 1.f) { lo = b[i] >> 16; - if( lo>=0 ) break; hi = b[i] & 0xffff; + if( lo>=0 || -lo>=n ) break; + if( hi>=n ) break; tN = N[hi] + N[-lo]; tX = X[hi] - X[-lo]; @@ -615,17 +616,17 @@ static void bark_noise_hybridmp(int n,const long *b, B = tN * tXY - tX * tY; D = tN * tXX - tX * tX; R = (A + x * B) / D; - if (R < 0.f) - R = 0.f; + if (R < 0.f) R = 0.f; noise[i] = R - offset; } - for ( ;; i++, x += 1.f) { + for ( ; i < n; i++, x += 1.f) { lo = b[i] >> 16; hi = b[i] & 0xffff; - if(hi>=n)break; + if( lo<0 || lo>=n ) break; + if( hi>=n ) break; tN = N[hi] - N[lo]; tX = X[hi] - X[lo]; @@ -641,6 +642,7 @@ static void bark_noise_hybridmp(int n,const long *b, noise[i] = R - offset; } + for ( ; i < n; i++, x += 1.f) { R = (A + x * B) / D; @@ -651,10 +653,11 @@ static void bark_noise_hybridmp(int n,const long *b, if (fixed <= 0) return; - for (i = 0, x = 0.f;; i++, x += 1.f) { + for (i = 0, x = 0.f; i < n; i++, x += 1.f) { hi = i + fixed / 2; lo = hi - fixed; - if(lo>=0)break; + if ( hi>=n ) break; + if ( lo>=0 ) break; tN = N[hi] + N[-lo]; tX = X[hi] - X[-lo]; @@ -670,11 +673,12 @@ static void bark_noise_hybridmp(int n,const long *b, if (R - offset < noise[i]) noise[i] = R - offset; } - for ( ;; i++, x += 1.f) { + for ( ; i < n; i++, x += 1.f) { hi = i + fixed / 2; lo = hi - fixed; - if(hi>=n)break; + if ( hi>=n ) break; + if ( lo<0 ) break; tN = N[hi] - N[lo]; tX = X[hi] - X[lo]; diff --git a/thirdparty/libvorbis/psy.h b/thirdparty/libvorbis/psy.h index ab2534db3a..d9a04e8b74 100644 --- a/thirdparty/libvorbis/psy.h +++ b/thirdparty/libvorbis/psy.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/psytune.c b/thirdparty/libvorbis/psytune.c index 6952136c6b..67223e5118 100644 --- a/thirdparty/libvorbis/psytune.c +++ b/thirdparty/libvorbis/psytune.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/registry.c b/thirdparty/libvorbis/registry.c index 74f7ef0396..db0f67b2e2 100644 --- a/thirdparty/libvorbis/registry.c +++ b/thirdparty/libvorbis/registry.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/registry.h b/thirdparty/libvorbis/registry.h index 599d959942..b823aa6091 100644 --- a/thirdparty/libvorbis/registry.h +++ b/thirdparty/libvorbis/registry.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/res0.c b/thirdparty/libvorbis/res0.c index 6d623d730f..c931aded38 100644 --- a/thirdparty/libvorbis/res0.c +++ b/thirdparty/libvorbis/res0.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -30,9 +30,6 @@ #include "misc.h" #include "os.h" -//#define TRAIN_RES 1 -//#define TRAIN_RESAUX 1 - #if defined(TRAIN_RES) || defined (TRAIN_RESAUX) #include <stdio.h> #endif diff --git a/thirdparty/libvorbis/scales.h b/thirdparty/libvorbis/scales.h index 18bc4e7518..3c2ae48d9e 100644 --- a/thirdparty/libvorbis/scales.h +++ b/thirdparty/libvorbis/scales.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/sharedbook.c b/thirdparty/libvorbis/sharedbook.c index 4545d4f459..62a9a00afb 100644 --- a/thirdparty/libvorbis/sharedbook.c +++ b/thirdparty/libvorbis/sharedbook.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -50,7 +50,7 @@ long _float32_pack(float val){ sign=0x80000000; val= -val; } - exp= floor(log(val)/log(2.f)+.001); //+epsilon + exp= floor(log(val)/log(2.f)+.001); /* +epsilon */ mant=rint(ldexp(val,(VQ_FMAN-1)-exp)); exp=(exp+VQ_FEXP_BIAS)<<VQ_FMAN; @@ -62,7 +62,15 @@ float _float32_unpack(long val){ int sign=val&0x80000000; long exp =(val&0x7fe00000L)>>VQ_FMAN; if(sign)mant= -mant; - return(ldexp(mant,exp-(VQ_FMAN-1)-VQ_FEXP_BIAS)); + exp=exp-(VQ_FMAN-1)-VQ_FEXP_BIAS; + /* clamp excessive exponent values */ + if (exp>63){ + exp=63; + } + if (exp<-63){ + exp=-63; + } + return(ldexp(mant,exp)); } /* given a list of word lengths, generate a list of codewords. Works @@ -294,7 +302,7 @@ int vorbis_book_init_encode(codebook *c,const static_codebook *s){ c->used_entries=s->entries; c->dim=s->dim; c->codelist=_make_words(s->lengthlist,s->entries,0); - //c->valuelist=_book_unquantize(s,s->entries,NULL); + /* c->valuelist=_book_unquantize(s,s->entries,NULL); */ c->quantvals=_book_maptype1_quantvals(s); c->minval=(int)rint(_float32_unpack(s->q_min)); c->delta=(int)rint(_float32_unpack(s->q_delta)); @@ -573,6 +581,7 @@ void run_test(static_codebook *b,float *comp){ exit(1); } } + free(out); } int main(){ diff --git a/thirdparty/libvorbis/smallft.c b/thirdparty/libvorbis/smallft.c index 6d528af423..4ffabab4bb 100644 --- a/thirdparty/libvorbis/smallft.c +++ b/thirdparty/libvorbis/smallft.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/smallft.h b/thirdparty/libvorbis/smallft.h index 9e867c67d2..02fe8f9cd4 100644 --- a/thirdparty/libvorbis/smallft.h +++ b/thirdparty/libvorbis/smallft.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/synthesis.c b/thirdparty/libvorbis/synthesis.c index 5f6092c3d3..3e2d681270 100644 --- a/thirdparty/libvorbis/synthesis.c +++ b/thirdparty/libvorbis/synthesis.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/vorbis/codec.h b/thirdparty/libvorbis/vorbis/codec.h index 42aa29138e..f8a912bc26 100644 --- a/thirdparty/libvorbis/vorbis/codec.h +++ b/thirdparty/libvorbis/vorbis/codec.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * ******************************************************************** diff --git a/thirdparty/libvorbis/vorbis/vorbisenc.h b/thirdparty/libvorbis/vorbis/vorbisenc.h index 55f3b4a667..085b15e669 100644 --- a/thirdparty/libvorbis/vorbis/vorbisenc.h +++ b/thirdparty/libvorbis/vorbis/vorbisenc.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/vorbis/vorbisfile.h b/thirdparty/libvorbis/vorbis/vorbisfile.h index 56626119bb..3d65393f52 100644 --- a/thirdparty/libvorbis/vorbis/vorbisfile.h +++ b/thirdparty/libvorbis/vorbis/vorbisfile.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/vorbisenc.c b/thirdparty/libvorbis/vorbisenc.c index 4a4607cb41..cf3806a6e1 100644 --- a/thirdparty/libvorbis/vorbisenc.c +++ b/thirdparty/libvorbis/vorbisenc.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -684,6 +684,7 @@ int vorbis_encode_setup_init(vorbis_info *vi){ highlevel_encode_setup *hi=&ci->hi; if(ci==NULL)return(OV_EINVAL); + if(vi->channels<1||vi->channels>255)return(OV_EINVAL); if(!hi->impulse_block_p)i0=1; /* too low/high an ATH floater is nonsensical, but doesn't break anything */ @@ -1210,7 +1211,7 @@ int vorbis_encode_ctl(vorbis_info *vi,int number,void *arg){ hi->req, hi->managed, &new_base); - if(!hi->setup)return OV_EIMPL; + if(!new_template)return OV_EIMPL; hi->setup=new_template; hi->base_setting=new_base; vorbis_encode_setup_setting(vi,vi->channels,vi->rate); diff --git a/thirdparty/libvorbis/vorbisfile.c b/thirdparty/libvorbis/vorbisfile.c index b570c3c5f6..9219c2f2dc 100644 --- a/thirdparty/libvorbis/vorbisfile.c +++ b/thirdparty/libvorbis/vorbisfile.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** @@ -264,6 +264,10 @@ static ogg_int64_t _get_prev_page_serial(OggVorbis_File *vf, ogg_int64_t begin, } } } + /*We started from the beginning of the stream and found nothing. + This should be impossible unless the contents of the stream changed out + from under us after we read from it.*/ + if(!begin&&vf->offset<0)return OV_EBADLINK; } /* we're not interested in the page... just the serialno and granpos. */ @@ -1230,7 +1234,6 @@ double ov_time_total(OggVorbis_File *vf,int i){ int ov_raw_seek(OggVorbis_File *vf,ogg_int64_t pos){ ogg_stream_state work_os; - int ret; if(vf->ready_state<OPENED)return(OV_EINVAL); if(!vf->seekable) @@ -1253,8 +1256,12 @@ int ov_raw_seek(OggVorbis_File *vf,ogg_int64_t pos){ vf->current_serialno); /* must set serialno */ vorbis_synthesis_restart(&vf->vd); - ret=_seek_helper(vf,pos); - if(ret)goto seek_error; + if(_seek_helper(vf,pos)) { + /* dump the machine so we're in a known state */ + vf->pcm_offset=-1; + _decode_clear(vf); + return OV_EBADLINK; + } /* we need to make sure the pcm_offset is set, but we don't want to advance the raw cursor past good packets just to get to the first @@ -1388,13 +1395,6 @@ int ov_raw_seek(OggVorbis_File *vf,ogg_int64_t pos){ vf->bittrack=0.f; vf->samptrack=0.f; return(0); - - seek_error: - /* dump the machine so we're in a known state */ - vf->pcm_offset=-1; - ogg_stream_clear(&work_os); - _decode_clear(vf); - return OV_EBADLINK; } /* Page granularity seek (faster than sample granularity because we @@ -1964,6 +1964,7 @@ long ov_read_filter(OggVorbis_File *vf,char *buffer,int length, long samples; if(vf->ready_state<OPENED)return(OV_EINVAL); + if(word<=0)return(OV_EINVAL); while(1){ if(vf->ready_state==INITSET){ @@ -1989,6 +1990,8 @@ long ov_read_filter(OggVorbis_File *vf,char *buffer,int length, long channels=ov_info(vf,-1)->channels; long bytespersample=word * channels; vorbis_fpu_control fpu; + + if(channels<1||channels>255)return(OV_EINVAL); if(samples>length/bytespersample)samples=length/bytespersample; if(samples <= 0) diff --git a/thirdparty/libvorbis/window.c b/thirdparty/libvorbis/window.c index b3b7ce0163..2151b278d1 100644 --- a/thirdparty/libvorbis/window.c +++ b/thirdparty/libvorbis/window.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libvorbis/window.h b/thirdparty/libvorbis/window.h index 6ac260749e..33d83f85f9 100644 --- a/thirdparty/libvorbis/window.h +++ b/thirdparty/libvorbis/window.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation https://xiph.org/ * * * ******************************************************************** diff --git a/thirdparty/libwebp/AUTHORS b/thirdparty/libwebp/AUTHORS index 0d70b7fb2a..30abde0326 100644 --- a/thirdparty/libwebp/AUTHORS +++ b/thirdparty/libwebp/AUTHORS @@ -1,9 +1,15 @@ Contributors: +- Aidan O'Loan (aidanol at gmail dot com) - Alan Browning (browning at google dot com) - Charles Munger (clm at google dot com) +- Cheng Yi (cyi at google dot com) - Christian Duvivier (cduvivier at google dot com) +- Christopher Degawa (ccom at randomderp dot com) +- Clement Courbet (courbet at google dot com) - Djordje Pesut (djordje dot pesut at imgtec dot com) - Hui Su (huisu at google dot com) +- Ilya Kurdyukov (jpegqs at gmail dot com) +- Ingvar Stepanyan (rreverser at google dot com) - James Zern (jzern at google dot com) - Jan Engelhardt (jengelh at medozas dot de) - Jehan (jehan at girinstud dot io) @@ -20,6 +26,7 @@ Contributors: - Mislav Bradac (mislavm at google dot com) - Nico Weber (thakis at chromium dot org) - Noel Chromium (noel at chromium dot org) +- Oliver Wolff (oliver dot wolff at qt dot io) - Owen Rodley (orodley at google dot com) - Parag Salasakar (img dot mips1 at gmail dot com) - Pascal Massimino (pascal dot massimino at gmail dot com) @@ -38,5 +45,7 @@ Contributors: - Vikas Arora (vikasa at google dot com) - Vincent Rabaud (vrabaud at google dot com) - Vlad Tsyrklevich (vtsyrklevich at chromium dot org) +- Wan-Teh Chang (wtc at google dot com) - Yang Zhang (yang dot zhang at arm dot com) - Yannis Guyon (yguyon at google dot com) +- Zhi An Ng (zhin at chromium dot org) diff --git a/thirdparty/libwebp/src/dec/alpha_dec.c b/thirdparty/libwebp/src/dec/alpha_dec.c index bce735bfc2..0b93a30b32 100644 --- a/thirdparty/libwebp/src/dec/alpha_dec.c +++ b/thirdparty/libwebp/src/dec/alpha_dec.c @@ -183,7 +183,7 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, assert(dec != NULL && io != NULL); if (row < 0 || num_rows <= 0 || row + num_rows > height) { - return NULL; // sanity check. + return NULL; } if (!dec->is_alpha_decoded_) { diff --git a/thirdparty/libwebp/src/dec/buffer_dec.c b/thirdparty/libwebp/src/dec/buffer_dec.c index 3cd94eb4d9..4786cf0ddb 100644 --- a/thirdparty/libwebp/src/dec/buffer_dec.c +++ b/thirdparty/libwebp/src/dec/buffer_dec.c @@ -102,7 +102,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { int stride; uint64_t size; - if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) { + if ((uint64_t)w * kModeBpp[mode] >= (1ull << 31)) { return VP8_STATUS_INVALID_PARAM; } stride = w * kModeBpp[mode]; @@ -117,7 +117,6 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { } total_size = size + 2 * uv_size + a_size; - // Security/sanity checks output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output)); if (output == NULL) { return VP8_STATUS_OUT_OF_MEMORY; @@ -156,11 +155,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) { } if (WebPIsRGBMode(buffer->colorspace)) { WebPRGBABuffer* const buf = &buffer->u.RGBA; - buf->rgba += (buffer->height - 1) * buf->stride; + buf->rgba += (int64_t)(buffer->height - 1) * buf->stride; buf->stride = -buf->stride; } else { WebPYUVABuffer* const buf = &buffer->u.YUVA; - const int H = buffer->height; + const int64_t H = buffer->height; buf->y += (H - 1) * buf->y_stride; buf->y_stride = -buf->y_stride; buf->u += ((H - 1) >> 1) * buf->u_stride; @@ -188,8 +187,7 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height, const int ch = options->crop_height; const int x = options->crop_left & ~1; const int y = options->crop_top & ~1; - if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || - x + cw > width || y + ch > height) { + if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) { return VP8_STATUS_INVALID_PARAM; // out of frame boundary. } width = cw; diff --git a/thirdparty/libwebp/src/dec/frame_dec.c b/thirdparty/libwebp/src/dec/frame_dec.c index 04609a8e56..91ca1f8609 100644 --- a/thirdparty/libwebp/src/dec/frame_dec.c +++ b/thirdparty/libwebp/src/dec/frame_dec.c @@ -705,7 +705,7 @@ static int AllocateMemory(VP8Decoder* const dec) { + cache_size + alpha_size + WEBP_ALIGN_CST; uint8_t* mem; - if (needed != (size_t)needed) return 0; // check for overflow + if (!CheckSizeOverflow(needed)) return 0; // check for overflow if (needed > dec->mem_size_) { WebPSafeFree(dec->mem_); dec->mem_size_ = 0; diff --git a/thirdparty/libwebp/src/dec/io_dec.c b/thirdparty/libwebp/src/dec/io_dec.c index e603f19c98..5ef6298886 100644 --- a/thirdparty/libwebp/src/dec/io_dec.c +++ b/thirdparty/libwebp/src/dec/io_dec.c @@ -25,21 +25,16 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { WebPDecBuffer* output = p->output; const WebPYUVABuffer* const buf = &output->u.YUVA; - uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride; - uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride; - uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride; + uint8_t* const y_dst = buf->y + (size_t)io->mb_y * buf->y_stride; + uint8_t* const u_dst = buf->u + (size_t)(io->mb_y >> 1) * buf->u_stride; + uint8_t* const v_dst = buf->v + (size_t)(io->mb_y >> 1) * buf->v_stride; const int mb_w = io->mb_w; const int mb_h = io->mb_h; const int uv_w = (mb_w + 1) / 2; const int uv_h = (mb_h + 1) / 2; - int j; - for (j = 0; j < mb_h; ++j) { - memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w); - } - for (j = 0; j < uv_h; ++j) { - memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w); - memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w); - } + WebPCopyPlane(io->y, io->y_stride, y_dst, buf->y_stride, mb_w, mb_h); + WebPCopyPlane(io->u, io->uv_stride, u_dst, buf->u_stride, uv_w, uv_h); + WebPCopyPlane(io->v, io->uv_stride, v_dst, buf->v_stride, uv_w, uv_h); return io->mb_h; } @@ -47,7 +42,7 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { WebPDecBuffer* const output = p->output; WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* const dst = buf->rgba + io->mb_y * buf->stride; + uint8_t* const dst = buf->rgba + (size_t)io->mb_y * buf->stride; WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst, buf->stride, io->mb_w, io->mb_h, @@ -62,7 +57,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { int num_lines_out = io->mb_h; // a priori guess const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + uint8_t* dst = buf->rgba + (size_t)io->mb_y * buf->stride; WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace]; const uint8_t* cur_y = io->y; const uint8_t* cur_u = io->u; @@ -133,7 +128,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p, const WebPYUVABuffer* const buf = &p->output->u.YUVA; const int mb_w = io->mb_w; const int mb_h = io->mb_h; - uint8_t* dst = buf->a + io->mb_y * buf->a_stride; + uint8_t* dst = buf->a + (size_t)io->mb_y * buf->a_stride; int j; (void)expected_num_lines_out; assert(expected_num_lines_out == mb_h); @@ -186,7 +181,7 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p, (colorspace == MODE_ARGB || colorspace == MODE_Argb); const WebPRGBABuffer* const buf = &p->output->u.RGBA; int num_rows; - const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3); const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w, @@ -210,7 +205,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p, const WEBP_CSP_MODE colorspace = p->output->colorspace; const WebPRGBABuffer* const buf = &p->output->u.RGBA; int num_rows; - const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; #if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; @@ -276,9 +271,9 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) { static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) { const WebPYUVABuffer* const buf = &p->output->u.YUVA; - uint8_t* const dst_a = buf->a + p->last_y * buf->a_stride; + uint8_t* const dst_a = buf->a + (size_t)p->last_y * buf->a_stride; if (io->a != NULL) { - uint8_t* const dst_y = buf->y + p->last_y * buf->y_stride; + uint8_t* const dst_y = buf->y + (size_t)p->last_y * buf->y_stride; const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a); assert(expected_num_lines_out == num_lines_out); if (num_lines_out > 0) { // unmultiply the Y @@ -303,46 +298,57 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { const int uv_out_height = (out_height + 1) >> 1; const int uv_in_width = (io->mb_w + 1) >> 1; const int uv_in_height = (io->mb_h + 1) >> 1; - const size_t work_size = 2 * out_width; // scratch memory for luma rescaler + // scratch memory for luma rescaler + const size_t work_size = 2 * (size_t)out_width; const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones - size_t tmp_size, rescaler_size; + uint64_t total_size; + size_t rescaler_size; rescaler_t* work; WebPRescaler* scalers; const int num_rescalers = has_alpha ? 4 : 3; - tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work); + total_size = ((uint64_t)work_size + 2 * uv_work_size) * sizeof(*work); if (has_alpha) { - tmp_size += work_size * sizeof(*work); + total_size += (uint64_t)work_size * sizeof(*work); } rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST; + total_size += rescaler_size; + if (!CheckSizeOverflow(total_size)) { + return 0; + } - p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size); + p->memory = WebPSafeMalloc(1ULL, (size_t)total_size); if (p->memory == NULL) { return 0; // memory error } work = (rescaler_t*)p->memory; - scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size); + scalers = (WebPRescaler*)WEBP_ALIGN( + (const uint8_t*)work + total_size - rescaler_size); p->scaler_y = &scalers[0]; p->scaler_u = &scalers[1]; p->scaler_v = &scalers[2]; p->scaler_a = has_alpha ? &scalers[3] : NULL; - WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, - buf->y, out_width, out_height, buf->y_stride, 1, - work); - WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, - buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, - work + work_size); - WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, - buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, - work + work_size + uv_work_size); + if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, + buf->y, out_width, out_height, buf->y_stride, 1, + work) || + !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, + buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, + work + work_size) || + !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, + buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, + work + work_size + uv_work_size)) { + return 0; + } p->emit = EmitRescaledYUV; if (has_alpha) { - WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, - buf->a, out_width, out_height, buf->a_stride, 1, - work + work_size + 2 * uv_work_size); + if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, + buf->a, out_width, out_height, buf->a_stride, 1, + work + work_size + 2 * uv_work_size)) { + return 0; + } p->emit_alpha = EmitRescaledAlphaYUV; WebPInitAlphaProcessing(); } @@ -356,7 +362,7 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) { const WebPYUV444Converter convert = WebPYUV444Converters[p->output->colorspace]; const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + y_pos * buf->stride; + uint8_t* dst = buf->rgba + (size_t)y_pos * buf->stride; int num_lines_out = 0; // For RGB rescaling, because of the YUV420, current scan position // U/V can be +1/-1 line from the Y one. Hence the double test. @@ -383,15 +389,15 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { while (j < mb_h) { const int y_lines_in = WebPRescalerImport(p->scaler_y, mb_h - j, - io->y + j * io->y_stride, io->y_stride); + io->y + (size_t)j * io->y_stride, io->y_stride); j += y_lines_in; if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) { - const int u_lines_in = - WebPRescalerImport(p->scaler_u, uv_mb_h - uv_j, - io->u + uv_j * io->uv_stride, io->uv_stride); - const int v_lines_in = - WebPRescalerImport(p->scaler_v, uv_mb_h - uv_j, - io->v + uv_j * io->uv_stride, io->uv_stride); + const int u_lines_in = WebPRescalerImport( + p->scaler_u, uv_mb_h - uv_j, io->u + (size_t)uv_j * io->uv_stride, + io->uv_stride); + const int v_lines_in = WebPRescalerImport( + p->scaler_v, uv_mb_h - uv_j, io->v + (size_t)uv_j * io->uv_stride, + io->uv_stride); (void)v_lines_in; // remove a gcc warning assert(u_lines_in == v_lines_in); uv_j += u_lines_in; @@ -403,7 +409,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; + uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride; const WEBP_CSP_MODE colorspace = p->output->colorspace; const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb); @@ -431,7 +437,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; + uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride; #if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; #else @@ -470,7 +476,7 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p, int lines_left = expected_num_out_lines; const int y_end = p->last_y + lines_left; while (lines_left > 0) { - const int row_offset = scaler->src_y - io->mb_y; + const int64_t row_offset = (int64_t)scaler->src_y - io->mb_y; WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y, io->a + row_offset * io->width, io->width); lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left); @@ -485,51 +491,58 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { const int out_height = io->scaled_height; const int uv_in_width = (io->mb_w + 1) >> 1; const int uv_in_height = (io->mb_h + 1) >> 1; - const size_t work_size = 2 * out_width; // scratch memory for one rescaler + // scratch memory for one rescaler + const size_t work_size = 2 * (size_t)out_width; rescaler_t* work; // rescalers work area uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion - size_t tmp_size1, tmp_size2, total_size, rescaler_size; + uint64_t tmp_size1, tmp_size2, total_size; + size_t rescaler_size; WebPRescaler* scalers; const int num_rescalers = has_alpha ? 4 : 3; - tmp_size1 = 3 * work_size; - tmp_size2 = 3 * out_width; - if (has_alpha) { - tmp_size1 += work_size; - tmp_size2 += out_width; - } + tmp_size1 = (uint64_t)num_rescalers * work_size; + tmp_size2 = (uint64_t)num_rescalers * out_width; total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp); rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST; + total_size += rescaler_size; + if (!CheckSizeOverflow(total_size)) { + return 0; + } - p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size); + p->memory = WebPSafeMalloc(1ULL, (size_t)total_size); if (p->memory == NULL) { return 0; // memory error } work = (rescaler_t*)p->memory; tmp = (uint8_t*)(work + tmp_size1); - scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size); + scalers = (WebPRescaler*)WEBP_ALIGN( + (const uint8_t*)work + total_size - rescaler_size); p->scaler_y = &scalers[0]; p->scaler_u = &scalers[1]; p->scaler_v = &scalers[2]; p->scaler_a = has_alpha ? &scalers[3] : NULL; - WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, - tmp + 0 * out_width, out_width, out_height, 0, 1, - work + 0 * work_size); - WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, - tmp + 1 * out_width, out_width, out_height, 0, 1, - work + 1 * work_size); - WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, - tmp + 2 * out_width, out_width, out_height, 0, 1, - work + 2 * work_size); + if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, + tmp + 0 * out_width, out_width, out_height, 0, 1, + work + 0 * work_size) || + !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, + tmp + 1 * out_width, out_width, out_height, 0, 1, + work + 1 * work_size) || + !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, + tmp + 2 * out_width, out_width, out_height, 0, 1, + work + 2 * work_size)) { + return 0; + } p->emit = EmitRescaledRGB; WebPInitYUV444Converters(); if (has_alpha) { - WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, - tmp + 3 * out_width, out_width, out_height, 0, 1, - work + 3 * work_size); + if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, + tmp + 3 * out_width, out_width, out_height, 0, 1, + work + 3 * work_size)) { + return 0; + } p->emit_alpha = EmitRescaledAlphaRGB; if (p->output->colorspace == MODE_RGBA_4444 || p->output->colorspace == MODE_rgbA_4444) { diff --git a/thirdparty/libwebp/src/dec/vp8_dec.c b/thirdparty/libwebp/src/dec/vp8_dec.c index 57efb69041..5f405e4c2a 100644 --- a/thirdparty/libwebp/src/dec/vp8_dec.c +++ b/thirdparty/libwebp/src/dec/vp8_dec.c @@ -335,7 +335,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { io->scaled_width = io->width; io->scaled_height = io->height; - io->mb_w = io->width; // sanity check + io->mb_w = io->width; // for soundness io->mb_h = io->height; // ditto VP8ResetProba(&dec->proba_); @@ -494,13 +494,11 @@ static int GetCoeffsAlt(VP8BitReader* const br, return 16; } -static WEBP_TSAN_IGNORE_FUNCTION void InitGetCoeffs(void) { - if (GetCoeffs == NULL) { - if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { - GetCoeffs = GetCoeffsAlt; - } else { - GetCoeffs = GetCoeffsFast; - } +WEBP_DSP_INIT_FUNC(InitGetCoeffs) { + if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { + GetCoeffs = GetCoeffsAlt; + } else { + GetCoeffs = GetCoeffsFast; } } diff --git a/thirdparty/libwebp/src/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h index 600a684410..20526a87c4 100644 --- a/thirdparty/libwebp/src/dec/vp8i_dec.h +++ b/thirdparty/libwebp/src/dec/vp8i_dec.h @@ -31,8 +31,8 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 -#define DEC_MIN_VERSION 1 -#define DEC_REV_VERSION 0 +#define DEC_MIN_VERSION 2 +#define DEC_REV_VERSION 1 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/thirdparty/libwebp/src/dec/vp8l_dec.c b/thirdparty/libwebp/src/dec/vp8l_dec.c index 93615d4ed2..73c3b54fff 100644 --- a/thirdparty/libwebp/src/dec/vp8l_dec.c +++ b/thirdparty/libwebp/src/dec/vp8l_dec.c @@ -559,8 +559,11 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { memory += work_size * sizeof(*work); scaled_data = (uint32_t*)memory; - WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, - out_width, out_height, 0, num_channels, work); + if (!WebPRescalerInit(dec->rescaler, in_width, in_height, + (uint8_t*)scaled_data, out_width, out_height, + 0, num_channels, work)) { + return 0; + } return 1; } #endif // WEBP_REDUCE_SIZE @@ -574,13 +577,14 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int rgba_stride, uint8_t* const rgba) { uint32_t* const src = (uint32_t*)rescaler->dst; + uint8_t* dst = rgba; const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - uint8_t* const dst = rgba + num_lines_out * rgba_stride; WebPRescalerExportRow(rescaler); WebPMultARGBRow(src, dst_width, 1); VP8LConvertFromBGRA(src, dst_width, colorspace, dst); + dst += rgba_stride; ++num_lines_out; } return num_lines_out; @@ -594,8 +598,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, int num_lines_in = 0; int num_lines_out = 0; while (num_lines_in < mb_h) { - uint8_t* const row_in = in + num_lines_in * in_stride; - uint8_t* const row_out = out + num_lines_out * out_stride; + uint8_t* const row_in = in + (uint64_t)num_lines_in * in_stride; + uint8_t* const row_out = out + (uint64_t)num_lines_out * out_stride; const int lines_left = mb_h - num_lines_in; const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); int lines_imported; @@ -796,7 +800,8 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { const WebPDecBuffer* const output = dec->output_; if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA const WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride; + uint8_t* const rgba = + buf->rgba + (int64_t)dec->last_out_row_ * buf->stride; const int num_rows_out = #if !defined(WEBP_REDUCE_SIZE) io->use_scaling ? @@ -947,7 +952,6 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) { break; default: goto Copy; - break; } CopySmallPattern8b(src, dst, length, pattern); return; @@ -1515,7 +1519,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { assert(dec->width_ <= final_width); dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t)); if (dec->pixels_ == NULL) { - dec->argb_cache_ = NULL; // for sanity check + dec->argb_cache_ = NULL; // for soundness dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; } @@ -1525,7 +1529,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { static int AllocateInternalBuffers8b(VP8LDecoder* const dec) { const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_; - dec->argb_cache_ = NULL; // for sanity check + dec->argb_cache_ = NULL; // for soundness dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t)); if (dec->pixels_ == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; @@ -1667,7 +1671,6 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { VP8Io* io = NULL; WebPDecParams* params = NULL; - // Sanity checks. if (dec == NULL) return 0; assert(dec->hdr_.huffman_tables_ != NULL); diff --git a/thirdparty/libwebp/src/dec/webp_dec.c b/thirdparty/libwebp/src/dec/webp_dec.c index 42d098874d..77a54c55d2 100644 --- a/thirdparty/libwebp/src/dec/webp_dec.c +++ b/thirdparty/libwebp/src/dec/webp_dec.c @@ -785,6 +785,13 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size, //------------------------------------------------------------------------------ // Cropping and rescaling. +int WebPCheckCropDimensions(int image_width, int image_height, + int x, int y, int w, int h) { + return !(x < 0 || y < 0 || w <= 0 || h <= 0 || + x >= image_width || w > image_width || w > image_width - x || + y >= image_height || h > image_height || h > image_height - y); +} + int WebPIoInitFromOptions(const WebPDecoderOptions* const options, VP8Io* const io, WEBP_CSP_MODE src_colorspace) { const int W = io->width; @@ -792,7 +799,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, int x = 0, y = 0, w = W, h = H; // Cropping - io->use_cropping = (options != NULL) && (options->use_cropping > 0); + io->use_cropping = (options != NULL) && options->use_cropping; if (io->use_cropping) { w = options->crop_width; h = options->crop_height; @@ -802,7 +809,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, x &= ~1; y &= ~1; } - if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) { + if (!WebPCheckCropDimensions(W, H, x, y, w, h)) { return 0; // out of frame boundary error } } @@ -814,7 +821,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, io->mb_h = h; // Scaling - io->use_scaling = (options != NULL) && (options->use_scaling > 0); + io->use_scaling = (options != NULL) && options->use_scaling; if (io->use_scaling) { int scaled_width = options->scaled_width; int scaled_height = options->scaled_height; @@ -835,8 +842,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, if (io->use_scaling) { // disable filter (only for large downscaling ratio). - io->bypass_filtering = (io->scaled_width < W * 3 / 4) && - (io->scaled_height < H * 3 / 4); + io->bypass_filtering |= (io->scaled_width < W * 3 / 4) && + (io->scaled_height < H * 3 / 4); io->fancy_upsampling = 0; } return 1; diff --git a/thirdparty/libwebp/src/dec/webpi_dec.h b/thirdparty/libwebp/src/dec/webpi_dec.h index 24baff5d27..3b97388c71 100644 --- a/thirdparty/libwebp/src/dec/webpi_dec.h +++ b/thirdparty/libwebp/src/dec/webpi_dec.h @@ -77,6 +77,10 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers); //------------------------------------------------------------------------------ // Misc utils +// Returns true if crop dimensions are within image bounds. +int WebPCheckCropDimensions(int image_width, int image_height, + int x, int y, int w, int h); + // Initializes VP8Io with custom setup, io and teardown functions. The default // hooks will use the supplied 'params' as io->opaque handle. void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io); diff --git a/thirdparty/libwebp/src/demux/anim_decode.c b/thirdparty/libwebp/src/demux/anim_decode.c index 05dd707371..2bf4dcffe0 100644 --- a/thirdparty/libwebp/src/demux/anim_decode.c +++ b/thirdparty/libwebp/src/demux/anim_decode.c @@ -87,11 +87,19 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal( int abi_version) { WebPAnimDecoderOptions options; WebPAnimDecoder* dec = NULL; + WebPBitstreamFeatures features; if (webp_data == NULL || WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) { return NULL; } + // Validate the bitstream before doing expensive allocations. The demuxer may + // be more tolerant than the decoder. + if (WebPGetFeatures(webp_data->bytes, webp_data->size, &features) != + VP8_STATUS_OK) { + return NULL; + } + // Note: calloc() so that the pointer members are initialized to NULL. dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); if (dec == NULL) goto Error; @@ -145,7 +153,7 @@ static int ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width, uint32_t canvas_height) { const uint64_t size = (uint64_t)canvas_width * canvas_height * NUM_CHANNELS * sizeof(*buf); - if (size != (size_t)size) return 0; + if (!CheckSizeOverflow(size)) return 0; memset(buf, 0, (size_t)size); return 1; } @@ -166,7 +174,7 @@ static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset, static int CopyCanvas(const uint8_t* src, uint8_t* dst, uint32_t width, uint32_t height) { const uint64_t size = (uint64_t)width * height * NUM_CHANNELS; - if (size != (size_t)size) return 0; + if (!CheckSizeOverflow(size)) return 0; assert(src != NULL && dst != NULL); memcpy(dst, src, (size_t)size); return 1; @@ -346,12 +354,15 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec, { const uint8_t* in = iter.fragment.bytes; const size_t in_size = iter.fragment.size; - const size_t out_offset = - (iter.y_offset * width + iter.x_offset) * NUM_CHANNELS; + const uint32_t stride = width * NUM_CHANNELS; // at most 25 + 2 bits + const uint64_t out_offset = (uint64_t)iter.y_offset * stride + + (uint64_t)iter.x_offset * NUM_CHANNELS; // 53b + const uint64_t size = (uint64_t)iter.height * stride; // at most 25 + 27b WebPDecoderConfig* const config = &dec->config_; WebPRGBABuffer* const buf = &config->output.u.RGBA; - buf->stride = NUM_CHANNELS * width; - buf->size = buf->stride * iter.height; + if ((size_t)size != size) goto Error; + buf->stride = (int)stride; + buf->size = (size_t)size; buf->rgba = dec->curr_frame_ + out_offset; if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) { diff --git a/thirdparty/libwebp/src/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c index 1b3cc2e0a8..547a7725de 100644 --- a/thirdparty/libwebp/src/demux/demux.c +++ b/thirdparty/libwebp/src/demux/demux.c @@ -24,8 +24,8 @@ #include "src/webp/format_constants.h" #define DMUX_MAJ_VERSION 1 -#define DMUX_MIN_VERSION 1 -#define DMUX_REV_VERSION 0 +#define DMUX_MIN_VERSION 2 +#define DMUX_REV_VERSION 1 typedef struct { size_t start_; // start location of the data @@ -221,12 +221,16 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size, const size_t chunk_start_offset = mem->start_; const uint32_t fourcc = ReadLE32(mem); const uint32_t payload_size = ReadLE32(mem); - const uint32_t payload_size_padded = payload_size + (payload_size & 1); - const size_t payload_available = (payload_size_padded > MemDataSize(mem)) - ? MemDataSize(mem) : payload_size_padded; - const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available; + uint32_t payload_size_padded; + size_t payload_available; + size_t chunk_size; if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR; + + payload_size_padded = payload_size + (payload_size & 1); + payload_available = (payload_size_padded > MemDataSize(mem)) + ? MemDataSize(mem) : payload_size_padded; + chunk_size = CHUNK_HEADER_SIZE + payload_available; if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR; if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA; @@ -312,6 +316,7 @@ static ParseStatus ParseAnimationFrame( int bits; MemBuffer* const mem = &dmux->mem_; Frame* frame; + size_t start_offset; ParseStatus status = NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame); if (status != PARSE_OK) return status; @@ -332,7 +337,11 @@ static ParseStatus ParseAnimationFrame( // Store a frame only if the animation flag is set there is some data for // this frame is available. + start_offset = mem->start_; status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame); + if (status != PARSE_ERROR && mem->start_ - start_offset > anmf_payload_size) { + status = PARSE_ERROR; + } if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) { added_frame = AddFrame(dmux, frame); if (added_frame) { @@ -446,9 +455,11 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) { const size_t chunk_start_offset = mem->start_; const uint32_t fourcc = ReadLE32(mem); const uint32_t chunk_size = ReadLE32(mem); - const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1); + uint32_t chunk_size_padded; if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR; + + chunk_size_padded = chunk_size + (chunk_size & 1); if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR; switch (fourcc) { diff --git a/thirdparty/libwebp/src/dsp/alpha_processing.c b/thirdparty/libwebp/src/dsp/alpha_processing.c index 819d1391f2..1892929a43 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing.c @@ -157,7 +157,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) { } } -void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, +void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse) { int x; for (x = 0; x < width; ++x) { @@ -178,7 +179,8 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, #undef MFIX void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse); -void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, +void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse); //------------------------------------------------------------------------------ @@ -193,8 +195,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, } } -void WebPMultRows(uint8_t* ptr, int stride, - const uint8_t* alpha, int alpha_stride, +void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride, + const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int num_rows, int inverse) { int n; for (n = 0; n < num_rows; ++n) { @@ -290,9 +292,9 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, } #if !WEBP_NEON_OMIT_C_CODE -static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, +static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int height, - uint8_t* dst, int dst_stride) { + uint8_t* WEBP_RESTRICT dst, int dst_stride) { uint32_t alpha_mask = 0xff; int i, j; @@ -309,9 +311,10 @@ static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, return (alpha_mask != 0xff); } -static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride) { int i, j; for (j = 0; j < height; ++j) { for (i = 0; i < width; ++i) { @@ -322,9 +325,9 @@ static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, +static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, - uint8_t* alpha, int alpha_stride) { + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { uint8_t alpha_mask = 0xff; int i, j; @@ -340,7 +343,8 @@ static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, return (alpha_mask == 0xff); } -static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) { +static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size) { int i; for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8; } @@ -359,6 +363,11 @@ static int HasAlpha32b_C(const uint8_t* src, int length) { return 0; } +static void AlphaReplace_C(uint32_t* src, int length, uint32_t color) { + int x; + for (x = 0; x < length; ++x) if ((src[x] >> 24) == 0) src[x] = color; +} + //------------------------------------------------------------------------------ // Simple channel manipulations. @@ -367,8 +376,11 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { } #ifdef WORDS_BIGENDIAN -static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, - const uint8_t* b, int len, uint32_t* out) { +static void PackARGB_C(const uint8_t* WEBP_RESTRICT a, + const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i < len; ++i) { out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]); @@ -376,8 +388,10 @@ static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, } #endif -static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, - int len, int step, uint32_t* out) { +static void PackRGB_C(const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, int step, uint32_t* WEBP_RESTRICT out) { int i, offset = 0; for (i = 0; i < len; ++i) { out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]); @@ -387,19 +401,26 @@ static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); -int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); -void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); -int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); -void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); +int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int, + uint8_t* WEBP_RESTRICT, int); +void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT, int, int, int, + uint32_t* WEBP_RESTRICT, int); +int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int, + uint8_t* WEBP_RESTRICT, int); +void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size); #ifdef WORDS_BIGENDIAN void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g, const uint8_t* b, int, uint32_t*); #endif -void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, - int len, int step, uint32_t* out); +void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, int step, uint32_t* WEBP_RESTRICT out); int (*WebPHasAlpha8b)(const uint8_t* src, int length); int (*WebPHasAlpha32b)(const uint8_t* src, int length); +void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); //------------------------------------------------------------------------------ // Init function @@ -428,13 +449,14 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { WebPHasAlpha8b = HasAlpha8b_C; WebPHasAlpha32b = HasAlpha32b_C; + WebPAlphaReplace = AlphaReplace_C; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitAlphaProcessingSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitAlphaProcessingSSE41(); } @@ -448,7 +470,7 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitAlphaProcessingNEON(); @@ -469,4 +491,5 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { assert(WebPPackRGB != NULL); assert(WebPHasAlpha8b != NULL); assert(WebPHasAlpha32b != NULL); + assert(WebPAlphaReplace != NULL); } diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c index 9d55421704..9e0ace9421 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c @@ -80,9 +80,9 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, //------------------------------------------------------------------------------ -static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride) { +static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint8_t* WEBP_RESTRICT dst, int dst_stride) { uint32_t alpha_mask = 0xffffffffu; uint8x8_t mask8 = vdup_n_u8(0xff); uint32_t tmp[2]; @@ -112,9 +112,10 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride, return (alpha_mask != 0xffffffffu); } -static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride) { int i, j; uint8x8x4_t greens; // leave A/R/B channels zero'd. greens.val[0] = vdup_n_u8(0); @@ -131,9 +132,9 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, +static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, - uint8_t* alpha, int alpha_stride) { + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { uint32_t alpha_mask = 0xffffffffu; uint8x8_t mask8 = vdup_n_u8(0xff); uint32_t tmp[2]; @@ -161,8 +162,8 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, return (alpha_mask == 0xffffffffu); } -static void ExtractGreen_NEON(const uint32_t* argb, - uint8_t* alpha, int size) { +static void ExtractGreen_NEON(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size) { int i; for (i = 0; i + 16 <= size; i += 16) { const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i)); diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c index 2871c56d84..a5f8c9f7c7 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c @@ -18,9 +18,9 @@ //------------------------------------------------------------------------------ -static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride) { +static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint8_t* WEBP_RESTRICT dst, int dst_stride) { // alpha_and stores an 'and' operation of all the alpha[] values. The final // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; @@ -72,9 +72,10 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride, return (alpha_and != 0xff); } -static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride) { int i, j; const __m128i zero = _mm_setzero_si128(); const int limit = width & ~15; @@ -98,9 +99,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride, +static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, - uint8_t* alpha, int alpha_stride) { + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { // alpha_and stores an 'and' operation of all the alpha[] values. The final // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; @@ -265,6 +266,27 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) { return 0; } +static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) { + const __m128i m_color = _mm_set1_epi32(color); + const __m128i zero = _mm_setzero_si128(); + int i = 0; + for (; i + 8 <= length; i += 8) { + const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0)); + const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 4)); + const __m128i b0 = _mm_srai_epi32(a0, 24); + const __m128i b1 = _mm_srai_epi32(a1, 24); + const __m128i c0 = _mm_cmpeq_epi32(b0, zero); + const __m128i c1 = _mm_cmpeq_epi32(b1, zero); + const __m128i d0 = _mm_and_si128(c0, m_color); + const __m128i d1 = _mm_and_si128(c1, m_color); + const __m128i e0 = _mm_andnot_si128(c0, a0); + const __m128i e1 = _mm_andnot_si128(c1, a1); + _mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0)); + _mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1)); + } + for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color; +} + // ----------------------------------------------------------------------------- // Apply alpha value to rows @@ -296,7 +318,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) { if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse); } -static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha, +static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse) { int x = 0; if (!inverse) { @@ -334,6 +357,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) { WebPHasAlpha8b = HasAlpha8b_SSE2; WebPHasAlpha32b = HasAlpha32b_SSE2; + WebPAlphaReplace = AlphaReplace_SSE2; } #else // !WEBP_USE_SSE2 diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c index 56040f9c88..cdf877ce49 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c @@ -19,9 +19,9 @@ //------------------------------------------------------------------------------ -static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride, - int width, int height, - uint8_t* alpha, int alpha_stride) { +static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb, + int argb_stride, int width, int height, + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { // alpha_and stores an 'and' operation of all the alpha[] values. The final // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; diff --git a/thirdparty/libwebp/src/dsp/cost.c b/thirdparty/libwebp/src/dsp/cost.c index cc681cdd4b..460ec4f2a7 100644 --- a/thirdparty/libwebp/src/dsp/cost.c +++ b/thirdparty/libwebp/src/dsp/cost.c @@ -395,12 +395,12 @@ WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) { VP8EncDspCostInitMIPSdspR2(); } #endif -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8EncDspCostInitSSE2(); } #endif -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (VP8GetCPUInfo(kNEON)) { VP8EncDspCostInitNEON(); } diff --git a/thirdparty/libwebp/src/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c index 0fa5b6a5ce..3145e190a4 100644 --- a/thirdparty/libwebp/src/dsp/cpu.c +++ b/thirdparty/libwebp/src/dsp/cpu.c @@ -55,12 +55,18 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "a"(info_type), "c"(0)); } -#elif (defined(_M_X64) || defined(_M_IX86)) && \ - defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 #include <intrin.h> #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 -#elif defined(WEBP_MSC_SSE2) +#define WEBP_HAVE_MSC_CPUID +#elif _MSC_VER > 1310 +#include <intrin.h> #define GetCPUInfo __cpuid +#define WEBP_HAVE_MSC_CPUID +#endif + #endif // NaCl has no support for xgetbv or the raw opcode. @@ -94,7 +100,7 @@ static WEBP_INLINE uint64_t xgetbv(void) { #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. #endif -#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) +#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID) // helper function for run-time detection of slow SSSE3 platforms static int CheckSlowModel(int info) { @@ -179,9 +185,34 @@ static int AndroidCPUInfo(CPUFeature feature) { return 0; } VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; -#elif defined(WEBP_USE_NEON) -// define a dummy function to enable turning off NEON at runtime by setting -// VP8DecGetCPUInfo = NULL +#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test +// Use compile flags as an indicator of SIMD support instead of a runtime check. +static int wasmCPUInfo(CPUFeature feature) { + switch (feature) { +#ifdef WEBP_HAVE_SSE2 + case kSSE2: + return 1; +#endif +#ifdef WEBP_HAVE_SSE41 + case kSSE3: + case kSlowSSSE3: + case kSSE4_1: + return 1; +#endif +#ifdef WEBP_HAVE_NEON + case kNEON: + return 1; +#endif + default: + break; + } + return 0; +} +VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; +#elif defined(WEBP_HAVE_NEON) +// In most cases this function doesn't check for NEON support (it's assumed by +// the configuration), but enables turning off NEON at runtime, for testing +// purposes, by setting VP8DecGetCPUInfo = NULL. static int armCPUInfo(CPUFeature feature) { if (feature != kNEON) return 0; #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) diff --git a/thirdparty/libwebp/src/dsp/dec.c b/thirdparty/libwebp/src/dsp/dec.c index 1119842dd3..537c701282 100644 --- a/thirdparty/libwebp/src/dsp/dec.c +++ b/thirdparty/libwebp/src/dsp/dec.c @@ -807,10 +807,10 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8DspInitSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { VP8DspInitSSE41(); } @@ -834,7 +834,7 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8DspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/dec_neon.c b/thirdparty/libwebp/src/dsp/dec_neon.c index 239ec4167e..fa851707e2 100644 --- a/thirdparty/libwebp/src/dsp/dec_neon.c +++ b/thirdparty/libwebp/src/dsp/dec_neon.c @@ -1283,12 +1283,12 @@ static void DC4_NEON(uint8_t* dst) { // DC const uint8x8_t A = vld1_u8(dst - BPS); // top row const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); + const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1)); const uint8x8_t dc0 = vrshrn_n_u16(sum, 3); // (sum + 4) >> 3 @@ -1429,8 +1429,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x8_t A = vld1_u8(dst - BPS); // top row #if defined(__aarch64__) - const uint16x8_t B = vmovl_u8(A); - const uint16_t p2 = vaddvq_u16(B); + const uint16_t p2 = vaddlv_u8(A); sum_top = vdupq_n_u16(p2); #else const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top @@ -1441,18 +1440,18 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { } if (do_left) { - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); - const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + 4 * BPS - 1)); - const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + 5 * BPS - 1)); - const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + 6 * BPS - 1)); - const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + 7 * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); - const uint16x8_t s2 = vaddq_u16(L4, L5); - const uint16x8_t s3 = vaddq_u16(L6, L7); + const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); + const uint8x8_t L4 = vld1_u8(dst + 4 * BPS - 1); + const uint8x8_t L5 = vld1_u8(dst + 5 * BPS - 1); + const uint8x8_t L6 = vld1_u8(dst + 6 * BPS - 1); + const uint8x8_t L7 = vld1_u8(dst + 7 * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); + const uint16x8_t s2 = vaddl_u8(L4, L5); + const uint16x8_t s3 = vaddl_u8(L6, L7); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t s23 = vaddq_u16(s2, s3); sum_left = vaddq_u16(s01, s23); @@ -1512,29 +1511,34 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x16_t A = vld1q_u8(dst - BPS); // top row +#if defined(__aarch64__) + const uint16_t p3 = vaddlvq_u8(A); + sum_top = vdupq_n_u16(p3); +#else const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); const uint16x4_t p2 = vpadd_u16(p1, p1); const uint16x4_t p3 = vpadd_u16(p2, p2); sum_top = vcombine_u16(p3, p3); +#endif } if (do_left) { int i; sum_left = vdupq_n_u16(0); for (i = 0; i < 16; i += 8) { - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + (i + 0) * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + (i + 1) * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + (i + 2) * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + (i + 3) * BPS - 1)); - const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + (i + 4) * BPS - 1)); - const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + (i + 5) * BPS - 1)); - const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + (i + 6) * BPS - 1)); - const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + (i + 7) * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); - const uint16x8_t s2 = vaddq_u16(L4, L5); - const uint16x8_t s3 = vaddq_u16(L6, L7); + const uint8x8_t L0 = vld1_u8(dst + (i + 0) * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + (i + 1) * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + (i + 2) * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + (i + 3) * BPS - 1); + const uint8x8_t L4 = vld1_u8(dst + (i + 4) * BPS - 1); + const uint8x8_t L5 = vld1_u8(dst + (i + 5) * BPS - 1); + const uint8x8_t L6 = vld1_u8(dst + (i + 6) * BPS - 1); + const uint8x8_t L7 = vld1_u8(dst + (i + 7) * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); + const uint16x8_t s2 = vaddl_u8(L4, L5); + const uint16x8_t s3 = vaddl_u8(L6, L7); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t s23 = vaddq_u16(s2, s3); const uint16x8_t sum = vaddq_u16(s01, s23); diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h index a784de334a..513e159bb3 100644 --- a/thirdparty/libwebp/src/dsp/dsp.h +++ b/thirdparty/libwebp/src/dsp/dsp.h @@ -27,6 +27,23 @@ extern "C" { #define BPS 32 // this is the common stride for enc/dec //------------------------------------------------------------------------------ +// WEBP_RESTRICT + +// Declares a pointer with the restrict type qualifier if available. +// This allows code to hint to the compiler that only this pointer references a +// particular object or memory region within the scope of the block in which it +// is declared. This may allow for improved optimizations due to the lack of +// pointer aliasing. See also: +// https://en.cppreference.com/w/c/language/restrict +#if defined(__GNUC__) +#define WEBP_RESTRICT __restrict__ +#elif defined(_MSC_VER) +#define WEBP_RESTRICT __restrict +#else +#define WEBP_RESTRICT +#endif + +//------------------------------------------------------------------------------ // CPU detection #if defined(__GNUC__) @@ -51,9 +68,7 @@ extern "C" { # define __has_builtin(x) 0 #endif -// for now, none of the optimizations below are available in emscripten -#if !defined(EMSCRIPTEN) - +#if !defined(HAVE_CONFIG_H) #if defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) #define WEBP_MSC_SSE2 // Visual C++ SSE2 targets @@ -63,23 +78,37 @@ extern "C" { (defined(_M_X64) || defined(_M_IX86)) #define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets #endif +#endif // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp // files without intrinsics, allowing the corresponding Init() to be called. // Files containing intrinsics will need to be built targeting the instruction // set so should succeed on one of the earlier tests. -#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2) +#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2)) #define WEBP_USE_SSE2 #endif -#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41) +#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2) +#define WEBP_HAVE_SSE2 +#endif + +#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41)) #define WEBP_USE_SSE41 #endif +#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41) +#define WEBP_HAVE_SSE41 +#endif + +#undef WEBP_MSC_SSE41 +#undef WEBP_MSC_SSE2 + // The intrinsics currently cause compiler errors with arm-nacl-gcc and the // inline assembly would need to be modified for use with Native Client. -#if (defined(__ARM_NEON__) || \ - defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \ +#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \ !defined(__native_client__) #define WEBP_USE_NEON #endif @@ -95,6 +124,10 @@ extern "C" { #define WEBP_USE_INTRINSICS #endif +#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) +#define WEBP_HAVE_NEON +#endif + #if defined(__mips__) && !defined(__mips64) && \ defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) #define WEBP_USE_MIPS32 @@ -110,13 +143,11 @@ extern "C" { #define WEBP_USE_MSA #endif -#endif /* EMSCRIPTEN */ - #ifndef WEBP_DSP_OMIT_C_CODE #define WEBP_DSP_OMIT_C_CODE 1 #endif -#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE +#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE #define WEBP_NEON_OMIT_C_CODE 1 #else #define WEBP_NEON_OMIT_C_CODE 0 @@ -193,6 +224,12 @@ extern "C" { #endif #endif +// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. +// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. +#if !defined(WEBP_OFFSET_PTR) +#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) +#endif + // Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) #if !defined(WEBP_SWAP_16BIT_CSP) #define WEBP_SWAP_16BIT_CSP 0 @@ -572,26 +609,29 @@ extern void (*WebPApplyAlphaMultiply4444)( // Dispatch the values from alpha[] plane to the ARGB destination 'dst'. // Returns true if alpha[] plane has non-trivial values different from 0xff. -extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride); +extern int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint8_t* WEBP_RESTRICT dst, int dst_stride); // Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the // A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units. -extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride); +extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride); // Extract the alpha values from 32b values in argb[] and pack them into alpha[] // (this is the opposite of WebPDispatchAlpha). // Returns true if there's only trivial 0xff alpha values. -extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, - int width, int height, - uint8_t* alpha, int alpha_stride); +extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb, + int argb_stride, int width, int height, + uint8_t* WEBP_RESTRICT alpha, + int alpha_stride); // Extract the green values from 32b values in argb[] and pack them into alpha[] // (this is the opposite of WebPDispatchAlphaToGreen). -extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); +extern void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size); // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). // Un-Multiply operation transforms x into x * 255 / A. @@ -604,34 +644,42 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, int inverse); // Same for a row of single values, with side alpha values. -extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, +extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse); // Same a WebPMultRow(), but for several 'num_rows' rows. -void WebPMultRows(uint8_t* ptr, int stride, - const uint8_t* alpha, int alpha_stride, +void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride, + const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int num_rows, int inverse); // Plain-C versions, used as fallback by some implementations. -void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, +void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse); void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse); #ifdef WORDS_BIGENDIAN // ARGB packing function: a/r/g/b input is rgba or bgra order. -extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, - const uint8_t* g, const uint8_t* b, int len, - uint32_t* out); +extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a, + const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, uint32_t* WEBP_RESTRICT out); #endif // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order. -extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, - int len, int step, uint32_t* out); +extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, int step, uint32_t* WEBP_RESTRICT out); // This function returns true if src[i] contains a value different from 0xff. extern int (*WebPHasAlpha8b)(const uint8_t* src, int length); // This function returns true if src[4*i] contains a value different from 0xff. extern int (*WebPHasAlpha32b)(const uint8_t* src, int length); +// replaces transparent values in src[] by 'color'. +extern void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); // To be called first before using the above. void WebPInitAlphaProcessing(void); diff --git a/thirdparty/libwebp/src/dsp/enc.c b/thirdparty/libwebp/src/dsp/enc.c index 2fddbc4c52..ea47a3fd95 100644 --- a/thirdparty/libwebp/src/dsp/enc.c +++ b/thirdparty/libwebp/src/dsp/enc.c @@ -773,10 +773,10 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8EncDspInitSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { VP8EncDspInitSSE41(); } @@ -800,7 +800,7 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8EncDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/filters.c b/thirdparty/libwebp/src/dsp/filters.c index 9e910d99c9..4506567ba3 100644 --- a/thirdparty/libwebp/src/dsp/filters.c +++ b/thirdparty/libwebp/src/dsp/filters.c @@ -254,7 +254,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) { #endif if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8FiltersInitSSE2(); } @@ -271,7 +271,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8FiltersInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/filters_sse2.c b/thirdparty/libwebp/src/dsp/filters_sse2.c index 4b3f2d020f..5c33ec15e2 100644 --- a/thirdparty/libwebp/src/dsp/filters_sse2.c +++ b/thirdparty/libwebp/src/dsp/filters_sse2.c @@ -320,7 +320,12 @@ extern void VP8FiltersInitSSE2(void); WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) { WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2; +#if defined(CHROMIUM) + // TODO(crbug.com/654974) + (void)VerticalUnfilter_SSE2; +#else WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2; +#endif WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2; WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2; diff --git a/thirdparty/libwebp/src/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c index aad5f43ec9..d8bbb02b35 100644 --- a/thirdparty/libwebp/src/dsp/lossless.c +++ b/thirdparty/libwebp/src/dsp/lossless.c @@ -107,62 +107,62 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { //------------------------------------------------------------------------------ // Predictors -static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) { (void)top; (void)left; return ARGB_BLACK; } -static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) { (void)top; return left; } -static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) { (void)left; return top[0]; } -static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) { (void)left; return top[1]; } -static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) { (void)left; return top[-1]; } -static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average3(left, top[0], top[1]); return pred; } -static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(left, top[-1]); return pred; } -static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(left, top[0]); return pred; } -static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average4(left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Select(top[0], left, top[-1]); return pred; } -static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); return pred; } -static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); return pred; } @@ -182,18 +182,18 @@ static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, out[i] = left = VP8LAddPixels(in[i], left); } } -GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C) -GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C) -GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C) -GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C) -GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C) -GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C) -GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C) -GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C) -GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C) -GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C) -GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C) -GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor2_C, PredictorAdd2_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor3_C, PredictorAdd3_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor4_C, PredictorAdd4_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor5_C, PredictorAdd5_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor6_C, PredictorAdd6_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor7_C, PredictorAdd7_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor8_C, PredictorAdd8_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor9_C, PredictorAdd9_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor10_C, PredictorAdd10_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor11_C, PredictorAdd11_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor12_C, PredictorAdd12_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C) //------------------------------------------------------------------------------ @@ -562,7 +562,6 @@ VP8LPredictorFunc VP8LPredictors[16]; // exposed plain-C implementations VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16]; -VP8LPredictorFunc VP8LPredictors_C[16]; VP8LTransformColorInverseFunc VP8LTransformColorInverse; @@ -576,6 +575,7 @@ VP8LMapARGBFunc VP8LMapColor32b; VP8LMapAlphaFunc VP8LMapColor8b; extern void VP8LDspInitSSE2(void); +extern void VP8LDspInitSSE41(void); extern void VP8LDspInitNEON(void); extern void VP8LDspInitMIPSdspR2(void); extern void VP8LDspInitMSA(void); @@ -600,8 +600,7 @@ extern void VP8LDspInitMSA(void); } while (0); WEBP_DSP_INIT_FUNC(VP8LDspInit) { - COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors) - COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C) + COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C) @@ -623,9 +622,14 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8LDspInitSSE2(); +#if defined(WEBP_HAVE_SSE41) + if (VP8GetCPUInfo(kSSE4_1)) { + VP8LDspInitSSE41(); + } +#endif } #endif #if defined(WEBP_USE_MIPS_DSP_R2) @@ -640,7 +644,7 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8LDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h index f709cc86b2..ebd316d1ed 100644 --- a/thirdparty/libwebp/src/dsp/lossless.h +++ b/thirdparty/libwebp/src/dsp/lossless.h @@ -30,7 +30,22 @@ extern "C" { typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); extern VP8LPredictorFunc VP8LPredictors[16]; -extern VP8LPredictorFunc VP8LPredictors_C[16]; + +uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top); + // These Add/Sub function expects upper[-1] and out[-1] to be readable. typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, const uint32_t* upper, int num_pixels, diff --git a/thirdparty/libwebp/src/dsp/lossless_common.h b/thirdparty/libwebp/src/dsp/lossless_common.h index 9c2ebe6809..96a106f9ee 100644 --- a/thirdparty/libwebp/src/dsp/lossless_common.h +++ b/thirdparty/libwebp/src/dsp/lossless_common.h @@ -184,19 +184,6 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ } \ } -// It subtracts the prediction from the input pixel and stores the residual -// in the output pixel. -#define GENERATE_PREDICTOR_SUB(PREDICTOR, PREDICTOR_SUB) \ -static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int x; \ - assert(upper != NULL); \ - for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \ - out[x] = VP8LSubPixels(in[x], pred); \ - } \ -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c index 9c36055afc..c3e8537ade 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc.c @@ -329,6 +329,15 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = { static float FastSLog2Slow_C(uint32_t v) { assert(v >= LOG_LOOKUP_IDX_MAX); if (v < APPROX_LOG_WITH_CORRECTION_MAX) { +#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) + // use clz if available + const int log_cnt = BitsLog2Floor(v) - 7; + const uint32_t y = 1 << log_cnt; + int correction = 0; + const float v_f = (float)v; + const uint32_t orig_v = v; + v >>= log_cnt; +#else int log_cnt = 0; uint32_t y = 1; int correction = 0; @@ -339,6 +348,7 @@ static float FastSLog2Slow_C(uint32_t v) { v = v >> 1; y = y << 1; } while (v >= LOG_LOOKUP_IDX_MAX); +#endif // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256 // Xf = floor(Xf) * (1 + (v % y) / v) // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) @@ -355,6 +365,14 @@ static float FastSLog2Slow_C(uint32_t v) { static float FastLog2Slow_C(uint32_t v) { assert(v >= LOG_LOOKUP_IDX_MAX); if (v < APPROX_LOG_WITH_CORRECTION_MAX) { +#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) + // use clz if available + const int log_cnt = BitsLog2Floor(v) - 7; + const uint32_t y = 1 << log_cnt; + const uint32_t orig_v = v; + double log_2; + v >>= log_cnt; +#else int log_cnt = 0; uint32_t y = 1; const uint32_t orig_v = v; @@ -364,6 +382,7 @@ static float FastLog2Slow_C(uint32_t v) { v = v >> 1; y = y << 1; } while (v >= LOG_LOOKUP_IDX_MAX); +#endif log_2 = kLog2Table[v] + log_cnt; if (orig_v >= APPROX_LOG_MAX) { // Since the division is still expensive, add this correction factor only @@ -702,140 +721,6 @@ void VP8LHistogramAdd(const VP8LHistogram* const a, //------------------------------------------------------------------------------ // Image transforms. -static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { - return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1); -} - -static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { - return Average2(Average2(a0, a2), a1); -} - -static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, - uint32_t a2, uint32_t a3) { - return Average2(Average2(a0, a1), Average2(a2, a3)); -} - -static WEBP_INLINE uint32_t Clip255(uint32_t a) { - if (a < 256) { - return a; - } - // return 0, when a is a negative integer. - // return 255, when a is positive. - return ~a >> 24; -} - -static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { - return Clip255(a + b - c); -} - -static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, - uint32_t c2) { - const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); - const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, - (c1 >> 16) & 0xff, - (c2 >> 16) & 0xff); - const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, - (c1 >> 8) & 0xff, - (c2 >> 8) & 0xff); - const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; -} - -static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { - return Clip255(a + (a - b) / 2); -} - -static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, - uint32_t c2) { - const uint32_t ave = Average2(c0, c1); - const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); - const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); - const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); - const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; -} - -// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined. -#if defined(__arm__) && \ - (LOCAL_GCC_VERSION == 0x409 || LOCAL_GCC_VERSION == 0x408) -# define LOCAL_INLINE __attribute__ ((noinline)) -#else -# define LOCAL_INLINE WEBP_INLINE -#endif - -static LOCAL_INLINE int Sub3(int a, int b, int c) { - const int pb = b - c; - const int pa = a - c; - return abs(pb) - abs(pa); -} - -#undef LOCAL_INLINE - -static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { - const int pa_minus_pb = - Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + - Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + - Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + - Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); - return (pa_minus_pb <= 0) ? a : b; -} - -//------------------------------------------------------------------------------ -// Predictors - -static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { - (void)left; - return top[0]; -} -static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { - (void)left; - return top[1]; -} -static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { - (void)left; - return top[-1]; -} -static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3(left, top[0], top[1]); - return pred; -} -static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[-1]); - return pred; -} -static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[0]); - return pred; -} -static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(top[-1], top[0]); - (void)left; - return pred; -} -static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(top[0], top[1]); - (void)left; - return pred; -} -static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4(left, top[-1], top[0], top[1]); - return pred; -} -static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select(top[0], left, top[-1]); - return pred; -} -static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); - return pred; -} -static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); - return pred; -} - -//------------------------------------------------------------------------------ - static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; @@ -850,18 +735,33 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper, (void)upper; } -GENERATE_PREDICTOR_SUB(Predictor2, PredictorSub2_C) -GENERATE_PREDICTOR_SUB(Predictor3, PredictorSub3_C) -GENERATE_PREDICTOR_SUB(Predictor4, PredictorSub4_C) -GENERATE_PREDICTOR_SUB(Predictor5, PredictorSub5_C) -GENERATE_PREDICTOR_SUB(Predictor6, PredictorSub6_C) -GENERATE_PREDICTOR_SUB(Predictor7, PredictorSub7_C) -GENERATE_PREDICTOR_SUB(Predictor8, PredictorSub8_C) -GENERATE_PREDICTOR_SUB(Predictor9, PredictorSub9_C) -GENERATE_PREDICTOR_SUB(Predictor10, PredictorSub10_C) -GENERATE_PREDICTOR_SUB(Predictor11, PredictorSub11_C) -GENERATE_PREDICTOR_SUB(Predictor12, PredictorSub12_C) -GENERATE_PREDICTOR_SUB(Predictor13, PredictorSub13_C) +// It subtracts the prediction from the input pixel and stores the residual +// in the output pixel. +#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \ +static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ + const uint32_t* upper, \ + int num_pixels, uint32_t* out) { \ + int x; \ + assert(upper != NULL); \ + for (x = 0; x < num_pixels; ++x) { \ + const uint32_t pred = \ + VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \ + out[x] = VP8LSubPixels(in[x], pred); \ + } \ +} + +GENERATE_PREDICTOR_SUB(2) +GENERATE_PREDICTOR_SUB(3) +GENERATE_PREDICTOR_SUB(4) +GENERATE_PREDICTOR_SUB(5) +GENERATE_PREDICTOR_SUB(6) +GENERATE_PREDICTOR_SUB(7) +GENERATE_PREDICTOR_SUB(8) +GENERATE_PREDICTOR_SUB(9) +GENERATE_PREDICTOR_SUB(10) +GENERATE_PREDICTOR_SUB(11) +GENERATE_PREDICTOR_SUB(12) +GENERATE_PREDICTOR_SUB(13) //------------------------------------------------------------------------------ @@ -962,10 +862,10 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8LEncDspInitSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { VP8LEncDspInitSSE41(); } @@ -989,7 +889,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8LEncDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c index e676f6fdc9..b2f83b871c 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -232,76 +232,55 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { //------------------------------------------------------------------------------ // Entropy -// Checks whether the X or Y contribution is worth computing and adding. -// Used in loop unrolling. -#define ANALYZE_X_OR_Y(x_or_y, j) \ - do { \ - if ((x_or_y)[i + (j)] != 0) retval -= VP8LFastSLog2((x_or_y)[i + (j)]); \ - } while (0) - -// Checks whether the X + Y contribution is worth computing and adding. -// Used in loop unrolling. -#define ANALYZE_XY(j) \ - do { \ - if (tmp[j] != 0) { \ - retval -= VP8LFastSLog2(tmp[j]); \ - ANALYZE_X_OR_Y(X, j); \ - } \ - } while (0) +// TODO(https://crbug.com/webp/499): this function produces different results +// from the C code due to use of double/float resulting in output differences +// when compared to -noasm. +#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86)) static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { int i; double retval = 0.; - int sumX, sumXY; - int32_t tmp[4]; - __m128i zero = _mm_setzero_si128(); - // Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY). - __m128i sumXY_128 = zero; - __m128i sumX_128 = zero; - - for (i = 0; i < 256; i += 4) { - const __m128i x = _mm_loadu_si128((const __m128i*)(X + i)); - const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i)); - - // Check if any X is non-zero: this actually provides a speedup as X is - // usually sparse. - if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) { - const __m128i xy_128 = _mm_add_epi32(x, y); - sumXY_128 = _mm_add_epi32(sumXY_128, xy_128); - - sumX_128 = _mm_add_epi32(sumX_128, x); - - // Analyze the different X + Y. - _mm_storeu_si128((__m128i*)tmp, xy_128); - - ANALYZE_XY(0); - ANALYZE_XY(1); - ANALYZE_XY(2); - ANALYZE_XY(3); - } else { - // X is fully 0, so only deal with Y. - sumXY_128 = _mm_add_epi32(sumXY_128, y); - - ANALYZE_X_OR_Y(Y, 0); - ANALYZE_X_OR_Y(Y, 1); - ANALYZE_X_OR_Y(Y, 2); - ANALYZE_X_OR_Y(Y, 3); + int sumX = 0, sumXY = 0; + const __m128i zero = _mm_setzero_si128(); + + for (i = 0; i < 256; i += 16) { + const __m128i x0 = _mm_loadu_si128((const __m128i*)(X + i + 0)); + const __m128i y0 = _mm_loadu_si128((const __m128i*)(Y + i + 0)); + const __m128i x1 = _mm_loadu_si128((const __m128i*)(X + i + 4)); + const __m128i y1 = _mm_loadu_si128((const __m128i*)(Y + i + 4)); + const __m128i x2 = _mm_loadu_si128((const __m128i*)(X + i + 8)); + const __m128i y2 = _mm_loadu_si128((const __m128i*)(Y + i + 8)); + const __m128i x3 = _mm_loadu_si128((const __m128i*)(X + i + 12)); + const __m128i y3 = _mm_loadu_si128((const __m128i*)(Y + i + 12)); + const __m128i x4 = _mm_packs_epi16(_mm_packs_epi32(x0, x1), + _mm_packs_epi32(x2, x3)); + const __m128i y4 = _mm_packs_epi16(_mm_packs_epi32(y0, y1), + _mm_packs_epi32(y2, y3)); + const int32_t mx = _mm_movemask_epi8(_mm_cmpgt_epi8(x4, zero)); + int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx; + while (my) { + const int32_t j = BitsCtz(my); + int xy; + if ((mx >> j) & 1) { + const int x = X[i + j]; + sumXY += x; + retval -= VP8LFastSLog2(x); + } + xy = X[i + j] + Y[i + j]; + sumX += xy; + retval -= VP8LFastSLog2(xy); + my &= my - 1; } } - - // Sum up sumX_128 to get sumX. - _mm_storeu_si128((__m128i*)tmp, sumX_128); - sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0]; - - // Sum up sumXY_128 to get sumXY. - _mm_storeu_si128((__m128i*)tmp, sumXY_128); - sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0]; - retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); return (float)retval; } -#undef ANALYZE_X_OR_Y -#undef ANALYZE_XY + +#else + +#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC // won't be faster + +#endif //------------------------------------------------------------------------------ @@ -460,20 +439,22 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, (void)upper; } -#define GENERATE_PREDICTOR_1(X, IN) \ -static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int i; \ - for (i = 0; i + 4 <= num_pixels; i += 4) { \ - const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ - const __m128i pred = _mm_loadu_si128((const __m128i*)&(IN)); \ - const __m128i res = _mm_sub_epi8(src, pred); \ - _mm_storeu_si128((__m128i*)&out[i], res); \ - } \ - if (i != num_pixels) { \ - VP8LPredictorsSub_C[(X)](in + i, upper + i, num_pixels - i, out + i); \ - } \ -} +#define GENERATE_PREDICTOR_1(X, IN) \ + static void PredictorSub##X##_SSE2(const uint32_t* const in, \ + const uint32_t* const upper, \ + int num_pixels, uint32_t* const out) { \ + int i; \ + for (i = 0; i + 4 <= num_pixels; i += 4) { \ + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ + const __m128i pred = _mm_loadu_si128((const __m128i*)&(IN)); \ + const __m128i res = _mm_sub_epi8(src, pred); \ + _mm_storeu_si128((__m128i*)&out[i], res); \ + } \ + if (i != num_pixels) { \ + VP8LPredictorsSub_C[(X)](in + i, WEBP_OFFSET_PTR(upper, i), \ + num_pixels - i, out + i); \ + } \ + } GENERATE_PREDICTOR_1(1, in[i - 1]) // Predictor1: L GENERATE_PREDICTOR_1(2, upper[i]) // Predictor2: T @@ -657,7 +638,9 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) { VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2; VP8LAddVector = AddVector_SSE2; VP8LAddVectorEq = AddVectorEq_SSE2; +#if !defined(DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC) VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2; +#endif VP8LVectorMismatch = VectorMismatch_SSE2; VP8LBundleColorMap = BundleColorMap_SSE2; diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c index 719d8ed25e..ad358a6f25 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c @@ -44,46 +44,47 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data, //------------------------------------------------------------------------------ // Color Transform -#define SPAN 8 +#define MK_CST_16(HI, LO) \ + _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff))) + static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, int histo[]) { - const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue)); - const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue)); - const __m128i mask_g = _mm_set1_epi16((short)0xff00); // green mask - const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask - const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask - const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1, - -1, -1, -1, -1, -1, -1, -1); - const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, - 2, -1, 6, -1, 10, -1, 14); - int y; - for (y = 0; y < tile_height; ++y) { - const uint32_t* const src = argb + y * stride; - int i, x; - for (x = 0; x + SPAN <= tile_width; x += SPAN) { - uint16_t values[SPAN]; - const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); - const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); - const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo); - const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi); - const __m128i r = _mm_or_si128(r0, r1); // r 0 - const __m128i gb0 = _mm_and_si128(in0, mask_gb); - const __m128i gb1 = _mm_and_si128(in1, mask_gb); - const __m128i gb = _mm_packus_epi32(gb0, gb1); // g b - const __m128i g = _mm_and_si128(gb, mask_g); // g 0 - const __m128i A = _mm_mulhi_epi16(r, mults_r); // x dbr - const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dbg - const __m128i C = _mm_sub_epi8(gb, B); // x b' - const __m128i D = _mm_sub_epi8(C, A); // x b'' - const __m128i E = _mm_and_si128(D, mask_b); // 0 b'' - _mm_storeu_si128((__m128i*)values, E); - for (i = 0; i < SPAN; ++i) ++histo[values[i]]; + const __m128i mult = + MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue)); + const __m128i perm = + _mm_setr_epi8(-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14); + if (tile_width >= 4) { + int y; + for (y = 0; y < tile_height; ++y) { + const uint32_t* const src = argb + y * stride; + const __m128i A1 = _mm_loadu_si128((const __m128i*)src); + const __m128i B1 = _mm_shuffle_epi8(A1, perm); + const __m128i C1 = _mm_mulhi_epi16(B1, mult); + const __m128i D1 = _mm_sub_epi16(A1, C1); + __m128i E = _mm_add_epi16(_mm_srli_epi32(D1, 16), D1); + int x; + for (x = 4; x + 4 <= tile_width; x += 4) { + const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x)); + __m128i B2, C2, D2; + ++histo[_mm_extract_epi8(E, 0)]; + B2 = _mm_shuffle_epi8(A2, perm); + ++histo[_mm_extract_epi8(E, 4)]; + C2 = _mm_mulhi_epi16(B2, mult); + ++histo[_mm_extract_epi8(E, 8)]; + D2 = _mm_sub_epi16(A2, C2); + ++histo[_mm_extract_epi8(E, 12)]; + E = _mm_add_epi16(_mm_srli_epi32(D2, 16), D2); + } + ++histo[_mm_extract_epi8(E, 0)]; + ++histo[_mm_extract_epi8(E, 4)]; + ++histo[_mm_extract_epi8(E, 8)]; + ++histo[_mm_extract_epi8(E, 12)]; } } { - const int left_over = tile_width & (SPAN - 1); + const int left_over = tile_width & 3; if (left_over > 0) { VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride, left_over, tile_height, @@ -95,33 +96,37 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]) { - const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red)); - const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask - const __m128i mask = _mm_set1_epi16(0xff); - - int y; - for (y = 0; y < tile_height; ++y) { - const uint32_t* const src = argb + y * stride; - int i, x; - for (x = 0; x + SPAN <= tile_width; x += SPAN) { - uint16_t values[SPAN]; - const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); - const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); - const __m128i g0 = _mm_and_si128(in0, mask_g); // 0 0 | g 0 - const __m128i g1 = _mm_and_si128(in1, mask_g); - const __m128i g = _mm_packus_epi32(g0, g1); // g 0 - const __m128i A0 = _mm_srli_epi32(in0, 16); // 0 0 | x r - const __m128i A1 = _mm_srli_epi32(in1, 16); - const __m128i A = _mm_packus_epi32(A0, A1); // x r - const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dr - const __m128i C = _mm_sub_epi8(A, B); // x r' - const __m128i D = _mm_and_si128(C, mask); // 0 r' - _mm_storeu_si128((__m128i*)values, D); - for (i = 0; i < SPAN; ++i) ++histo[values[i]]; + + const __m128i mult = MK_CST_16(0, CST_5b(green_to_red)); + const __m128i mask_g = _mm_set1_epi32(0x0000ff00); + if (tile_width >= 4) { + int y; + for (y = 0; y < tile_height; ++y) { + const uint32_t* const src = argb + y * stride; + const __m128i A1 = _mm_loadu_si128((const __m128i*)src); + const __m128i B1 = _mm_and_si128(A1, mask_g); + const __m128i C1 = _mm_madd_epi16(B1, mult); + __m128i D = _mm_sub_epi16(A1, C1); + int x; + for (x = 4; x + 4 <= tile_width; x += 4) { + const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x)); + __m128i B2, C2; + ++histo[_mm_extract_epi8(D, 2)]; + B2 = _mm_and_si128(A2, mask_g); + ++histo[_mm_extract_epi8(D, 6)]; + C2 = _mm_madd_epi16(B2, mult); + ++histo[_mm_extract_epi8(D, 10)]; + ++histo[_mm_extract_epi8(D, 14)]; + D = _mm_sub_epi16(A2, C2); + } + ++histo[_mm_extract_epi8(D, 2)]; + ++histo[_mm_extract_epi8(D, 6)]; + ++histo[_mm_extract_epi8(D, 10)]; + ++histo[_mm_extract_epi8(D, 14)]; } } { - const int left_over = tile_width & (SPAN - 1); + const int left_over = tile_width & 3; if (left_over > 0) { VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride, left_over, tile_height, green_to_red, @@ -130,6 +135,8 @@ static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, } } +#undef MK_CST_16 + //------------------------------------------------------------------------------ // Entry point diff --git a/thirdparty/libwebp/src/dsp/lossless_sse2.c b/thirdparty/libwebp/src/dsp/lossless_sse2.c index aef0cee1b3..3a0eb440db 100644 --- a/thirdparty/libwebp/src/dsp/lossless_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c @@ -18,7 +18,6 @@ #include "src/dsp/common_sse2.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" -#include <assert.h> #include <emmintrin.h> //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/dsp/lossless_sse41.c b/thirdparty/libwebp/src/dsp/lossless_sse41.c new file mode 100644 index 0000000000..b0d6daa7fe --- /dev/null +++ b/thirdparty/libwebp/src/dsp/lossless_sse41.c @@ -0,0 +1,132 @@ +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// SSE41 variant of methods for lossless decoder + +#include "src/dsp/dsp.h" + +#if defined(WEBP_USE_SSE41) + +#include "src/dsp/common_sse41.h" +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" + +//------------------------------------------------------------------------------ +// Color-space conversion functions + +static void TransformColorInverse_SSE41(const VP8LMultipliers* const m, + const uint32_t* const src, + int num_pixels, uint32_t* dst) { +// sign-extended multiplying constants, pre-shifted by 5. +#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend + const __m128i mults_rb = _mm_set1_epi32((uint32_t)CST(green_to_red_) << 16 | + (CST(green_to_blue_) & 0xffff)); + const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_)); +#undef CST + const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); + const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5, + -1, 9, -1, 9, -1, 13, -1, 13); + const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1, + -1, 10, -1, -1, -1, 14, -1, -1); + int i; + for (i = 0; i + 4 <= num_pixels; i += 4) { + const __m128i A = _mm_loadu_si128((const __m128i*)(src + i)); + const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g0 + const __m128i C = _mm_mulhi_epi16(B, mults_rb); + const __m128i D = _mm_add_epi8(A, C); + const __m128i E = _mm_shuffle_epi8(D, perm2); + const __m128i F = _mm_mulhi_epi16(E, mults_b2); + const __m128i G = _mm_add_epi8(D, F); + const __m128i out = _mm_blendv_epi8(G, A, mask_ag); + _mm_storeu_si128((__m128i*)&dst[i], out); + } + // Fall-back to C-version for left-overs. + if (i != num_pixels) { + VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i); + } +} + +//------------------------------------------------------------------------------ + +#define ARGB_TO_RGB_SSE41 do { \ + while (num_pixels >= 16) { \ + const __m128i in0 = _mm_loadu_si128(in + 0); \ + const __m128i in1 = _mm_loadu_si128(in + 1); \ + const __m128i in2 = _mm_loadu_si128(in + 2); \ + const __m128i in3 = _mm_loadu_si128(in + 3); \ + const __m128i a0 = _mm_shuffle_epi8(in0, perm0); \ + const __m128i a1 = _mm_shuffle_epi8(in1, perm1); \ + const __m128i a2 = _mm_shuffle_epi8(in2, perm2); \ + const __m128i a3 = _mm_shuffle_epi8(in3, perm3); \ + const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \ + const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \ + const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \ + _mm_storeu_si128(out + 0, b0); \ + _mm_storeu_si128(out + 1, b1); \ + _mm_storeu_si128(out + 2, b2); \ + in += 4; \ + out += 3; \ + num_pixels -= 16; \ + } \ +} while (0) + +static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels, + uint8_t* dst) { + const __m128i* in = (const __m128i*)src; + __m128i* out = (__m128i*)dst; + const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, + 8, 14, 13, 12, -1, -1, -1, -1); + const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39); + const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e); + const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93); + + ARGB_TO_RGB_SSE41; + + // left-overs + if (num_pixels > 0) { + VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } +} + +static void ConvertBGRAToBGR_SSE41(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const __m128i* in = (const __m128i*)src; + __m128i* out = (__m128i*)dst; + const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, + 12, 13, 14, -1, -1, -1, -1); + const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39); + const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e); + const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93); + + ARGB_TO_RGB_SSE41; + + // left-overs + if (num_pixels > 0) { + VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } +} + +#undef ARGB_TO_RGB_SSE41 + +//------------------------------------------------------------------------------ +// Entry point + +extern void VP8LDspInitSSE41(void); + +WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) { + VP8LTransformColorInverse = TransformColorInverse_SSE41; + VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41; + VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41; +} + +#else // !WEBP_USE_SSE41 + +WEBP_DSP_INIT_STUB(VP8LDspInitSSE41) + +#endif // WEBP_USE_SSE41 diff --git a/thirdparty/libwebp/src/dsp/rescaler.c b/thirdparty/libwebp/src/dsp/rescaler.c index c5a01e82df..14620ce4f1 100644 --- a/thirdparty/libwebp/src/dsp/rescaler.c +++ b/thirdparty/libwebp/src/dsp/rescaler.c @@ -38,8 +38,9 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, int x_out = channel; // simple bilinear interpolation int accum = wrk->x_add; - int left = src[x_in]; - int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left; + rescaler_t left = (rescaler_t)src[x_in]; + rescaler_t right = + (wrk->src_width > 1) ? (rescaler_t)src[x_in + x_stride] : left; x_in += x_stride; while (1) { wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; @@ -50,7 +51,7 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, left = right; x_in += x_stride; assert(x_in < wrk->src_width * x_stride); - right = src[x_in]; + right = (rescaler_t)src[x_in]; accum += wrk->x_add; } } @@ -213,7 +214,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C; if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPRescalerDspInitSSE2(); } @@ -235,7 +236,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPRescalerDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/ssim.c b/thirdparty/libwebp/src/dsp/ssim.c index 989ce8254c..f85c2e6e5b 100644 --- a/thirdparty/libwebp/src/dsp/ssim.c +++ b/thirdparty/libwebp/src/dsp/ssim.c @@ -150,7 +150,7 @@ WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) { #endif if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8SSIMDspInitSSE2(); } diff --git a/thirdparty/libwebp/src/dsp/upsampling.c b/thirdparty/libwebp/src/dsp/upsampling.c index 9b60da5bbb..87f771f3eb 100644 --- a/thirdparty/libwebp/src/dsp/upsampling.c +++ b/thirdparty/libwebp/src/dsp/upsampling.c @@ -233,12 +233,12 @@ WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) { WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C; if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitYUV444ConvertersSSE2(); } #endif -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitYUV444ConvertersSSE41(); } @@ -278,12 +278,12 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitUpsamplersSSE2(); } #endif -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitUpsamplersSSE41(); } @@ -300,7 +300,7 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitUpsamplersNEON(); diff --git a/thirdparty/libwebp/src/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c index 14e67fc28e..48466f8b11 100644 --- a/thirdparty/libwebp/src/dsp/yuv.c +++ b/thirdparty/libwebp/src/dsp/yuv.c @@ -90,16 +90,16 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitSamplersSSE2(); } -#endif // WEBP_USE_SSE2 -#if defined(WEBP_USE_SSE41) +#endif // WEBP_HAVE_SSE2 +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitSamplersSSE41(); } -#endif // WEBP_USE_SSE41 +#endif // WEBP_HAVE_SSE41 #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { WebPInitSamplersMIPS32(); @@ -276,26 +276,26 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { #endif if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitConvertARGBToYUVSSE2(); WebPInitSharpYUVSSE2(); } -#endif // WEBP_USE_SSE2 -#if defined(WEBP_USE_SSE41) +#endif // WEBP_HAVE_SSE2 +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitConvertARGBToYUVSSE41(); } -#endif // WEBP_USE_SSE41 +#endif // WEBP_HAVE_SSE41 } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitConvertARGBToYUVNEON(); WebPInitSharpYUVNEON(); } -#endif // WEBP_USE_NEON +#endif // WEBP_HAVE_NEON assert(WebPConvertARGBToY != NULL); assert(WebPConvertARGBToUV != NULL); diff --git a/thirdparty/libwebp/src/enc/alpha_enc.c b/thirdparty/libwebp/src/enc/alpha_enc.c index dce9ca957d..0b54f3e6ec 100644 --- a/thirdparty/libwebp/src/enc/alpha_enc.c +++ b/thirdparty/libwebp/src/enc/alpha_enc.c @@ -303,7 +303,7 @@ static int EncodeAlpha(VP8Encoder* const enc, int ok = 1; const int reduce_levels = (quality < 100); - // quick sanity checks + // quick correctness checks assert((uint64_t)data_size == (uint64_t)width * height); // as per spec assert(enc != NULL && pic != NULL && pic->a != NULL); assert(output != NULL && output_size != NULL); @@ -361,7 +361,7 @@ static int EncodeAlpha(VP8Encoder* const enc, //------------------------------------------------------------------------------ // Main calls -static int CompressAlphaJob(void* arg1, void* dummy) { +static int CompressAlphaJob(void* arg1, void* unused) { VP8Encoder* const enc = (VP8Encoder*)arg1; const WebPConfig* config = enc->config_; uint8_t* alpha_data = NULL; @@ -375,13 +375,13 @@ static int CompressAlphaJob(void* arg1, void* dummy) { filter, effort_level, &alpha_data, &alpha_size)) { return 0; } - if (alpha_size != (uint32_t)alpha_size) { // Sanity check. + if (alpha_size != (uint32_t)alpha_size) { // Soundness check. WebPSafeFree(alpha_data); return 0; } enc->alpha_data_size_ = (uint32_t)alpha_size; enc->alpha_data_ = alpha_data; - (void)dummy; + (void)unused; return 1; } diff --git a/thirdparty/libwebp/src/enc/analysis_enc.c b/thirdparty/libwebp/src/enc/analysis_enc.c index 687757ae03..ebb784261c 100644 --- a/thirdparty/libwebp/src/enc/analysis_enc.c +++ b/thirdparty/libwebp/src/enc/analysis_enc.c @@ -126,16 +126,6 @@ static void InitHistogram(VP8Histogram* const histo) { histo->last_non_zero = 1; } -static void MergeHistograms(const VP8Histogram* const in, - VP8Histogram* const out) { - if (in->max_value > out->max_value) { - out->max_value = in->max_value; - } - if (in->last_non_zero > out->last_non_zero) { - out->last_non_zero = in->last_non_zero; - } -} - //------------------------------------------------------------------------------ // Simplified k-Means, to assign Nb segments based on alpha-histogram @@ -285,49 +275,6 @@ static int FastMBAnalyze(VP8EncIterator* const it) { return 0; } -static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, - int best_alpha) { - uint8_t modes[16]; - const int max_mode = MAX_INTRA4_MODE; - int i4_alpha; - VP8Histogram total_histo; - int cur_histo = 0; - InitHistogram(&total_histo); - - VP8IteratorStartI4(it); - do { - int mode; - int best_mode_alpha = DEFAULT_ALPHA; - VP8Histogram histos[2]; - const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; - - VP8MakeIntra4Preds(it); - for (mode = 0; mode < max_mode; ++mode) { - int alpha; - - InitHistogram(&histos[cur_histo]); - VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], - 0, 1, &histos[cur_histo]); - alpha = GetAlpha(&histos[cur_histo]); - if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { - best_mode_alpha = alpha; - modes[it->i4_] = mode; - cur_histo ^= 1; // keep track of best histo so far. - } - } - // accumulate best histogram - MergeHistograms(&histos[cur_histo ^ 1], &total_histo); - // Note: we reuse the original samples for predictors - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - - i4_alpha = GetAlpha(&total_histo); - if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { - VP8SetIntra4Mode(it, modes); - best_alpha = i4_alpha; - } - return best_alpha; -} - static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { int best_alpha = DEFAULT_ALPHA; int smallest_alpha = 0; @@ -371,13 +318,6 @@ static void MBAnalyze(VP8EncIterator* const it, best_alpha = FastMBAnalyze(it); } else { best_alpha = MBAnalyzeBestIntra16Mode(it); - if (enc->method_ >= 5) { - // We go and make a fast decision for intra4/intra16. - // It's usually not a good and definitive pick, but helps seeding the - // stats about level bit-cost. - // TODO(skal): improve criterion. - best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha); - } } best_uv_alpha = MBAnalyzeBestUVMode(it); diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.c b/thirdparty/libwebp/src/enc/backward_references_enc.c index d445b40fc5..519b36a091 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.c +++ b/thirdparty/libwebp/src/enc/backward_references_enc.c @@ -11,13 +11,14 @@ // #include <assert.h> +#include <float.h> #include <math.h> -#include "src/enc/backward_references_enc.h" -#include "src/enc/histogram_enc.h" +#include "src/dsp/dsp.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" -#include "src/dsp/dsp.h" +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" #include "src/utils/color_cache_utils.h" #include "src/utils/utils.h" @@ -103,6 +104,20 @@ void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { } } +// Swaps the content of two VP8LBackwardRefs. +static void BackwardRefsSwap(VP8LBackwardRefs* const refs1, + VP8LBackwardRefs* const refs2) { + const int point_to_refs1 = + (refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_); + const int point_to_refs2 = + (refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_); + const VP8LBackwardRefs tmp = *refs1; + *refs1 = *refs2; + *refs2 = tmp; + if (point_to_refs2) refs1->tail_ = &refs1->refs_; + if (point_to_refs1) refs2->tail_ = &refs2->refs_; +} + void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) { assert(refs != NULL); memset(refs, 0, sizeof(*refs)); @@ -154,6 +169,22 @@ static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { return b; } +// Return 1 on success, 0 on error. +static int BackwardRefsClone(const VP8LBackwardRefs* const from, + VP8LBackwardRefs* const to) { + const PixOrCopyBlock* block_from = from->refs_; + VP8LClearBackwardRefs(to); + while (block_from != NULL) { + PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to); + if (block_to == NULL) return 0; + memcpy(block_to->start_, block_from->start_, + block_from->size_ * sizeof(PixOrCopy)); + block_to->size_ = block_from->size_; + block_from = block_from->next_; + } + return 1; +} + extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, const PixOrCopy v); void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, @@ -753,12 +784,18 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, } } } else { + int code, extra_bits, extra_bits_value; // We should compute the contribution of the (distance,length) // histograms but those are the same independently from the cache size. // As those constant contributions are in the end added to the other - // histogram contributions, we can safely ignore them. + // histogram contributions, we can ignore them, except for the length + // prefix that is part of the literal_ histogram. int len = PixOrCopyLength(v); uint32_t argb_prev = *argb ^ 0xffffffffu; + VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value); + for (i = 0; i <= cache_bits_max; ++i) { + ++histos[i]->literal_[NUM_LITERAL_CODES + code]; + } // Update the color caches. do { if (*argb != argb_prev) { @@ -842,16 +879,21 @@ extern int VP8LBackwardReferencesTraceBackwards( int xsize, int ysize, const uint32_t* const argb, int cache_bits, const VP8LHashChain* const hash_chain, const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst); -static VP8LBackwardRefs* GetBackwardReferences( - int width, int height, const uint32_t* const argb, int quality, - int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* best, - VP8LBackwardRefs* worst) { - const int cache_bits_initial = *cache_bits; - double bit_cost_best = -1; +static int GetBackwardReferences(int width, int height, + const uint32_t* const argb, int quality, + int lz77_types_to_try, int cache_bits_max, + int do_no_cache, + const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, + int* const cache_bits_best) { VP8LHistogram* histo = NULL; - int lz77_type, lz77_type_best = 0; + int i, lz77_type; + // Index 0 is for a color cache, index 1 for no cache (if needed). + int lz77_types_best[2] = {0, 0}; + double bit_costs_best[2] = {DBL_MAX, DBL_MAX}; VP8LHashChain hash_chain_box; + VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1]; + int status = 0; memset(&hash_chain_box, 0, sizeof(hash_chain_box)); histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS); @@ -860,86 +902,129 @@ static VP8LBackwardRefs* GetBackwardReferences( for (lz77_type = 1; lz77_types_to_try; lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { int res = 0; - double bit_cost; - int cache_bits_tmp = cache_bits_initial; + double bit_cost = 0.; if ((lz77_types_to_try & lz77_type) == 0) continue; switch (lz77_type) { case kLZ77RLE: - res = BackwardReferencesRle(width, height, argb, 0, worst); + res = BackwardReferencesRle(width, height, argb, 0, refs_tmp); break; case kLZ77Standard: // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color // cache is not that different in practice. - res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, worst); + res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, + refs_tmp); break; case kLZ77Box: if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error; res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain, - &hash_chain_box, worst); + &hash_chain_box, refs_tmp); break; default: assert(0); } if (!res) goto Error; - // Next, try with a color cache and update the references. - if (!CalculateBestCacheSize(argb, quality, worst, &cache_bits_tmp)) { - goto Error; - } - if (cache_bits_tmp > 0) { - if (!BackwardRefsWithLocalCache(argb, cache_bits_tmp, worst)) { - goto Error; + // Start with the no color cache case. + for (i = 1; i >= 0; --i) { + int cache_bits = (i == 1) ? 0 : cache_bits_max; + + if (i == 1 && !do_no_cache) continue; + + if (i == 0) { + // Try with a color cache. + if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) { + goto Error; + } + if (cache_bits > 0) { + if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) { + goto Error; + } + } } - } - // Keep the best backward references. - VP8LHistogramCreate(histo, worst, cache_bits_tmp); - bit_cost = VP8LHistogramEstimateBits(histo); - if (lz77_type_best == 0 || bit_cost < bit_cost_best) { - VP8LBackwardRefs* const tmp = worst; - worst = best; - best = tmp; - bit_cost_best = bit_cost; - *cache_bits = cache_bits_tmp; - lz77_type_best = lz77_type; + if (i == 0 && do_no_cache && cache_bits == 0) { + // No need to re-compute bit_cost as it was computed at i == 1. + } else { + VP8LHistogramCreate(histo, refs_tmp, cache_bits); + bit_cost = VP8LHistogramEstimateBits(histo); + } + + if (bit_cost < bit_costs_best[i]) { + if (i == 1) { + // Do not swap as the full cache analysis would have the wrong + // VP8LBackwardRefs to start with. + if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error; + } else { + BackwardRefsSwap(refs_tmp, &refs[0]); + } + bit_costs_best[i] = bit_cost; + lz77_types_best[i] = lz77_type; + if (i == 0) *cache_bits_best = cache_bits; + } } } - assert(lz77_type_best > 0); + assert(lz77_types_best[0] > 0); + assert(!do_no_cache || lz77_types_best[1] > 0); // Improve on simple LZ77 but only for high quality (TraceBackwards is // costly). - if ((lz77_type_best == kLZ77Standard || lz77_type_best == kLZ77Box) && - quality >= 25) { - const VP8LHashChain* const hash_chain_tmp = - (lz77_type_best == kLZ77Standard) ? hash_chain : &hash_chain_box; - if (VP8LBackwardReferencesTraceBackwards(width, height, argb, *cache_bits, - hash_chain_tmp, best, worst)) { - double bit_cost_trace; - VP8LHistogramCreate(histo, worst, *cache_bits); - bit_cost_trace = VP8LHistogramEstimateBits(histo); - if (bit_cost_trace < bit_cost_best) best = worst; + for (i = 1; i >= 0; --i) { + if (i == 1 && !do_no_cache) continue; + if ((lz77_types_best[i] == kLZ77Standard || + lz77_types_best[i] == kLZ77Box) && + quality >= 25) { + const VP8LHashChain* const hash_chain_tmp = + (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box; + const int cache_bits = (i == 1) ? 0 : *cache_bits_best; + if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, + hash_chain_tmp, &refs[i], + refs_tmp)) { + double bit_cost_trace; + VP8LHistogramCreate(histo, refs_tmp, cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_costs_best[i]) { + BackwardRefsSwap(refs_tmp, &refs[i]); + } + } } - } - BackwardReferences2DLocality(width, best); + BackwardReferences2DLocality(width, &refs[i]); + + if (i == 1 && lz77_types_best[0] == lz77_types_best[1] && + *cache_bits_best == 0) { + // If the best cache size is 0 and we have the same best LZ77, just copy + // the data over and stop here. + if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error; + break; + } + } + status = 1; Error: VP8LHashChainClear(&hash_chain_box); VP8LFreeHistogram(histo); - return best; + return status; } -VP8LBackwardRefs* VP8LGetBackwardReferences( +WebPEncodingError VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2) { + int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, + int* const cache_bits_best) { if (low_effort) { - return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, - hash_chain, refs_tmp1); + VP8LBackwardRefs* refs_best; + *cache_bits_best = cache_bits_max; + refs_best = GetBackwardReferencesLowEffort( + width, height, argb, cache_bits_best, hash_chain, refs); + if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + // Set it in first position. + BackwardRefsSwap(refs_best, &refs[0]); } else { - return GetBackwardReferences(width, height, argb, quality, - lz77_types_to_try, cache_bits, hash_chain, - refs_tmp1, refs_tmp2); + if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, + cache_bits_max, do_no_cache, hash_chain, refs, + cache_bits_best)) { + return VP8_ENC_ERROR_OUT_OF_MEMORY; + } } + return VP8_ENC_OK; } diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.h b/thirdparty/libwebp/src/enc/backward_references_enc.h index 103ddfdcb7..4c0267b41e 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.h +++ b/thirdparty/libwebp/src/enc/backward_references_enc.h @@ -16,6 +16,7 @@ #include <assert.h> #include <stdlib.h> #include "src/webp/types.h" +#include "src/webp/encode.h" #include "src/webp/format_constants.h" #ifdef __cplusplus @@ -218,14 +219,19 @@ enum VP8LLZ77Type { // Evaluates best possible backward references for specified quality. // The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache // bits to use (passing 0 implies disabling the local color cache). -// The optimal cache bits is evaluated and set for the *cache_bits parameter. -// The return value is the pointer to the best of the two backward refs viz, -// refs[0] or refs[1]. -VP8LBackwardRefs* VP8LGetBackwardReferences( +// The optimal cache bits is evaluated and set for the *cache_bits_best +// parameter with the matching refs_best. +// If do_no_cache == 0, refs is an array of 2 values and the best +// VP8LBackwardRefs is put in the first element. +// If do_no_cache != 0, refs is an array of 3 values and the best +// VP8LBackwardRefs is put in the first element, the best value with no-cache in +// the second element. +// In both cases, the last element is used as temporary internally. +WebPEncodingError VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2); + int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, + int* const cache_bits_best); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/enc/config_enc.c b/thirdparty/libwebp/src/enc/config_enc.c index 9d4828978e..3518b41403 100644 --- a/thirdparty/libwebp/src/enc/config_enc.c +++ b/thirdparty/libwebp/src/enc/config_enc.c @@ -39,6 +39,8 @@ int WebPConfigInitInternal(WebPConfig* config, config->partitions = 0; config->segments = 4; config->pass = 1; + config->qmin = 0; + config->qmax = 100; config->show_compressed = 0; config->preprocessing = 0; config->autofilter = 0; @@ -106,6 +108,9 @@ int WebPValidateConfig(const WebPConfig* config) { if (config->filter_type < 0 || config->filter_type > 1) return 0; if (config->autofilter < 0 || config->autofilter > 1) return 0; if (config->pass < 1 || config->pass > 10) return 0; + if (config->qmin < 0 || config->qmax > 100 || config->qmin > config->qmax) { + return 0; + } if (config->show_compressed < 0 || config->show_compressed > 1) return 0; if (config->preprocessing < 0 || config->preprocessing > 7) return 0; if (config->partitions < 0 || config->partitions > 3) return 0; diff --git a/thirdparty/libwebp/src/enc/frame_enc.c b/thirdparty/libwebp/src/enc/frame_enc.c index 1aec376e44..af538d83ba 100644 --- a/thirdparty/libwebp/src/enc/frame_enc.c +++ b/thirdparty/libwebp/src/enc/frame_enc.c @@ -31,10 +31,15 @@ // we allow 2k of extra head-room in PARTITION0 limit. #define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11) +static float Clamp(float v, float min, float max) { + return (v < min) ? min : (v > max) ? max : v; +} + typedef struct { // struct for organizing convergence in either size or PSNR int is_first; float dq; float q, last_q; + float qmin, qmax; double value, last_value; // PSNR or size double target; int do_size_search; @@ -47,7 +52,9 @@ static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { s->is_first = 1; s->dq = 10.f; - s->q = s->last_q = enc->config_->quality; + s->qmin = 1.f * enc->config_->qmin; + s->qmax = 1.f * enc->config_->qmax; + s->q = s->last_q = Clamp(enc->config_->quality, s->qmin, s->qmax); s->target = do_size_search ? (double)target_size : (target_PSNR > 0.) ? target_PSNR : 40.; // default, just in case @@ -56,10 +63,6 @@ static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { return do_size_search; } -static float Clamp(float v, float min, float max) { - return (v < min) ? min : (v > max) ? max : v; -} - static float ComputeNextQ(PassStats* const s) { float dq; if (s->is_first) { @@ -75,7 +78,7 @@ static float ComputeNextQ(PassStats* const s) { s->dq = Clamp(dq, -30.f, 30.f); s->last_q = s->q; s->last_value = s->value; - s->q = Clamp(s->q + s->dq, 0.f, 100.f); + s->q = Clamp(s->q + s->dq, s->qmin, s->qmax); return s->q; } @@ -848,9 +851,10 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { } #if (DEBUG_SEARCH > 0) - printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf\n", + printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf " + " range:[%.1f, %.1f]\n", num_pass_left, stats.last_value, stats.value, - stats.last_q, stats.q, stats.dq); + stats.last_q, stats.q, stats.dq, stats.qmin, stats.qmax); #endif if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) { ++num_pass_left; diff --git a/thirdparty/libwebp/src/enc/histogram_enc.c b/thirdparty/libwebp/src/enc/histogram_enc.c index a4e6bf3a98..38a0cebcab 100644 --- a/thirdparty/libwebp/src/enc/histogram_enc.c +++ b/thirdparty/libwebp/src/enc/histogram_enc.c @@ -208,6 +208,7 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, } else if (PixOrCopyIsCacheIdx(v)) { const int literal_ix = NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); + assert(histo->palette_code_bits_ != 0); ++histo->literal_[literal_ix]; } else { int code, extra_bits; @@ -1170,13 +1171,15 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) { int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, int quality, int low_effort, - int histo_bits, int cache_bits, + int histogram_bits, int cache_bits, VP8LHistogramSet* const image_histo, VP8LHistogram* const tmp_histo, uint16_t* const histogram_symbols) { int ok = 0; - const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; - const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; + const int histo_xsize = + histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1; + const int histo_ysize = + histogram_bits ? VP8LSubSampleSize(ysize, histogram_bits) : 1; const int image_histo_raw_size = histo_xsize * histo_ysize; VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); @@ -1192,7 +1195,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, if (orig_histo == NULL || map_tmp == NULL) goto Error; // Construct the histograms from backward references. - HistogramBuild(xsize, histo_bits, refs, orig_histo); + HistogramBuild(xsize, histogram_bits, refs, orig_histo); // Copies the histograms and computes its bit_cost. // histogram_symbols is optimized HistogramCopyAndAnalyze(orig_histo, image_histo, &num_used, diff --git a/thirdparty/libwebp/src/enc/histogram_enc.h b/thirdparty/libwebp/src/enc/histogram_enc.h index 54c2d21783..c3428b5d55 100644 --- a/thirdparty/libwebp/src/enc/histogram_enc.h +++ b/thirdparty/libwebp/src/enc/histogram_enc.h @@ -64,8 +64,8 @@ void VP8LHistogramCreate(VP8LHistogram* const p, const VP8LBackwardRefs* const refs, int palette_code_bits); -// Return the size of the histogram for a given palette_code_bits. -int VP8LGetHistogramSize(int palette_code_bits); +// Return the size of the histogram for a given cache_bits. +int VP8LGetHistogramSize(int cache_bits); // Set the palette_code_bits and reset the stats. // If init_arrays is true, the arrays are also filled with 0's. @@ -110,7 +110,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, int quality, int low_effort, int histogram_bits, int cache_bits, - VP8LHistogramSet* const image_in, + VP8LHistogramSet* const image_histo, VP8LHistogram* const tmp_histo, uint16_t* const histogram_symbols); diff --git a/thirdparty/libwebp/src/enc/picture_csp_enc.c b/thirdparty/libwebp/src/enc/picture_csp_enc.c index 718e014ed2..35eede9635 100644 --- a/thirdparty/libwebp/src/enc/picture_csp_enc.c +++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c @@ -61,16 +61,14 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height, // Checking for the presence of non-opaque alpha. int WebPPictureHasTransparency(const WebPPicture* picture) { if (picture == NULL) return 0; - if (!picture->use_argb) { - return CheckNonOpaque(picture->a, picture->width, picture->height, - 1, picture->a_stride); - } else { + if (picture->use_argb) { const int alpha_offset = ALPHA_OFFSET; return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, picture->width, picture->height, 4, picture->argb_stride * sizeof(*picture->argb)); } - return 0; + return CheckNonOpaque(picture->a, picture->width, picture->height, + 1, picture->a_stride); } //------------------------------------------------------------------------------ @@ -90,8 +88,9 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { static int kLinearToGammaTab[kGammaTabSize + 1]; static uint16_t kGammaToLinearTab[256]; static volatile int kGammaTablesOk = 0; +static void InitGammaTables(void); -static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) { +WEBP_DSP_INIT_FUNC(InitGammaTables) { if (!kGammaTablesOk) { int v; const double scale = (double)(1 << kGammaTabFix) / kGammaScale; @@ -181,8 +180,9 @@ static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; #define GAMMA_TO_LINEAR_BITS 14 static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX static volatile int kGammaTablesSOk = 0; +static void InitGammaTablesS(void); -static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) { +WEBP_DSP_INIT_FUNC(InitGammaTablesS) { assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values if (!kGammaTablesSOk) { int v; diff --git a/thirdparty/libwebp/src/enc/picture_rescale_enc.c b/thirdparty/libwebp/src/enc/picture_rescale_enc.c index 58a6ae7b9d..a75f5d9c06 100644 --- a/thirdparty/libwebp/src/enc/picture_rescale_enc.c +++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c @@ -164,22 +164,25 @@ int WebPPictureCrop(WebPPicture* pic, //------------------------------------------------------------------------------ // Simple picture rescaler -static void RescalePlane(const uint8_t* src, - int src_width, int src_height, int src_stride, - uint8_t* dst, - int dst_width, int dst_height, int dst_stride, - rescaler_t* const work, - int num_channels) { +static int RescalePlane(const uint8_t* src, + int src_width, int src_height, int src_stride, + uint8_t* dst, + int dst_width, int dst_height, int dst_stride, + rescaler_t* const work, + int num_channels) { WebPRescaler rescaler; int y = 0; - WebPRescalerInit(&rescaler, src_width, src_height, - dst, dst_width, dst_height, dst_stride, - num_channels, work); + if (!WebPRescalerInit(&rescaler, src_width, src_height, + dst, dst_width, dst_height, dst_stride, + num_channels, work)) { + return 0; + } while (y < src_height) { y += WebPRescalerImport(&rescaler, src_height - y, src + y * src_stride, src_stride); WebPRescalerExport(&rescaler); } + return 1; } static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) { @@ -222,25 +225,28 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { // If present, we need to rescale alpha first (for AlphaMultiplyY). if (pic->a != NULL) { WebPInitAlphaProcessing(); - RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, - tmp.a, width, height, tmp.a_stride, work, 1); + if (!RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, + tmp.a, width, height, tmp.a_stride, work, 1)) { + return 0; + } } // We take transparency into account on the luma plane only. That's not // totally exact blending, but still is a good approximation. AlphaMultiplyY(pic, 0); - RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, - tmp.y, width, height, tmp.y_stride, work, 1); + if (!RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, + tmp.y, width, height, tmp.y_stride, work, 1) || + !RescalePlane(pic->u, + HALVE(prev_width), HALVE(prev_height), pic->uv_stride, + tmp.u, + HALVE(width), HALVE(height), tmp.uv_stride, work, 1) || + !RescalePlane(pic->v, + HALVE(prev_width), HALVE(prev_height), pic->uv_stride, + tmp.v, + HALVE(width), HALVE(height), tmp.uv_stride, work, 1)) { + return 0; + } AlphaMultiplyY(&tmp, 1); - - RescalePlane(pic->u, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.u, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); - RescalePlane(pic->v, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.v, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); } else { work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); if (work == NULL) { @@ -252,11 +258,12 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { // the premultiplication afterward (while preserving the alpha channel). WebPInitAlphaProcessing(); AlphaMultiplyARGB(pic, 0); - RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, - pic->argb_stride * 4, - (uint8_t*)tmp.argb, width, height, - tmp.argb_stride * 4, - work, 4); + if (!RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, + pic->argb_stride * 4, + (uint8_t*)tmp.argb, width, height, + tmp.argb_stride * 4, work, 4)) { + return 0; + } AlphaMultiplyARGB(&tmp, 1); } WebPPictureFree(pic); diff --git a/thirdparty/libwebp/src/enc/picture_tools_enc.c b/thirdparty/libwebp/src/enc/picture_tools_enc.c index d0e8a495da..38cb01534a 100644 --- a/thirdparty/libwebp/src/enc/picture_tools_enc.c +++ b/thirdparty/libwebp/src/enc/picture_tools_enc.c @@ -83,6 +83,19 @@ static int SmoothenBlock(const uint8_t* a_ptr, int a_stride, uint8_t* y_ptr, return (count == 0); } +void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color) { + if (pic != NULL && pic->use_argb) { + int y = pic->height; + uint32_t* argb = pic->argb; + color &= 0xffffffu; // force alpha=0 + WebPInitAlphaProcessing(); + while (y-- > 0) { + WebPAlphaReplace(argb, pic->width, color); + argb += pic->argb_stride; + } + } +} + void WebPCleanupTransparentArea(WebPPicture* pic) { int x, y, w, h; if (pic == NULL) return; @@ -165,24 +178,6 @@ void WebPCleanupTransparentArea(WebPPicture* pic) { #undef SIZE #undef SIZE2 -void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { - int x, y, w, h; - uint32_t* argb; - assert(pic != NULL && pic->use_argb); - w = pic->width; - h = pic->height; - argb = pic->argb; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - if ((argb[x] & 0xff000000) == 0) { - argb[x] = 0x00000000; - } - } - argb += pic->argb_stride; - } -} - //------------------------------------------------------------------------------ // Blend color and remove transparency info diff --git a/thirdparty/libwebp/src/enc/syntax_enc.c b/thirdparty/libwebp/src/enc/syntax_enc.c index a9e5a6cf0f..e18cf650ca 100644 --- a/thirdparty/libwebp/src/enc/syntax_enc.c +++ b/thirdparty/libwebp/src/enc/syntax_enc.c @@ -349,7 +349,7 @@ int VP8EncWrite(VP8Encoder* const enc) { (enc->alpha_data_size_ & 1); riff_size += CHUNK_HEADER_SIZE + padded_alpha_size; } - // Sanity check. + // RIFF size should fit in 32-bits. if (riff_size > 0xfffffffeU) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG); } diff --git a/thirdparty/libwebp/src/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h index fedcaeea27..67e9509367 100644 --- a/thirdparty/libwebp/src/enc/vp8i_enc.h +++ b/thirdparty/libwebp/src/enc/vp8i_enc.h @@ -31,8 +31,8 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 -#define ENC_MIN_VERSION 1 -#define ENC_REV_VERSION 0 +#define ENC_MIN_VERSION 2 +#define ENC_REV_VERSION 1 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -286,8 +286,7 @@ int VP8IteratorNext(VP8EncIterator* const it); // save the yuv_out_ boundary values to top_/left_ arrays for next iterations. void VP8IteratorSaveBoundary(VP8EncIterator* const it); // Report progression based on macroblock rows. Return 0 for user-abort request. -int VP8IteratorProgress(const VP8EncIterator* const it, - int final_delta_percent); +int VP8IteratorProgress(const VP8EncIterator* const it, int delta); // Intra4x4 iterations void VP8IteratorStartI4(VP8EncIterator* const it); // returns true if not done. @@ -505,9 +504,9 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); // Returns false in case of error (invalid param, out-of-memory). int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); -// Clean-up the RGB samples under fully transparent area, to help lossless -// compressibility (no guarantee, though). Assumes that pic->use_argb is true. -void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); +// Replace samples that are fully transparent by 'color' to help compressibility +// (no guarantee, though). Assumes pic->use_argb is true. +void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color); //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/enc/vp8l_enc.c b/thirdparty/libwebp/src/enc/vp8l_enc.c index 2efd403f77..e330e716f1 100644 --- a/thirdparty/libwebp/src/enc/vp8l_enc.c +++ b/thirdparty/libwebp/src/enc/vp8l_enc.c @@ -65,25 +65,22 @@ static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { *col2 = tmp; } -static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { - // Find greedily always the closest color of the predicted color to minimize - // deltas in the palette. This reduces storage needs since the - // palette is stored with delta encoding. - uint32_t predict = 0x00000000; - int i, k; - for (i = 0; i < num_colors; ++i) { - int best_ix = i; - uint32_t best_score = ~0U; - for (k = i; k < num_colors; ++k) { - const uint32_t cur_score = PaletteColorDistance(palette[k], predict); - if (best_score > cur_score) { - best_score = cur_score; - best_ix = k; - } +static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, + int num_colors) { + int low = 0, hi = num_colors; + if (sorted[low] == color) return low; // loop invariant: sorted[low] != color + while (1) { + const int mid = (low + hi) >> 1; + if (sorted[mid] == color) { + return mid; + } else if (sorted[mid] < color) { + low = mid; + } else { + hi = mid; } - SwapColor(&palette[best_ix], &palette[i]); - predict = palette[i]; } + assert(0); + return 0; } // The palette has been sorted by alpha. This function checks if the other @@ -92,7 +89,8 @@ static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { // no benefit to re-organize them greedily. A monotonic development // would be spotted in green-only situations (like lossy alpha) or gray-scale // images. -static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { +static int PaletteHasNonMonotonousDeltas(const uint32_t* const palette, + int num_colors) { uint32_t predict = 0x000000; int i; uint8_t sign_found = 0x00; @@ -115,28 +113,215 @@ static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { return (sign_found & (sign_found << 1)) != 0; // two consequent signs. } +static void PaletteSortMinimizeDeltas(const uint32_t* const palette_sorted, + int num_colors, uint32_t* const palette) { + uint32_t predict = 0x00000000; + int i, k; + memcpy(palette, palette_sorted, num_colors * sizeof(*palette)); + if (!PaletteHasNonMonotonousDeltas(palette_sorted, num_colors)) return; + // Find greedily always the closest color of the predicted color to minimize + // deltas in the palette. This reduces storage needs since the + // palette is stored with delta encoding. + for (i = 0; i < num_colors; ++i) { + int best_ix = i; + uint32_t best_score = ~0U; + for (k = i; k < num_colors; ++k) { + const uint32_t cur_score = PaletteColorDistance(palette[k], predict); + if (best_score > cur_score) { + best_score = cur_score; + best_ix = k; + } + } + SwapColor(&palette[best_ix], &palette[i]); + predict = palette[i]; + } +} + +// Sort palette in increasing order and prepare an inverse mapping array. +static void PrepareMapToPalette(const uint32_t palette[], uint32_t num_colors, + uint32_t sorted[], uint32_t idx_map[]) { + uint32_t i; + memcpy(sorted, palette, num_colors * sizeof(*sorted)); + qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); + for (i = 0; i < num_colors; ++i) { + idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; + } +} + // ----------------------------------------------------------------------------- -// Palette +// Modified Zeng method from "A Survey on Palette Reordering +// Methods for Improving the Compression of Color-Indexed Images" by Armando J. +// Pinho and Antonio J. R. Neves. + +// Finds the biggest cooccurrence in the matrix. +static void CoOccurrenceFindMax(const uint32_t* const cooccurrence, + uint32_t num_colors, uint8_t* const c1, + uint8_t* const c2) { + // Find the index that is most frequently located adjacent to other + // (different) indexes. + uint32_t best_sum = 0u; + uint32_t i, j, best_cooccurrence; + *c1 = 0u; + for (i = 0; i < num_colors; ++i) { + uint32_t sum = 0; + for (j = 0; j < num_colors; ++j) sum += cooccurrence[i * num_colors + j]; + if (sum > best_sum) { + best_sum = sum; + *c1 = i; + } + } + // Find the index that is most frequently found adjacent to *c1. + *c2 = 0u; + best_cooccurrence = 0u; + for (i = 0; i < num_colors; ++i) { + if (cooccurrence[*c1 * num_colors + i] > best_cooccurrence) { + best_cooccurrence = cooccurrence[*c1 * num_colors + i]; + *c2 = i; + } + } + assert(*c1 != *c2); +} -// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE, -// creates a palette and returns true, else returns false. -static int AnalyzeAndCreatePalette(const WebPPicture* const pic, - int low_effort, - uint32_t palette[MAX_PALETTE_SIZE], - int* const palette_size) { - const int num_colors = WebPGetColorPalette(pic, palette); - if (num_colors > MAX_PALETTE_SIZE) { - *palette_size = 0; - return 0; +// Builds the cooccurrence matrix +static WebPEncodingError CoOccurrenceBuild(const WebPPicture* const pic, + const uint32_t* const palette, + uint32_t num_colors, + uint32_t* cooccurrence) { + uint32_t *lines, *line_top, *line_current, *line_tmp; + int x, y; + const uint32_t* src = pic->argb; + uint32_t prev_pix = ~src[0]; + uint32_t prev_idx = 0u; + uint32_t idx_map[MAX_PALETTE_SIZE] = {0}; + uint32_t palette_sorted[MAX_PALETTE_SIZE]; + lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines)); + if (lines == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + line_top = &lines[0]; + line_current = &lines[pic->width]; + PrepareMapToPalette(palette, num_colors, palette_sorted, idx_map); + for (y = 0; y < pic->height; ++y) { + for (x = 0; x < pic->width; ++x) { + const uint32_t pix = src[x]; + if (pix != prev_pix) { + prev_idx = idx_map[SearchColorNoIdx(palette_sorted, pix, num_colors)]; + prev_pix = pix; + } + line_current[x] = prev_idx; + // 4-connectivity is what works best as mentioned in "On the relation + // between Memon's and the modified Zeng's palette reordering methods". + if (x > 0 && prev_idx != line_current[x - 1]) { + const uint32_t left_idx = line_current[x - 1]; + ++cooccurrence[prev_idx * num_colors + left_idx]; + ++cooccurrence[left_idx * num_colors + prev_idx]; + } + if (y > 0 && prev_idx != line_top[x]) { + const uint32_t top_idx = line_top[x]; + ++cooccurrence[prev_idx * num_colors + top_idx]; + ++cooccurrence[top_idx * num_colors + prev_idx]; + } + } + line_tmp = line_top; + line_top = line_current; + line_current = line_tmp; + src += pic->argb_stride; + } + WebPSafeFree(lines); + return VP8_ENC_OK; +} + +struct Sum { + uint8_t index; + uint32_t sum; +}; + +// Implements the modified Zeng method from "A Survey on Palette Reordering +// Methods for Improving the Compression of Color-Indexed Images" by Armando J. +// Pinho and Antonio J. R. Neves. +static WebPEncodingError PaletteSortModifiedZeng( + const WebPPicture* const pic, const uint32_t* const palette_sorted, + uint32_t num_colors, uint32_t* const palette) { + uint32_t i, j, ind; + uint8_t remapping[MAX_PALETTE_SIZE]; + uint32_t* cooccurrence; + struct Sum sums[MAX_PALETTE_SIZE]; + uint32_t first, last; + uint32_t num_sums; + // TODO(vrabaud) check whether one color images should use palette or not. + if (num_colors <= 1) return VP8_ENC_OK; + // Build the co-occurrence matrix. + cooccurrence = + (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence)); + if (cooccurrence == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence) != + VP8_ENC_OK) { + WebPSafeFree(cooccurrence); + return VP8_ENC_ERROR_OUT_OF_MEMORY; + } + + // Initialize the mapping list with the two best indices. + CoOccurrenceFindMax(cooccurrence, num_colors, &remapping[0], &remapping[1]); + + // We need to append and prepend to the list of remapping. To this end, we + // actually define the next start/end of the list as indices in a vector (with + // a wrap around when the end is reached). + first = 0; + last = 1; + num_sums = num_colors - 2; // -2 because we know the first two values + if (num_sums > 0) { + // Initialize the sums with the first two remappings and find the best one + struct Sum* best_sum = &sums[0]; + best_sum->index = 0u; + best_sum->sum = 0u; + for (i = 0, j = 0; i < num_colors; ++i) { + if (i == remapping[0] || i == remapping[1]) continue; + sums[j].index = i; + sums[j].sum = cooccurrence[i * num_colors + remapping[0]] + + cooccurrence[i * num_colors + remapping[1]]; + if (sums[j].sum > best_sum->sum) best_sum = &sums[j]; + ++j; + } + + while (num_sums > 0) { + const uint8_t best_index = best_sum->index; + // Compute delta to know if we need to prepend or append the best index. + int32_t delta = 0; + const int32_t n = num_colors - num_sums; + for (ind = first, j = 0; (ind + j) % num_colors != last + 1; ++j) { + const uint16_t l_j = remapping[(ind + j) % num_colors]; + delta += (n - 1 - 2 * (int32_t)j) * + (int32_t)cooccurrence[best_index * num_colors + l_j]; + } + if (delta > 0) { + first = (first == 0) ? num_colors - 1 : first - 1; + remapping[first] = best_index; + } else { + ++last; + remapping[last] = best_index; + } + // Remove best_sum from sums. + *best_sum = sums[num_sums - 1]; + --num_sums; + // Update all the sums and find the best one. + best_sum = &sums[0]; + for (i = 0; i < num_sums; ++i) { + sums[i].sum += cooccurrence[best_index * num_colors + sums[i].index]; + if (sums[i].sum > best_sum->sum) best_sum = &sums[i]; + } + } } - *palette_size = num_colors; - qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); - if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { - GreedyMinimizeDeltas(palette, num_colors); + assert((last + 1) % num_colors == first); + WebPSafeFree(cooccurrence); + + // Re-map the palette. + for (i = 0; i < num_colors; ++i) { + palette[i] = palette_sorted[remapping[(first + i) % num_colors]]; } - return 1; + return VP8_ENC_OK; } +// ----------------------------------------------------------------------------- +// Palette + // These five modes are evaluated and their respective entropy is computed. typedef enum { kDirect = 0, @@ -144,10 +329,18 @@ typedef enum { kSubGreen = 2, kSpatialSubGreen = 3, kPalette = 4, - kNumEntropyIx = 5 + kPaletteAndSpatial = 5, + kNumEntropyIx = 6 } EntropyIx; typedef enum { + kSortedDefault = 0, + kMinimizeDelta = 1, + kModifiedZeng = 2, + kUnusedPalette = 3, +} PaletteSorting; + +typedef enum { kHistoAlpha = 0, kHistoAlphaPred, kHistoGreen, @@ -354,14 +547,21 @@ static int GetTransformBits(int method, int histo_bits) { } // Set of parameters to be used in each iteration of the cruncher. -#define CRUNCH_CONFIGS_LZ77_MAX 2 +#define CRUNCH_SUBCONFIGS_MAX 2 +typedef struct { + int lz77_; + int do_no_cache_; +} CrunchSubConfig; typedef struct { int entropy_idx_; - int lz77s_types_to_try_[CRUNCH_CONFIGS_LZ77_MAX]; - int lz77s_types_to_try_size_; + PaletteSorting palette_sorting_type_; + CrunchSubConfig sub_configs_[CRUNCH_SUBCONFIGS_MAX]; + int sub_configs_size_; } CrunchConfig; -#define CRUNCH_CONFIGS_MAX kNumEntropyIx +// +2 because we add a palette sorting configuration for kPalette and +// kPaletteAndSpatial. +#define CRUNCH_CONFIGS_MAX (kNumEntropyIx + 2) static int EncoderAnalyze(VP8LEncoder* const enc, CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX], @@ -376,11 +576,20 @@ static int EncoderAnalyze(VP8LEncoder* const enc, int i; int use_palette; int n_lz77s; + // If set to 0, analyze the cache with the computed cache value. If 1, also + // analyze with no-cache. + int do_no_cache = 0; assert(pic != NULL && pic->argb != NULL); - use_palette = - AnalyzeAndCreatePalette(pic, low_effort, - enc->palette_, &enc->palette_size_); + // Check whether a palette is possible. + enc->palette_size_ = WebPGetColorPalette(pic, enc->palette_sorted_); + use_palette = (enc->palette_size_ <= MAX_PALETTE_SIZE); + if (!use_palette) { + enc->palette_size_ = 0; + } else { + qsort(enc->palette_sorted_, enc->palette_size_, + sizeof(*enc->palette_sorted_), PaletteCompareColorsForQsort); + } // Empirical bit sizes. enc->histo_bits_ = GetHistoBits(method, use_palette, @@ -390,6 +599,8 @@ static int EncoderAnalyze(VP8LEncoder* const enc, if (low_effort) { // AnalyzeEntropy is somewhat slow. crunch_configs[0].entropy_idx_ = use_palette ? kPalette : kSpatialSubGreen; + crunch_configs[0].palette_sorting_type_ = + use_palette ? kSortedDefault : kUnusedPalette; n_lz77s = 1; *crunch_configs_size = 1; } else { @@ -402,29 +613,59 @@ static int EncoderAnalyze(VP8LEncoder* const enc, return 0; } if (method == 6 && config->quality == 100) { + do_no_cache = 1; // Go brute force on all transforms. *crunch_configs_size = 0; for (i = 0; i < kNumEntropyIx; ++i) { - if (i != kPalette || use_palette) { + // We can only apply kPalette or kPaletteAndSpatial if we can indeed use + // a palette. + if ((i != kPalette && i != kPaletteAndSpatial) || use_palette) { assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX); - crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i; + crunch_configs[(*crunch_configs_size)].entropy_idx_ = i; + if (use_palette && (i == kPalette || i == kPaletteAndSpatial)) { + crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = + kMinimizeDelta; + ++*crunch_configs_size; + // Also add modified Zeng's method. + crunch_configs[(*crunch_configs_size)].entropy_idx_ = i; + crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = + kModifiedZeng; + } else { + crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = + kUnusedPalette; + } + ++*crunch_configs_size; } } } else { // Only choose the guessed best transform. *crunch_configs_size = 1; crunch_configs[0].entropy_idx_ = min_entropy_ix; + crunch_configs[0].palette_sorting_type_ = + use_palette ? kMinimizeDelta : kUnusedPalette; + if (config->quality >= 75 && method == 5) { + // Test with and without color cache. + do_no_cache = 1; + // If we have a palette, also check in combination with spatial. + if (min_entropy_ix == kPalette) { + *crunch_configs_size = 2; + crunch_configs[1].entropy_idx_ = kPaletteAndSpatial; + crunch_configs[1].palette_sorting_type_ = kMinimizeDelta; + } + } } } // Fill in the different LZ77s. - assert(n_lz77s <= CRUNCH_CONFIGS_LZ77_MAX); + assert(n_lz77s <= CRUNCH_SUBCONFIGS_MAX); for (i = 0; i < *crunch_configs_size; ++i) { int j; for (j = 0; j < n_lz77s; ++j) { - crunch_configs[i].lz77s_types_to_try_[j] = + assert(j < CRUNCH_SUBCONFIGS_MAX); + crunch_configs[i].sub_configs_[j].lz77_ = (j == 0) ? kLZ77Standard | kLZ77RLE : kLZ77Box; + crunch_configs[i].sub_configs_[j].do_no_cache_ = do_no_cache; } - crunch_configs[i].lz77s_types_to_try_size_ = n_lz77s; + crunch_configs[i].sub_configs_size_ = n_lz77s; } return 1; } @@ -440,7 +681,7 @@ static int EncoderInit(VP8LEncoder* const enc) { int i; if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; - for (i = 0; i < 3; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); + for (i = 0; i < 4; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); return 1; } @@ -769,13 +1010,10 @@ static WebPEncodingError StoreImageToBitMask( } // Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 -static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, - const uint32_t* const argb, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2, - int width, int height, - int quality, int low_effort) { +static WebPEncodingError EncodeImageNoHuffman( + VP8LBitWriter* const bw, const uint32_t* const argb, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_array, + int width, int height, int quality, int low_effort) { int i; int max_tokens = 0; WebPEncodingError err = VP8_ENC_OK; @@ -798,13 +1036,11 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, - kLZ77Standard | kLZ77RLE, &cache_bits, - hash_chain, refs_tmp1, refs_tmp2); - if (refs == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + err = VP8LGetBackwardReferences( + width, height, argb, quality, /*low_effort=*/0, kLZ77Standard | kLZ77RLE, + cache_bits, /*do_no_cache=*/0, hash_chain, refs_array, &cache_bits); + if (err != VP8_ENC_OK) goto Error; + refs = &refs_array[0]; histogram_image = VP8LAllocateHistogramSet(1, cache_bits); if (histogram_image == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; @@ -860,11 +1096,11 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, static WebPEncodingError EncodeImageInternal( VP8LBitWriter* const bw, const uint32_t* const argb, - VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[3], int width, + VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width, int height, int quality, int low_effort, int use_cache, const CrunchConfig* const config, int* cache_bits, int histogram_bits, size_t init_byte_position, int* const hdr_size, int* const data_size) { - WebPEncodingError err = VP8_ENC_OK; + WebPEncodingError err = VP8_ENC_ERROR_OUT_OF_MEMORY; const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); @@ -876,103 +1112,103 @@ static WebPEncodingError EncodeImageInternal( 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; - VP8LBackwardRefs* refs_best; - VP8LBackwardRefs* refs_tmp; uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_symbols)); - int lz77s_idx; + int sub_configs_idx; + int cache_bits_init, write_histogram_image; VP8LBitWriter bw_init = *bw, bw_best; int hdr_size_tmp; + VP8LHashChain hash_chain_histogram; // histogram image hash chain + size_t bw_size_best = ~(size_t)0; assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); assert(hdr_size != NULL); assert(data_size != NULL); - if (histogram_symbols == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + // Make sure we can allocate the different objects. + memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram)); + if (huff_tree == NULL || histogram_symbols == NULL || + !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize) || + !VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort)) { goto Error; } - if (use_cache) { // If the value is different from zero, it has been set during the // palette analysis. - if (*cache_bits == 0) *cache_bits = MAX_COLOR_CACHE_BITS; + cache_bits_init = (*cache_bits == 0) ? MAX_COLOR_CACHE_BITS : *cache_bits; } else { - *cache_bits = 0; + cache_bits_init = 0; } - // 'best_refs' is the reference to the best backward refs and points to one - // of refs_array[0] or refs_array[1]. - // Calculate backward references from ARGB image. - if (huff_tree == NULL || - !VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort) || - !VP8LBitWriterInit(&bw_best, 0) || - (config->lz77s_types_to_try_size_ > 1 && + // If several iterations will happen, clone into bw_best. + if (!VP8LBitWriterInit(&bw_best, 0) || + ((config->sub_configs_size_ > 1 || + config->sub_configs_[0].do_no_cache_) && !VP8LBitWriterClone(bw, &bw_best))) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - for (lz77s_idx = 0; lz77s_idx < config->lz77s_types_to_try_size_; - ++lz77s_idx) { - refs_best = VP8LGetBackwardReferences( - width, height, argb, quality, low_effort, - config->lz77s_types_to_try_[lz77s_idx], cache_bits, hash_chain, - &refs_array[0], &refs_array[1]); - if (refs_best == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Keep the best references aside and use the other element from the first - // two as a temporary for later usage. - refs_tmp = &refs_array[refs_best == &refs_array[0] ? 1 : 0]; - - histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); - tmp_histo = VP8LAllocateHistogram(*cache_bits); - if (histogram_image == NULL || tmp_histo == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - - // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, refs_best, quality, low_effort, - histogram_bits, *cache_bits, histogram_image, - tmp_histo, histogram_symbols)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Create Huffman bit lengths and codes for each histogram image. - histogram_image_size = histogram_image->size; - bit_array_size = 5 * histogram_image_size; - huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, - sizeof(*huffman_codes)); - // Note: some histogram_image entries may point to tmp_histos[], so the - // latter need to outlive the following call to GetHuffBitLengthsAndCodes(). - if (huffman_codes == NULL || - !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Free combined histograms. - VP8LFreeHistogramSet(histogram_image); - histogram_image = NULL; - - // Free scratch histograms. - VP8LFreeHistogram(tmp_histo); - tmp_histo = NULL; + for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_; + ++sub_configs_idx) { + const CrunchSubConfig* const sub_config = + &config->sub_configs_[sub_configs_idx]; + int cache_bits_best, i_cache; + err = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, + sub_config->lz77_, cache_bits_init, + sub_config->do_no_cache_, hash_chain, + &refs_array[0], &cache_bits_best); + if (err != VP8_ENC_OK) goto Error; - // Color Cache parameters. - if (*cache_bits > 0) { - VP8LPutBits(bw, 1, 1); - VP8LPutBits(bw, *cache_bits, 4); - } else { - VP8LPutBits(bw, 0, 1); - } + for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) { + const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0; + // Speed-up: no need to study the no-cache case if it was already studied + // in i_cache == 0. + if (i_cache == 1 && cache_bits_best == 0) break; + + // Reset the bit writer for this iteration. + VP8LBitWriterReset(&bw_init, bw); + + // Build histogram image and symbols from backward references. + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp); + tmp_histo = VP8LAllocateHistogram(cache_bits_tmp); + if (histogram_image == NULL || tmp_histo == NULL || + !VP8LGetHistoImageSymbols(width, height, &refs_array[i_cache], + quality, low_effort, histogram_bits, + cache_bits_tmp, histogram_image, tmp_histo, + histogram_symbols)) { + goto Error; + } + // Create Huffman bit lengths and codes for each histogram image. + histogram_image_size = histogram_image->size; + bit_array_size = 5 * histogram_image_size; + huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, + sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the + // latter need to outlive the following call to + // GetHuffBitLengthsAndCodes(). + if (huffman_codes == NULL || + !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + goto Error; + } + // Free combined histograms. + VP8LFreeHistogramSet(histogram_image); + histogram_image = NULL; + + // Free scratch histograms. + VP8LFreeHistogram(tmp_histo); + tmp_histo = NULL; + + // Color Cache parameters. + if (cache_bits_tmp > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, cache_bits_tmp, 4); + } else { + VP8LPutBits(bw, 0, 1); + } - // Huffman image + meta huffman. - { - const int write_histogram_image = (histogram_image_size > 1); + // Huffman image + meta huffman. + write_histogram_image = (histogram_image_size > 1); VP8LPutBits(bw, write_histogram_image, 1); if (write_histogram_image) { uint32_t* const histogram_argb = @@ -980,10 +1216,7 @@ static WebPEncodingError EncodeImageInternal( sizeof(*histogram_argb)); int max_index = 0; uint32_t i; - if (histogram_argb == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + if (histogram_argb == NULL) goto Error; for (i = 0; i < histogram_image_xysize; ++i) { const int symbol_index = histogram_symbols[i] & 0xffff; histogram_argb[i] = (symbol_index << 8); @@ -995,65 +1228,64 @@ static WebPEncodingError EncodeImageInternal( VP8LPutBits(bw, histogram_bits - 2, 3); err = EncodeImageNoHuffman( - bw, histogram_argb, hash_chain, refs_tmp, &refs_array[2], + bw, histogram_argb, &hash_chain_histogram, &refs_array[2], VP8LSubSampleSize(width, histogram_bits), VP8LSubSampleSize(height, histogram_bits), quality, low_effort); WebPSafeFree(histogram_argb); if (err != VP8_ENC_OK) goto Error; } - } - // Store Huffman codes. - { - int i; - int max_tokens = 0; - // Find maximum number of symbols for the huffman tree-set. - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - if (max_tokens < codes->num_symbols) { - max_tokens = codes->num_symbols; + // Store Huffman codes. + { + int i; + int max_tokens = 0; + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) goto Error; + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); + ClearHuffmanTreeIfOnlyOneSymbol(codes); } } - tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + // Store actual literals. + hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); + err = StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], + histogram_symbols, huffman_codes); + if (err != VP8_ENC_OK) goto Error; + // Keep track of the smallest image so far. + if (VP8LBitWriterNumBytes(bw) < bw_size_best) { + bw_size_best = VP8LBitWriterNumBytes(bw); + *cache_bits = cache_bits_tmp; + *hdr_size = hdr_size_tmp; + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); + VP8LBitWriterSwap(bw, &bw_best); } - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - StoreHuffmanCode(bw, huff_tree, tokens, codes); - ClearHuffmanTreeIfOnlyOneSymbol(codes); + WebPSafeFree(tokens); + tokens = NULL; + if (huffman_codes != NULL) { + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); + huffman_codes = NULL; } } - // Store actual literals. - hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); - err = StoreImageToBitMask(bw, width, histogram_bits, refs_best, - histogram_symbols, huffman_codes); - // Keep track of the smallest image so far. - if (lz77s_idx == 0 || - VP8LBitWriterNumBytes(bw) < VP8LBitWriterNumBytes(&bw_best)) { - *hdr_size = hdr_size_tmp; - *data_size = - (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); - VP8LBitWriterSwap(bw, &bw_best); - } - // Reset the bit writer for the following iteration if any. - if (config->lz77s_types_to_try_size_ > 1) VP8LBitWriterReset(&bw_init, bw); - WebPSafeFree(tokens); - tokens = NULL; - if (huffman_codes != NULL) { - WebPSafeFree(huffman_codes->codes); - WebPSafeFree(huffman_codes); - huffman_codes = NULL; - } } VP8LBitWriterSwap(bw, &bw_best); + err = VP8_ENC_OK; Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); VP8LFreeHistogram(tmp_histo); + VP8LHashChainClear(&hash_chain_histogram); if (huffman_codes != NULL) { WebPSafeFree(huffman_codes->codes); WebPSafeFree(huffman_codes); @@ -1095,8 +1327,7 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, VP8LPutBits(bw, pred_bits - 2, 3); return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], // cast const away - (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, + (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, quality, low_effort); } @@ -1116,8 +1347,7 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, VP8LPutBits(bw, ccolor_transform_bits - 2, 3); return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], // cast const away - (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, + (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, quality, low_effort); } @@ -1272,22 +1502,6 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { // ----------------------------------------------------------------------------- -static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, - int hi) { - int low = 0; - if (sorted[low] == color) return low; // loop invariant: sorted[low] != color - while (1) { - const int mid = (low + hi) >> 1; - if (sorted[mid] == color) { - return mid; - } else if (sorted[mid] < color) { - low = mid; - } else { - hi = mid; - } - } -} - #define APPLY_PALETTE_GREEDY_MAX 4 static WEBP_INLINE uint32_t SearchColorGreedy(const uint32_t palette[], @@ -1322,17 +1536,6 @@ static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) { (32 - PALETTE_INV_SIZE_BITS); } -// Sort palette in increasing order and prepare an inverse mapping array. -static void PrepareMapToPalette(const uint32_t palette[], int num_colors, - uint32_t sorted[], uint32_t idx_map[]) { - int i; - memcpy(sorted, palette, num_colors * sizeof(*sorted)); - qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); - for (i = 0; i < num_colors; ++i) { - idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; - } -} - // Use 1 pixel cache for ARGB pixels. #define APPLY_PALETTE_FOR(COLOR_INDEX) do { \ uint32_t prev_pix = palette[0]; \ @@ -1464,8 +1667,8 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, } tmp_palette[0] = palette[0]; return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, - &enc->refs_[0], &enc->refs_[1], palette_size, 1, - 20 /* quality */, low_effort); + &enc->refs_[0], palette_size, 1, /*quality=*/20, + low_effort); } // ----------------------------------------------------------------------------- @@ -1491,7 +1694,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { if (enc != NULL) { int i; VP8LHashChainClear(&enc->hash_chain_); - for (i = 0; i < 3; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); + for (i = 0; i < 4; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); ClearTransformBuffer(enc); WebPSafeFree(enc); } @@ -1541,7 +1744,7 @@ static int EncodeStreamHook(void* input, void* data2) { int data_size = 0; int use_delta_palette = 0; int idx; - size_t best_size = 0; + size_t best_size = ~(size_t)0; VP8LBitWriter bw_init = *bw, bw_best; (void)data2; @@ -1553,12 +1756,15 @@ static int EncodeStreamHook(void* input, void* data2) { for (idx = 0; idx < num_crunch_configs; ++idx) { const int entropy_idx = crunch_configs[idx].entropy_idx_; - enc->use_palette_ = (entropy_idx == kPalette); + enc->use_palette_ = + (entropy_idx == kPalette) || (entropy_idx == kPaletteAndSpatial); enc->use_subtract_green_ = (entropy_idx == kSubGreen) || (entropy_idx == kSpatialSubGreen); - enc->use_predict_ = - (entropy_idx == kSpatial) || (entropy_idx == kSpatialSubGreen); - if (low_effort) { + enc->use_predict_ = (entropy_idx == kSpatial) || + (entropy_idx == kSpatialSubGreen) || + (entropy_idx == kPaletteAndSpatial); + // When using a palette, R/B==0, hence no need to test for cross-color. + if (low_effort || enc->use_palette_) { enc->use_cross_color_ = 0; } else { enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; @@ -1590,6 +1796,19 @@ static int EncodeStreamHook(void* input, void* data2) { // Encode palette if (enc->use_palette_) { + if (crunch_configs[idx].palette_sorting_type_ == kSortedDefault) { + // Nothing to do, we have already sorted the palette. + memcpy(enc->palette_, enc->palette_sorted_, + enc->palette_size_ * sizeof(*enc->palette_)); + } else if (crunch_configs[idx].palette_sorting_type_ == kMinimizeDelta) { + PaletteSortMinimizeDeltas(enc->palette_sorted_, enc->palette_size_, + enc->palette_); + } else { + assert(crunch_configs[idx].palette_sorting_type_ == kModifiedZeng); + err = PaletteSortModifiedZeng(enc->pic_, enc->palette_sorted_, + enc->palette_size_, enc->palette_); + if (err != VP8_ENC_OK) goto Error; + } err = EncodePalette(bw, low_effort, enc); if (err != VP8_ENC_OK) goto Error; err = MapImageFromPalette(enc, use_delta_palette); @@ -1640,7 +1859,7 @@ static int EncodeStreamHook(void* input, void* data2) { if (err != VP8_ENC_OK) goto Error; // If we are better than what we already have. - if (idx == 0 || VP8LBitWriterNumBytes(bw) < best_size) { + if (VP8LBitWriterNumBytes(bw) < best_size) { best_size = VP8LBitWriterNumBytes(bw); // Store the BitWriter. VP8LBitWriterSwap(bw, &bw_best); @@ -1754,6 +1973,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, enc_side->palette_size_ = enc_main->palette_size_; memcpy(enc_side->palette_, enc_main->palette_, sizeof(enc_main->palette_)); + memcpy(enc_side->palette_sorted_, enc_main->palette_sorted_, + sizeof(enc_main->palette_sorted_)); param->enc_ = enc_side; } // Create the workers. @@ -1816,7 +2037,7 @@ Error: } #undef CRUNCH_CONFIGS_MAX -#undef CRUNCH_CONFIGS_LZ77_MAX +#undef CRUNCH_SUBCONFIGS_MAX int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture) { diff --git a/thirdparty/libwebp/src/enc/vp8li_enc.h b/thirdparty/libwebp/src/enc/vp8li_enc.h index d2d0fc509c..00de48946c 100644 --- a/thirdparty/libwebp/src/enc/vp8li_enc.h +++ b/thirdparty/libwebp/src/enc/vp8li_enc.h @@ -69,9 +69,11 @@ typedef struct { int use_palette_; int palette_size_; uint32_t palette_[MAX_PALETTE_SIZE]; + // Sorted version of palette_ for cache purposes. + uint32_t palette_sorted_[MAX_PALETTE_SIZE]; // Some 'scratch' (potentially large) objects. - struct VP8LBackwardRefs refs_[3]; // Backward Refs array for temporaries. + struct VP8LBackwardRefs refs_[4]; // Backward Refs array for temporaries. VP8LHashChain hash_chain_; // HashChain data for constructing // backward references. } VP8LEncoder; diff --git a/thirdparty/libwebp/src/enc/webp_enc.c b/thirdparty/libwebp/src/enc/webp_enc.c index 9f4b10c26c..ce2db2e94b 100644 --- a/thirdparty/libwebp/src/enc/webp_enc.c +++ b/thirdparty/libwebp/src/enc/webp_enc.c @@ -400,7 +400,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { } if (!config->exact) { - WebPCleanupTransparentAreaLossless(pic); + WebPReplaceTransparentPixels(pic, 0x000000); } ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. diff --git a/thirdparty/libwebp/src/mux/anim_encode.c b/thirdparty/libwebp/src/mux/anim_encode.c index 7be99068f6..7078d9ae6b 100644 --- a/thirdparty/libwebp/src/mux/anim_encode.c +++ b/thirdparty/libwebp/src/mux/anim_encode.c @@ -248,9 +248,6 @@ WebPAnimEncoder* WebPAnimEncoderNewInternal( enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc)); if (enc == NULL) return NULL; - // sanity inits, so we can call WebPAnimEncoderDelete(): - enc->encoded_frames_ = NULL; - enc->mux_ = NULL; MarkNoError(enc); // Dimensions and options. @@ -421,7 +418,7 @@ static void MinimizeChangeRectangle(const WebPPicture* const src, const int max_allowed_diff_lossy = QualityToMaxDiff(quality); const int max_allowed_diff = is_lossless ? 0 : max_allowed_diff_lossy; - // Sanity checks. + // Assumption/correctness checks. assert(src->width == dst->width && src->height == dst->height); assert(rect->x_offset_ + rect->width_ <= dst->width); assert(rect->y_offset_ + rect->height_ <= dst->height); @@ -949,7 +946,8 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) { int new_duration; assert(enc->count_ >= 1); - assert(prev_enc_frame->sub_frame_.duration == + assert(!prev_enc_frame->is_key_frame_ || + prev_enc_frame->sub_frame_.duration == prev_enc_frame->key_frame_.duration); assert(prev_enc_frame->sub_frame_.duration == (prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1))); @@ -966,7 +964,7 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) { 0x10, 0x88, 0x88, 0x08 }; const WebPData lossless_1x1 = { - lossless_1x1_bytes, sizeof(lossless_1x1_bytes) + lossless_1x1_bytes, sizeof(lossless_1x1_bytes) }; const uint8_t lossy_1x1_bytes[] = { 0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50, @@ -1358,6 +1356,12 @@ int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp, if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) { return 0; } + // IncreasePreviousDuration() may add a frame to avoid exceeding + // MAX_DURATION which could cause CacheFrame() to over read encoded_frames_ + // before the next flush. + if (enc->count_ == enc->size_ && !FlushFrames(enc)) { + return 0; + } } else { enc->first_timestamp_ = timestamp; } diff --git a/thirdparty/libwebp/src/mux/muxedit.c b/thirdparty/libwebp/src/mux/muxedit.c index ccf14b2a0c..02c3edecd7 100644 --- a/thirdparty/libwebp/src/mux/muxedit.c +++ b/thirdparty/libwebp/src/mux/muxedit.c @@ -235,7 +235,6 @@ WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream, WebPMuxImage wpi; WebPMuxError err; - // Sanity checks. if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) { return WEBP_MUX_INVALID_ARGUMENT; @@ -267,7 +266,6 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info, WebPMuxImage wpi; WebPMuxError err; - // Sanity checks. if (mux == NULL || info == NULL) return WEBP_MUX_INVALID_ARGUMENT; if (info->id != WEBP_CHUNK_ANMF) return WEBP_MUX_INVALID_ARGUMENT; diff --git a/thirdparty/libwebp/src/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h index ad3e1bdb97..330da66754 100644 --- a/thirdparty/libwebp/src/mux/muxi.h +++ b/thirdparty/libwebp/src/mux/muxi.h @@ -28,8 +28,8 @@ extern "C" { // Defines and constants. #define MUX_MAJ_VERSION 1 -#define MUX_MIN_VERSION 1 -#define MUX_REV_VERSION 0 +#define MUX_MIN_VERSION 2 +#define MUX_REV_VERSION 1 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/thirdparty/libwebp/src/mux/muxread.c b/thirdparty/libwebp/src/mux/muxread.c index ae3b876bc5..80050396e1 100644 --- a/thirdparty/libwebp/src/mux/muxread.c +++ b/thirdparty/libwebp/src/mux/muxread.c @@ -56,7 +56,7 @@ static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk, uint32_t chunk_size; WebPData chunk_data; - // Sanity checks. + // Correctness checks. if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA; chunk_size = GetLE32(data + TAG_SIZE); if (chunk_size > MAX_CHUNK_PAYLOAD) return WEBP_MUX_BAD_DATA; @@ -155,7 +155,6 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data, break; default: goto Fail; - break; } subchunk_size = ChunkDiskSize(&subchunk); bytes += subchunk_size; @@ -187,7 +186,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, WebPChunk** chunk_list_ends[WEBP_CHUNK_NIL + 1] = { NULL }; ChunkInit(&chunk); - // Sanity checks. if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) { return NULL; // version mismatch } @@ -264,7 +262,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err; ChunkRelease(&chunk); goto PushImage; - break; default: // A non-image chunk. if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before // getting all chunks of an image. @@ -483,7 +480,6 @@ WebPMuxError WebPMuxGetFrame( WebPMuxError err; WebPMuxImage* wpi; - // Sanity checks. if (mux == NULL || frame == NULL) { return WEBP_MUX_INVALID_ARGUMENT; } diff --git a/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h b/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h index 46b3880706..404b9a6d8c 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h +++ b/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h @@ -55,7 +55,7 @@ void VP8LoadFinalBytes(VP8BitReader* const br); // makes sure br->value_ has at least BITS bits worth of data static WEBP_UBSAN_IGNORE_UNDEF WEBP_INLINE -void VP8LoadNewBytes(VP8BitReader* const br) { +void VP8LoadNewBytes(VP8BitReader* WEBP_RESTRICT const br) { assert(br != NULL && br->buf_ != NULL); // Read 'BITS' bits at a time if possible. if (br->buf_ < br->buf_max_) { @@ -104,7 +104,7 @@ void VP8LoadNewBytes(VP8BitReader* const br) { } // Read a bit with proba 'prob'. Speed-critical function! -static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, +static WEBP_INLINE int VP8GetBit(VP8BitReader* WEBP_RESTRICT const br, int prob, const char label[]) { // Don't move this declaration! It makes a big speed difference to store // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't @@ -137,7 +137,8 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, // simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE -int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) { +int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v, + const char label[]) { if (br->bits_ < 0) { VP8LoadNewBytes(br); } @@ -155,7 +156,7 @@ int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) { } } -static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, +static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* WEBP_RESTRICT const br, int prob, const char label[]) { // Don't move this declaration! It makes a big speed difference to store // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't diff --git a/thirdparty/libwebp/src/utils/bit_reader_utils.c b/thirdparty/libwebp/src/utils/bit_reader_utils.c index 60271c0ae0..857cd60988 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_utils.c +++ b/thirdparty/libwebp/src/utils/bit_reader_utils.c @@ -41,14 +41,7 @@ void VP8InitBitReader(VP8BitReader* const br, br->bits_ = -8; // to load the very first 8bits br->eof_ = 0; VP8BitReaderSetBuffer(br, start, size); -// -- GODOT -- begin -#ifdef JAVASCRIPT_ENABLED // html5 required aligned reads - while(((uintptr_t)br->buf_ & 1) != 0 && !br->eof_) - VP8LoadFinalBytes(br); -#else VP8LoadNewBytes(br); -#endif -// -- GODOT -- end } void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) { diff --git a/thirdparty/libwebp/src/utils/bit_reader_utils.h b/thirdparty/libwebp/src/utils/bit_reader_utils.h index 199dacf224..e64156e318 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_utils.h +++ b/thirdparty/libwebp/src/utils/bit_reader_utils.h @@ -58,12 +58,6 @@ extern "C" { // BITS can be any multiple of 8 from 8 to 56 (inclusive). // Pick values that fit natural register size. -// -- GODOT -- start -#ifdef JAVASCRIPT_ENABLED -#define BITS 16 -#else -// -- GODOT -- end - #if defined(__i386__) || defined(_M_IX86) // x86 32bit #define BITS 24 #elif defined(__x86_64__) || defined(_M_X64) // x86 64bit @@ -78,10 +72,6 @@ extern "C" { #define BITS 24 #endif -// -- GODOT -- start -#endif -// -- GODOT -- end - //------------------------------------------------------------------------------ // Derived types and constants: // bit_t = natural register type for storing 'value_' (which is BITS+8 bits) diff --git a/thirdparty/libwebp/src/utils/bit_writer_utils.c b/thirdparty/libwebp/src/utils/bit_writer_utils.c index bef0e31ca5..2f408508f1 100644 --- a/thirdparty/libwebp/src/utils/bit_writer_utils.c +++ b/thirdparty/libwebp/src/utils/bit_writer_utils.c @@ -278,7 +278,7 @@ void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) { // If needed, make some room by flushing some bits out. if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; - if (extra_size != (size_t)extra_size || + if (!CheckSizeOverflow(extra_size) || !VP8LBitWriterResize(bw, (size_t)extra_size)) { bw->cur_ = bw->buf_; bw->error_ = 1; @@ -314,7 +314,7 @@ void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) { while (used >= VP8L_WRITER_BITS) { if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; - if (extra_size != (size_t)extra_size || + if (!CheckSizeOverflow(extra_size) || !VP8LBitWriterResize(bw, (size_t)extra_size)) { bw->cur_ = bw->buf_; bw->error_ = 1; diff --git a/thirdparty/libwebp/src/utils/color_cache_utils.c b/thirdparty/libwebp/src/utils/color_cache_utils.c index b09f538e8b..7b5222b6e5 100644 --- a/thirdparty/libwebp/src/utils/color_cache_utils.c +++ b/thirdparty/libwebp/src/utils/color_cache_utils.c @@ -20,22 +20,22 @@ //------------------------------------------------------------------------------ // VP8LColorCache. -int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) { +int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits) { const int hash_size = 1 << hash_bits; - assert(cc != NULL); + assert(color_cache != NULL); assert(hash_bits > 0); - cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size, - sizeof(*cc->colors_)); - if (cc->colors_ == NULL) return 0; - cc->hash_shift_ = 32 - hash_bits; - cc->hash_bits_ = hash_bits; + color_cache->colors_ = (uint32_t*)WebPSafeCalloc( + (uint64_t)hash_size, sizeof(*color_cache->colors_)); + if (color_cache->colors_ == NULL) return 0; + color_cache->hash_shift_ = 32 - hash_bits; + color_cache->hash_bits_ = hash_bits; return 1; } -void VP8LColorCacheClear(VP8LColorCache* const cc) { - if (cc != NULL) { - WebPSafeFree(cc->colors_); - cc->colors_ = NULL; +void VP8LColorCacheClear(VP8LColorCache* const color_cache) { + if (color_cache != NULL) { + WebPSafeFree(color_cache->colors_); + color_cache->colors_ = NULL; } } diff --git a/thirdparty/libwebp/src/utils/huffman_encode_utils.c b/thirdparty/libwebp/src/utils/huffman_encode_utils.c index 6f3b1bbe02..fd7a47d8f7 100644 --- a/thirdparty/libwebp/src/utils/huffman_encode_utils.c +++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.c @@ -404,8 +404,7 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { // Main entry point void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, - uint8_t* const buf_rle, - HuffmanTree* const huff_tree, + uint8_t* const buf_rle, HuffmanTree* const huff_tree, HuffmanTreeCode* const huff_code) { const int num_symbols = huff_code->num_symbols; memset(buf_rle, 0, num_symbols * sizeof(*buf_rle)); diff --git a/thirdparty/libwebp/src/utils/huffman_encode_utils.h b/thirdparty/libwebp/src/utils/huffman_encode_utils.h index 3e6763ce49..3f7f1d8074 100644 --- a/thirdparty/libwebp/src/utils/huffman_encode_utils.h +++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.h @@ -51,7 +51,7 @@ int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree, // huffman code tree. void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, uint8_t* const buf_rle, HuffmanTree* const huff_tree, - HuffmanTreeCode* const tree); + HuffmanTreeCode* const huff_code); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/utils/rescaler_utils.c b/thirdparty/libwebp/src/utils/rescaler_utils.c index 4bcae24af5..a0581a14b1 100644 --- a/thirdparty/libwebp/src/utils/rescaler_utils.c +++ b/thirdparty/libwebp/src/utils/rescaler_utils.c @@ -12,66 +12,74 @@ // Author: Skal (pascal.massimino@gmail.com) #include <assert.h> +#include <limits.h> #include <stdlib.h> #include <string.h> #include "src/dsp/dsp.h" #include "src/utils/rescaler_utils.h" +#include "src/utils/utils.h" //------------------------------------------------------------------------------ -void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, - uint8_t* const dst, - int dst_width, int dst_height, int dst_stride, - int num_channels, rescaler_t* const work) { +int WebPRescalerInit(WebPRescaler* const rescaler, + int src_width, int src_height, + uint8_t* const dst, + int dst_width, int dst_height, int dst_stride, + int num_channels, rescaler_t* const work) { const int x_add = src_width, x_sub = dst_width; const int y_add = src_height, y_sub = dst_height; - wrk->x_expand = (src_width < dst_width); - wrk->y_expand = (src_height < dst_height); - wrk->src_width = src_width; - wrk->src_height = src_height; - wrk->dst_width = dst_width; - wrk->dst_height = dst_height; - wrk->src_y = 0; - wrk->dst_y = 0; - wrk->dst = dst; - wrk->dst_stride = dst_stride; - wrk->num_channels = num_channels; + const uint64_t total_size = 2ull * dst_width * num_channels * sizeof(*work); + if (!CheckSizeOverflow(total_size)) return 0; + + rescaler->x_expand = (src_width < dst_width); + rescaler->y_expand = (src_height < dst_height); + rescaler->src_width = src_width; + rescaler->src_height = src_height; + rescaler->dst_width = dst_width; + rescaler->dst_height = dst_height; + rescaler->src_y = 0; + rescaler->dst_y = 0; + rescaler->dst = dst; + rescaler->dst_stride = dst_stride; + rescaler->num_channels = num_channels; // for 'x_expand', we use bilinear interpolation - wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add; - wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; - if (!wrk->x_expand) { // fx_scale is not used otherwise - wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub); + rescaler->x_add = rescaler->x_expand ? (x_sub - 1) : x_add; + rescaler->x_sub = rescaler->x_expand ? (x_add - 1) : x_sub; + if (!rescaler->x_expand) { // fx_scale is not used otherwise + rescaler->fx_scale = WEBP_RESCALER_FRAC(1, rescaler->x_sub); } // vertical scaling parameters - wrk->y_add = wrk->y_expand ? y_add - 1 : y_add; - wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub; - wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add; - if (!wrk->y_expand) { + rescaler->y_add = rescaler->y_expand ? y_add - 1 : y_add; + rescaler->y_sub = rescaler->y_expand ? y_sub - 1 : y_sub; + rescaler->y_accum = rescaler->y_expand ? rescaler->y_sub : rescaler->y_add; + if (!rescaler->y_expand) { // This is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast. - // Its value is <= WEBP_RESCALER_ONE, because dst_height <= wrk->y_add, and - // wrk->x_add >= 1; - const uint64_t ratio = - (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add); + // Its value is <= WEBP_RESCALER_ONE, because dst_height <= rescaler->y_add + // and rescaler->x_add >= 1; + const uint64_t num = (uint64_t)dst_height * WEBP_RESCALER_ONE; + const uint64_t den = (uint64_t)rescaler->x_add * rescaler->y_add; + const uint64_t ratio = num / den; if (ratio != (uint32_t)ratio) { // When ratio == WEBP_RESCALER_ONE, we can't represent the ratio with the // current fixed-point precision. This happens when src_height == - // wrk->y_add (which == src_height), and wrk->x_add == 1. + // rescaler->y_add (which == src_height), and rescaler->x_add == 1. // => We special-case fxy_scale = 0, in WebPRescalerExportRow(). - wrk->fxy_scale = 0; + rescaler->fxy_scale = 0; } else { - wrk->fxy_scale = (uint32_t)ratio; + rescaler->fxy_scale = (uint32_t)ratio; } - wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub); + rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->y_sub); } else { - wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add); - // wrk->fxy_scale is unused here. + rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->x_add); + // rescaler->fxy_scale is unused here. } - wrk->irow = work; - wrk->frow = work + num_channels * dst_width; - memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); + rescaler->irow = work; + rescaler->frow = work + num_channels * dst_width; + memset(work, 0, (size_t)total_size); WebPRescalerDspInit(); + return 1; } int WebPRescalerGetScaledDimensions(int src_width, int src_height, @@ -82,6 +90,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, { int width = *scaled_width; int height = *scaled_height; + const int max_size = INT_MAX / 2; // if width is unspecified, scale original proportionally to height ratio. if (width == 0 && src_height > 0) { @@ -94,7 +103,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, (int)(((uint64_t)src_height * width + src_width - 1) / src_width); } // Check if the overall dimensions still make sense. - if (width <= 0 || height <= 0) { + if (width <= 0 || height <= 0 || width > max_size || height > max_size) { return 0; } @@ -107,31 +116,34 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, //------------------------------------------------------------------------------ // all-in-one calls -int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) { - const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub; +int WebPRescaleNeededLines(const WebPRescaler* const rescaler, + int max_num_lines) { + const int num_lines = + (rescaler->y_accum + rescaler->y_sub - 1) / rescaler->y_sub; return (num_lines > max_num_lines) ? max_num_lines : num_lines; } -int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, +int WebPRescalerImport(WebPRescaler* const rescaler, int num_lines, const uint8_t* src, int src_stride) { int total_imported = 0; - while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) { - if (wrk->y_expand) { - rescaler_t* const tmp = wrk->irow; - wrk->irow = wrk->frow; - wrk->frow = tmp; + while (total_imported < num_lines && + !WebPRescalerHasPendingOutput(rescaler)) { + if (rescaler->y_expand) { + rescaler_t* const tmp = rescaler->irow; + rescaler->irow = rescaler->frow; + rescaler->frow = tmp; } - WebPRescalerImportRow(wrk, src); - if (!wrk->y_expand) { // Accumulate the contribution of the new row. + WebPRescalerImportRow(rescaler, src); + if (!rescaler->y_expand) { // Accumulate the contribution of the new row. int x; - for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) { - wrk->irow[x] += wrk->frow[x]; + for (x = 0; x < rescaler->num_channels * rescaler->dst_width; ++x) { + rescaler->irow[x] += rescaler->frow[x]; } } - ++wrk->src_y; + ++rescaler->src_y; src += src_stride; ++total_imported; - wrk->y_accum -= wrk->y_sub; + rescaler->y_accum -= rescaler->y_sub; } return total_imported; } diff --git a/thirdparty/libwebp/src/utils/rescaler_utils.h b/thirdparty/libwebp/src/utils/rescaler_utils.h index ca41e42c4a..ef201ef86c 100644 --- a/thirdparty/libwebp/src/utils/rescaler_utils.h +++ b/thirdparty/libwebp/src/utils/rescaler_utils.h @@ -47,12 +47,13 @@ struct WebPRescaler { }; // Initialize a rescaler given scratch area 'work' and dimensions of src & dst. -void WebPRescalerInit(WebPRescaler* const rescaler, - int src_width, int src_height, - uint8_t* const dst, - int dst_width, int dst_height, int dst_stride, - int num_channels, - rescaler_t* const work); +// Returns false in case of error. +int WebPRescalerInit(WebPRescaler* const rescaler, + int src_width, int src_height, + uint8_t* const dst, + int dst_width, int dst_height, int dst_stride, + int num_channels, + rescaler_t* const work); // If either 'scaled_width' or 'scaled_height' (but not both) is 0 the value // will be calculated preserving the aspect ratio, otherwise the values are diff --git a/thirdparty/libwebp/src/utils/utils.c b/thirdparty/libwebp/src/utils/utils.c index 764f752b82..9e464c16ce 100644 --- a/thirdparty/libwebp/src/utils/utils.c +++ b/thirdparty/libwebp/src/utils/utils.c @@ -101,6 +101,9 @@ static void Increment(int* const v) { #if defined(MALLOC_LIMIT) { const char* const malloc_limit_str = getenv("MALLOC_LIMIT"); +#if MALLOC_LIMIT > 1 + mem_limit = (size_t)MALLOC_LIMIT; +#endif if (malloc_limit_str != NULL) { mem_limit = atoi(malloc_limit_str); } @@ -169,16 +172,16 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) { const uint64_t total_size = nmemb * size; if (nmemb == 0) return 1; if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0; - if (total_size != (size_t)total_size) return 0; + if (!CheckSizeOverflow(total_size)) return 0; #if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT) if (countdown_to_fail > 0 && --countdown_to_fail == 0) { return 0; // fake fail! } #endif -#if defined(MALLOC_LIMIT) +#if defined(PRINT_MEM_INFO) && defined(MALLOC_LIMIT) if (mem_limit > 0) { const uint64_t new_total_mem = (uint64_t)total_mem + total_size; - if (new_total_mem != (size_t)new_total_mem || + if (!CheckSizeOverflow(new_total_mem) || new_total_mem > mem_limit) { return 0; // fake fail! } @@ -231,7 +234,7 @@ void WebPFree(void* ptr) { void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { assert(src != NULL && dst != NULL); - assert(src_stride >= width && dst_stride >= width); + assert(abs(src_stride) >= width && abs(dst_stride) >= width); while (height-- > 0) { memcpy(dst, src, width); src += src_stride; diff --git a/thirdparty/libwebp/src/utils/utils.h b/thirdparty/libwebp/src/utils/utils.h index 2a3ec92678..ef04f108fe 100644 --- a/thirdparty/libwebp/src/utils/utils.h +++ b/thirdparty/libwebp/src/utils/utils.h @@ -42,6 +42,10 @@ extern "C" { #endif #endif // WEBP_MAX_ALLOCABLE_MEMORY +static WEBP_INLINE int CheckSizeOverflow(uint64_t size) { + return size == (size_t)size; +} + // size-checking safe malloc/calloc: verify that the requested size is not too // large, or return NULL. You don't need to call these for constructs like // malloc(sizeof(foo)), but only if there's picture-dependent size involved @@ -107,24 +111,33 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) { PutLE16(data + 2, (int)(val >> 16)); } -// Returns (int)floor(log2(n)). n must be > 0. // use GNU builtins where available. #if defined(__GNUC__) && \ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) +// Returns (int)floor(log2(n)). n must be > 0. static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return 31 ^ __builtin_clz(n); } +// counts the number of trailing zero +static WEBP_INLINE int BitsCtz(uint32_t n) { return __builtin_ctz(n); } #elif defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) #include <intrin.h> #pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - unsigned long first_set_bit; + unsigned long first_set_bit; // NOLINT (runtime/int) _BitScanReverse(&first_set_bit, n); return first_set_bit; } -#else // default: use the C-version. +static WEBP_INLINE int BitsCtz(uint32_t n) { + unsigned long first_set_bit; // NOLINT (runtime/int) + _BitScanForward(&first_set_bit, n); + return first_set_bit; +} +#else // default: use the (slow) C-version. +#define WEBP_HAVE_SLOW_CLZ_CTZ // signal that the Clz/Ctz function are slow // Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either // based on table or not. Can be used as fallback if clz() is not available. #define WEBP_NEED_LOG_TABLE_8BIT @@ -139,6 +152,15 @@ static WEBP_INLINE int WebPLog2FloorC(uint32_t n) { } static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); } + +static WEBP_INLINE int BitsCtz(uint32_t n) { + int i; + for (i = 0; i < 32; ++i, n >>= 1) { + if (n & 1) return i; + } + return 32; +} + #endif //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/webp/decode.h b/thirdparty/libwebp/src/webp/decode.h index 80dd0ef0cc..44fcd64a84 100644 --- a/thirdparty/libwebp/src/webp/decode.h +++ b/thirdparty/libwebp/src/webp/decode.h @@ -453,7 +453,7 @@ struct WebPDecoderOptions { int scaled_width, scaled_height; // final resolution int use_threads; // if true, use multi-threaded decoding int dithering_strength; // dithering strength (0=Off, 100=full) - int flip; // flip output vertically + int flip; // if true, flip output vertically int alpha_dithering_strength; // alpha dithering strength in [0..100] uint32_t pad[5]; // padding for later use diff --git a/thirdparty/libwebp/src/webp/encode.h b/thirdparty/libwebp/src/webp/encode.h index 655166e7d4..b4c599df87 100644 --- a/thirdparty/libwebp/src/webp/encode.h +++ b/thirdparty/libwebp/src/webp/encode.h @@ -148,7 +148,8 @@ struct WebPConfig { int use_delta_palette; // reserved for future lossless feature int use_sharp_yuv; // if needed, use sharp (and slow) RGB->YUV conversion - uint32_t pad[2]; // padding for later use + int qmin; // minimum permissible quality factor + int qmax; // maximum permissible quality factor }; // Enumerate some predefined settings for WebPConfig, depending on the type @@ -291,6 +292,11 @@ typedef enum WebPEncodingError { #define WEBP_MAX_DIMENSION 16383 // Main exchange structure (input samples, output bytes, statistics) +// +// Once WebPPictureInit() has been called, it's ok to make all the INPUT fields +// (use_argb, y/u/v, argb, ...) point to user-owned data, even if +// WebPPictureAlloc() has been called. Depending on the value use_argb, +// it's guaranteed that either *argb or *y/*u/*v content will be kept untouched. struct WebPPicture { // INPUT ////////////// diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp index f8aad7b49c..b1f7b359c1 100644 --- a/thirdparty/meshoptimizer/clusterizer.cpp +++ b/thirdparty/meshoptimizer/clusterizer.cpp @@ -368,8 +368,7 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const } // split axis is one where the variance is largest - unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 - : 2; + unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2; float split = mean[axis]; size_t middle = kdtreePartition(indices, count, points, stride, axis, split); diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index e44b99ce52..a420eb1098 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -278,9 +278,30 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M. * Each 32-bit component is decoded in isolation; stride must be divisible by 4. */ -MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride); + +/** + * Vertex buffer filter encoders + * These functions can be used to encode data in a format that meshopt_decodeFilter can decode + * + * meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. + * Input data must contain 4 floats for every vector (count*4 total). + * + * meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding. + * Each component is stored as an 16-bit integer; stride must be equal to 8. + * Input data must contain 4 floats for every quaternion (count*4 total). + * + * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24). + * Mantissa is shared between all components of a given vector as defined by stride; stride must be divisible by 4. + * Input data must contain stride/4 floats for every vector (count*stride/4 total). + * When individual (scalar) encoding is desired, simply pass stride=4 and adjust count accordingly. + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data); /** * Experimental: Mesh simplifier @@ -305,7 +326,7 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* d /** * Experimental: Mesh simplifier (sloppy) - * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance + * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error. * Returns the number of indices after simplification, with destination containing new index data * The resulting index buffer references vertices from the original vertex buffer. diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index cf5db4e119..ccc99edb1a 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -358,7 +358,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned #if TRACE printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n", - int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3])); + int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3])); #endif } @@ -1114,8 +1114,8 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f; printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n", - int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect), - int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2])); + int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect), + int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2])); #endif return edge_collapses; @@ -1473,7 +1473,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned kinds[vertex_kind[i]] += remap[i] == i; printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n", - int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked])); + int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked])); #endif Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); @@ -1649,9 +1649,9 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind #if TRACE printf("pass %d (%s): grid size %d, triangles %d, %s\n", - pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", - grid_size, int(triangles), - (triangles <= target_index_count / 3) ? "under" : "over"); + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + grid_size, int(triangles), + (triangles <= target_index_count / 3) ? "under" : "over"); #endif float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles)); @@ -1778,9 +1778,9 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos #if TRACE printf("pass %d (%s): grid size %d, vertices %d, %s\n", - pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", - grid_size, int(vertices), - (vertices <= target_vertex_count) ? "under" : "over"); + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + grid_size, int(vertices), + (vertices <= target_vertex_count) ? "under" : "over"); #endif float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices)); diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp index 5f3ec204ab..7925ea862c 100644 --- a/thirdparty/meshoptimizer/vertexcodec.cpp +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -77,6 +77,8 @@ #endif #ifdef SIMD_WASM +#undef __DEPRECATED +#pragma clang diagnostic ignored "-Wdeprecated-declarations" #include <wasm_simd128.h> #endif @@ -1028,7 +1030,7 @@ static unsigned int getCpuFeatures() return cpuinfo[2]; } -unsigned int cpuid = getCpuFeatures(); +static unsigned int cpuid = getCpuFeatures(); #endif } // namespace meshopt diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp index 39946f46ed..606a280aa9 100644 --- a/thirdparty/meshoptimizer/vertexfilter.cpp +++ b/thirdparty/meshoptimizer/vertexfilter.cpp @@ -52,6 +52,7 @@ #endif #ifdef SIMD_WASM +#undef __DEPRECATED #include <wasm_simd128.h> #endif @@ -160,7 +161,8 @@ static void decodeFilterExp(unsigned int* data, size_t count) #endif #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) -template <typename T> static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride) +template <typename T> +static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride) { assert(stride <= 4); @@ -791,52 +793,170 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count) } // namespace meshopt -void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size) +void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride) { using namespace meshopt; - assert(vertex_size == 4 || vertex_size == 8); + assert(stride == 4 || stride == 8); #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) - if (vertex_size == 4) - dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), vertex_count, 4); + if (stride == 4) + dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), count, 4); else - dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), vertex_count, 4); + dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), count, 4); #else - if (vertex_size == 4) - decodeFilterOct(static_cast<signed char*>(buffer), vertex_count); + if (stride == 4) + decodeFilterOct(static_cast<signed char*>(buffer), count); else - decodeFilterOct(static_cast<short*>(buffer), vertex_count); + decodeFilterOct(static_cast<short*>(buffer), count); #endif } -void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size) +void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride) { using namespace meshopt; - assert(vertex_size == 8); - (void)vertex_size; + assert(stride == 8); + (void)stride; #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) - dispatchSimd(decodeFilterQuatSimd, static_cast<short*>(buffer), vertex_count, 4); + dispatchSimd(decodeFilterQuatSimd, static_cast<short*>(buffer), count, 4); #else - decodeFilterQuat(static_cast<short*>(buffer), vertex_count); + decodeFilterQuat(static_cast<short*>(buffer), count); #endif } -void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size) +void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride) { using namespace meshopt; - assert(vertex_size % 4 == 0); + assert(stride > 0 && stride % 4 == 0); #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) - dispatchSimd(decodeFilterExpSimd, static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4), 1); + dispatchSimd(decodeFilterExpSimd, static_cast<unsigned int*>(buffer), count * (stride / 4), 1); #else - decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4)); + decodeFilterExp(static_cast<unsigned int*>(buffer), count * (stride / 4)); #endif } +void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data) +{ + assert(stride == 4 || stride == 8); + assert(bits >= 1 && bits <= 16); + + signed char* d8 = static_cast<signed char*>(destination); + short* d16 = static_cast<short*>(destination); + + int bytebits = int(stride * 2); + + for (size_t i = 0; i < count; ++i) + { + const float* n = &data[i * 4]; + + // octahedral encoding of a unit vector + float nx = n[0], ny = n[1], nz = n[2], nw = n[3]; + float nl = fabsf(nx) + fabsf(ny) + fabsf(nz); + float ns = nl == 0.f ? 0.f : 1.f / nl; + + nx *= ns; + ny *= ns; + + float u = (nz >= 0.f) ? nx : (1 - fabsf(ny)) * (nx >= 0.f ? 1.f : -1.f); + float v = (nz >= 0.f) ? ny : (1 - fabsf(nx)) * (ny >= 0.f ? 1.f : -1.f); + + int fu = meshopt_quantizeSnorm(u, bits); + int fv = meshopt_quantizeSnorm(v, bits); + int fo = meshopt_quantizeSnorm(1.f, bits); + int fw = meshopt_quantizeSnorm(nw, bytebits); + + if (stride == 4) + { + d8[i * 4 + 0] = (signed char)(fu); + d8[i * 4 + 1] = (signed char)(fv); + d8[i * 4 + 2] = (signed char)(fo); + d8[i * 4 + 3] = (signed char)(fw); + } + else + { + d16[i * 4 + 0] = short(fu); + d16[i * 4 + 1] = short(fv); + d16[i * 4 + 2] = short(fo); + d16[i * 4 + 3] = short(fw); + } + } +} + +void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, int bits, const float* data) +{ + assert(stride == 8); + assert(bits >= 4 && bits <= 16); + (void)stride; + + short* destination = static_cast<short*>(destination_); + + const float scaler = sqrtf(2.f); + + for (size_t i = 0; i < count; ++i) + { + const float* q = &data[i * 4]; + short* d = &destination[i * 4]; + + // establish maximum quaternion component + int qc = 0; + qc = fabsf(q[1]) > fabsf(q[qc]) ? 1 : qc; + qc = fabsf(q[2]) > fabsf(q[qc]) ? 2 : qc; + qc = fabsf(q[3]) > fabsf(q[qc]) ? 3 : qc; + + // we use double-cover properties to discard the sign + float sign = q[qc] < 0.f ? -1.f : 1.f; + + // note: we always encode a cyclical swizzle to be able to recover the order via rotation + d[0] = short(meshopt_quantizeSnorm(q[(qc + 1) & 3] * scaler * sign, bits)); + d[1] = short(meshopt_quantizeSnorm(q[(qc + 2) & 3] * scaler * sign, bits)); + d[2] = short(meshopt_quantizeSnorm(q[(qc + 3) & 3] * scaler * sign, bits)); + d[3] = short((meshopt_quantizeSnorm(1.f, bits) & ~3) | qc); + } +} + +void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data) +{ + assert(stride > 0 && stride % 4 == 0); + assert(bits >= 1 && bits <= 24); + + unsigned int* destination = static_cast<unsigned int*>(destination_); + size_t stride_float = stride / sizeof(float); + + for (size_t i = 0; i < count; ++i) + { + const float* v = &data[i * stride_float]; + unsigned int* d = &destination[i * stride_float]; + + // use maximum exponent to encode values; this guarantess that mantissa is [-1, 1] + int exp = -100; + + for (size_t j = 0; j < stride_float; ++j) + { + int e; + frexp(v[j], &e); + + exp = (exp < e) ? e : exp; + } + + // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude) + exp -= (bits - 1); + + // compute renormalized rounded mantissa for each component + int mmask = (1 << 24) - 1; + + for (size_t j = 0; j < stride_float; ++j) + { + int m = int(ldexp(v[j], -exp) + (v[j] >= 0 ? 0.5f : -0.5f)); + + d[j] = (m & mmask) | (unsigned(exp) << 24); + } + } +} + #undef SIMD_SSE #undef SIMD_NEON #undef SIMD_WASM diff --git a/thirdparty/minimp3/minimp3.h b/thirdparty/minimp3/minimp3.h index 796cbc1f8e..3220ae1a85 100644 --- a/thirdparty/minimp3/minimp3.h +++ b/thirdparty/minimp3/minimp3.h @@ -881,12 +881,22 @@ static void L3_midside_stereo(float *left, int n) int i = 0; float *right = left + 576; #if HAVE_SIMD - if (have_simd()) for (; i < n - 3; i += 4) + if (have_simd()) { - f4 vl = VLD(left + i); - f4 vr = VLD(right + i); - VSTORE(left + i, VADD(vl, vr)); - VSTORE(right + i, VSUB(vl, vr)); + for (; i < n - 3; i += 4) + { + f4 vl = VLD(left + i); + f4 vr = VLD(right + i); + VSTORE(left + i, VADD(vl, vr)); + VSTORE(right + i, VSUB(vl, vr)); + } +#ifdef __GNUC__ + /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc. + * For more info see: https://github.com/lieff/minimp3/issues/88 + */ + if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0) + return; +#endif } #endif /* HAVE_SIMD */ for (; i < n; i++) @@ -1353,7 +1363,7 @@ static void mp3d_DCT_II(float *grbuf, int n) } else #endif /* HAVE_SIMD */ #ifdef MINIMP3_ONLY_SIMD - {} + {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */ #else /* MINIMP3_ONLY_SIMD */ for (; k < n; k++) { @@ -1583,7 +1593,7 @@ static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins) } else #endif /* HAVE_SIMD */ #ifdef MINIMP3_ONLY_SIMD - {} + {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */ #else /* MINIMP3_ONLY_SIMD */ for (i = 14; i >= 0; i--) { diff --git a/thirdparty/minimp3/minimp3_ex.h b/thirdparty/minimp3/minimp3_ex.h index e29dd15b2e..2871705df3 100644 --- a/thirdparty/minimp3/minimp3_ex.h +++ b/thirdparty/minimp3/minimp3_ex.h @@ -6,6 +6,7 @@ This software is distributed without any warranty. See <http://creativecommons.org/publicdomain/zero/1.0/>. */ +#include <stddef.h> #include "minimp3.h" /* flags for mp3dec_ex_open_* functions */ @@ -128,8 +129,10 @@ int mp3dec_ex_open_w(mp3dec_ex_t *dec, const wchar_t *file_name, int flags); #endif #endif /*MINIMP3_EXT_H*/ -#ifdef MINIMP3_IMPLEMENTATION +#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_EX_IMPLEMENTATION_GUARD) +#define _MINIMP3_EX_IMPLEMENTATION_GUARD #include <limits.h> +#include "minimp3.h" static void mp3dec_skip_id3v1(const uint8_t *buf, size_t *pbuf_size) { @@ -1391,4 +1394,4 @@ void mp3dec_ex_close(mp3dec_ex_t *dec) } #endif -#endif /*MINIMP3_IMPLEMENTATION*/ +#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_EX_IMPLEMENTATION_GUARD */ diff --git a/thirdparty/miniupnpc/LICENSE b/thirdparty/miniupnpc/LICENSE index 6ddd381baa..fe9118c07e 100644 --- a/thirdparty/miniupnpc/LICENSE +++ b/thirdparty/miniupnpc/LICENSE @@ -1,4 +1,4 @@ -MiniUPnPc +MiniUPnP Project Copyright (c) 2005-2020, Thomas BERNARD All rights reserved. @@ -24,4 +24,3 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/thirdparty/miniupnpc/miniupnpc/igd_desc_parse.h b/thirdparty/miniupnpc/include/igd_desc_parse.h index 0de546b697..0de546b697 100644 --- a/thirdparty/miniupnpc/miniupnpc/igd_desc_parse.h +++ b/thirdparty/miniupnpc/include/igd_desc_parse.h diff --git a/thirdparty/miniupnpc/miniupnpc/miniupnpc.h b/thirdparty/miniupnpc/include/miniupnpc.h index 3aef8ea443..a10bd950a8 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniupnpc.h +++ b/thirdparty/miniupnpc/include/miniupnpc.h @@ -1,4 +1,4 @@ -/* $Id: miniupnpc.h,v 1.58 2021/03/02 23:49:52 nanard Exp $ */ +/* $Id: miniupnpc.h,v 1.59 2021/09/28 21:39:17 nanard Exp $ */ /* vim: tabstop=4 shiftwidth=4 noexpandtab * Project: miniupnp * http://miniupnp.free.fr/ @@ -20,7 +20,7 @@ #define UPNPDISCOVER_MEMORY_ERROR (-102) /* versions : */ -#define MINIUPNPC_VERSION "2.2.2" +#define MINIUPNPC_VERSION "2.2.3" #define MINIUPNPC_API_VERSION 17 /* Source port: diff --git a/thirdparty/miniupnpc/miniupnpc/miniupnpc_declspec.h b/thirdparty/miniupnpc/include/miniupnpc_declspec.h index 40adb922ec..40adb922ec 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniupnpc_declspec.h +++ b/thirdparty/miniupnpc/include/miniupnpc_declspec.h diff --git a/thirdparty/miniupnpc/miniupnpc/miniupnpctypes.h b/thirdparty/miniupnpc/include/miniupnpctypes.h index 307ce39699..26ed4f0059 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniupnpctypes.h +++ b/thirdparty/miniupnpc/include/miniupnpctypes.h @@ -1,13 +1,15 @@ /* $Id: miniupnpctypes.h,v 1.1 2011/02/15 11:10:40 nanard Exp $ */ /* Miniupnp project : http://miniupnp.free.fr/ or http://miniupnp.tuxfamily.org * Author : Thomas Bernard - * Copyright (c) 2011 Thomas Bernard + * Copyright (c) 2021 Thomas Bernard * This software is subject to the conditions detailed in the * LICENCE file provided within this distribution */ #ifndef MINIUPNPCTYPES_H_INCLUDED #define MINIUPNPCTYPES_H_INCLUDED -#if (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) +/* Use unsigned long long when available : + * strtoull is C99 */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define UNSIGNED_INTEGER unsigned long long #define STRTOUI strtoull #else diff --git a/thirdparty/miniupnpc/miniupnpc/miniwget.h b/thirdparty/miniupnpc/include/miniwget.h index f5572c2544..f5572c2544 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniwget.h +++ b/thirdparty/miniupnpc/include/miniwget.h diff --git a/thirdparty/miniupnpc/miniupnpc/portlistingparse.h b/thirdparty/miniupnpc/include/portlistingparse.h index e3957a3f4c..e3957a3f4c 100644 --- a/thirdparty/miniupnpc/miniupnpc/portlistingparse.h +++ b/thirdparty/miniupnpc/include/portlistingparse.h diff --git a/thirdparty/miniupnpc/miniupnpc/upnpcommands.h b/thirdparty/miniupnpc/include/upnpcommands.h index 1b6d447732..1b6d447732 100644 --- a/thirdparty/miniupnpc/miniupnpc/upnpcommands.h +++ b/thirdparty/miniupnpc/include/upnpcommands.h diff --git a/thirdparty/miniupnpc/miniupnpc/upnpdev.h b/thirdparty/miniupnpc/include/upnpdev.h index 9b2cb431ba..171d495be3 100644 --- a/thirdparty/miniupnpc/miniupnpc/upnpdev.h +++ b/thirdparty/miniupnpc/include/upnpdev.h @@ -1,8 +1,8 @@ -/* $Id: upnpdev.h,v 1.3 2020/05/29 15:57:42 nanard Exp $ */ +/* $Id: upnpdev.h,v 1.4 2021/08/21 09:45:01 nanard Exp $ */ /* Project : miniupnp * Web : http://miniupnp.free.fr/ or https://miniupnp.tuxfamily.org/ * Author : Thomas BERNARD - * copyright (c) 2005-2020 Thomas Bernard + * copyright (c) 2005-2021 Thomas Bernard * This software is subjet to the conditions detailed in the * provided LICENSE file. */ #ifndef UPNPDEV_H_INCLUDED @@ -20,7 +20,7 @@ struct UPNPDev { char * st; char * usn; unsigned int scope_id; -#if defined(__STDC_VERSION) && __STDC_VERSION__ >= 199901L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 flexible array member */ char buffer[]; #elif defined(__GNUC__) diff --git a/thirdparty/miniupnpc/miniupnpc/upnpreplyparse.h b/thirdparty/miniupnpc/include/upnpreplyparse.h index 6badd15b26..6badd15b26 100644 --- a/thirdparty/miniupnpc/miniupnpc/upnpreplyparse.h +++ b/thirdparty/miniupnpc/include/upnpreplyparse.h diff --git a/thirdparty/miniupnpc/miniupnpc/addr_is_reserved.c b/thirdparty/miniupnpc/src/addr_is_reserved.c index 7e586d7da2..18c6424201 100644 --- a/thirdparty/miniupnpc/miniupnpc/addr_is_reserved.c +++ b/thirdparty/miniupnpc/src/addr_is_reserved.c @@ -56,7 +56,7 @@ int addr_is_reserved(const char * addr_str) uint32_t addr_n, address; size_t i; -#if defined(_WIN32) && (!defined(_WIN32_WINNT_VISTA) || (_WIN32_WINNT < _WIN32_WINNT_VISTA)) +#if defined(_WIN32) && _WIN32_WINNT < 0x0600 // _WIN32_WINNT_VISTA addr_n = inet_addr(addr_str); if (addr_n == INADDR_NONE) return 1; diff --git a/thirdparty/miniupnpc/miniupnpc/addr_is_reserved.h b/thirdparty/miniupnpc/src/addr_is_reserved.h index f8b5d66a09..f8b5d66a09 100644 --- a/thirdparty/miniupnpc/miniupnpc/addr_is_reserved.h +++ b/thirdparty/miniupnpc/src/addr_is_reserved.h diff --git a/thirdparty/miniupnpc/miniupnpc/codelength.h b/thirdparty/miniupnpc/src/codelength.h index ea0b005ffe..ea0b005ffe 100644 --- a/thirdparty/miniupnpc/miniupnpc/codelength.h +++ b/thirdparty/miniupnpc/src/codelength.h diff --git a/thirdparty/miniupnpc/miniupnpc/connecthostport.c b/thirdparty/miniupnpc/src/connecthostport.c index 79f832b8db..79f832b8db 100644 --- a/thirdparty/miniupnpc/miniupnpc/connecthostport.c +++ b/thirdparty/miniupnpc/src/connecthostport.c diff --git a/thirdparty/miniupnpc/miniupnpc/connecthostport.h b/thirdparty/miniupnpc/src/connecthostport.h index 701816b5b6..701816b5b6 100644 --- a/thirdparty/miniupnpc/miniupnpc/connecthostport.h +++ b/thirdparty/miniupnpc/src/connecthostport.h diff --git a/thirdparty/miniupnpc/miniupnpc/igd_desc_parse.c b/thirdparty/miniupnpc/src/igd_desc_parse.c index d2999ad011..d2999ad011 100644 --- a/thirdparty/miniupnpc/miniupnpc/igd_desc_parse.c +++ b/thirdparty/miniupnpc/src/igd_desc_parse.c diff --git a/thirdparty/miniupnpc/miniupnpc/minisoap.c b/thirdparty/miniupnpc/src/minisoap.c index 78606672d5..78606672d5 100644 --- a/thirdparty/miniupnpc/miniupnpc/minisoap.c +++ b/thirdparty/miniupnpc/src/minisoap.c diff --git a/thirdparty/miniupnpc/miniupnpc/minisoap.h b/thirdparty/miniupnpc/src/minisoap.h index d6a45d03ba..d6a45d03ba 100644 --- a/thirdparty/miniupnpc/miniupnpc/minisoap.h +++ b/thirdparty/miniupnpc/src/minisoap.h diff --git a/thirdparty/miniupnpc/miniupnpc/minissdpc.c b/thirdparty/miniupnpc/src/minissdpc.c index 5d3a0fd049..edebb1600a 100644 --- a/thirdparty/miniupnpc/miniupnpc/minissdpc.c +++ b/thirdparty/miniupnpc/src/minissdpc.c @@ -1,4 +1,4 @@ -/* $Id: minissdpc.c,v 1.47 2021/03/02 23:38:30 nanard Exp $ */ +/* $Id: minissdpc.c,v 1.49 2021/05/13 11:00:36 nanard Exp $ */ /* vim: tabstop=4 shiftwidth=4 noexpandtab * Project : miniupnp * Web : http://miniupnp.free.fr/ or https://miniupnp.tuxfamily.org/ @@ -460,7 +460,7 @@ parseMSEARCHReply(const char * reply, int size, static int upnp_gettimeofday(struct timeval * tv) { #if defined(_WIN32) -#if defined(_WIN32_WINNT_VISTA) && (_WIN32_WINNT >= _WIN32_WINNT_VISTA) +#if _WIN32_WINNT >= 0x0600 // _WIN32_WINNT_VISTA ULONGLONG ts = GetTickCount64(); #else DWORD ts = GetTickCount(); @@ -469,14 +469,29 @@ static int upnp_gettimeofday(struct timeval * tv) tv->tv_usec = (ts % 1000) * 1000; return 0; /* success */ #elif defined(CLOCK_MONOTONIC_FAST) || defined(CLOCK_MONOTONIC) - struct timespec ts; - int ret_code = clock_gettime(UPNP_CLOCKID, &ts); - if (ret_code == 0) +#if defined(__APPLE__) +#if defined(__clang__) + if (__builtin_available(macOS 10.12, iOS 10.0, tvOS 10.0, watchOS 3.0, *)) { +#else /* !defined(__clang__) */ + if (clock_gettime != NULL) { +#endif /* defined(__clang__) */ +#endif /* defined(__APPLE__) */ + struct timespec ts; + int ret_code = clock_gettime(UPNP_CLOCKID, &ts); + if (ret_code == 0) + { + tv->tv_sec = ts.tv_sec; + tv->tv_usec = ts.tv_nsec / 1000; + } + return ret_code; +#if defined(__APPLE__) + } + else { - tv->tv_sec = ts.tv_sec; - tv->tv_usec = ts.tv_nsec / 1000; + /* fall-back for earlier Apple platforms */ + return gettimeofday(tv, NULL); } - return ret_code; +#endif /* defined(__APPLE__) */ #else return gettimeofday(tv, NULL); #endif @@ -705,7 +720,7 @@ ssdpDiscoverDevices(const char * const deviceTypes[], } } - if(multicastif) + if(multicastif && multicastif[0] != '\0') { if(ipv6) { #if !defined(_WIN32) @@ -732,7 +747,7 @@ ssdpDiscoverDevices(const char * const deviceTypes[], } else { struct in_addr mc_if; #if defined(_WIN32) -#if defined(_WIN32_WINNT_VISTA) && (_WIN32_WINNT >= _WIN32_WINNT_VISTA) +#if _WIN32_WINNT >= 0x0600 // _WIN32_WINNT_VISTA InetPtonA(AF_INET, multicastif, &mc_if); #else mc_if.s_addr = inet_addr(multicastif); /* old Windows SDK do not support InetPtoA() */ diff --git a/thirdparty/miniupnpc/miniupnpc/minissdpc.h b/thirdparty/miniupnpc/src/minissdpc.h index c99f929b9e..c99f929b9e 100644 --- a/thirdparty/miniupnpc/miniupnpc/minissdpc.h +++ b/thirdparty/miniupnpc/src/minissdpc.h diff --git a/thirdparty/miniupnpc/miniupnpc/miniupnpc.c b/thirdparty/miniupnpc/src/miniupnpc.c index 696af93237..696af93237 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniupnpc.c +++ b/thirdparty/miniupnpc/src/miniupnpc.c diff --git a/thirdparty/miniupnpc/miniupnpc/miniupnpc_socketdef.h b/thirdparty/miniupnpc/src/miniupnpc_socketdef.h index 5986e58c76..5986e58c76 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniupnpc_socketdef.h +++ b/thirdparty/miniupnpc/src/miniupnpc_socketdef.h diff --git a/thirdparty/miniupnpc/miniupnpc/miniupnpcstrings.h b/thirdparty/miniupnpc/src/miniupnpcstrings.h index 7b3d04074a..eefbc8dbdd 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniupnpcstrings.h +++ b/thirdparty/miniupnpc/src/miniupnpcstrings.h @@ -4,7 +4,7 @@ #include "core/version.h" #define OS_STRING VERSION_NAME "/1.0" -#define MINIUPNPC_VERSION_STRING "2.2.2" +#define MINIUPNPC_VERSION_STRING "2.2.3" #if 0 /* according to "UPnP Device Architecture 1.0" */ diff --git a/thirdparty/miniupnpc/miniupnpc/miniwget.c b/thirdparty/miniupnpc/src/miniwget.c index d5b7970632..d5b7970632 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniwget.c +++ b/thirdparty/miniupnpc/src/miniwget.c diff --git a/thirdparty/miniupnpc/miniupnpc/miniwget_private.h b/thirdparty/miniupnpc/src/miniwget_private.h index e4eaac8085..e4eaac8085 100644 --- a/thirdparty/miniupnpc/miniupnpc/miniwget_private.h +++ b/thirdparty/miniupnpc/src/miniwget_private.h diff --git a/thirdparty/miniupnpc/miniupnpc/minixml.c b/thirdparty/miniupnpc/src/minixml.c index ed2d3c759c..ed2d3c759c 100644 --- a/thirdparty/miniupnpc/miniupnpc/minixml.c +++ b/thirdparty/miniupnpc/src/minixml.c diff --git a/thirdparty/miniupnpc/miniupnpc/minixml.h b/thirdparty/miniupnpc/src/minixml.h index 2e60397388..2e60397388 100644 --- a/thirdparty/miniupnpc/miniupnpc/minixml.h +++ b/thirdparty/miniupnpc/src/minixml.h diff --git a/thirdparty/miniupnpc/miniupnpc/minixmlvalid.c b/thirdparty/miniupnpc/src/minixmlvalid.c index dad1488122..dad1488122 100644 --- a/thirdparty/miniupnpc/miniupnpc/minixmlvalid.c +++ b/thirdparty/miniupnpc/src/minixmlvalid.c diff --git a/thirdparty/miniupnpc/miniupnpc/portlistingparse.c b/thirdparty/miniupnpc/src/portlistingparse.c index 162cf8b7ec..162cf8b7ec 100644 --- a/thirdparty/miniupnpc/miniupnpc/portlistingparse.c +++ b/thirdparty/miniupnpc/src/portlistingparse.c diff --git a/thirdparty/miniupnpc/miniupnpc/receivedata.c b/thirdparty/miniupnpc/src/receivedata.c index 7f187f6e56..7f187f6e56 100644 --- a/thirdparty/miniupnpc/miniupnpc/receivedata.c +++ b/thirdparty/miniupnpc/src/receivedata.c diff --git a/thirdparty/miniupnpc/miniupnpc/receivedata.h b/thirdparty/miniupnpc/src/receivedata.h index c9fdc561f8..c9fdc561f8 100644 --- a/thirdparty/miniupnpc/miniupnpc/receivedata.h +++ b/thirdparty/miniupnpc/src/receivedata.h diff --git a/thirdparty/miniupnpc/miniupnpc/upnpcommands.c b/thirdparty/miniupnpc/src/upnpcommands.c index 1e1ee6786f..1e1ee6786f 100644 --- a/thirdparty/miniupnpc/miniupnpc/upnpcommands.c +++ b/thirdparty/miniupnpc/src/upnpcommands.c diff --git a/thirdparty/miniupnpc/miniupnpc/upnpdev.c b/thirdparty/miniupnpc/src/upnpdev.c index d89a9934c3..d89a9934c3 100644 --- a/thirdparty/miniupnpc/miniupnpc/upnpdev.c +++ b/thirdparty/miniupnpc/src/upnpdev.c diff --git a/thirdparty/miniupnpc/miniupnpc/upnpreplyparse.c b/thirdparty/miniupnpc/src/upnpreplyparse.c index 4d06f0585d..4d06f0585d 100644 --- a/thirdparty/miniupnpc/miniupnpc/upnpreplyparse.c +++ b/thirdparty/miniupnpc/src/upnpreplyparse.c diff --git a/thirdparty/miniupnpc/miniupnpc/win32_snprintf.h b/thirdparty/miniupnpc/src/win32_snprintf.h index 1fc284ecff..1fc284ecff 100644 --- a/thirdparty/miniupnpc/miniupnpc/win32_snprintf.h +++ b/thirdparty/miniupnpc/src/win32_snprintf.h diff --git a/thirdparty/pcre2/AUTHORS b/thirdparty/pcre2/AUTHORS index f001cb770e..bec8a1e5ad 100644 --- a/thirdparty/pcre2/AUTHORS +++ b/thirdparty/pcre2/AUTHORS @@ -5,10 +5,10 @@ Written by: Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -University of Cambridge Computing Service, +Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2020 University of Cambridge +Copyright (c) 1997-2021 University of Cambridge All rights reserved @@ -19,7 +19,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2020 Zoltan Herczeg +Copyright(c) 2010-2021 Zoltan Herczeg All rights reserved. @@ -30,7 +30,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2020 Zoltan Herczeg +Copyright(c) 2009-2021 Zoltan Herczeg All rights reserved. #### diff --git a/thirdparty/pcre2/LICENCE b/thirdparty/pcre2/LICENCE index 155d073127..b1ec61be44 100644 --- a/thirdparty/pcre2/LICENCE +++ b/thirdparty/pcre2/LICENCE @@ -23,10 +23,10 @@ Written by: Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -University of Cambridge Computing Service, +Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2020 University of Cambridge +Copyright (c) 1997-2021 University of Cambridge All rights reserved. @@ -37,7 +37,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2010-2020 Zoltan Herczeg +Copyright(c) 2010-2021 Zoltan Herczeg All rights reserved. @@ -48,7 +48,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2009-2020 Zoltan Herczeg +Copyright(c) 2009-2021 Zoltan Herczeg All rights reserved. diff --git a/thirdparty/pcre2/src/config.h b/thirdparty/pcre2/src/config.h index 10f4104790..a13593715e 100644 --- a/thirdparty/pcre2/src/config.h +++ b/thirdparty/pcre2/src/config.h @@ -85,8 +85,8 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the `memmove' function. */ /* #undef HAVE_MEMMOVE */ -/* Define to 1 if you have the <memory.h> header file. */ -/* #undef HAVE_MEMORY_H */ +/* Define to 1 if you have the <minix/config.h> header file. */ +/* #undef HAVE_MINIX_CONFIG_H */ /* Define to 1 if you have the `mkostemp' function. */ /* #undef HAVE_MKOSTEMP */ @@ -103,12 +103,18 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the <readline/readline.h> header file. */ /* #undef HAVE_READLINE_READLINE_H */ +/* Define to 1 if you have the `realpath' function. */ +/* #undef HAVE_REALPATH */ + /* Define to 1 if you have the `secure_getenv' function. */ /* #undef HAVE_SECURE_GETENV */ /* Define to 1 if you have the <stdint.h> header file. */ /* #undef HAVE_STDINT_H */ +/* Define to 1 if you have the <stdio.h> header file. */ +/* #undef HAVE_STDIO_H */ + /* Define to 1 if you have the <stdlib.h> header file. */ /* #undef HAVE_STDLIB_H */ @@ -136,6 +142,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if the compiler supports simple visibility declarations. */ /* #undef HAVE_VISIBILITY */ +/* Define to 1 if you have the <wchar.h> header file. */ +/* #undef HAVE_WCHAR_H */ + /* Define to 1 if you have the <windows.h> header file. */ /* #undef HAVE_WINDOWS_H */ @@ -224,7 +233,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.36" +#define PACKAGE_STRING "PCRE2 10.39" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -233,7 +242,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.36" +#define PACKAGE_VERSION "10.39" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -286,7 +295,9 @@ sure both macros are undefined; an emulation function will then be used. */ unless SUPPORT_JIT is also defined. */ /* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */ -/* Define to 1 if you have the ANSI C header files. */ +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ /* #undef STDC_HEADERS */ /* Define to any value to enable support for Just-In-Time compiling. */ @@ -340,35 +351,91 @@ sure both macros are undefined; an emulation function will then be used. */ #ifndef _ALL_SOURCE # define _ALL_SOURCE 1 #endif +/* Enable general extensions on macOS. */ +#ifndef _DARWIN_C_SOURCE +# define _DARWIN_C_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif /* Enable GNU extensions on systems that have them. */ #ifndef _GNU_SOURCE # define _GNU_SOURCE 1 #endif -/* Enable threading extensions on Solaris. */ +/* Enable X/Open compliant socket functions that do not require linking + with -lxnet on HP-UX 11.11. */ +#ifndef _HPUX_ALT_XOPEN_SOCKET_API +# define _HPUX_ALT_XOPEN_SOCKET_API 1 +#endif +/* Identify the host operating system as Minix. + This macro does not affect the system headers' behavior. + A future release of Autoconf may stop defining this macro. */ +#ifndef _MINIX +/* # undef _MINIX */ +#endif +/* Enable general extensions on NetBSD. + Enable NetBSD compatibility extensions on Minix. */ +#ifndef _NETBSD_SOURCE +# define _NETBSD_SOURCE 1 +#endif +/* Enable OpenBSD compatibility extensions on NetBSD. + Oddly enough, this does nothing on OpenBSD. */ +#ifndef _OPENBSD_SOURCE +# define _OPENBSD_SOURCE 1 +#endif +/* Define to 1 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_SOURCE +/* # undef _POSIX_SOURCE */ +#endif +/* Define to 2 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_1_SOURCE +/* # undef _POSIX_1_SOURCE */ +#endif +/* Enable POSIX-compatible threading on Solaris. */ #ifndef _POSIX_PTHREAD_SEMANTICS # define _POSIX_PTHREAD_SEMANTICS 1 #endif +/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ +#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ +#ifndef __STDC_WANT_IEC_60559_BFP_EXT__ +# define __STDC_WANT_IEC_60559_BFP_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ +#ifndef __STDC_WANT_IEC_60559_DFP_EXT__ +# define __STDC_WANT_IEC_60559_DFP_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ +#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ +# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ +#ifndef __STDC_WANT_LIB_EXT2__ +# define __STDC_WANT_LIB_EXT2__ 1 +#endif +/* Enable extensions specified by ISO/IEC 24747:2009. */ +#ifndef __STDC_WANT_MATH_SPEC_FUNCS__ +# define __STDC_WANT_MATH_SPEC_FUNCS__ 1 +#endif /* Enable extensions on HP NonStop. */ #ifndef _TANDEM_SOURCE # define _TANDEM_SOURCE 1 #endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# define __EXTENSIONS__ 1 +/* Enable X/Open extensions. Define to 500 only if necessary + to make mbstate_t available. */ +#ifndef _XOPEN_SOURCE +/* # undef _XOPEN_SOURCE */ #endif /* Version number of package */ -#define VERSION "10.36" - -/* Define to 1 if on MINIX. */ -/* #undef _MINIX */ - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -/* #undef _POSIX_1_SOURCE */ - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -/* #undef _POSIX_SOURCE */ +#define VERSION "10.39" /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/thirdparty/pcre2/src/pcre2.h b/thirdparty/pcre2/src/pcre2.h index f204ec8180..90a97d9cb7 100644 --- a/thirdparty/pcre2/src/pcre2.h +++ b/thirdparty/pcre2/src/pcre2.h @@ -5,7 +5,7 @@ /* This is the public header file for the PCRE library, second API, to be #included by applications that call PCRE2 functions. - Copyright (c) 2016-2020 University of Cambridge + Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 36 +#define PCRE2_MINOR 39 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2020-12-04 +#define PCRE2_DATE 2021-10-29 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -84,8 +84,8 @@ set, we ensure here that it has no effect. */ /* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do not have stdint.h, which is why we use inttypes.h, which according to the C -standard is a superset of stdint.h. If none of these headers are available, -the relevant values must be provided by some other means. */ +standard is a superset of stdint.h. If inttypes.h is not available the build +will break and the relevant values must be provided by some other means. */ #include <limits.h> #include <stdlib.h> @@ -152,6 +152,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ #define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ +#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ /* These are for pcre2_jit_compile(). */ @@ -311,6 +312,7 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 #define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 /* "Expected" matching error codes: no match and partial match. */ diff --git a/thirdparty/pcre2/src/pcre2_auto_possess.c b/thirdparty/pcre2/src/pcre2_auto_possess.c index c64cf856d1..e5e0895682 100644 --- a/thirdparty/pcre2/src/pcre2_auto_possess.c +++ b/thirdparty/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -490,6 +490,7 @@ switch(c) list[2] = (uint32_t)(end - code); return end; } + return NULL; /* Opcode not accepted */ } @@ -1186,12 +1187,16 @@ for (;;) c = *repeat_opcode; if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) { - /* end must not be NULL. */ - end = get_chr_property_list(code, utf, ucp, cb->fcc, list); + /* The return from get_chr_property_list() will never be NULL when + *code (aka c) is one of the three class opcodes. However, gcc with + -fanalyzer notes that a NULL return is possible, and grumbles. Hence we + put in a check. */ + end = get_chr_property_list(code, utf, ucp, cb->fcc, list); list[1] = (c & 1) == 0; - if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) + if (end != NULL && + compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) { switch (c) { diff --git a/thirdparty/pcre2/src/pcre2_compile.c b/thirdparty/pcre2/src/pcre2_compile.c index e811f12f02..383159be76 100644 --- a/thirdparty/pcre2/src/pcre2_compile.c +++ b/thirdparty/pcre2/src/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -137,7 +137,7 @@ static BOOL static int check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *, - compile_block *); + compile_block *, int *); /************************************************* @@ -782,12 +782,15 @@ are allowed. */ #define PUBLIC_COMPILE_EXTRA_OPTIONS \ (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \ - PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX) + PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) /* Compile time error code numbers. They are given names so that they can more easily be tracked. When a new number is added, the tables called eint1 and eint2 in pcre2posix.c may need to be updated, and a new error text must be -added to compile_error_texts in pcre2_error.c. */ +added to compile_error_texts in pcre2_error.c. Also, the error codes in +pcre2.h.in must be updated - their values are exactly 100 greater than these +values. */ enum { ERR0 = COMPILE_ERROR_BASE, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, @@ -799,7 +802,7 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, - ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98 }; + ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99 }; /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -1398,32 +1401,47 @@ static BOOL read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp, uint32_t *maxp, int *errorcodeptr) { -PCRE2_SPTR p = *ptrptr; +PCRE2_SPTR p; BOOL yield = FALSE; +BOOL had_comma = FALSE; int32_t min = 0; int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */ -/* NB read_number() initializes the error code to zero. The only error is for a -number that is too big. */ +/* Check the syntax */ + +*errorcodeptr = 0; +for (p = *ptrptr;; p++) + { + uint32_t c; + if (p >= ptrend) return FALSE; + c = *p; + if (IS_DIGIT(c)) continue; + if (c == CHAR_RIGHT_CURLY_BRACKET) break; + if (c == CHAR_COMMA) + { + if (had_comma) return FALSE; + had_comma = TRUE; + } + else return FALSE; + } + +/* The only error from read_number() is for a number that is too big. */ +p = *ptrptr; if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr)) goto EXIT; -if (p >= ptrend) goto EXIT; - if (*p == CHAR_RIGHT_CURLY_BRACKET) { p++; max = min; } - else { - if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT; - if (*p != CHAR_RIGHT_CURLY_BRACKET) + if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) { if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, - errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + errorcodeptr)) goto EXIT; if (max < min) { @@ -1438,11 +1456,10 @@ yield = TRUE; if (minp != NULL) *minp = (uint32_t)min; if (maxp != NULL) *maxp = (uint32_t)max; -/* Update the pattern pointer on success, or after an error, but not when -the result is "not a repeat quantifier". */ +/* Update the pattern pointer */ EXIT: -if (yield || *errorcodeptr != 0) *ptrptr = p; +*ptrptr = p; return yield; } @@ -1776,19 +1793,23 @@ else { oldptr = ptr; ptr--; /* Back to the digit */ - if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s, - errorcodeptr)) - break; - /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + /* As we know we are at a digit, the only possible error from + read_number() is a number that is too large to be a group number. In this + case we fall through handle this as not a group reference. If we have + read a small enough number, check for a back reference. + + \1 to \9 are always back references. \8x and \9x are too; \1x to \7x are octal escapes if there are not that many previous captures. */ - if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount) + if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && + (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) { if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; else escape = -s; /* Indicates a back reference */ break; } + ptr = oldptr; /* Put the pointer back and fall through */ } @@ -7781,6 +7802,16 @@ for (;; pptr++) } #endif + /* \K is forbidden in lookarounds since 10.38 because that's what Perl has + done. However, there's an option, in case anyone was relying on it. */ + + if (cb->assert_depth > 0 && meta_arg == ESC_K && + (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0) + { + *errorcodeptr = ERR99; + return 0; + } + /* For the rest (including \X when Unicode is supported - if not it's faulted at parse time), the OP value is the escape value when PCRE2_UCP is not set; if it is set, these escapes do not show up here because they are @@ -9130,7 +9161,7 @@ for (;; pptr++) case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: - *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb); + *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr); if (*errcodeptr != 0) return -1; /* Ignore any qualifiers that follow a lookahead assertion. */ @@ -9470,16 +9501,16 @@ Arguments retptr if not NULL, return the ket pointer here recurses chain of recurse_check to catch mutual recursion cb points to the compile block + lcptr points to loop counter Returns: 0 on success, or an errorcode (cb->erroroffset will be set) */ static int check_lookbehinds(uint32_t *pptr, uint32_t **retptr, - parsed_recurse_check *recurses, compile_block *cb) + parsed_recurse_check *recurses, compile_block *cb, int *lcptr) { int errorcode = 0; -int loopcount = 0; int nestlevel = 0; cb->erroroffset = PCRE2_UNSET; @@ -9605,7 +9636,7 @@ for (; *pptr != META_END; pptr++) case META_LOOKBEHIND: case META_LOOKBEHINDNOT: case META_LOOKBEHIND_NA: - if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, recurses, cb)) + if (!set_lookbehind_lengths(&pptr, &errorcode, lcptr, recurses, cb)) return errorcode; break; } @@ -10060,7 +10091,8 @@ lengths. */ if (has_lookbehind) { - errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb); + int loopcount = 0; + errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb, &loopcount); if (errorcode != 0) goto HAD_CB_ERROR; } diff --git a/thirdparty/pcre2/src/pcre2_dfa_match.c b/thirdparty/pcre2/src/pcre2_dfa_match.c index 625695b7cb..060dc7669a 100644 --- a/thirdparty/pcre2/src/pcre2_dfa_match.c +++ b/thirdparty/pcre2/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -3256,8 +3256,8 @@ BOOL has_first_cu = FALSE; BOOL has_req_cu = FALSE; #if PCRE2_CODE_UNIT_WIDTH == 8 -BOOL memchr_not_found_first_cu = FALSE; -BOOL memchr_not_found_first_cu2 = FALSE; +PCRE2_SPTR memchr_found_first_cu = NULL; +PCRE2_SPTR memchr_found_first_cu2 = NULL; #endif PCRE2_UCHAR first_cu = 0; @@ -3648,13 +3648,7 @@ for (;;) } } - /* Not anchored. Advance to a unique first code unit if there is one. In - 8-bit mode, the use of memchr() gives a big speed up, even though we have - to call it twice in caseless mode, in order to find the earliest occurrence - of the character in either of its cases. If a call to memchr() that - searches the rest of the subject fails to find one case, remember that in - order not to keep on repeating the search. This can make a huge difference - when the strings are very long and only one case is present. */ + /* Not anchored. Advance to a unique first code unit if there is one. */ else { @@ -3662,43 +3656,68 @@ for (;;) { if (first_cu != first_cu2) /* Caseless */ { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + #if PCRE2_CODE_UNIT_WIDTH != 8 PCRE2_UCHAR smc; while (start_match < end_subject && (smc = UCHAR21TEST(start_match)) != first_cu && - smc != first_cu2) + smc != first_cu2) start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ -#else /* 8-bit code units */ PCRE2_SPTR pp1 = NULL; PCRE2_SPTR pp2 = NULL; - PCRE2_SIZE cu2size = end_subject - start_match; + PCRE2_SIZE searchlength = end_subject - start_match; - if (!memchr_not_found_first_cu) + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ + + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) { - pp1 = memchr(start_match, first_cu, end_subject - start_match); - if (pp1 == NULL) memchr_not_found_first_cu = TRUE; - else cu2size = pp1 - start_match; + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; } - /* If pp1 is not NULL, we have arranged to search only as far as pp1, - to see if the other case is earlier, so we can set "not found" only - when both searches have returned NULL. */ + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ + + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; - if (!memchr_not_found_first_cu2) + /* Do the same thing for the other case. */ + + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) { - pp2 = memchr(start_match, first_cu2, cu2size); - memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL); + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; } + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + if (pp1 == NULL) start_match = (pp2 == NULL)? end_subject : pp2; else start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; -#endif + +#endif /* 8-bit handling */ } - /* The caseful case */ + /* The caseful case is much simpler. */ else { diff --git a/thirdparty/pcre2/src/pcre2_error.c b/thirdparty/pcre2/src/pcre2_error.c index c61648cb7f..3dee63d0db 100644 --- a/thirdparty/pcre2/src/pcre2_error.c +++ b/thirdparty/pcre2/src/pcre2_error.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -186,6 +186,7 @@ static const unsigned char compile_error_texts[] = "script runs require Unicode support, which this version of PCRE2 does not have\0" "too many capturing groups (maximum 65535)\0" "atomic assertion expected after (?( or (?(?C)\0" + "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" ; /* Match-time and UTF error texts are in the same format. */ diff --git a/thirdparty/pcre2/src/pcre2_jit_compile.c b/thirdparty/pcre2/src/pcre2_jit_compile.c index 1977d28aa5..db2ce65598 100644 --- a/thirdparty/pcre2/src/pcre2_jit_compile.c +++ b/thirdparty/pcre2/src/pcre2_jit_compile.c @@ -1226,7 +1226,7 @@ while (cc < ccend) return TRUE; } -#define EARLY_FAIL_ENHANCE_MAX (1 + 1) +#define EARLY_FAIL_ENHANCE_MAX (1 + 3) /* start: @@ -1236,23 +1236,28 @@ start: return: current number of iterators enhanced with fast fail */ -static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start) +static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, + sljit_s32 depth, int start, BOOL fast_forward_allowed) { +PCRE2_SPTR begin = cc; PCRE2_SPTR next_alt; PCRE2_SPTR end; PCRE2_SPTR accelerated_start; +BOOL prev_fast_forward_allowed; int result = 0; int count; -BOOL fast_forward_allowed = TRUE; SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA); SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0); SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX); +next_alt = cc + GET(cc, 1); +if (*next_alt == OP_ALT) + fast_forward_allowed = FALSE; + do { count = start; - next_alt = cc + GET(cc, 1); cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); while (TRUE) @@ -1475,31 +1480,20 @@ do case OP_CBRA: end = cc + GET(cc, 1); - if (*end == OP_KET && PRIVATE_DATA(end) == 0) - { - if (*cc == OP_CBRA) - { - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - break; - cc += IMM2_SIZE; - } - - cc += 1 + LINK_SIZE; - continue; - } - + prev_fast_forward_allowed = fast_forward_allowed; fast_forward_allowed = FALSE; if (depth >= 4) break; end = bracketend(cc) - (1 + LINK_SIZE); - if (*end != OP_KET || PRIVATE_DATA(end) != 0) + if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) break; - if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - break; + count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed); + + if (PRIVATE_DATA(cc) != 0) + common->private_data_ptrs[begin - common->start] = 1; - count = detect_early_fail(common, cc, private_data_start, depth + 1, count); if (count < EARLY_FAIL_ENHANCE_MAX) { cc = end + (1 + LINK_SIZE); @@ -1521,7 +1515,7 @@ do { count++; - if (fast_forward_allowed && *next_alt == OP_KET) + if (fast_forward_allowed) { common->fast_forward_bc_ptr = accelerated_start; common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip; @@ -1555,6 +1549,8 @@ do return EARLY_FAIL_ENHANCE_MAX; } + /* Cannot be part of a repeat. */ + common->private_data_ptrs[begin - common->start] = 1; count++; if (count < EARLY_FAIL_ENHANCE_MAX) @@ -1569,8 +1565,8 @@ do else if (result < count) result = count; - fast_forward_allowed = FALSE; cc = next_alt; + next_alt = cc + GET(cc, 1); } while (*cc == OP_ALT); @@ -1620,11 +1616,12 @@ sljit_sw length = end - begin; sljit_s32 min, max, i; /* Detect fixed iterations first. */ -if (end[-(1 + LINK_SIZE)] != OP_KET) +if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) return FALSE; -/* Already detected repeat. */ -if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) +/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ + * Skip the check of the second part. */ +if (PRIVATE_DATA(end - LINK_SIZE) == 0) return TRUE; next = end; @@ -1763,6 +1760,7 @@ while (cc < ccend) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) break; + /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */ if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) { if (detect_repeat(common, cc)) @@ -1813,6 +1811,7 @@ while (cc < ccend) case OP_COND: /* Might be a hidden SCOND. */ + common->private_data_ptrs[cc - common->start] = 0; alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { @@ -4203,9 +4202,6 @@ TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer, and it is destroyed. Does not modify STR_PTR for invalid character sequences. */ DEFINE_COMPILER; -SLJIT_UNUSED_ARG(backtracks); -SLJIT_UNUSED_ARG(must_be_valid); - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_jump *jump; #endif @@ -4279,6 +4275,10 @@ if (common->invalid_utf && !must_be_valid) } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(must_be_valid); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } @@ -8141,7 +8141,7 @@ switch(type) } else OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); if (!common->endonly) compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); @@ -8161,7 +8161,7 @@ switch(type) } else OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -8201,14 +8201,14 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } else { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } return cc; @@ -8227,7 +8227,7 @@ switch(type) jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -9581,11 +9581,11 @@ free_stack(common, callout_arg_size); /* Check return value. */ OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32)); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER)); if (common->abort_label == NULL) - add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */); + add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */); else - JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label); + JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label); return cc + callout_length; } @@ -11232,7 +11232,7 @@ early_fail_type = (early_fail_ptr & 0x7); early_fail_ptr >>= 3; /* During recursion, these optimizations are disabled. */ -if (common->early_fail_start_ptr == 0) +if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL) { early_fail_ptr = 0; early_fail_type = type_skip; @@ -13661,9 +13661,11 @@ if (!common->private_data_ptrs) memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); -set_private_data_ptrs(common, &private_data_size, ccend); + if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) - detect_early_fail(common, common->start, &private_data_size, 0, 0); + detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE); + +set_private_data_ptrs(common, &private_data_size, ccend); SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); @@ -14130,6 +14132,10 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_jit_compile(pcre2_code *code, uint32_t options) { pcre2_real_code *re = (pcre2_real_code *)code; +#ifdef SUPPORT_JIT +executable_functions *functions; +static int executable_allocator_is_working = 0; +#endif if (code == NULL) return PCRE2_ERROR_NULL; @@ -14164,8 +14170,7 @@ actions are needed: */ #ifdef SUPPORT_JIT -executable_functions *functions = (executable_functions *)re->executable_jit; -static int executable_allocator_is_working = 0; +functions = (executable_functions *)re->executable_jit; #endif if ((options & PCRE2_JIT_INVALID_UTF) != 0) diff --git a/thirdparty/pcre2/src/pcre2_jit_simd_inc.h b/thirdparty/pcre2/src/pcre2_jit_simd_inc.h index 5673d338c0..aa029cce38 100644 --- a/thirdparty/pcre2/src/pcre2_jit_simd_inc.h +++ b/thirdparty/pcre2/src/pcre2_jit_simd_inc.h @@ -39,7 +39,29 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) +#if !(defined SUPPORT_VALGRIND) + +#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)) + +typedef enum { + vector_compare_match1, + vector_compare_match1i, + vector_compare_match2, +} vector_compare_type; + +static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) @@ -56,6 +78,10 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); } #endif +#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + static sljit_s32 character_to_int32(PCRE2_UCHAR chr) { sljit_u32 value = chr; @@ -97,13 +123,7 @@ instruction[4] = (sljit_u8)offset; sljit_emit_op_custom(compiler, instruction, 5); } -typedef enum { - sse2_compare_match1, - sse2_compare_match1i, - sse2_compare_match2, -} sse2_compare_type; - -static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type, +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) { sljit_u8 instruction[4]; @@ -112,11 +132,11 @@ instruction[1] = 0x0f; SLJIT_ASSERT(step >= 0 && step <= 3); -if (compare_type != sse2_compare_match2) +if (compare_type != vector_compare_match2) { if (step == 0) { - if (compare_type == sse2_compare_match1i) + if (compare_type == vector_compare_match1i) { /* POR xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ @@ -185,14 +205,14 @@ switch (step) static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { DEFINE_COMPILER; +sljit_u8 instruction[8]; struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; #endif struct sljit_jump *quit; struct sljit_jump *partial_quit[2]; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; +vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; @@ -207,12 +227,12 @@ SLJIT_UNUSED_ARG(offset); if (char1 != char2) { bit = char1 ^ char2; - compare_type = sse2_compare_match1i; + compare_type = vector_compare_match1i; if (!is_powerof2(bit)) { bit = 0; - compare_type = sse2_compare_match2; + compare_type = vector_compare_match2; } } @@ -349,11 +369,11 @@ if (common->utf && offset > 0) static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) { DEFINE_COMPILER; +sljit_u8 instruction[8]; struct sljit_label *start; struct sljit_jump *quit; jump_list *not_found = NULL; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; +vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; @@ -366,12 +386,12 @@ int i; if (char1 != char2) { bit = char1 ^ char2; - compare_type = sse2_compare_match1i; + compare_type = vector_compare_match1i; if (!is_powerof2(bit)) { bit = 0; - compare_type = sse2_compare_match2; + compare_type = vector_compare_match2; } } @@ -476,27 +496,15 @@ return not_found; #ifndef _WIN64 -static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -return 15; -#elif PCRE2_CODE_UNIT_WIDTH == 16 -return 7; -#elif PCRE2_CODE_UNIT_WIDTH == 32 -return 3; -#else -#error "Unsupported unit width" -#endif -} - #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) { DEFINE_COMPILER; -sse2_compare_type compare1_type = sse2_compare_match1; -sse2_compare_type compare2_type = sse2_compare_match1; +sljit_u8 instruction[8]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; sljit_u32 bit1 = 0; sljit_u32 bit2 = 0; sljit_u32 diff = IN_UCHARS(offs1 - offs2); @@ -516,7 +524,6 @@ struct sljit_label *start; struct sljit_label *restart; #endif struct sljit_jump *jump[2]; -sljit_u8 instruction[8]; int i; SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); @@ -549,13 +556,13 @@ else bit1 = char1a ^ char1b; if (is_powerof2(bit1)) { - compare1_type = sse2_compare_match1i; + compare1_type = vector_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); } else { - compare1_type = sse2_compare_match2; + compare1_type = vector_compare_match2; bit1 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); @@ -578,13 +585,13 @@ else bit2 = char2a ^ char2b; if (is_powerof2(bit2)) { - compare2_type = sse2_compare_match1i; + compare2_type = vector_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); } else { - compare2_type = sse2_compare_match2; + compare2_type = vector_compare_match2; bit2 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); @@ -731,9 +738,6 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) { @@ -760,7 +764,7 @@ if (common->match_end_ptr != 0) #undef SSE2_COMPARE_TYPE_INDEX -#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */ +#endif /* SLJIT_CONFIG_X86 */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) @@ -1121,3 +1125,734 @@ JUMPHERE(partial_quit); } #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define VECTOR_ELEMENT_SIZE 0 +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define VECTOR_ELEMENT_SIZE 1 +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define VECTOR_ELEMENT_SIZE 2 +#else +#error "Unsupported unit width" +#endif + +static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, + sljit_s32 base_reg, sljit_s32 index_reg) +{ +sljit_u16 instruction[3]; + +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg); +instruction[1] = (sljit_u16)(base_reg << 12); +instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06)); + +sljit_emit_op_custom(compiler, instruction, 6); +} + +#if PCRE2_CODE_UNIT_WIDTH == 32 + +static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg, + PCRE2_UCHAR chr, sljit_s32 tmp_general_reg) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 1); + +if (chr < 0x7fff) + { + if (step == 1) + return; + + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4)); + instruction[1] = (sljit_u16)chr; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (step == 0) + { + OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr); + + /* VLVG */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +/* VREP */ +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg); +instruction[1] = 0; +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d); +sljit_emit_op_custom(compiler, instruction, 6); +} + +#endif + +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, + int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 2); + +if (step == 1) + { + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp1_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (compare_type != vector_compare_match2) + { + if (step == 0 && compare_type == vector_compare_match1i) + { + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + } + return; + } + +switch (step) + { + case 0: + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + + case 2: + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(tmp_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); +JUMPTO(SLJIT_OVERFLOW, start); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +JUMPHERE(quit); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); +quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); +JUMPTO(SLJIT_OVERFLOW, start); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); + +JUMPHERE(quit); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +return not_found; +} + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *jump[2]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_s32 diff = IN_UCHARS(offs2 - offs1); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data1_ind = 0; +sljit_s32 data2_ind = 1; +sljit_s32 tmp1_ind = 2; +sljit_s32 tmp2_ind = 3; +sljit_s32 cmp1a_ind = 4; +sljit_s32 cmp1b_ind = 5; +sljit_s32 cmp2a_ind = 6; +sljit_s32 cmp2b_ind = 7; +sljit_s32 zero_ind = 8; +int i; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0); + +if (char1a != char1b) + { + bit1 = char1a ^ char1b; + compare1_type = vector_compare_match1i; + + if (!is_powerof2(bit1)) + { + bit1 = 0; + compare1_type = vector_compare_match2; + } + } + +if (char2a != char2b) + { + bit2 = char2a ^ char2b; + compare2_type = vector_compare_match1i; + + if (!is_powerof2(bit2)) + { + bit2 = 0; + compare2_type = vector_compare_match2; + } + } + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + CMOV(SLJIT_LESS, STR_END, TMP1, 0); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); +OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4)); +instruction[1] = (sljit_u16)(char1a | bit1); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1a != char1b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4)); + instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4)); +instruction[1] = (sljit_u16)(char2a | bit2); +/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ +sljit_emit_op_custom(compiler, instruction, 6); + +if (char2a != char2b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4)); + instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1); + + if (char1a != char1b) + replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1); + + replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1); + + if (char2a != char2b) + replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1); + } + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); +instruction[1] = 0; +instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); +load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0); +jump[1] = JUMP(SLJIT_JUMP); +JUMPHERE(jump[0]); +load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0); +JUMPHERE(jump[1]); + +load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff); + +/* Main loop. */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0); +load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); +JUMPTO(SLJIT_OVERFLOW, start); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +JUMPHERE(quit); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + /* TMP1 contains diff. */ + OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* SLJIT_CONFIG_S390X */ + +#endif /* !SUPPORT_VALGRIND */ diff --git a/thirdparty/pcre2/src/pcre2_match.c b/thirdparty/pcre2/src/pcre2_match.c index e3f78c2ca3..f28cdbb47a 100644 --- a/thirdparty/pcre2/src/pcre2_match.c +++ b/thirdparty/pcre2/src/pcre2_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015-2020 University of Cambridge + New API code Copyright (c) 2015-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -818,10 +818,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* N is now the frame of the recursion; the previous frame is at the OP_RECURSE position. Go back there, copying the current subject position - and mark, and move on past the OP_RECURSE. */ + and mark, and the start_match position (\K might have changed it), and + then move on past the OP_RECURSE. */ P->eptr = Feptr; P->mark = Fmark; + P->start_match = Fstart_match; F = P; Fecode += 1 + LINK_SIZE; continue; @@ -6115,8 +6117,8 @@ BOOL has_req_cu = FALSE; BOOL startline; #if PCRE2_CODE_UNIT_WIDTH == 8 -BOOL memchr_not_found_first_cu; -BOOL memchr_not_found_first_cu2; +PCRE2_SPTR memchr_found_first_cu; +PCRE2_SPTR memchr_found_first_cu2; #endif PCRE2_UCHAR first_cu = 0; @@ -6710,8 +6712,8 @@ start_partial = match_partial = NULL; mb->hitend = FALSE; #if PCRE2_CODE_UNIT_WIDTH == 8 -memchr_not_found_first_cu = FALSE; -memchr_not_found_first_cu2 = FALSE; +memchr_found_first_cu = NULL; +memchr_found_first_cu2 = NULL; #endif for(;;) @@ -6780,13 +6782,7 @@ for(;;) } } - /* Not anchored. Advance to a unique first code unit if there is one. In - 8-bit mode, the use of memchr() gives a big speed up, even though we have - to call it twice in caseless mode, in order to find the earliest occurrence - of the character in either of its cases. If a call to memchr() that - searches the rest of the subject fails to find one case, remember that in - order not to keep on repeating the search. This can make a huge difference - when the strings are very long and only one case is present. */ + /* Not anchored. Advance to a unique first code unit if there is one. */ else { @@ -6794,43 +6790,68 @@ for(;;) { if (first_cu != first_cu2) /* Caseless */ { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + #if PCRE2_CODE_UNIT_WIDTH != 8 PCRE2_UCHAR smc; while (start_match < end_subject && (smc = UCHAR21TEST(start_match)) != first_cu && - smc != first_cu2) + smc != first_cu2) start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ -#else /* 8-bit code units */ PCRE2_SPTR pp1 = NULL; PCRE2_SPTR pp2 = NULL; - PCRE2_SIZE cu2size = end_subject - start_match; + PCRE2_SIZE searchlength = end_subject - start_match; - if (!memchr_not_found_first_cu) + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ + + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) { - pp1 = memchr(start_match, first_cu, end_subject - start_match); - if (pp1 == NULL) memchr_not_found_first_cu = TRUE; - else cu2size = pp1 - start_match; + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; } - /* If pp1 is not NULL, we have arranged to search only as far as pp1, - to see if the other case is earlier, so we can set "not found" only - when both searches have returned NULL. */ + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ + + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; - if (!memchr_not_found_first_cu2) + /* Do the same thing for the other case. */ + + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) { - pp2 = memchr(start_match, first_cu2, cu2size); - memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL); + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; } + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + if (pp1 == NULL) start_match = (pp2 == NULL)? end_subject : pp2; else start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; -#endif + +#endif /* 8-bit handling */ } - /* The caseful case */ + /* The caseful case is much simpler. */ else { diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c index b10de45efb..c164e976e0 100644 --- a/thirdparty/pcre2/src/pcre2_tables.c +++ b/thirdparty/pcre2/src/pcre2_tables.c @@ -273,6 +273,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Cs0 STR_C STR_s "\0" #define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" #define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0" +#define STRING_Cypro_Minoan0 STR_C STR_y STR_p STR_r STR_o STR_UNDERSCORE STR_M STR_i STR_n STR_o STR_a STR_n "\0" #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" @@ -371,6 +372,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0" #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" +#define STRING_Old_Uyghur0 STR_O STR_l STR_d STR_UNDERSCORE STR_U STR_y STR_g STR_h STR_u STR_r "\0" #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" #define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0" #define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0" @@ -415,6 +417,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0" #define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0" #define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0" +#define STRING_Tangsa0 STR_T STR_a STR_n STR_g STR_s STR_a "\0" #define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0" #define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0" #define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0" @@ -422,9 +425,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0" #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0" #define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0" +#define STRING_Toto0 STR_T STR_o STR_t STR_o "\0" #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" #define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0" #define STRING_Vai0 STR_V STR_a STR_i "\0" +#define STRING_Vithkuqi0 STR_V STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0" #define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0" #define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0" #define STRING_Xan0 STR_X STR_a STR_n "\0" @@ -476,6 +481,7 @@ const char PRIV(utt_names)[] = STRING_Cs0 STRING_Cuneiform0 STRING_Cypriot0 + STRING_Cypro_Minoan0 STRING_Cyrillic0 STRING_Deseret0 STRING_Devanagari0 @@ -574,6 +580,7 @@ const char PRIV(utt_names)[] = STRING_Old_Sogdian0 STRING_Old_South_Arabian0 STRING_Old_Turkic0 + STRING_Old_Uyghur0 STRING_Oriya0 STRING_Osage0 STRING_Osmanya0 @@ -618,6 +625,7 @@ const char PRIV(utt_names)[] = STRING_Tai_Viet0 STRING_Takri0 STRING_Tamil0 + STRING_Tangsa0 STRING_Tangut0 STRING_Telugu0 STRING_Thaana0 @@ -625,9 +633,11 @@ const char PRIV(utt_names)[] = STRING_Tibetan0 STRING_Tifinagh0 STRING_Tirhuta0 + STRING_Toto0 STRING_Ugaritic0 STRING_Unknown0 STRING_Vai0 + STRING_Vithkuqi0 STRING_Wancho0 STRING_Warang_Citi0 STRING_Xan0 @@ -679,172 +689,177 @@ const ucp_type_table PRIV(utt)[] = { { 255, PT_PC, ucp_Cs }, { 258, PT_SC, ucp_Cuneiform }, { 268, PT_SC, ucp_Cypriot }, - { 276, PT_SC, ucp_Cyrillic }, - { 285, PT_SC, ucp_Deseret }, - { 293, PT_SC, ucp_Devanagari }, - { 304, PT_SC, ucp_Dives_Akuru }, - { 316, PT_SC, ucp_Dogra }, - { 322, PT_SC, ucp_Duployan }, - { 331, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 352, PT_SC, ucp_Elbasan }, - { 360, PT_SC, ucp_Elymaic }, - { 368, PT_SC, ucp_Ethiopic }, - { 377, PT_SC, ucp_Georgian }, - { 386, PT_SC, ucp_Glagolitic }, - { 397, PT_SC, ucp_Gothic }, - { 404, PT_SC, ucp_Grantha }, - { 412, PT_SC, ucp_Greek }, - { 418, PT_SC, ucp_Gujarati }, - { 427, PT_SC, ucp_Gunjala_Gondi }, - { 441, PT_SC, ucp_Gurmukhi }, - { 450, PT_SC, ucp_Han }, - { 454, PT_SC, ucp_Hangul }, - { 461, PT_SC, ucp_Hanifi_Rohingya }, - { 477, PT_SC, ucp_Hanunoo }, - { 485, PT_SC, ucp_Hatran }, - { 492, PT_SC, ucp_Hebrew }, - { 499, PT_SC, ucp_Hiragana }, - { 508, PT_SC, ucp_Imperial_Aramaic }, - { 525, PT_SC, ucp_Inherited }, - { 535, PT_SC, ucp_Inscriptional_Pahlavi }, - { 557, PT_SC, ucp_Inscriptional_Parthian }, - { 580, PT_SC, ucp_Javanese }, - { 589, PT_SC, ucp_Kaithi }, - { 596, PT_SC, ucp_Kannada }, - { 604, PT_SC, ucp_Katakana }, - { 613, PT_SC, ucp_Kayah_Li }, - { 622, PT_SC, ucp_Kharoshthi }, - { 633, PT_SC, ucp_Khitan_Small_Script }, - { 653, PT_SC, ucp_Khmer }, - { 659, PT_SC, ucp_Khojki }, - { 666, PT_SC, ucp_Khudawadi }, - { 676, PT_GC, ucp_L }, - { 678, PT_LAMP, 0 }, - { 681, PT_SC, ucp_Lao }, - { 685, PT_SC, ucp_Latin }, - { 691, PT_SC, ucp_Lepcha }, - { 698, PT_SC, ucp_Limbu }, - { 704, PT_SC, ucp_Linear_A }, - { 713, PT_SC, ucp_Linear_B }, - { 722, PT_SC, ucp_Lisu }, - { 727, PT_PC, ucp_Ll }, - { 730, PT_PC, ucp_Lm }, - { 733, PT_PC, ucp_Lo }, - { 736, PT_PC, ucp_Lt }, - { 739, PT_PC, ucp_Lu }, - { 742, PT_SC, ucp_Lycian }, - { 749, PT_SC, ucp_Lydian }, - { 756, PT_GC, ucp_M }, - { 758, PT_SC, ucp_Mahajani }, - { 767, PT_SC, ucp_Makasar }, - { 775, PT_SC, ucp_Malayalam }, - { 785, PT_SC, ucp_Mandaic }, - { 793, PT_SC, ucp_Manichaean }, - { 804, PT_SC, ucp_Marchen }, - { 812, PT_SC, ucp_Masaram_Gondi }, - { 826, PT_PC, ucp_Mc }, - { 829, PT_PC, ucp_Me }, - { 832, PT_SC, ucp_Medefaidrin }, - { 844, PT_SC, ucp_Meetei_Mayek }, - { 857, PT_SC, ucp_Mende_Kikakui }, - { 871, PT_SC, ucp_Meroitic_Cursive }, - { 888, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 909, PT_SC, ucp_Miao }, - { 914, PT_PC, ucp_Mn }, - { 917, PT_SC, ucp_Modi }, - { 922, PT_SC, ucp_Mongolian }, - { 932, PT_SC, ucp_Mro }, - { 936, PT_SC, ucp_Multani }, - { 944, PT_SC, ucp_Myanmar }, - { 952, PT_GC, ucp_N }, - { 954, PT_SC, ucp_Nabataean }, - { 964, PT_SC, ucp_Nandinagari }, - { 976, PT_PC, ucp_Nd }, - { 979, PT_SC, ucp_New_Tai_Lue }, - { 991, PT_SC, ucp_Newa }, - { 996, PT_SC, ucp_Nko }, - { 1000, PT_PC, ucp_Nl }, - { 1003, PT_PC, ucp_No }, - { 1006, PT_SC, ucp_Nushu }, - { 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, - { 1035, PT_SC, ucp_Ogham }, - { 1041, PT_SC, ucp_Ol_Chiki }, - { 1050, PT_SC, ucp_Old_Hungarian }, - { 1064, PT_SC, ucp_Old_Italic }, - { 1075, PT_SC, ucp_Old_North_Arabian }, - { 1093, PT_SC, ucp_Old_Permic }, - { 1104, PT_SC, ucp_Old_Persian }, - { 1116, PT_SC, ucp_Old_Sogdian }, - { 1128, PT_SC, ucp_Old_South_Arabian }, - { 1146, PT_SC, ucp_Old_Turkic }, - { 1157, PT_SC, ucp_Oriya }, - { 1163, PT_SC, ucp_Osage }, - { 1169, PT_SC, ucp_Osmanya }, - { 1177, PT_GC, ucp_P }, - { 1179, PT_SC, ucp_Pahawh_Hmong }, - { 1192, PT_SC, ucp_Palmyrene }, - { 1202, PT_SC, ucp_Pau_Cin_Hau }, - { 1214, PT_PC, ucp_Pc }, - { 1217, PT_PC, ucp_Pd }, - { 1220, PT_PC, ucp_Pe }, - { 1223, PT_PC, ucp_Pf }, - { 1226, PT_SC, ucp_Phags_Pa }, - { 1235, PT_SC, ucp_Phoenician }, - { 1246, PT_PC, ucp_Pi }, - { 1249, PT_PC, ucp_Po }, - { 1252, PT_PC, ucp_Ps }, - { 1255, PT_SC, ucp_Psalter_Pahlavi }, - { 1271, PT_SC, ucp_Rejang }, - { 1278, PT_SC, ucp_Runic }, - { 1284, PT_GC, ucp_S }, - { 1286, PT_SC, ucp_Samaritan }, - { 1296, PT_SC, ucp_Saurashtra }, - { 1307, PT_PC, ucp_Sc }, - { 1310, PT_SC, ucp_Sharada }, - { 1318, PT_SC, ucp_Shavian }, - { 1326, PT_SC, ucp_Siddham }, - { 1334, PT_SC, ucp_SignWriting }, - { 1346, PT_SC, ucp_Sinhala }, - { 1354, PT_PC, ucp_Sk }, - { 1357, PT_PC, ucp_Sm }, - { 1360, PT_PC, ucp_So }, - { 1363, PT_SC, ucp_Sogdian }, - { 1371, PT_SC, ucp_Sora_Sompeng }, - { 1384, PT_SC, ucp_Soyombo }, - { 1392, PT_SC, ucp_Sundanese }, - { 1402, PT_SC, ucp_Syloti_Nagri }, - { 1415, PT_SC, ucp_Syriac }, - { 1422, PT_SC, ucp_Tagalog }, - { 1430, PT_SC, ucp_Tagbanwa }, - { 1439, PT_SC, ucp_Tai_Le }, - { 1446, PT_SC, ucp_Tai_Tham }, - { 1455, PT_SC, ucp_Tai_Viet }, - { 1464, PT_SC, ucp_Takri }, - { 1470, PT_SC, ucp_Tamil }, - { 1476, PT_SC, ucp_Tangut }, - { 1483, PT_SC, ucp_Telugu }, - { 1490, PT_SC, ucp_Thaana }, - { 1497, PT_SC, ucp_Thai }, - { 1502, PT_SC, ucp_Tibetan }, - { 1510, PT_SC, ucp_Tifinagh }, - { 1519, PT_SC, ucp_Tirhuta }, - { 1527, PT_SC, ucp_Ugaritic }, - { 1536, PT_SC, ucp_Unknown }, - { 1544, PT_SC, ucp_Vai }, - { 1548, PT_SC, ucp_Wancho }, - { 1555, PT_SC, ucp_Warang_Citi }, - { 1567, PT_ALNUM, 0 }, - { 1571, PT_PXSPACE, 0 }, - { 1575, PT_SPACE, 0 }, - { 1579, PT_UCNC, 0 }, - { 1583, PT_WORD, 0 }, - { 1587, PT_SC, ucp_Yezidi }, - { 1594, PT_SC, ucp_Yi }, - { 1597, PT_GC, ucp_Z }, - { 1599, PT_SC, ucp_Zanabazar_Square }, - { 1616, PT_PC, ucp_Zl }, - { 1619, PT_PC, ucp_Zp }, - { 1622, PT_PC, ucp_Zs } + { 276, PT_SC, ucp_Cypro_Minoan }, + { 289, PT_SC, ucp_Cyrillic }, + { 298, PT_SC, ucp_Deseret }, + { 306, PT_SC, ucp_Devanagari }, + { 317, PT_SC, ucp_Dives_Akuru }, + { 329, PT_SC, ucp_Dogra }, + { 335, PT_SC, ucp_Duployan }, + { 344, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 365, PT_SC, ucp_Elbasan }, + { 373, PT_SC, ucp_Elymaic }, + { 381, PT_SC, ucp_Ethiopic }, + { 390, PT_SC, ucp_Georgian }, + { 399, PT_SC, ucp_Glagolitic }, + { 410, PT_SC, ucp_Gothic }, + { 417, PT_SC, ucp_Grantha }, + { 425, PT_SC, ucp_Greek }, + { 431, PT_SC, ucp_Gujarati }, + { 440, PT_SC, ucp_Gunjala_Gondi }, + { 454, PT_SC, ucp_Gurmukhi }, + { 463, PT_SC, ucp_Han }, + { 467, PT_SC, ucp_Hangul }, + { 474, PT_SC, ucp_Hanifi_Rohingya }, + { 490, PT_SC, ucp_Hanunoo }, + { 498, PT_SC, ucp_Hatran }, + { 505, PT_SC, ucp_Hebrew }, + { 512, PT_SC, ucp_Hiragana }, + { 521, PT_SC, ucp_Imperial_Aramaic }, + { 538, PT_SC, ucp_Inherited }, + { 548, PT_SC, ucp_Inscriptional_Pahlavi }, + { 570, PT_SC, ucp_Inscriptional_Parthian }, + { 593, PT_SC, ucp_Javanese }, + { 602, PT_SC, ucp_Kaithi }, + { 609, PT_SC, ucp_Kannada }, + { 617, PT_SC, ucp_Katakana }, + { 626, PT_SC, ucp_Kayah_Li }, + { 635, PT_SC, ucp_Kharoshthi }, + { 646, PT_SC, ucp_Khitan_Small_Script }, + { 666, PT_SC, ucp_Khmer }, + { 672, PT_SC, ucp_Khojki }, + { 679, PT_SC, ucp_Khudawadi }, + { 689, PT_GC, ucp_L }, + { 691, PT_LAMP, 0 }, + { 694, PT_SC, ucp_Lao }, + { 698, PT_SC, ucp_Latin }, + { 704, PT_SC, ucp_Lepcha }, + { 711, PT_SC, ucp_Limbu }, + { 717, PT_SC, ucp_Linear_A }, + { 726, PT_SC, ucp_Linear_B }, + { 735, PT_SC, ucp_Lisu }, + { 740, PT_PC, ucp_Ll }, + { 743, PT_PC, ucp_Lm }, + { 746, PT_PC, ucp_Lo }, + { 749, PT_PC, ucp_Lt }, + { 752, PT_PC, ucp_Lu }, + { 755, PT_SC, ucp_Lycian }, + { 762, PT_SC, ucp_Lydian }, + { 769, PT_GC, ucp_M }, + { 771, PT_SC, ucp_Mahajani }, + { 780, PT_SC, ucp_Makasar }, + { 788, PT_SC, ucp_Malayalam }, + { 798, PT_SC, ucp_Mandaic }, + { 806, PT_SC, ucp_Manichaean }, + { 817, PT_SC, ucp_Marchen }, + { 825, PT_SC, ucp_Masaram_Gondi }, + { 839, PT_PC, ucp_Mc }, + { 842, PT_PC, ucp_Me }, + { 845, PT_SC, ucp_Medefaidrin }, + { 857, PT_SC, ucp_Meetei_Mayek }, + { 870, PT_SC, ucp_Mende_Kikakui }, + { 884, PT_SC, ucp_Meroitic_Cursive }, + { 901, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 922, PT_SC, ucp_Miao }, + { 927, PT_PC, ucp_Mn }, + { 930, PT_SC, ucp_Modi }, + { 935, PT_SC, ucp_Mongolian }, + { 945, PT_SC, ucp_Mro }, + { 949, PT_SC, ucp_Multani }, + { 957, PT_SC, ucp_Myanmar }, + { 965, PT_GC, ucp_N }, + { 967, PT_SC, ucp_Nabataean }, + { 977, PT_SC, ucp_Nandinagari }, + { 989, PT_PC, ucp_Nd }, + { 992, PT_SC, ucp_New_Tai_Lue }, + { 1004, PT_SC, ucp_Newa }, + { 1009, PT_SC, ucp_Nko }, + { 1013, PT_PC, ucp_Nl }, + { 1016, PT_PC, ucp_No }, + { 1019, PT_SC, ucp_Nushu }, + { 1025, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, + { 1048, PT_SC, ucp_Ogham }, + { 1054, PT_SC, ucp_Ol_Chiki }, + { 1063, PT_SC, ucp_Old_Hungarian }, + { 1077, PT_SC, ucp_Old_Italic }, + { 1088, PT_SC, ucp_Old_North_Arabian }, + { 1106, PT_SC, ucp_Old_Permic }, + { 1117, PT_SC, ucp_Old_Persian }, + { 1129, PT_SC, ucp_Old_Sogdian }, + { 1141, PT_SC, ucp_Old_South_Arabian }, + { 1159, PT_SC, ucp_Old_Turkic }, + { 1170, PT_SC, ucp_Old_Uyghur }, + { 1181, PT_SC, ucp_Oriya }, + { 1187, PT_SC, ucp_Osage }, + { 1193, PT_SC, ucp_Osmanya }, + { 1201, PT_GC, ucp_P }, + { 1203, PT_SC, ucp_Pahawh_Hmong }, + { 1216, PT_SC, ucp_Palmyrene }, + { 1226, PT_SC, ucp_Pau_Cin_Hau }, + { 1238, PT_PC, ucp_Pc }, + { 1241, PT_PC, ucp_Pd }, + { 1244, PT_PC, ucp_Pe }, + { 1247, PT_PC, ucp_Pf }, + { 1250, PT_SC, ucp_Phags_Pa }, + { 1259, PT_SC, ucp_Phoenician }, + { 1270, PT_PC, ucp_Pi }, + { 1273, PT_PC, ucp_Po }, + { 1276, PT_PC, ucp_Ps }, + { 1279, PT_SC, ucp_Psalter_Pahlavi }, + { 1295, PT_SC, ucp_Rejang }, + { 1302, PT_SC, ucp_Runic }, + { 1308, PT_GC, ucp_S }, + { 1310, PT_SC, ucp_Samaritan }, + { 1320, PT_SC, ucp_Saurashtra }, + { 1331, PT_PC, ucp_Sc }, + { 1334, PT_SC, ucp_Sharada }, + { 1342, PT_SC, ucp_Shavian }, + { 1350, PT_SC, ucp_Siddham }, + { 1358, PT_SC, ucp_SignWriting }, + { 1370, PT_SC, ucp_Sinhala }, + { 1378, PT_PC, ucp_Sk }, + { 1381, PT_PC, ucp_Sm }, + { 1384, PT_PC, ucp_So }, + { 1387, PT_SC, ucp_Sogdian }, + { 1395, PT_SC, ucp_Sora_Sompeng }, + { 1408, PT_SC, ucp_Soyombo }, + { 1416, PT_SC, ucp_Sundanese }, + { 1426, PT_SC, ucp_Syloti_Nagri }, + { 1439, PT_SC, ucp_Syriac }, + { 1446, PT_SC, ucp_Tagalog }, + { 1454, PT_SC, ucp_Tagbanwa }, + { 1463, PT_SC, ucp_Tai_Le }, + { 1470, PT_SC, ucp_Tai_Tham }, + { 1479, PT_SC, ucp_Tai_Viet }, + { 1488, PT_SC, ucp_Takri }, + { 1494, PT_SC, ucp_Tamil }, + { 1500, PT_SC, ucp_Tangsa }, + { 1507, PT_SC, ucp_Tangut }, + { 1514, PT_SC, ucp_Telugu }, + { 1521, PT_SC, ucp_Thaana }, + { 1528, PT_SC, ucp_Thai }, + { 1533, PT_SC, ucp_Tibetan }, + { 1541, PT_SC, ucp_Tifinagh }, + { 1550, PT_SC, ucp_Tirhuta }, + { 1558, PT_SC, ucp_Toto }, + { 1563, PT_SC, ucp_Ugaritic }, + { 1572, PT_SC, ucp_Unknown }, + { 1580, PT_SC, ucp_Vai }, + { 1584, PT_SC, ucp_Vithkuqi }, + { 1593, PT_SC, ucp_Wancho }, + { 1600, PT_SC, ucp_Warang_Citi }, + { 1612, PT_ALNUM, 0 }, + { 1616, PT_PXSPACE, 0 }, + { 1620, PT_SPACE, 0 }, + { 1624, PT_UCNC, 0 }, + { 1628, PT_WORD, 0 }, + { 1632, PT_SC, ucp_Yezidi }, + { 1639, PT_SC, ucp_Yi }, + { 1642, PT_GC, ucp_Z }, + { 1644, PT_SC, ucp_Zanabazar_Square }, + { 1661, PT_PC, ucp_Zl }, + { 1664, PT_PC, ucp_Zp }, + { 1667, PT_PC, ucp_Zs } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); diff --git a/thirdparty/pcre2/src/pcre2_ucd.c b/thirdparty/pcre2/src/pcre2_ucd.c index 46e23ff06b..0b8ac75bd4 100644 --- a/thirdparty/pcre2/src/pcre2_ucd.c +++ b/thirdparty/pcre2/src/pcre2_ucd.c @@ -20,7 +20,7 @@ needed. */ /* Unicode character database. */ /* This file was autogenerated by the MultiStage2.py script. */ -/* Total size: 101044 bytes, block size: 128. */ +/* Total size: 102844 bytes, block size: 128. */ /* The tables herein are needed only when UCP support is built, and in PCRE2 that happens automatically with UTF support. @@ -39,7 +39,7 @@ const uint16_t PRIV(ucd_stage2)[] = {0}; const uint32_t PRIV(ucd_caseless_sets)[] = {0}; #else -const char *PRIV(unicode_version) = "13.0.0"; +const char *PRIV(unicode_version) = "14.0.0"; /* If the 32-bit library is run in non-32-bit mode, character values greater than 0x10ffff may be encountered. For these we set up a @@ -116,16 +116,16 @@ set of decimal digits. It is used to ensure that all the digits in a script run come from the same set. */ const uint32_t PRIV(ucd_digit_sets)[] = { - 65, /* Number of subsequent values */ + 66, /* Number of subsequent values */ 0x00039, 0x00669, 0x006f9, 0x007c9, 0x0096f, 0x009ef, 0x00a6f, 0x00aef, 0x00b6f, 0x00bef, 0x00c6f, 0x00cef, 0x00d6f, 0x00def, 0x00e59, 0x00ed9, 0x00f29, 0x01049, 0x01099, 0x017e9, 0x01819, 0x0194f, 0x019d9, 0x01a89, 0x01a99, 0x01b59, 0x01bb9, 0x01c49, 0x01c59, 0x0a629, 0x0a8d9, 0x0a909, 0x0a9d9, 0x0a9f9, 0x0aa59, 0x0abf9, 0x0ff19, 0x104a9, 0x10d39, 0x1106f, 0x110f9, 0x1113f, 0x111d9, 0x112f9, 0x11459, 0x114d9, 0x11659, 0x116c9, - 0x11739, 0x118e9, 0x11959, 0x11c59, 0x11d59, 0x11da9, 0x16a69, 0x16b59, - 0x1d7d7, 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e149, 0x1e2f9, 0x1e959, - 0x1fbf9, + 0x11739, 0x118e9, 0x11959, 0x11c59, 0x11d59, 0x11da9, 0x16a69, 0x16ac9, + 0x16b59, 0x1d7d7, 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e149, 0x1e2f9, + 0x1e959, 0x1fbf9, }; /* This vector is a list of lists of scripts for the Script Extension @@ -135,55 +135,59 @@ const uint8_t PRIV(ucd_script_sets)[] = { /* 0 */ 0, /* 1 */ 1, 11, 0, /* 4 */ 1, 144, 0, - /* 7 */ 1, 50, 0, - /* 10 */ 1, 56, 0, - /* 13 */ 3, 15, 0, - /* 16 */ 4, 23, 0, - /* 19 */ 6, 84, 0, - /* 22 */ 12, 36, 0, - /* 25 */ 13, 18, 0, - /* 28 */ 13, 34, 0, - /* 31 */ 13, 118, 0, - /* 34 */ 13, 50, 0, - /* 37 */ 15, 107, 0, - /* 40 */ 15, 150, 0, - /* 43 */ 15, 100, 0, - /* 46 */ 15, 54, 0, - /* 49 */ 17, 34, 0, - /* 52 */ 107, 54, 0, - /* 55 */ 21, 108, 0, - /* 58 */ 22, 129, 0, - /* 61 */ 23, 34, 0, - /* 64 */ 27, 30, 0, - /* 67 */ 29, 150, 0, - /* 70 */ 34, 38, 0, - /* 73 */ 38, 65, 0, - /* 76 */ 1, 50, 56, 0, - /* 80 */ 1, 56, 156, 0, - /* 84 */ 3, 96, 49, 0, - /* 88 */ 96, 39, 53, 0, - /* 92 */ 12, 110, 36, 0, - /* 96 */ 15, 107, 29, 0, - /* 100 */ 15, 107, 34, 0, - /* 104 */ 23, 27, 30, 0, - /* 108 */ 69, 34, 39, 0, - /* 112 */ 3, 15, 107, 29, 0, - /* 117 */ 7, 25, 52, 51, 0, - /* 122 */ 15, 142, 85, 111, 0, - /* 127 */ 1, 144, 50, 56, 156, 0, - /* 133 */ 4, 24, 23, 27, 30, 0, - /* 139 */ 4, 24, 23, 27, 30, 61, 0, - /* 146 */ 15, 29, 37, 44, 54, 55, 0, - /* 153 */ 132, 1, 95, 112, 121, 144, 148, 50, 0, - /* 162 */ 3, 15, 107, 29, 150, 44, 55, 124, 0, - /* 171 */ 15, 142, 21, 22, 108, 85, 111, 114, 109, 102, 124, 0, - /* 183 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 54, 55, 124, 0, - /* 196 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 100, 54, 55, 124, 0, - /* 210 */ 15, 142, 21, 22, 108, 29, 85, 111, 114, 150, 109, 102, 124, 0, - /* 224 */ 15, 142, 21, 22, 108, 29, 85, 111, 37, 114, 150, 109, 102, 124, 0, - /* 239 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0, - /* 260 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 35, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0, - /* 282 */ + /* 7 */ 1, 64, 0, + /* 10 */ 1, 50, 0, + /* 13 */ 1, 56, 0, + /* 16 */ 3, 15, 0, + /* 19 */ 4, 23, 0, + /* 22 */ 6, 84, 0, + /* 25 */ 12, 36, 0, + /* 28 */ 13, 18, 0, + /* 31 */ 13, 34, 0, + /* 34 */ 13, 118, 0, + /* 37 */ 13, 50, 0, + /* 40 */ 15, 107, 0, + /* 43 */ 15, 150, 0, + /* 46 */ 15, 100, 0, + /* 49 */ 15, 54, 0, + /* 52 */ 17, 34, 0, + /* 55 */ 107, 54, 0, + /* 58 */ 21, 108, 0, + /* 61 */ 22, 129, 0, + /* 64 */ 23, 34, 0, + /* 67 */ 27, 30, 0, + /* 70 */ 29, 150, 0, + /* 73 */ 34, 38, 0, + /* 76 */ 112, 158, 0, + /* 79 */ 38, 65, 0, + /* 82 */ 1, 50, 56, 0, + /* 86 */ 1, 56, 156, 0, + /* 90 */ 3, 96, 49, 0, + /* 94 */ 96, 39, 53, 0, + /* 98 */ 157, 12, 36, 0, + /* 102 */ 12, 110, 36, 0, + /* 106 */ 15, 107, 29, 0, + /* 110 */ 15, 107, 34, 0, + /* 114 */ 23, 27, 30, 0, + /* 118 */ 69, 34, 39, 0, + /* 122 */ 3, 15, 107, 29, 0, + /* 127 */ 7, 25, 52, 51, 0, + /* 132 */ 15, 142, 85, 111, 0, + /* 137 */ 4, 24, 23, 27, 30, 0, + /* 143 */ 1, 64, 144, 50, 56, 156, 0, + /* 150 */ 4, 24, 23, 27, 30, 61, 0, + /* 157 */ 15, 29, 37, 44, 54, 55, 0, + /* 164 */ 132, 1, 64, 144, 50, 56, 156, 0, + /* 172 */ 3, 15, 107, 29, 150, 44, 55, 124, 0, + /* 181 */ 132, 1, 95, 112, 158, 121, 144, 148, 50, 0, + /* 191 */ 15, 142, 21, 22, 108, 85, 111, 114, 109, 102, 124, 0, + /* 203 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 54, 55, 124, 0, + /* 216 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 100, 54, 55, 124, 0, + /* 230 */ 15, 142, 21, 22, 108, 29, 85, 111, 114, 150, 109, 102, 124, 0, + /* 244 */ 15, 142, 21, 22, 108, 29, 85, 111, 37, 114, 150, 109, 102, 124, 0, + /* 259 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0, + /* 280 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 35, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0, + /* 302 */ }; /* These are the main two-stage UCD tables. The fields in each record are: @@ -192,7 +196,7 @@ offset to multichar other cases or zero (8 bits), offset to other case or zero (32 bits, signed), script extension (16 bits, signed), and a dummy 16-bit field to make the whole thing a multiple of 4 bytes. */ -const ucd_record PRIV(ucd_records)[] = { /* 11700 bytes, record size 12 */ +const ucd_record PRIV(ucd_records)[] = { /* 11964 bytes, record size 12 */ { 10, 0, 2, 0, 0, 10, 256, }, /* 0 */ { 10, 0, 2, 0, 0, 10, 0, }, /* 1 */ { 10, 0, 1, 0, 0, 10, 0, }, /* 2 */ @@ -390,9 +394,9 @@ const ucd_record PRIV(ucd_records)[] = { /* 11700 bytes, record size 12 */ { 13, 9, 12, 88, 1, 13, 0, }, /* 194 */ { 13, 5, 12, 88, -1, 13, 0, }, /* 195 */ { 13, 26, 12, 0, 0, 13, 0, }, /* 196 */ - { 13, 12, 3, 0, 0, -31, 0, }, /* 197 */ - { 13, 12, 3, 0, 0, -25, 0, }, /* 198 */ - { 28, 12, 3, 0, 0, -28, 0, }, /* 199 */ + { 13, 12, 3, 0, 0, -34, 0, }, /* 197 */ + { 13, 12, 3, 0, 0, -28, 0, }, /* 198 */ + { 28, 12, 3, 0, 0, -31, 0, }, /* 199 */ { 13, 11, 3, 0, 0, 13, 0, }, /* 200 */ { 13, 9, 12, 0, 15, 13, 0, }, /* 201 */ { 13, 5, 12, 0, -15, 13, 0, }, /* 202 */ @@ -413,761 +417,783 @@ const ucd_record PRIV(ucd_records)[] = { /* 11700 bytes, record size 12 */ { 1, 25, 12, 0, 0, 1, 0, }, /* 217 */ { 1, 21, 12, 0, 0, 1, 0, }, /* 218 */ { 1, 23, 12, 0, 0, 1, 0, }, /* 219 */ - { 10, 21, 12, 0, 0, -127, 0, }, /* 220 */ + { 10, 21, 12, 0, 0, -143, 0, }, /* 220 */ { 1, 26, 12, 0, 0, 1, 0, }, /* 221 */ { 1, 12, 3, 0, 0, 1, 0, }, /* 222 */ - { 1, 1, 2, 0, 0, -76, 0, }, /* 223 */ - { 1, 7, 12, 0, 0, 1, 0, }, /* 224 */ - { 10, 6, 12, 0, 0, -153, 0, }, /* 225 */ - { 28, 12, 3, 0, 0, -7, 0, }, /* 226 */ - { 1, 13, 12, 0, 0, -80, 0, }, /* 227 */ - { 1, 21, 12, 0, 0, -4, 0, }, /* 228 */ - { 1, 6, 12, 0, 0, 1, 0, }, /* 229 */ - { 1, 13, 12, 0, 0, 1, 0, }, /* 230 */ - { 50, 21, 12, 0, 0, 50, 0, }, /* 231 */ - { 50, 1, 4, 0, 0, 50, 0, }, /* 232 */ - { 50, 7, 12, 0, 0, 50, 0, }, /* 233 */ - { 50, 12, 3, 0, 0, 50, 0, }, /* 234 */ - { 56, 7, 12, 0, 0, 56, 0, }, /* 235 */ - { 56, 12, 3, 0, 0, 56, 0, }, /* 236 */ - { 64, 13, 12, 0, 0, 64, 0, }, /* 237 */ - { 64, 7, 12, 0, 0, 64, 0, }, /* 238 */ - { 64, 12, 3, 0, 0, 64, 0, }, /* 239 */ - { 64, 6, 12, 0, 0, 64, 0, }, /* 240 */ - { 64, 26, 12, 0, 0, 64, 0, }, /* 241 */ - { 64, 21, 12, 0, 0, 64, 0, }, /* 242 */ - { 64, 23, 12, 0, 0, 64, 0, }, /* 243 */ - { 90, 7, 12, 0, 0, 90, 0, }, /* 244 */ - { 90, 12, 3, 0, 0, 90, 0, }, /* 245 */ - { 90, 6, 12, 0, 0, 90, 0, }, /* 246 */ - { 90, 21, 12, 0, 0, 90, 0, }, /* 247 */ - { 95, 7, 12, 0, 0, 95, 0, }, /* 248 */ - { 95, 12, 3, 0, 0, 95, 0, }, /* 249 */ - { 95, 21, 12, 0, 0, 95, 0, }, /* 250 */ - { 15, 12, 3, 0, 0, 15, 0, }, /* 251 */ - { 15, 10, 5, 0, 0, 15, 0, }, /* 252 */ - { 15, 7, 12, 0, 0, 15, 0, }, /* 253 */ - { 28, 12, 3, 0, 0, -196, 0, }, /* 254 */ - { 28, 12, 3, 0, 0, -183, 0, }, /* 255 */ - { 10, 21, 12, 0, 0, -239, 0, }, /* 256 */ - { 10, 21, 12, 0, 0, -260, 0, }, /* 257 */ - { 15, 13, 12, 0, 0, -122, 0, }, /* 258 */ - { 15, 21, 12, 0, 0, 15, 0, }, /* 259 */ - { 15, 6, 12, 0, 0, 15, 0, }, /* 260 */ - { 3, 7, 12, 0, 0, 3, 0, }, /* 261 */ - { 3, 12, 3, 0, 0, 3, 0, }, /* 262 */ - { 3, 10, 5, 0, 0, 3, 0, }, /* 263 */ - { 3, 10, 3, 0, 0, 3, 0, }, /* 264 */ - { 3, 13, 12, 0, 0, -84, 0, }, /* 265 */ - { 3, 23, 12, 0, 0, 3, 0, }, /* 266 */ - { 3, 15, 12, 0, 0, 3, 0, }, /* 267 */ - { 3, 26, 12, 0, 0, 3, 0, }, /* 268 */ - { 3, 21, 12, 0, 0, 3, 0, }, /* 269 */ - { 22, 12, 3, 0, 0, 22, 0, }, /* 270 */ - { 22, 10, 5, 0, 0, 22, 0, }, /* 271 */ - { 22, 7, 12, 0, 0, 22, 0, }, /* 272 */ - { 22, 13, 12, 0, 0, -58, 0, }, /* 273 */ - { 22, 21, 12, 0, 0, 22, 0, }, /* 274 */ - { 21, 12, 3, 0, 0, 21, 0, }, /* 275 */ - { 21, 10, 5, 0, 0, 21, 0, }, /* 276 */ - { 21, 7, 12, 0, 0, 21, 0, }, /* 277 */ - { 21, 13, 12, 0, 0, -55, 0, }, /* 278 */ - { 21, 21, 12, 0, 0, 21, 0, }, /* 279 */ - { 21, 23, 12, 0, 0, 21, 0, }, /* 280 */ - { 44, 12, 3, 0, 0, 44, 0, }, /* 281 */ - { 44, 10, 5, 0, 0, 44, 0, }, /* 282 */ - { 44, 7, 12, 0, 0, 44, 0, }, /* 283 */ - { 44, 10, 3, 0, 0, 44, 0, }, /* 284 */ - { 44, 13, 12, 0, 0, 44, 0, }, /* 285 */ - { 44, 26, 12, 0, 0, 44, 0, }, /* 286 */ - { 44, 15, 12, 0, 0, 44, 0, }, /* 287 */ - { 54, 12, 3, 0, 0, 54, 0, }, /* 288 */ - { 54, 7, 12, 0, 0, 54, 0, }, /* 289 */ - { 54, 10, 3, 0, 0, 54, 0, }, /* 290 */ - { 54, 10, 5, 0, 0, 54, 0, }, /* 291 */ - { 54, 13, 12, 0, 0, -52, 0, }, /* 292 */ - { 54, 15, 12, 0, 0, -52, 0, }, /* 293 */ - { 54, 26, 12, 0, 0, -52, 0, }, /* 294 */ - { 54, 26, 12, 0, 0, 54, 0, }, /* 295 */ - { 54, 23, 12, 0, 0, 54, 0, }, /* 296 */ - { 55, 12, 3, 0, 0, 55, 0, }, /* 297 */ - { 55, 10, 5, 0, 0, 55, 0, }, /* 298 */ - { 55, 7, 12, 0, 0, 55, 0, }, /* 299 */ - { 55, 13, 12, 0, 0, 55, 0, }, /* 300 */ - { 55, 21, 12, 0, 0, 55, 0, }, /* 301 */ - { 55, 15, 12, 0, 0, 55, 0, }, /* 302 */ - { 55, 26, 12, 0, 0, 55, 0, }, /* 303 */ - { 29, 7, 12, 0, 0, 29, 0, }, /* 304 */ - { 29, 12, 3, 0, 0, 29, 0, }, /* 305 */ - { 29, 10, 5, 0, 0, 29, 0, }, /* 306 */ - { 29, 21, 12, 0, 0, 29, 0, }, /* 307 */ - { 29, 10, 3, 0, 0, 29, 0, }, /* 308 */ - { 29, 13, 12, 0, 0, -67, 0, }, /* 309 */ - { 37, 12, 3, 0, 0, 37, 0, }, /* 310 */ - { 37, 10, 5, 0, 0, 37, 0, }, /* 311 */ - { 37, 7, 12, 0, 0, 37, 0, }, /* 312 */ - { 37, 10, 3, 0, 0, 37, 0, }, /* 313 */ - { 37, 7, 4, 0, 0, 37, 0, }, /* 314 */ - { 37, 26, 12, 0, 0, 37, 0, }, /* 315 */ - { 37, 15, 12, 0, 0, 37, 0, }, /* 316 */ - { 37, 13, 12, 0, 0, 37, 0, }, /* 317 */ - { 48, 12, 3, 0, 0, 48, 0, }, /* 318 */ - { 48, 10, 5, 0, 0, 48, 0, }, /* 319 */ - { 48, 7, 12, 0, 0, 48, 0, }, /* 320 */ - { 48, 10, 3, 0, 0, 48, 0, }, /* 321 */ - { 48, 13, 12, 0, 0, 48, 0, }, /* 322 */ - { 48, 21, 12, 0, 0, 48, 0, }, /* 323 */ - { 57, 7, 12, 0, 0, 57, 0, }, /* 324 */ - { 57, 12, 3, 0, 0, 57, 0, }, /* 325 */ - { 57, 7, 5, 0, 0, 57, 0, }, /* 326 */ - { 57, 6, 12, 0, 0, 57, 0, }, /* 327 */ - { 57, 21, 12, 0, 0, 57, 0, }, /* 328 */ - { 57, 13, 12, 0, 0, 57, 0, }, /* 329 */ - { 33, 7, 12, 0, 0, 33, 0, }, /* 330 */ - { 33, 12, 3, 0, 0, 33, 0, }, /* 331 */ - { 33, 7, 5, 0, 0, 33, 0, }, /* 332 */ - { 33, 6, 12, 0, 0, 33, 0, }, /* 333 */ - { 33, 13, 12, 0, 0, 33, 0, }, /* 334 */ - { 58, 7, 12, 0, 0, 58, 0, }, /* 335 */ - { 58, 26, 12, 0, 0, 58, 0, }, /* 336 */ - { 58, 21, 12, 0, 0, 58, 0, }, /* 337 */ - { 58, 12, 3, 0, 0, 58, 0, }, /* 338 */ - { 58, 13, 12, 0, 0, 58, 0, }, /* 339 */ - { 58, 15, 12, 0, 0, 58, 0, }, /* 340 */ - { 58, 22, 12, 0, 0, 58, 0, }, /* 341 */ - { 58, 18, 12, 0, 0, 58, 0, }, /* 342 */ - { 58, 10, 5, 0, 0, 58, 0, }, /* 343 */ - { 39, 7, 12, 0, 0, 39, 0, }, /* 344 */ - { 39, 10, 12, 0, 0, 39, 0, }, /* 345 */ - { 39, 12, 3, 0, 0, 39, 0, }, /* 346 */ - { 39, 10, 5, 0, 0, 39, 0, }, /* 347 */ - { 39, 13, 12, 0, 0, -88, 0, }, /* 348 */ - { 39, 21, 12, 0, 0, 39, 0, }, /* 349 */ - { 39, 13, 12, 0, 0, 39, 0, }, /* 350 */ - { 39, 26, 12, 0, 0, 39, 0, }, /* 351 */ - { 17, 9, 12, 0, 7264, 17, 0, }, /* 352 */ - { 17, 5, 12, 0, 3008, 17, 0, }, /* 353 */ - { 10, 21, 12, 0, 0, -49, 0, }, /* 354 */ - { 17, 6, 12, 0, 0, 17, 0, }, /* 355 */ - { 24, 7, 6, 0, 0, 24, 0, }, /* 356 */ - { 24, 7, 7, 0, 0, 24, 0, }, /* 357 */ - { 24, 7, 8, 0, 0, 24, 0, }, /* 358 */ - { 16, 7, 12, 0, 0, 16, 0, }, /* 359 */ - { 16, 12, 3, 0, 0, 16, 0, }, /* 360 */ - { 16, 21, 12, 0, 0, 16, 0, }, /* 361 */ - { 16, 15, 12, 0, 0, 16, 0, }, /* 362 */ - { 16, 26, 12, 0, 0, 16, 0, }, /* 363 */ - { 9, 9, 12, 0, 38864, 9, 0, }, /* 364 */ - { 9, 9, 12, 0, 8, 9, 0, }, /* 365 */ - { 9, 5, 12, 0, -8, 9, 0, }, /* 366 */ - { 8, 17, 12, 0, 0, 8, 0, }, /* 367 */ - { 8, 7, 12, 0, 0, 8, 0, }, /* 368 */ - { 8, 26, 12, 0, 0, 8, 0, }, /* 369 */ - { 8, 21, 12, 0, 0, 8, 0, }, /* 370 */ - { 41, 29, 12, 0, 0, 41, 0, }, /* 371 */ - { 41, 7, 12, 0, 0, 41, 0, }, /* 372 */ - { 41, 22, 12, 0, 0, 41, 0, }, /* 373 */ - { 41, 18, 12, 0, 0, 41, 0, }, /* 374 */ - { 46, 7, 12, 0, 0, 46, 0, }, /* 375 */ - { 46, 14, 12, 0, 0, 46, 0, }, /* 376 */ - { 51, 7, 12, 0, 0, 51, 0, }, /* 377 */ - { 51, 12, 3, 0, 0, 51, 0, }, /* 378 */ - { 25, 7, 12, 0, 0, 25, 0, }, /* 379 */ - { 25, 12, 3, 0, 0, 25, 0, }, /* 380 */ - { 10, 21, 12, 0, 0, -117, 0, }, /* 381 */ - { 7, 7, 12, 0, 0, 7, 0, }, /* 382 */ - { 7, 12, 3, 0, 0, 7, 0, }, /* 383 */ - { 52, 7, 12, 0, 0, 52, 0, }, /* 384 */ - { 52, 12, 3, 0, 0, 52, 0, }, /* 385 */ - { 32, 7, 12, 0, 0, 32, 0, }, /* 386 */ - { 32, 12, 3, 0, 0, 32, 0, }, /* 387 */ - { 32, 10, 5, 0, 0, 32, 0, }, /* 388 */ - { 32, 21, 12, 0, 0, 32, 0, }, /* 389 */ - { 32, 6, 12, 0, 0, 32, 0, }, /* 390 */ - { 32, 23, 12, 0, 0, 32, 0, }, /* 391 */ - { 32, 13, 12, 0, 0, 32, 0, }, /* 392 */ - { 32, 15, 12, 0, 0, 32, 0, }, /* 393 */ - { 38, 21, 12, 0, 0, 38, 0, }, /* 394 */ - { 10, 21, 12, 0, 0, -73, 0, }, /* 395 */ - { 38, 17, 12, 0, 0, 38, 0, }, /* 396 */ - { 38, 12, 3, 0, 0, 38, 0, }, /* 397 */ - { 38, 1, 2, 0, 0, 38, 0, }, /* 398 */ - { 38, 13, 12, 0, 0, 38, 0, }, /* 399 */ - { 38, 7, 12, 0, 0, 38, 0, }, /* 400 */ - { 38, 6, 12, 0, 0, 38, 0, }, /* 401 */ - { 35, 7, 12, 0, 0, 35, 0, }, /* 402 */ - { 35, 12, 3, 0, 0, 35, 0, }, /* 403 */ - { 35, 10, 5, 0, 0, 35, 0, }, /* 404 */ - { 35, 26, 12, 0, 0, 35, 0, }, /* 405 */ - { 35, 21, 12, 0, 0, 35, 0, }, /* 406 */ - { 35, 13, 12, 0, 0, 35, 0, }, /* 407 */ - { 53, 7, 12, 0, 0, 53, 0, }, /* 408 */ - { 40, 7, 12, 0, 0, 40, 0, }, /* 409 */ - { 40, 13, 12, 0, 0, 40, 0, }, /* 410 */ - { 40, 15, 12, 0, 0, 40, 0, }, /* 411 */ - { 40, 26, 12, 0, 0, 40, 0, }, /* 412 */ - { 32, 26, 12, 0, 0, 32, 0, }, /* 413 */ - { 6, 7, 12, 0, 0, 6, 0, }, /* 414 */ - { 6, 12, 3, 0, 0, 6, 0, }, /* 415 */ - { 6, 10, 5, 0, 0, 6, 0, }, /* 416 */ - { 6, 21, 12, 0, 0, 6, 0, }, /* 417 */ - { 91, 7, 12, 0, 0, 91, 0, }, /* 418 */ - { 91, 10, 5, 0, 0, 91, 0, }, /* 419 */ - { 91, 12, 3, 0, 0, 91, 0, }, /* 420 */ - { 91, 10, 12, 0, 0, 91, 0, }, /* 421 */ - { 91, 13, 12, 0, 0, 91, 0, }, /* 422 */ - { 91, 21, 12, 0, 0, 91, 0, }, /* 423 */ - { 91, 6, 12, 0, 0, 91, 0, }, /* 424 */ - { 28, 11, 3, 0, 0, 28, 0, }, /* 425 */ - { 62, 12, 3, 0, 0, 62, 0, }, /* 426 */ - { 62, 10, 5, 0, 0, 62, 0, }, /* 427 */ - { 62, 7, 12, 0, 0, 62, 0, }, /* 428 */ - { 62, 10, 3, 0, 0, 62, 0, }, /* 429 */ - { 62, 13, 12, 0, 0, 62, 0, }, /* 430 */ - { 62, 21, 12, 0, 0, 62, 0, }, /* 431 */ - { 62, 26, 12, 0, 0, 62, 0, }, /* 432 */ - { 76, 12, 3, 0, 0, 76, 0, }, /* 433 */ - { 76, 10, 5, 0, 0, 76, 0, }, /* 434 */ - { 76, 7, 12, 0, 0, 76, 0, }, /* 435 */ - { 76, 13, 12, 0, 0, 76, 0, }, /* 436 */ - { 93, 7, 12, 0, 0, 93, 0, }, /* 437 */ - { 93, 12, 3, 0, 0, 93, 0, }, /* 438 */ - { 93, 10, 5, 0, 0, 93, 0, }, /* 439 */ - { 93, 21, 12, 0, 0, 93, 0, }, /* 440 */ - { 70, 7, 12, 0, 0, 70, 0, }, /* 441 */ - { 70, 10, 5, 0, 0, 70, 0, }, /* 442 */ - { 70, 12, 3, 0, 0, 70, 0, }, /* 443 */ - { 70, 21, 12, 0, 0, 70, 0, }, /* 444 */ - { 70, 13, 12, 0, 0, 70, 0, }, /* 445 */ - { 73, 13, 12, 0, 0, 73, 0, }, /* 446 */ - { 73, 7, 12, 0, 0, 73, 0, }, /* 447 */ - { 73, 6, 12, 0, 0, 73, 0, }, /* 448 */ - { 73, 21, 12, 0, 0, 73, 0, }, /* 449 */ - { 13, 5, 12, 63, -6222, 13, 0, }, /* 450 */ - { 13, 5, 12, 67, -6221, 13, 0, }, /* 451 */ - { 13, 5, 12, 71, -6212, 13, 0, }, /* 452 */ - { 13, 5, 12, 75, -6210, 13, 0, }, /* 453 */ - { 13, 5, 12, 79, -6210, 13, 0, }, /* 454 */ - { 13, 5, 12, 79, -6211, 13, 0, }, /* 455 */ - { 13, 5, 12, 84, -6204, 13, 0, }, /* 456 */ - { 13, 5, 12, 88, -6180, 13, 0, }, /* 457 */ - { 13, 5, 12, 108, 35267, 13, 0, }, /* 458 */ - { 17, 9, 12, 0, -3008, 17, 0, }, /* 459 */ - { 76, 21, 12, 0, 0, 76, 0, }, /* 460 */ - { 28, 12, 3, 0, 0, -112, 0, }, /* 461 */ - { 28, 12, 3, 0, 0, 15, 0, }, /* 462 */ - { 10, 21, 12, 0, 0, -37, 0, }, /* 463 */ - { 28, 12, 3, 0, 0, -13, 0, }, /* 464 */ - { 28, 12, 3, 0, 0, -43, 0, }, /* 465 */ - { 28, 12, 3, 0, 0, -146, 0, }, /* 466 */ - { 10, 10, 5, 0, 0, -13, 0, }, /* 467 */ - { 10, 7, 12, 0, 0, -40, 0, }, /* 468 */ - { 10, 7, 12, 0, 0, -13, 0, }, /* 469 */ - { 10, 7, 12, 0, 0, 15, 0, }, /* 470 */ - { 10, 7, 12, 0, 0, -162, 0, }, /* 471 */ - { 10, 7, 12, 0, 0, -37, 0, }, /* 472 */ - { 28, 12, 3, 0, 0, -96, 0, }, /* 473 */ - { 10, 10, 5, 0, 0, 3, 0, }, /* 474 */ - { 28, 12, 3, 0, 0, -37, 0, }, /* 475 */ - { 10, 7, 12, 0, 0, 150, 0, }, /* 476 */ - { 13, 5, 12, 0, 0, 13, 0, }, /* 477 */ - { 13, 6, 12, 0, 0, 13, 0, }, /* 478 */ - { 34, 5, 12, 0, 35332, 34, 0, }, /* 479 */ - { 34, 5, 12, 0, 3814, 34, 0, }, /* 480 */ - { 34, 5, 12, 0, 35384, 34, 0, }, /* 481 */ - { 28, 12, 3, 0, 0, -34, 0, }, /* 482 */ - { 34, 9, 12, 92, 1, 34, 0, }, /* 483 */ - { 34, 5, 12, 92, -1, 34, 0, }, /* 484 */ - { 34, 5, 12, 92, -58, 34, 0, }, /* 485 */ - { 34, 9, 12, 0, -7615, 34, 0, }, /* 486 */ - { 20, 5, 12, 0, 8, 20, 0, }, /* 487 */ - { 20, 9, 12, 0, -8, 20, 0, }, /* 488 */ - { 20, 5, 12, 0, 74, 20, 0, }, /* 489 */ - { 20, 5, 12, 0, 86, 20, 0, }, /* 490 */ - { 20, 5, 12, 0, 100, 20, 0, }, /* 491 */ - { 20, 5, 12, 0, 128, 20, 0, }, /* 492 */ - { 20, 5, 12, 0, 112, 20, 0, }, /* 493 */ - { 20, 5, 12, 0, 126, 20, 0, }, /* 494 */ - { 20, 8, 12, 0, -8, 20, 0, }, /* 495 */ - { 20, 5, 12, 0, 9, 20, 0, }, /* 496 */ - { 20, 9, 12, 0, -74, 20, 0, }, /* 497 */ - { 20, 8, 12, 0, -9, 20, 0, }, /* 498 */ - { 20, 5, 12, 21, -7173, 20, 0, }, /* 499 */ - { 20, 9, 12, 0, -86, 20, 0, }, /* 500 */ - { 20, 9, 12, 0, -100, 20, 0, }, /* 501 */ - { 20, 9, 12, 0, -112, 20, 0, }, /* 502 */ - { 20, 9, 12, 0, -128, 20, 0, }, /* 503 */ - { 20, 9, 12, 0, -126, 20, 0, }, /* 504 */ - { 28, 1, 3, 0, 0, 28, 0, }, /* 505 */ - { 28, 1, 13, 0, 0, 28, 0, }, /* 506 */ - { 10, 27, 2, 0, 0, 10, 0, }, /* 507 */ - { 10, 28, 2, 0, 0, 10, 0, }, /* 508 */ - { 10, 29, 12, 0, 0, -70, 0, }, /* 509 */ - { 10, 21, 14, 0, 0, 10, 0, }, /* 510 */ - { 0, 2, 2, 0, 0, 0, 0, }, /* 511 */ - { 28, 12, 3, 0, 0, -100, 0, }, /* 512 */ - { 10, 9, 12, 0, 0, 10, 0, }, /* 513 */ - { 10, 5, 12, 0, 0, 10, 0, }, /* 514 */ - { 20, 9, 12, 96, -7517, 20, 0, }, /* 515 */ - { 34, 9, 12, 100, -8383, 34, 0, }, /* 516 */ - { 34, 9, 12, 104, -8262, 34, 0, }, /* 517 */ - { 34, 9, 12, 0, 28, 34, 0, }, /* 518 */ - { 10, 7, 12, 0, 0, 10, 0, }, /* 519 */ - { 10, 5, 14, 0, 0, 10, 0, }, /* 520 */ - { 34, 5, 12, 0, -28, 34, 0, }, /* 521 */ - { 34, 14, 12, 0, 16, 34, 0, }, /* 522 */ - { 34, 14, 12, 0, -16, 34, 0, }, /* 523 */ - { 34, 14, 12, 0, 0, 34, 0, }, /* 524 */ - { 10, 25, 14, 0, 0, 10, 0, }, /* 525 */ - { 10, 26, 12, 0, 26, 10, 0, }, /* 526 */ - { 10, 26, 14, 0, 26, 10, 0, }, /* 527 */ - { 10, 26, 12, 0, -26, 10, 0, }, /* 528 */ - { 5, 26, 12, 0, 0, 5, 0, }, /* 529 */ - { 18, 9, 12, 0, 48, 18, 0, }, /* 530 */ - { 18, 5, 12, 0, -48, 18, 0, }, /* 531 */ - { 34, 9, 12, 0, -10743, 34, 0, }, /* 532 */ - { 34, 9, 12, 0, -3814, 34, 0, }, /* 533 */ - { 34, 9, 12, 0, -10727, 34, 0, }, /* 534 */ - { 34, 5, 12, 0, -10795, 34, 0, }, /* 535 */ - { 34, 5, 12, 0, -10792, 34, 0, }, /* 536 */ - { 34, 9, 12, 0, -10780, 34, 0, }, /* 537 */ - { 34, 9, 12, 0, -10749, 34, 0, }, /* 538 */ - { 34, 9, 12, 0, -10783, 34, 0, }, /* 539 */ - { 34, 9, 12, 0, -10782, 34, 0, }, /* 540 */ - { 34, 9, 12, 0, -10815, 34, 0, }, /* 541 */ - { 11, 5, 12, 0, 0, 11, 0, }, /* 542 */ - { 11, 26, 12, 0, 0, 11, 0, }, /* 543 */ - { 11, 12, 3, 0, 0, 11, 0, }, /* 544 */ - { 11, 21, 12, 0, 0, 11, 0, }, /* 545 */ - { 11, 15, 12, 0, 0, 11, 0, }, /* 546 */ - { 17, 5, 12, 0, -7264, 17, 0, }, /* 547 */ - { 59, 7, 12, 0, 0, 59, 0, }, /* 548 */ - { 59, 6, 12, 0, 0, 59, 0, }, /* 549 */ - { 59, 21, 12, 0, 0, 59, 0, }, /* 550 */ - { 59, 12, 3, 0, 0, 59, 0, }, /* 551 */ - { 13, 12, 3, 0, 0, 13, 0, }, /* 552 */ - { 10, 21, 12, 0, 0, -25, 0, }, /* 553 */ - { 23, 26, 12, 0, 0, 23, 0, }, /* 554 */ - { 10, 21, 12, 0, 0, -139, 0, }, /* 555 */ - { 10, 21, 12, 0, 0, -133, 0, }, /* 556 */ - { 23, 6, 12, 0, 0, 23, 0, }, /* 557 */ - { 10, 7, 12, 0, 0, 23, 0, }, /* 558 */ - { 23, 14, 12, 0, 0, 23, 0, }, /* 559 */ - { 10, 22, 12, 0, 0, -139, 0, }, /* 560 */ - { 10, 18, 12, 0, 0, -139, 0, }, /* 561 */ - { 10, 26, 12, 0, 0, -133, 0, }, /* 562 */ - { 10, 17, 12, 0, 0, -133, 0, }, /* 563 */ - { 10, 22, 12, 0, 0, -133, 0, }, /* 564 */ - { 10, 18, 12, 0, 0, -133, 0, }, /* 565 */ - { 28, 12, 3, 0, 0, -16, 0, }, /* 566 */ - { 24, 10, 3, 0, 0, 24, 0, }, /* 567 */ - { 10, 17, 14, 0, 0, -133, 0, }, /* 568 */ - { 10, 6, 12, 0, 0, -64, 0, }, /* 569 */ - { 10, 7, 12, 0, 0, -104, 0, }, /* 570 */ - { 10, 21, 14, 0, 0, -104, 0, }, /* 571 */ - { 10, 26, 12, 0, 0, 23, 0, }, /* 572 */ - { 27, 7, 12, 0, 0, 27, 0, }, /* 573 */ - { 28, 12, 3, 0, 0, -64, 0, }, /* 574 */ - { 10, 24, 12, 0, 0, -64, 0, }, /* 575 */ - { 27, 6, 12, 0, 0, 27, 0, }, /* 576 */ - { 10, 17, 12, 0, 0, -64, 0, }, /* 577 */ - { 30, 7, 12, 0, 0, 30, 0, }, /* 578 */ - { 30, 6, 12, 0, 0, 30, 0, }, /* 579 */ - { 4, 7, 12, 0, 0, 4, 0, }, /* 580 */ - { 24, 7, 12, 0, 0, 24, 0, }, /* 581 */ - { 10, 15, 12, 0, 0, 23, 0, }, /* 582 */ - { 24, 26, 12, 0, 0, 24, 0, }, /* 583 */ - { 10, 26, 14, 0, 0, 23, 0, }, /* 584 */ - { 30, 26, 12, 0, 0, 30, 0, }, /* 585 */ - { 23, 7, 12, 0, 0, 23, 0, }, /* 586 */ - { 61, 7, 12, 0, 0, 61, 0, }, /* 587 */ - { 61, 6, 12, 0, 0, 61, 0, }, /* 588 */ - { 61, 26, 12, 0, 0, 61, 0, }, /* 589 */ - { 86, 7, 12, 0, 0, 86, 0, }, /* 590 */ - { 86, 6, 12, 0, 0, 86, 0, }, /* 591 */ - { 86, 21, 12, 0, 0, 86, 0, }, /* 592 */ - { 77, 7, 12, 0, 0, 77, 0, }, /* 593 */ - { 77, 6, 12, 0, 0, 77, 0, }, /* 594 */ - { 77, 21, 12, 0, 0, 77, 0, }, /* 595 */ - { 77, 13, 12, 0, 0, 77, 0, }, /* 596 */ - { 13, 9, 12, 108, 1, 13, 0, }, /* 597 */ - { 13, 5, 12, 108, -35267, 13, 0, }, /* 598 */ - { 13, 7, 12, 0, 0, 13, 0, }, /* 599 */ - { 13, 21, 12, 0, 0, 13, 0, }, /* 600 */ - { 79, 7, 12, 0, 0, 79, 0, }, /* 601 */ - { 79, 14, 12, 0, 0, 79, 0, }, /* 602 */ - { 79, 12, 3, 0, 0, 79, 0, }, /* 603 */ - { 79, 21, 12, 0, 0, 79, 0, }, /* 604 */ - { 10, 24, 12, 0, 0, -61, 0, }, /* 605 */ - { 34, 9, 12, 0, -35332, 34, 0, }, /* 606 */ - { 34, 9, 12, 0, -42280, 34, 0, }, /* 607 */ - { 34, 5, 12, 0, 48, 34, 0, }, /* 608 */ - { 34, 9, 12, 0, -42308, 34, 0, }, /* 609 */ - { 34, 9, 12, 0, -42319, 34, 0, }, /* 610 */ - { 34, 9, 12, 0, -42315, 34, 0, }, /* 611 */ - { 34, 9, 12, 0, -42305, 34, 0, }, /* 612 */ - { 34, 9, 12, 0, -42258, 34, 0, }, /* 613 */ - { 34, 9, 12, 0, -42282, 34, 0, }, /* 614 */ - { 34, 9, 12, 0, -42261, 34, 0, }, /* 615 */ - { 34, 9, 12, 0, 928, 34, 0, }, /* 616 */ - { 34, 9, 12, 0, -48, 34, 0, }, /* 617 */ - { 34, 9, 12, 0, -42307, 34, 0, }, /* 618 */ - { 34, 9, 12, 0, -35384, 34, 0, }, /* 619 */ - { 49, 7, 12, 0, 0, 49, 0, }, /* 620 */ - { 49, 12, 3, 0, 0, 49, 0, }, /* 621 */ - { 49, 10, 5, 0, 0, 49, 0, }, /* 622 */ - { 49, 26, 12, 0, 0, 49, 0, }, /* 623 */ - { 10, 15, 12, 0, 0, -224, 0, }, /* 624 */ - { 10, 15, 12, 0, 0, -210, 0, }, /* 625 */ - { 10, 26, 12, 0, 0, -171, 0, }, /* 626 */ - { 10, 23, 12, 0, 0, -171, 0, }, /* 627 */ - { 65, 7, 12, 0, 0, 65, 0, }, /* 628 */ - { 65, 21, 12, 0, 0, 65, 0, }, /* 629 */ - { 75, 10, 5, 0, 0, 75, 0, }, /* 630 */ - { 75, 7, 12, 0, 0, 75, 0, }, /* 631 */ - { 75, 12, 3, 0, 0, 75, 0, }, /* 632 */ - { 75, 21, 12, 0, 0, 75, 0, }, /* 633 */ - { 75, 13, 12, 0, 0, 75, 0, }, /* 634 */ - { 15, 12, 3, 0, 0, -13, 0, }, /* 635 */ - { 15, 7, 12, 0, 0, -46, 0, }, /* 636 */ - { 69, 13, 12, 0, 0, 69, 0, }, /* 637 */ - { 69, 7, 12, 0, 0, 69, 0, }, /* 638 */ - { 69, 12, 3, 0, 0, 69, 0, }, /* 639 */ - { 10, 21, 12, 0, 0, -108, 0, }, /* 640 */ - { 69, 21, 12, 0, 0, 69, 0, }, /* 641 */ - { 74, 7, 12, 0, 0, 74, 0, }, /* 642 */ - { 74, 12, 3, 0, 0, 74, 0, }, /* 643 */ - { 74, 10, 5, 0, 0, 74, 0, }, /* 644 */ - { 74, 21, 12, 0, 0, 74, 0, }, /* 645 */ - { 84, 12, 3, 0, 0, 84, 0, }, /* 646 */ - { 84, 10, 5, 0, 0, 84, 0, }, /* 647 */ - { 84, 7, 12, 0, 0, 84, 0, }, /* 648 */ - { 84, 21, 12, 0, 0, 84, 0, }, /* 649 */ - { 10, 6, 12, 0, 0, -19, 0, }, /* 650 */ - { 84, 13, 12, 0, 0, 84, 0, }, /* 651 */ - { 39, 6, 12, 0, 0, 39, 0, }, /* 652 */ - { 68, 7, 12, 0, 0, 68, 0, }, /* 653 */ - { 68, 12, 3, 0, 0, 68, 0, }, /* 654 */ - { 68, 10, 5, 0, 0, 68, 0, }, /* 655 */ - { 68, 13, 12, 0, 0, 68, 0, }, /* 656 */ - { 68, 21, 12, 0, 0, 68, 0, }, /* 657 */ - { 92, 7, 12, 0, 0, 92, 0, }, /* 658 */ - { 92, 12, 3, 0, 0, 92, 0, }, /* 659 */ - { 92, 6, 12, 0, 0, 92, 0, }, /* 660 */ - { 92, 21, 12, 0, 0, 92, 0, }, /* 661 */ - { 87, 7, 12, 0, 0, 87, 0, }, /* 662 */ - { 87, 10, 5, 0, 0, 87, 0, }, /* 663 */ - { 87, 12, 3, 0, 0, 87, 0, }, /* 664 */ - { 87, 21, 12, 0, 0, 87, 0, }, /* 665 */ - { 87, 6, 12, 0, 0, 87, 0, }, /* 666 */ - { 34, 5, 12, 0, -928, 34, 0, }, /* 667 */ - { 9, 5, 12, 0, -38864, 9, 0, }, /* 668 */ - { 87, 13, 12, 0, 0, 87, 0, }, /* 669 */ - { 24, 7, 9, 0, 0, 24, 0, }, /* 670 */ - { 24, 7, 10, 0, 0, 24, 0, }, /* 671 */ - { 0, 4, 12, 0, 0, 0, 0, }, /* 672 */ - { 0, 3, 12, 0, 0, 0, 0, }, /* 673 */ - { 26, 25, 12, 0, 0, 26, 0, }, /* 674 */ - { 1, 24, 12, 0, 0, 1, 0, }, /* 675 */ - { 1, 7, 12, 0, 0, -10, 0, }, /* 676 */ - { 1, 26, 12, 0, 0, -10, 0, }, /* 677 */ - { 10, 6, 3, 0, 0, -64, 0, }, /* 678 */ - { 36, 7, 12, 0, 0, 36, 0, }, /* 679 */ - { 10, 21, 12, 0, 0, -22, 0, }, /* 680 */ - { 10, 15, 12, 0, 0, -92, 0, }, /* 681 */ - { 10, 26, 12, 0, 0, -22, 0, }, /* 682 */ - { 20, 14, 12, 0, 0, 20, 0, }, /* 683 */ - { 20, 15, 12, 0, 0, 20, 0, }, /* 684 */ - { 20, 26, 12, 0, 0, 20, 0, }, /* 685 */ - { 71, 7, 12, 0, 0, 71, 0, }, /* 686 */ - { 67, 7, 12, 0, 0, 67, 0, }, /* 687 */ - { 28, 12, 3, 0, 0, -1, 0, }, /* 688 */ - { 10, 15, 12, 0, 0, -1, 0, }, /* 689 */ - { 42, 7, 12, 0, 0, 42, 0, }, /* 690 */ - { 42, 15, 12, 0, 0, 42, 0, }, /* 691 */ - { 19, 7, 12, 0, 0, 19, 0, }, /* 692 */ - { 19, 14, 12, 0, 0, 19, 0, }, /* 693 */ - { 118, 7, 12, 0, 0, 118, 0, }, /* 694 */ - { 118, 12, 3, 0, 0, 118, 0, }, /* 695 */ - { 60, 7, 12, 0, 0, 60, 0, }, /* 696 */ - { 60, 21, 12, 0, 0, 60, 0, }, /* 697 */ - { 43, 7, 12, 0, 0, 43, 0, }, /* 698 */ - { 43, 21, 12, 0, 0, 43, 0, }, /* 699 */ - { 43, 14, 12, 0, 0, 43, 0, }, /* 700 */ - { 14, 9, 12, 0, 40, 14, 0, }, /* 701 */ - { 14, 5, 12, 0, -40, 14, 0, }, /* 702 */ - { 47, 7, 12, 0, 0, 47, 0, }, /* 703 */ - { 45, 7, 12, 0, 0, 45, 0, }, /* 704 */ - { 45, 13, 12, 0, 0, 45, 0, }, /* 705 */ - { 136, 9, 12, 0, 40, 136, 0, }, /* 706 */ - { 136, 5, 12, 0, -40, 136, 0, }, /* 707 */ - { 106, 7, 12, 0, 0, 106, 0, }, /* 708 */ - { 104, 7, 12, 0, 0, 104, 0, }, /* 709 */ - { 104, 21, 12, 0, 0, 104, 0, }, /* 710 */ - { 110, 7, 12, 0, 0, 110, 0, }, /* 711 */ - { 12, 7, 12, 0, 0, 12, 0, }, /* 712 */ - { 81, 7, 12, 0, 0, 81, 0, }, /* 713 */ - { 81, 21, 12, 0, 0, 81, 0, }, /* 714 */ - { 81, 15, 12, 0, 0, 81, 0, }, /* 715 */ - { 120, 7, 12, 0, 0, 120, 0, }, /* 716 */ - { 120, 26, 12, 0, 0, 120, 0, }, /* 717 */ - { 120, 15, 12, 0, 0, 120, 0, }, /* 718 */ - { 116, 7, 12, 0, 0, 116, 0, }, /* 719 */ - { 116, 15, 12, 0, 0, 116, 0, }, /* 720 */ - { 128, 7, 12, 0, 0, 128, 0, }, /* 721 */ - { 128, 15, 12, 0, 0, 128, 0, }, /* 722 */ - { 66, 7, 12, 0, 0, 66, 0, }, /* 723 */ - { 66, 15, 12, 0, 0, 66, 0, }, /* 724 */ - { 66, 21, 12, 0, 0, 66, 0, }, /* 725 */ - { 72, 7, 12, 0, 0, 72, 0, }, /* 726 */ - { 72, 21, 12, 0, 0, 72, 0, }, /* 727 */ - { 98, 7, 12, 0, 0, 98, 0, }, /* 728 */ - { 97, 7, 12, 0, 0, 97, 0, }, /* 729 */ - { 97, 15, 12, 0, 0, 97, 0, }, /* 730 */ - { 31, 7, 12, 0, 0, 31, 0, }, /* 731 */ - { 31, 12, 3, 0, 0, 31, 0, }, /* 732 */ - { 31, 15, 12, 0, 0, 31, 0, }, /* 733 */ - { 31, 21, 12, 0, 0, 31, 0, }, /* 734 */ - { 88, 7, 12, 0, 0, 88, 0, }, /* 735 */ - { 88, 15, 12, 0, 0, 88, 0, }, /* 736 */ - { 88, 21, 12, 0, 0, 88, 0, }, /* 737 */ - { 117, 7, 12, 0, 0, 117, 0, }, /* 738 */ - { 117, 15, 12, 0, 0, 117, 0, }, /* 739 */ - { 112, 7, 12, 0, 0, 112, 0, }, /* 740 */ - { 112, 26, 12, 0, 0, 112, 0, }, /* 741 */ - { 112, 12, 3, 0, 0, 112, 0, }, /* 742 */ - { 112, 15, 12, 0, 0, 112, 0, }, /* 743 */ - { 112, 21, 12, 0, 0, 112, 0, }, /* 744 */ - { 78, 7, 12, 0, 0, 78, 0, }, /* 745 */ - { 78, 21, 12, 0, 0, 78, 0, }, /* 746 */ - { 83, 7, 12, 0, 0, 83, 0, }, /* 747 */ - { 83, 15, 12, 0, 0, 83, 0, }, /* 748 */ - { 82, 7, 12, 0, 0, 82, 0, }, /* 749 */ - { 82, 15, 12, 0, 0, 82, 0, }, /* 750 */ - { 121, 7, 12, 0, 0, 121, 0, }, /* 751 */ - { 121, 21, 12, 0, 0, 121, 0, }, /* 752 */ - { 121, 15, 12, 0, 0, 121, 0, }, /* 753 */ - { 89, 7, 12, 0, 0, 89, 0, }, /* 754 */ - { 130, 9, 12, 0, 64, 130, 0, }, /* 755 */ - { 130, 5, 12, 0, -64, 130, 0, }, /* 756 */ - { 130, 15, 12, 0, 0, 130, 0, }, /* 757 */ - { 144, 7, 12, 0, 0, 144, 0, }, /* 758 */ - { 144, 12, 3, 0, 0, 144, 0, }, /* 759 */ - { 144, 13, 12, 0, 0, 144, 0, }, /* 760 */ - { 1, 15, 12, 0, 0, 1, 0, }, /* 761 */ - { 156, 7, 12, 0, 0, 156, 0, }, /* 762 */ - { 156, 12, 3, 0, 0, 156, 0, }, /* 763 */ - { 156, 17, 12, 0, 0, 156, 0, }, /* 764 */ - { 147, 7, 12, 0, 0, 147, 0, }, /* 765 */ - { 147, 15, 12, 0, 0, 147, 0, }, /* 766 */ - { 148, 7, 12, 0, 0, 148, 0, }, /* 767 */ - { 148, 12, 3, 0, 0, 148, 0, }, /* 768 */ - { 148, 15, 12, 0, 0, 148, 0, }, /* 769 */ - { 148, 21, 12, 0, 0, 148, 0, }, /* 770 */ - { 153, 7, 12, 0, 0, 153, 0, }, /* 771 */ - { 153, 15, 12, 0, 0, 153, 0, }, /* 772 */ - { 149, 7, 12, 0, 0, 149, 0, }, /* 773 */ - { 94, 10, 5, 0, 0, 94, 0, }, /* 774 */ - { 94, 12, 3, 0, 0, 94, 0, }, /* 775 */ - { 94, 7, 12, 0, 0, 94, 0, }, /* 776 */ - { 94, 21, 12, 0, 0, 94, 0, }, /* 777 */ - { 94, 15, 12, 0, 0, 94, 0, }, /* 778 */ - { 94, 13, 12, 0, 0, 94, 0, }, /* 779 */ - { 85, 12, 3, 0, 0, 85, 0, }, /* 780 */ - { 85, 10, 5, 0, 0, 85, 0, }, /* 781 */ - { 85, 7, 12, 0, 0, 85, 0, }, /* 782 */ - { 85, 21, 12, 0, 0, 85, 0, }, /* 783 */ - { 85, 1, 4, 0, 0, 85, 0, }, /* 784 */ - { 101, 7, 12, 0, 0, 101, 0, }, /* 785 */ - { 101, 13, 12, 0, 0, 101, 0, }, /* 786 */ - { 96, 12, 3, 0, 0, 96, 0, }, /* 787 */ - { 96, 7, 12, 0, 0, 96, 0, }, /* 788 */ - { 96, 10, 5, 0, 0, 96, 0, }, /* 789 */ - { 96, 13, 12, 0, 0, 96, 0, }, /* 790 */ - { 96, 21, 12, 0, 0, 96, 0, }, /* 791 */ - { 111, 7, 12, 0, 0, 111, 0, }, /* 792 */ - { 111, 12, 3, 0, 0, 111, 0, }, /* 793 */ - { 111, 21, 12, 0, 0, 111, 0, }, /* 794 */ - { 100, 12, 3, 0, 0, 100, 0, }, /* 795 */ - { 100, 10, 5, 0, 0, 100, 0, }, /* 796 */ - { 100, 7, 12, 0, 0, 100, 0, }, /* 797 */ - { 100, 7, 4, 0, 0, 100, 0, }, /* 798 */ - { 100, 21, 12, 0, 0, 100, 0, }, /* 799 */ - { 100, 13, 12, 0, 0, 100, 0, }, /* 800 */ - { 48, 15, 12, 0, 0, 48, 0, }, /* 801 */ - { 108, 7, 12, 0, 0, 108, 0, }, /* 802 */ - { 108, 10, 5, 0, 0, 108, 0, }, /* 803 */ - { 108, 12, 3, 0, 0, 108, 0, }, /* 804 */ - { 108, 21, 12, 0, 0, 108, 0, }, /* 805 */ - { 129, 7, 12, 0, 0, 129, 0, }, /* 806 */ - { 129, 21, 12, 0, 0, 129, 0, }, /* 807 */ - { 109, 7, 12, 0, 0, 109, 0, }, /* 808 */ - { 109, 12, 3, 0, 0, 109, 0, }, /* 809 */ - { 109, 10, 5, 0, 0, 109, 0, }, /* 810 */ - { 109, 13, 12, 0, 0, 109, 0, }, /* 811 */ - { 107, 12, 3, 0, 0, 107, 0, }, /* 812 */ - { 107, 12, 3, 0, 0, -52, 0, }, /* 813 */ - { 107, 10, 5, 0, 0, 107, 0, }, /* 814 */ - { 107, 10, 5, 0, 0, -52, 0, }, /* 815 */ - { 107, 7, 12, 0, 0, 107, 0, }, /* 816 */ - { 28, 12, 3, 0, 0, -52, 0, }, /* 817 */ - { 107, 10, 3, 0, 0, 107, 0, }, /* 818 */ - { 135, 7, 12, 0, 0, 135, 0, }, /* 819 */ - { 135, 10, 5, 0, 0, 135, 0, }, /* 820 */ - { 135, 12, 3, 0, 0, 135, 0, }, /* 821 */ - { 135, 21, 12, 0, 0, 135, 0, }, /* 822 */ - { 135, 13, 12, 0, 0, 135, 0, }, /* 823 */ - { 124, 7, 12, 0, 0, 124, 0, }, /* 824 */ - { 124, 10, 3, 0, 0, 124, 0, }, /* 825 */ - { 124, 10, 5, 0, 0, 124, 0, }, /* 826 */ - { 124, 12, 3, 0, 0, 124, 0, }, /* 827 */ - { 124, 21, 12, 0, 0, 124, 0, }, /* 828 */ - { 124, 13, 12, 0, 0, 124, 0, }, /* 829 */ - { 123, 7, 12, 0, 0, 123, 0, }, /* 830 */ - { 123, 10, 3, 0, 0, 123, 0, }, /* 831 */ - { 123, 10, 5, 0, 0, 123, 0, }, /* 832 */ - { 123, 12, 3, 0, 0, 123, 0, }, /* 833 */ - { 123, 21, 12, 0, 0, 123, 0, }, /* 834 */ - { 114, 7, 12, 0, 0, 114, 0, }, /* 835 */ - { 114, 10, 5, 0, 0, 114, 0, }, /* 836 */ - { 114, 12, 3, 0, 0, 114, 0, }, /* 837 */ - { 114, 21, 12, 0, 0, 114, 0, }, /* 838 */ - { 114, 13, 12, 0, 0, 114, 0, }, /* 839 */ - { 102, 7, 12, 0, 0, 102, 0, }, /* 840 */ - { 102, 12, 3, 0, 0, 102, 0, }, /* 841 */ - { 102, 10, 5, 0, 0, 102, 0, }, /* 842 */ - { 102, 13, 12, 0, 0, 102, 0, }, /* 843 */ - { 126, 7, 12, 0, 0, 126, 0, }, /* 844 */ - { 126, 12, 3, 0, 0, 126, 0, }, /* 845 */ - { 126, 10, 5, 0, 0, 126, 0, }, /* 846 */ - { 126, 13, 12, 0, 0, 126, 0, }, /* 847 */ - { 126, 15, 12, 0, 0, 126, 0, }, /* 848 */ - { 126, 21, 12, 0, 0, 126, 0, }, /* 849 */ - { 126, 26, 12, 0, 0, 126, 0, }, /* 850 */ - { 142, 7, 12, 0, 0, 142, 0, }, /* 851 */ - { 142, 10, 5, 0, 0, 142, 0, }, /* 852 */ - { 142, 12, 3, 0, 0, 142, 0, }, /* 853 */ - { 142, 21, 12, 0, 0, 142, 0, }, /* 854 */ - { 125, 9, 12, 0, 32, 125, 0, }, /* 855 */ - { 125, 5, 12, 0, -32, 125, 0, }, /* 856 */ - { 125, 13, 12, 0, 0, 125, 0, }, /* 857 */ - { 125, 15, 12, 0, 0, 125, 0, }, /* 858 */ - { 125, 7, 12, 0, 0, 125, 0, }, /* 859 */ - { 154, 7, 12, 0, 0, 154, 0, }, /* 860 */ - { 154, 10, 3, 0, 0, 154, 0, }, /* 861 */ - { 154, 10, 5, 0, 0, 154, 0, }, /* 862 */ - { 154, 12, 3, 0, 0, 154, 0, }, /* 863 */ - { 154, 7, 4, 0, 0, 154, 0, }, /* 864 */ - { 154, 21, 12, 0, 0, 154, 0, }, /* 865 */ - { 154, 13, 12, 0, 0, 154, 0, }, /* 866 */ - { 150, 7, 12, 0, 0, 150, 0, }, /* 867 */ - { 150, 10, 5, 0, 0, 150, 0, }, /* 868 */ - { 150, 12, 3, 0, 0, 150, 0, }, /* 869 */ - { 150, 21, 12, 0, 0, 150, 0, }, /* 870 */ - { 141, 7, 12, 0, 0, 141, 0, }, /* 871 */ - { 141, 12, 3, 0, 0, 141, 0, }, /* 872 */ - { 141, 10, 5, 0, 0, 141, 0, }, /* 873 */ - { 141, 7, 4, 0, 0, 141, 0, }, /* 874 */ - { 141, 21, 12, 0, 0, 141, 0, }, /* 875 */ - { 140, 7, 12, 0, 0, 140, 0, }, /* 876 */ - { 140, 12, 3, 0, 0, 140, 0, }, /* 877 */ - { 140, 10, 5, 0, 0, 140, 0, }, /* 878 */ - { 140, 7, 4, 0, 0, 140, 0, }, /* 879 */ - { 140, 21, 12, 0, 0, 140, 0, }, /* 880 */ - { 122, 7, 12, 0, 0, 122, 0, }, /* 881 */ - { 133, 7, 12, 0, 0, 133, 0, }, /* 882 */ - { 133, 10, 5, 0, 0, 133, 0, }, /* 883 */ - { 133, 12, 3, 0, 0, 133, 0, }, /* 884 */ - { 133, 21, 12, 0, 0, 133, 0, }, /* 885 */ - { 133, 13, 12, 0, 0, 133, 0, }, /* 886 */ - { 133, 15, 12, 0, 0, 133, 0, }, /* 887 */ - { 134, 21, 12, 0, 0, 134, 0, }, /* 888 */ - { 134, 7, 12, 0, 0, 134, 0, }, /* 889 */ - { 134, 12, 3, 0, 0, 134, 0, }, /* 890 */ - { 134, 10, 5, 0, 0, 134, 0, }, /* 891 */ - { 138, 7, 12, 0, 0, 138, 0, }, /* 892 */ - { 138, 12, 3, 0, 0, 138, 0, }, /* 893 */ - { 138, 7, 4, 0, 0, 138, 0, }, /* 894 */ - { 138, 13, 12, 0, 0, 138, 0, }, /* 895 */ - { 143, 7, 12, 0, 0, 143, 0, }, /* 896 */ - { 143, 10, 5, 0, 0, 143, 0, }, /* 897 */ - { 143, 12, 3, 0, 0, 143, 0, }, /* 898 */ - { 143, 13, 12, 0, 0, 143, 0, }, /* 899 */ - { 145, 7, 12, 0, 0, 145, 0, }, /* 900 */ - { 145, 12, 3, 0, 0, 145, 0, }, /* 901 */ - { 145, 10, 5, 0, 0, 145, 0, }, /* 902 */ - { 145, 21, 12, 0, 0, 145, 0, }, /* 903 */ - { 54, 15, 12, 0, 0, 54, 0, }, /* 904 */ - { 54, 21, 12, 0, 0, 54, 0, }, /* 905 */ - { 63, 7, 12, 0, 0, 63, 0, }, /* 906 */ - { 63, 14, 12, 0, 0, 63, 0, }, /* 907 */ - { 63, 21, 12, 0, 0, 63, 0, }, /* 908 */ - { 80, 7, 12, 0, 0, 80, 0, }, /* 909 */ - { 80, 1, 2, 0, 0, 80, 0, }, /* 910 */ - { 127, 7, 12, 0, 0, 127, 0, }, /* 911 */ - { 115, 7, 12, 0, 0, 115, 0, }, /* 912 */ - { 115, 13, 12, 0, 0, 115, 0, }, /* 913 */ - { 115, 21, 12, 0, 0, 115, 0, }, /* 914 */ - { 103, 7, 12, 0, 0, 103, 0, }, /* 915 */ - { 103, 12, 3, 0, 0, 103, 0, }, /* 916 */ - { 103, 21, 12, 0, 0, 103, 0, }, /* 917 */ - { 119, 7, 12, 0, 0, 119, 0, }, /* 918 */ - { 119, 12, 3, 0, 0, 119, 0, }, /* 919 */ - { 119, 21, 12, 0, 0, 119, 0, }, /* 920 */ - { 119, 26, 12, 0, 0, 119, 0, }, /* 921 */ - { 119, 6, 12, 0, 0, 119, 0, }, /* 922 */ - { 119, 13, 12, 0, 0, 119, 0, }, /* 923 */ - { 119, 15, 12, 0, 0, 119, 0, }, /* 924 */ - { 146, 9, 12, 0, 32, 146, 0, }, /* 925 */ - { 146, 5, 12, 0, -32, 146, 0, }, /* 926 */ - { 146, 15, 12, 0, 0, 146, 0, }, /* 927 */ - { 146, 21, 12, 0, 0, 146, 0, }, /* 928 */ - { 99, 7, 12, 0, 0, 99, 0, }, /* 929 */ - { 99, 12, 3, 0, 0, 99, 0, }, /* 930 */ - { 99, 10, 5, 0, 0, 99, 0, }, /* 931 */ - { 99, 6, 12, 0, 0, 99, 0, }, /* 932 */ - { 137, 6, 12, 0, 0, 137, 0, }, /* 933 */ - { 139, 6, 12, 0, 0, 139, 0, }, /* 934 */ - { 155, 12, 3, 0, 0, 155, 0, }, /* 935 */ - { 23, 10, 5, 0, 0, 23, 0, }, /* 936 */ - { 137, 7, 12, 0, 0, 137, 0, }, /* 937 */ - { 155, 7, 12, 0, 0, 155, 0, }, /* 938 */ - { 139, 7, 12, 0, 0, 139, 0, }, /* 939 */ - { 105, 7, 12, 0, 0, 105, 0, }, /* 940 */ - { 105, 26, 12, 0, 0, 105, 0, }, /* 941 */ - { 105, 12, 3, 0, 0, 105, 0, }, /* 942 */ - { 105, 21, 12, 0, 0, 105, 0, }, /* 943 */ - { 10, 1, 2, 0, 0, 105, 0, }, /* 944 */ - { 10, 10, 3, 0, 0, 10, 0, }, /* 945 */ - { 10, 10, 5, 0, 0, 10, 0, }, /* 946 */ - { 20, 12, 3, 0, 0, 20, 0, }, /* 947 */ - { 131, 26, 12, 0, 0, 131, 0, }, /* 948 */ - { 131, 12, 3, 0, 0, 131, 0, }, /* 949 */ - { 131, 21, 12, 0, 0, 131, 0, }, /* 950 */ - { 18, 12, 3, 0, 0, 18, 0, }, /* 951 */ - { 151, 7, 12, 0, 0, 151, 0, }, /* 952 */ - { 151, 12, 3, 0, 0, 151, 0, }, /* 953 */ - { 151, 6, 12, 0, 0, 151, 0, }, /* 954 */ - { 151, 13, 12, 0, 0, 151, 0, }, /* 955 */ - { 151, 26, 12, 0, 0, 151, 0, }, /* 956 */ - { 152, 7, 12, 0, 0, 152, 0, }, /* 957 */ - { 152, 12, 3, 0, 0, 152, 0, }, /* 958 */ - { 152, 13, 12, 0, 0, 152, 0, }, /* 959 */ - { 152, 23, 12, 0, 0, 152, 0, }, /* 960 */ - { 113, 7, 12, 0, 0, 113, 0, }, /* 961 */ - { 113, 15, 12, 0, 0, 113, 0, }, /* 962 */ - { 113, 12, 3, 0, 0, 113, 0, }, /* 963 */ - { 132, 9, 12, 0, 34, 132, 0, }, /* 964 */ - { 132, 5, 12, 0, -34, 132, 0, }, /* 965 */ - { 132, 12, 3, 0, 0, 132, 0, }, /* 966 */ - { 132, 6, 12, 0, 0, 132, 0, }, /* 967 */ - { 132, 13, 12, 0, 0, 132, 0, }, /* 968 */ - { 132, 21, 12, 0, 0, 132, 0, }, /* 969 */ - { 0, 2, 14, 0, 0, 0, 0, }, /* 970 */ - { 10, 26, 11, 0, 0, 10, 0, }, /* 971 */ - { 27, 26, 12, 0, 0, 27, 0, }, /* 972 */ - { 10, 24, 3, 0, 0, 10, 0, }, /* 973 */ - { 10, 1, 3, 0, 0, 10, 0, }, /* 974 */ + { 1, 1, 2, 0, 0, -82, 0, }, /* 223 */ + { 10, 21, 12, 0, 0, -164, 0, }, /* 224 */ + { 1, 7, 12, 0, 0, 1, 0, }, /* 225 */ + { 10, 6, 12, 0, 0, -181, 0, }, /* 226 */ + { 28, 12, 3, 0, 0, -10, 0, }, /* 227 */ + { 1, 13, 12, 0, 0, -86, 0, }, /* 228 */ + { 1, 21, 12, 0, 0, -4, 0, }, /* 229 */ + { 1, 6, 12, 0, 0, 1, 0, }, /* 230 */ + { 1, 13, 12, 0, 0, 1, 0, }, /* 231 */ + { 50, 21, 12, 0, 0, 50, 0, }, /* 232 */ + { 50, 1, 4, 0, 0, 50, 0, }, /* 233 */ + { 50, 7, 12, 0, 0, 50, 0, }, /* 234 */ + { 50, 12, 3, 0, 0, 50, 0, }, /* 235 */ + { 56, 7, 12, 0, 0, 56, 0, }, /* 236 */ + { 56, 12, 3, 0, 0, 56, 0, }, /* 237 */ + { 64, 13, 12, 0, 0, 64, 0, }, /* 238 */ + { 64, 7, 12, 0, 0, 64, 0, }, /* 239 */ + { 64, 12, 3, 0, 0, 64, 0, }, /* 240 */ + { 64, 6, 12, 0, 0, 64, 0, }, /* 241 */ + { 64, 26, 12, 0, 0, 64, 0, }, /* 242 */ + { 64, 21, 12, 0, 0, 64, 0, }, /* 243 */ + { 64, 23, 12, 0, 0, 64, 0, }, /* 244 */ + { 90, 7, 12, 0, 0, 90, 0, }, /* 245 */ + { 90, 12, 3, 0, 0, 90, 0, }, /* 246 */ + { 90, 6, 12, 0, 0, 90, 0, }, /* 247 */ + { 90, 21, 12, 0, 0, 90, 0, }, /* 248 */ + { 95, 7, 12, 0, 0, 95, 0, }, /* 249 */ + { 95, 12, 3, 0, 0, 95, 0, }, /* 250 */ + { 95, 21, 12, 0, 0, 95, 0, }, /* 251 */ + { 1, 24, 12, 0, 0, 1, 0, }, /* 252 */ + { 15, 12, 3, 0, 0, 15, 0, }, /* 253 */ + { 15, 10, 5, 0, 0, 15, 0, }, /* 254 */ + { 15, 7, 12, 0, 0, 15, 0, }, /* 255 */ + { 28, 12, 3, 0, 0, -216, 0, }, /* 256 */ + { 28, 12, 3, 0, 0, -203, 0, }, /* 257 */ + { 10, 21, 12, 0, 0, -259, 0, }, /* 258 */ + { 10, 21, 12, 0, 0, -280, 0, }, /* 259 */ + { 15, 13, 12, 0, 0, -132, 0, }, /* 260 */ + { 15, 21, 12, 0, 0, 15, 0, }, /* 261 */ + { 15, 6, 12, 0, 0, 15, 0, }, /* 262 */ + { 3, 7, 12, 0, 0, 3, 0, }, /* 263 */ + { 3, 12, 3, 0, 0, 3, 0, }, /* 264 */ + { 3, 10, 5, 0, 0, 3, 0, }, /* 265 */ + { 3, 10, 3, 0, 0, 3, 0, }, /* 266 */ + { 3, 13, 12, 0, 0, -90, 0, }, /* 267 */ + { 3, 23, 12, 0, 0, 3, 0, }, /* 268 */ + { 3, 15, 12, 0, 0, 3, 0, }, /* 269 */ + { 3, 26, 12, 0, 0, 3, 0, }, /* 270 */ + { 3, 21, 12, 0, 0, 3, 0, }, /* 271 */ + { 22, 12, 3, 0, 0, 22, 0, }, /* 272 */ + { 22, 10, 5, 0, 0, 22, 0, }, /* 273 */ + { 22, 7, 12, 0, 0, 22, 0, }, /* 274 */ + { 22, 13, 12, 0, 0, -61, 0, }, /* 275 */ + { 22, 21, 12, 0, 0, 22, 0, }, /* 276 */ + { 21, 12, 3, 0, 0, 21, 0, }, /* 277 */ + { 21, 10, 5, 0, 0, 21, 0, }, /* 278 */ + { 21, 7, 12, 0, 0, 21, 0, }, /* 279 */ + { 21, 13, 12, 0, 0, -58, 0, }, /* 280 */ + { 21, 21, 12, 0, 0, 21, 0, }, /* 281 */ + { 21, 23, 12, 0, 0, 21, 0, }, /* 282 */ + { 44, 12, 3, 0, 0, 44, 0, }, /* 283 */ + { 44, 10, 5, 0, 0, 44, 0, }, /* 284 */ + { 44, 7, 12, 0, 0, 44, 0, }, /* 285 */ + { 44, 10, 3, 0, 0, 44, 0, }, /* 286 */ + { 44, 13, 12, 0, 0, 44, 0, }, /* 287 */ + { 44, 26, 12, 0, 0, 44, 0, }, /* 288 */ + { 44, 15, 12, 0, 0, 44, 0, }, /* 289 */ + { 54, 12, 3, 0, 0, 54, 0, }, /* 290 */ + { 54, 7, 12, 0, 0, 54, 0, }, /* 291 */ + { 54, 10, 3, 0, 0, 54, 0, }, /* 292 */ + { 54, 10, 5, 0, 0, 54, 0, }, /* 293 */ + { 54, 13, 12, 0, 0, -55, 0, }, /* 294 */ + { 54, 15, 12, 0, 0, -55, 0, }, /* 295 */ + { 54, 26, 12, 0, 0, -55, 0, }, /* 296 */ + { 54, 26, 12, 0, 0, 54, 0, }, /* 297 */ + { 54, 23, 12, 0, 0, 54, 0, }, /* 298 */ + { 55, 12, 3, 0, 0, 55, 0, }, /* 299 */ + { 55, 10, 5, 0, 0, 55, 0, }, /* 300 */ + { 55, 7, 12, 0, 0, 55, 0, }, /* 301 */ + { 55, 13, 12, 0, 0, 55, 0, }, /* 302 */ + { 55, 21, 12, 0, 0, 55, 0, }, /* 303 */ + { 55, 15, 12, 0, 0, 55, 0, }, /* 304 */ + { 55, 26, 12, 0, 0, 55, 0, }, /* 305 */ + { 29, 7, 12, 0, 0, 29, 0, }, /* 306 */ + { 29, 12, 3, 0, 0, 29, 0, }, /* 307 */ + { 29, 10, 5, 0, 0, 29, 0, }, /* 308 */ + { 29, 21, 12, 0, 0, 29, 0, }, /* 309 */ + { 29, 10, 3, 0, 0, 29, 0, }, /* 310 */ + { 29, 13, 12, 0, 0, -70, 0, }, /* 311 */ + { 37, 12, 3, 0, 0, 37, 0, }, /* 312 */ + { 37, 10, 5, 0, 0, 37, 0, }, /* 313 */ + { 37, 7, 12, 0, 0, 37, 0, }, /* 314 */ + { 37, 10, 3, 0, 0, 37, 0, }, /* 315 */ + { 37, 7, 4, 0, 0, 37, 0, }, /* 316 */ + { 37, 26, 12, 0, 0, 37, 0, }, /* 317 */ + { 37, 15, 12, 0, 0, 37, 0, }, /* 318 */ + { 37, 13, 12, 0, 0, 37, 0, }, /* 319 */ + { 48, 12, 3, 0, 0, 48, 0, }, /* 320 */ + { 48, 10, 5, 0, 0, 48, 0, }, /* 321 */ + { 48, 7, 12, 0, 0, 48, 0, }, /* 322 */ + { 48, 10, 3, 0, 0, 48, 0, }, /* 323 */ + { 48, 13, 12, 0, 0, 48, 0, }, /* 324 */ + { 48, 21, 12, 0, 0, 48, 0, }, /* 325 */ + { 57, 7, 12, 0, 0, 57, 0, }, /* 326 */ + { 57, 12, 3, 0, 0, 57, 0, }, /* 327 */ + { 57, 7, 5, 0, 0, 57, 0, }, /* 328 */ + { 57, 6, 12, 0, 0, 57, 0, }, /* 329 */ + { 57, 21, 12, 0, 0, 57, 0, }, /* 330 */ + { 57, 13, 12, 0, 0, 57, 0, }, /* 331 */ + { 33, 7, 12, 0, 0, 33, 0, }, /* 332 */ + { 33, 12, 3, 0, 0, 33, 0, }, /* 333 */ + { 33, 7, 5, 0, 0, 33, 0, }, /* 334 */ + { 33, 6, 12, 0, 0, 33, 0, }, /* 335 */ + { 33, 13, 12, 0, 0, 33, 0, }, /* 336 */ + { 58, 7, 12, 0, 0, 58, 0, }, /* 337 */ + { 58, 26, 12, 0, 0, 58, 0, }, /* 338 */ + { 58, 21, 12, 0, 0, 58, 0, }, /* 339 */ + { 58, 12, 3, 0, 0, 58, 0, }, /* 340 */ + { 58, 13, 12, 0, 0, 58, 0, }, /* 341 */ + { 58, 15, 12, 0, 0, 58, 0, }, /* 342 */ + { 58, 22, 12, 0, 0, 58, 0, }, /* 343 */ + { 58, 18, 12, 0, 0, 58, 0, }, /* 344 */ + { 58, 10, 5, 0, 0, 58, 0, }, /* 345 */ + { 39, 7, 12, 0, 0, 39, 0, }, /* 346 */ + { 39, 10, 12, 0, 0, 39, 0, }, /* 347 */ + { 39, 12, 3, 0, 0, 39, 0, }, /* 348 */ + { 39, 10, 5, 0, 0, 39, 0, }, /* 349 */ + { 39, 13, 12, 0, 0, -94, 0, }, /* 350 */ + { 39, 21, 12, 0, 0, 39, 0, }, /* 351 */ + { 39, 13, 12, 0, 0, 39, 0, }, /* 352 */ + { 39, 26, 12, 0, 0, 39, 0, }, /* 353 */ + { 17, 9, 12, 0, 7264, 17, 0, }, /* 354 */ + { 17, 5, 12, 0, 3008, 17, 0, }, /* 355 */ + { 10, 21, 12, 0, 0, -52, 0, }, /* 356 */ + { 17, 6, 12, 0, 0, 17, 0, }, /* 357 */ + { 24, 7, 6, 0, 0, 24, 0, }, /* 358 */ + { 24, 7, 7, 0, 0, 24, 0, }, /* 359 */ + { 24, 7, 8, 0, 0, 24, 0, }, /* 360 */ + { 16, 7, 12, 0, 0, 16, 0, }, /* 361 */ + { 16, 12, 3, 0, 0, 16, 0, }, /* 362 */ + { 16, 21, 12, 0, 0, 16, 0, }, /* 363 */ + { 16, 15, 12, 0, 0, 16, 0, }, /* 364 */ + { 16, 26, 12, 0, 0, 16, 0, }, /* 365 */ + { 9, 9, 12, 0, 38864, 9, 0, }, /* 366 */ + { 9, 9, 12, 0, 8, 9, 0, }, /* 367 */ + { 9, 5, 12, 0, -8, 9, 0, }, /* 368 */ + { 8, 17, 12, 0, 0, 8, 0, }, /* 369 */ + { 8, 7, 12, 0, 0, 8, 0, }, /* 370 */ + { 8, 26, 12, 0, 0, 8, 0, }, /* 371 */ + { 8, 21, 12, 0, 0, 8, 0, }, /* 372 */ + { 41, 29, 12, 0, 0, 41, 0, }, /* 373 */ + { 41, 7, 12, 0, 0, 41, 0, }, /* 374 */ + { 41, 22, 12, 0, 0, 41, 0, }, /* 375 */ + { 41, 18, 12, 0, 0, 41, 0, }, /* 376 */ + { 46, 7, 12, 0, 0, 46, 0, }, /* 377 */ + { 46, 14, 12, 0, 0, 46, 0, }, /* 378 */ + { 51, 7, 12, 0, 0, 51, 0, }, /* 379 */ + { 51, 12, 3, 0, 0, 51, 0, }, /* 380 */ + { 51, 10, 5, 0, 0, 51, 0, }, /* 381 */ + { 25, 7, 12, 0, 0, 25, 0, }, /* 382 */ + { 25, 12, 3, 0, 0, 25, 0, }, /* 383 */ + { 25, 10, 5, 0, 0, 25, 0, }, /* 384 */ + { 10, 21, 12, 0, 0, -127, 0, }, /* 385 */ + { 7, 7, 12, 0, 0, 7, 0, }, /* 386 */ + { 7, 12, 3, 0, 0, 7, 0, }, /* 387 */ + { 52, 7, 12, 0, 0, 52, 0, }, /* 388 */ + { 52, 12, 3, 0, 0, 52, 0, }, /* 389 */ + { 32, 7, 12, 0, 0, 32, 0, }, /* 390 */ + { 32, 12, 3, 0, 0, 32, 0, }, /* 391 */ + { 32, 10, 5, 0, 0, 32, 0, }, /* 392 */ + { 32, 21, 12, 0, 0, 32, 0, }, /* 393 */ + { 32, 6, 12, 0, 0, 32, 0, }, /* 394 */ + { 32, 23, 12, 0, 0, 32, 0, }, /* 395 */ + { 32, 13, 12, 0, 0, 32, 0, }, /* 396 */ + { 32, 15, 12, 0, 0, 32, 0, }, /* 397 */ + { 38, 21, 12, 0, 0, 38, 0, }, /* 398 */ + { 10, 21, 12, 0, 0, -79, 0, }, /* 399 */ + { 38, 17, 12, 0, 0, 38, 0, }, /* 400 */ + { 38, 12, 3, 0, 0, 38, 0, }, /* 401 */ + { 38, 1, 2, 0, 0, 38, 0, }, /* 402 */ + { 38, 13, 12, 0, 0, 38, 0, }, /* 403 */ + { 38, 7, 12, 0, 0, 38, 0, }, /* 404 */ + { 38, 6, 12, 0, 0, 38, 0, }, /* 405 */ + { 35, 7, 12, 0, 0, 35, 0, }, /* 406 */ + { 35, 12, 3, 0, 0, 35, 0, }, /* 407 */ + { 35, 10, 5, 0, 0, 35, 0, }, /* 408 */ + { 35, 26, 12, 0, 0, 35, 0, }, /* 409 */ + { 35, 21, 12, 0, 0, 35, 0, }, /* 410 */ + { 35, 13, 12, 0, 0, 35, 0, }, /* 411 */ + { 53, 7, 12, 0, 0, 53, 0, }, /* 412 */ + { 40, 7, 12, 0, 0, 40, 0, }, /* 413 */ + { 40, 13, 12, 0, 0, 40, 0, }, /* 414 */ + { 40, 15, 12, 0, 0, 40, 0, }, /* 415 */ + { 40, 26, 12, 0, 0, 40, 0, }, /* 416 */ + { 32, 26, 12, 0, 0, 32, 0, }, /* 417 */ + { 6, 7, 12, 0, 0, 6, 0, }, /* 418 */ + { 6, 12, 3, 0, 0, 6, 0, }, /* 419 */ + { 6, 10, 5, 0, 0, 6, 0, }, /* 420 */ + { 6, 21, 12, 0, 0, 6, 0, }, /* 421 */ + { 91, 7, 12, 0, 0, 91, 0, }, /* 422 */ + { 91, 10, 5, 0, 0, 91, 0, }, /* 423 */ + { 91, 12, 3, 0, 0, 91, 0, }, /* 424 */ + { 91, 10, 12, 0, 0, 91, 0, }, /* 425 */ + { 91, 13, 12, 0, 0, 91, 0, }, /* 426 */ + { 91, 21, 12, 0, 0, 91, 0, }, /* 427 */ + { 91, 6, 12, 0, 0, 91, 0, }, /* 428 */ + { 28, 11, 3, 0, 0, 28, 0, }, /* 429 */ + { 62, 12, 3, 0, 0, 62, 0, }, /* 430 */ + { 62, 10, 5, 0, 0, 62, 0, }, /* 431 */ + { 62, 7, 12, 0, 0, 62, 0, }, /* 432 */ + { 62, 10, 3, 0, 0, 62, 0, }, /* 433 */ + { 62, 13, 12, 0, 0, 62, 0, }, /* 434 */ + { 62, 21, 12, 0, 0, 62, 0, }, /* 435 */ + { 62, 26, 12, 0, 0, 62, 0, }, /* 436 */ + { 76, 12, 3, 0, 0, 76, 0, }, /* 437 */ + { 76, 10, 5, 0, 0, 76, 0, }, /* 438 */ + { 76, 7, 12, 0, 0, 76, 0, }, /* 439 */ + { 76, 13, 12, 0, 0, 76, 0, }, /* 440 */ + { 93, 7, 12, 0, 0, 93, 0, }, /* 441 */ + { 93, 12, 3, 0, 0, 93, 0, }, /* 442 */ + { 93, 10, 5, 0, 0, 93, 0, }, /* 443 */ + { 93, 21, 12, 0, 0, 93, 0, }, /* 444 */ + { 70, 7, 12, 0, 0, 70, 0, }, /* 445 */ + { 70, 10, 5, 0, 0, 70, 0, }, /* 446 */ + { 70, 12, 3, 0, 0, 70, 0, }, /* 447 */ + { 70, 21, 12, 0, 0, 70, 0, }, /* 448 */ + { 70, 13, 12, 0, 0, 70, 0, }, /* 449 */ + { 73, 13, 12, 0, 0, 73, 0, }, /* 450 */ + { 73, 7, 12, 0, 0, 73, 0, }, /* 451 */ + { 73, 6, 12, 0, 0, 73, 0, }, /* 452 */ + { 73, 21, 12, 0, 0, 73, 0, }, /* 453 */ + { 13, 5, 12, 63, -6222, 13, 0, }, /* 454 */ + { 13, 5, 12, 67, -6221, 13, 0, }, /* 455 */ + { 13, 5, 12, 71, -6212, 13, 0, }, /* 456 */ + { 13, 5, 12, 75, -6210, 13, 0, }, /* 457 */ + { 13, 5, 12, 79, -6210, 13, 0, }, /* 458 */ + { 13, 5, 12, 79, -6211, 13, 0, }, /* 459 */ + { 13, 5, 12, 84, -6204, 13, 0, }, /* 460 */ + { 13, 5, 12, 88, -6180, 13, 0, }, /* 461 */ + { 13, 5, 12, 108, 35267, 13, 0, }, /* 462 */ + { 17, 9, 12, 0, -3008, 17, 0, }, /* 463 */ + { 76, 21, 12, 0, 0, 76, 0, }, /* 464 */ + { 28, 12, 3, 0, 0, -122, 0, }, /* 465 */ + { 28, 12, 3, 0, 0, 15, 0, }, /* 466 */ + { 10, 21, 12, 0, 0, -40, 0, }, /* 467 */ + { 28, 12, 3, 0, 0, -16, 0, }, /* 468 */ + { 28, 12, 3, 0, 0, -46, 0, }, /* 469 */ + { 28, 12, 3, 0, 0, -157, 0, }, /* 470 */ + { 10, 10, 5, 0, 0, -16, 0, }, /* 471 */ + { 10, 7, 12, 0, 0, -43, 0, }, /* 472 */ + { 10, 7, 12, 0, 0, -16, 0, }, /* 473 */ + { 10, 7, 12, 0, 0, 15, 0, }, /* 474 */ + { 10, 7, 12, 0, 0, -172, 0, }, /* 475 */ + { 10, 7, 12, 0, 0, -40, 0, }, /* 476 */ + { 28, 12, 3, 0, 0, -106, 0, }, /* 477 */ + { 10, 10, 5, 0, 0, 3, 0, }, /* 478 */ + { 28, 12, 3, 0, 0, -40, 0, }, /* 479 */ + { 10, 7, 12, 0, 0, 150, 0, }, /* 480 */ + { 13, 5, 12, 0, 0, 13, 0, }, /* 481 */ + { 13, 6, 12, 0, 0, 13, 0, }, /* 482 */ + { 34, 5, 12, 0, 35332, 34, 0, }, /* 483 */ + { 34, 5, 12, 0, 3814, 34, 0, }, /* 484 */ + { 34, 5, 12, 0, 35384, 34, 0, }, /* 485 */ + { 28, 12, 3, 0, 0, -37, 0, }, /* 486 */ + { 28, 12, 3, 0, 0, 50, 0, }, /* 487 */ + { 34, 9, 12, 92, 1, 34, 0, }, /* 488 */ + { 34, 5, 12, 92, -1, 34, 0, }, /* 489 */ + { 34, 5, 12, 92, -58, 34, 0, }, /* 490 */ + { 34, 9, 12, 0, -7615, 34, 0, }, /* 491 */ + { 20, 5, 12, 0, 8, 20, 0, }, /* 492 */ + { 20, 9, 12, 0, -8, 20, 0, }, /* 493 */ + { 20, 5, 12, 0, 74, 20, 0, }, /* 494 */ + { 20, 5, 12, 0, 86, 20, 0, }, /* 495 */ + { 20, 5, 12, 0, 100, 20, 0, }, /* 496 */ + { 20, 5, 12, 0, 128, 20, 0, }, /* 497 */ + { 20, 5, 12, 0, 112, 20, 0, }, /* 498 */ + { 20, 5, 12, 0, 126, 20, 0, }, /* 499 */ + { 20, 8, 12, 0, -8, 20, 0, }, /* 500 */ + { 20, 5, 12, 0, 9, 20, 0, }, /* 501 */ + { 20, 9, 12, 0, -74, 20, 0, }, /* 502 */ + { 20, 8, 12, 0, -9, 20, 0, }, /* 503 */ + { 20, 5, 12, 21, -7173, 20, 0, }, /* 504 */ + { 20, 9, 12, 0, -86, 20, 0, }, /* 505 */ + { 20, 9, 12, 0, -100, 20, 0, }, /* 506 */ + { 20, 9, 12, 0, -112, 20, 0, }, /* 507 */ + { 20, 9, 12, 0, -128, 20, 0, }, /* 508 */ + { 20, 9, 12, 0, -126, 20, 0, }, /* 509 */ + { 28, 1, 3, 0, 0, 28, 0, }, /* 510 */ + { 28, 1, 13, 0, 0, 28, 0, }, /* 511 */ + { 10, 27, 2, 0, 0, 10, 0, }, /* 512 */ + { 10, 28, 2, 0, 0, 10, 0, }, /* 513 */ + { 10, 29, 12, 0, 0, -73, 0, }, /* 514 */ + { 10, 21, 14, 0, 0, 10, 0, }, /* 515 */ + { 0, 2, 2, 0, 0, 0, 0, }, /* 516 */ + { 28, 12, 3, 0, 0, -110, 0, }, /* 517 */ + { 10, 9, 12, 0, 0, 10, 0, }, /* 518 */ + { 10, 5, 12, 0, 0, 10, 0, }, /* 519 */ + { 20, 9, 12, 96, -7517, 20, 0, }, /* 520 */ + { 34, 9, 12, 100, -8383, 34, 0, }, /* 521 */ + { 34, 9, 12, 104, -8262, 34, 0, }, /* 522 */ + { 34, 9, 12, 0, 28, 34, 0, }, /* 523 */ + { 10, 7, 12, 0, 0, 10, 0, }, /* 524 */ + { 10, 5, 14, 0, 0, 10, 0, }, /* 525 */ + { 34, 5, 12, 0, -28, 34, 0, }, /* 526 */ + { 34, 14, 12, 0, 16, 34, 0, }, /* 527 */ + { 34, 14, 12, 0, -16, 34, 0, }, /* 528 */ + { 34, 14, 12, 0, 0, 34, 0, }, /* 529 */ + { 10, 25, 14, 0, 0, 10, 0, }, /* 530 */ + { 10, 26, 12, 0, 26, 10, 0, }, /* 531 */ + { 10, 26, 14, 0, 26, 10, 0, }, /* 532 */ + { 10, 26, 12, 0, -26, 10, 0, }, /* 533 */ + { 5, 26, 12, 0, 0, 5, 0, }, /* 534 */ + { 18, 9, 12, 0, 48, 18, 0, }, /* 535 */ + { 18, 5, 12, 0, -48, 18, 0, }, /* 536 */ + { 34, 9, 12, 0, -10743, 34, 0, }, /* 537 */ + { 34, 9, 12, 0, -3814, 34, 0, }, /* 538 */ + { 34, 9, 12, 0, -10727, 34, 0, }, /* 539 */ + { 34, 5, 12, 0, -10795, 34, 0, }, /* 540 */ + { 34, 5, 12, 0, -10792, 34, 0, }, /* 541 */ + { 34, 9, 12, 0, -10780, 34, 0, }, /* 542 */ + { 34, 9, 12, 0, -10749, 34, 0, }, /* 543 */ + { 34, 9, 12, 0, -10783, 34, 0, }, /* 544 */ + { 34, 9, 12, 0, -10782, 34, 0, }, /* 545 */ + { 34, 9, 12, 0, -10815, 34, 0, }, /* 546 */ + { 11, 5, 12, 0, 0, 11, 0, }, /* 547 */ + { 11, 26, 12, 0, 0, 11, 0, }, /* 548 */ + { 11, 12, 3, 0, 0, 11, 0, }, /* 549 */ + { 11, 21, 12, 0, 0, 11, 0, }, /* 550 */ + { 11, 15, 12, 0, 0, 11, 0, }, /* 551 */ + { 17, 5, 12, 0, -7264, 17, 0, }, /* 552 */ + { 59, 7, 12, 0, 0, 59, 0, }, /* 553 */ + { 59, 6, 12, 0, 0, 59, 0, }, /* 554 */ + { 59, 21, 12, 0, 0, 59, 0, }, /* 555 */ + { 59, 12, 3, 0, 0, 59, 0, }, /* 556 */ + { 13, 12, 3, 0, 0, 13, 0, }, /* 557 */ + { 10, 21, 12, 0, 0, -28, 0, }, /* 558 */ + { 23, 26, 12, 0, 0, 23, 0, }, /* 559 */ + { 10, 21, 12, 0, 0, -150, 0, }, /* 560 */ + { 10, 21, 12, 0, 0, -137, 0, }, /* 561 */ + { 23, 6, 12, 0, 0, 23, 0, }, /* 562 */ + { 10, 7, 12, 0, 0, 23, 0, }, /* 563 */ + { 23, 14, 12, 0, 0, 23, 0, }, /* 564 */ + { 10, 22, 12, 0, 0, -150, 0, }, /* 565 */ + { 10, 18, 12, 0, 0, -150, 0, }, /* 566 */ + { 10, 26, 12, 0, 0, -137, 0, }, /* 567 */ + { 10, 17, 12, 0, 0, -137, 0, }, /* 568 */ + { 10, 22, 12, 0, 0, -137, 0, }, /* 569 */ + { 10, 18, 12, 0, 0, -137, 0, }, /* 570 */ + { 28, 12, 3, 0, 0, -19, 0, }, /* 571 */ + { 24, 10, 3, 0, 0, 24, 0, }, /* 572 */ + { 10, 17, 14, 0, 0, -137, 0, }, /* 573 */ + { 10, 6, 12, 0, 0, -67, 0, }, /* 574 */ + { 10, 7, 12, 0, 0, -114, 0, }, /* 575 */ + { 10, 21, 14, 0, 0, -114, 0, }, /* 576 */ + { 10, 26, 12, 0, 0, 23, 0, }, /* 577 */ + { 27, 7, 12, 0, 0, 27, 0, }, /* 578 */ + { 28, 12, 3, 0, 0, -67, 0, }, /* 579 */ + { 10, 24, 12, 0, 0, -67, 0, }, /* 580 */ + { 27, 6, 12, 0, 0, 27, 0, }, /* 581 */ + { 10, 17, 12, 0, 0, -67, 0, }, /* 582 */ + { 30, 7, 12, 0, 0, 30, 0, }, /* 583 */ + { 30, 6, 12, 0, 0, 30, 0, }, /* 584 */ + { 4, 7, 12, 0, 0, 4, 0, }, /* 585 */ + { 24, 7, 12, 0, 0, 24, 0, }, /* 586 */ + { 10, 15, 12, 0, 0, 23, 0, }, /* 587 */ + { 24, 26, 12, 0, 0, 24, 0, }, /* 588 */ + { 10, 26, 14, 0, 0, 23, 0, }, /* 589 */ + { 30, 26, 12, 0, 0, 30, 0, }, /* 590 */ + { 23, 7, 12, 0, 0, 23, 0, }, /* 591 */ + { 61, 7, 12, 0, 0, 61, 0, }, /* 592 */ + { 61, 6, 12, 0, 0, 61, 0, }, /* 593 */ + { 61, 26, 12, 0, 0, 61, 0, }, /* 594 */ + { 86, 7, 12, 0, 0, 86, 0, }, /* 595 */ + { 86, 6, 12, 0, 0, 86, 0, }, /* 596 */ + { 86, 21, 12, 0, 0, 86, 0, }, /* 597 */ + { 77, 7, 12, 0, 0, 77, 0, }, /* 598 */ + { 77, 6, 12, 0, 0, 77, 0, }, /* 599 */ + { 77, 21, 12, 0, 0, 77, 0, }, /* 600 */ + { 77, 13, 12, 0, 0, 77, 0, }, /* 601 */ + { 13, 9, 12, 108, 1, 13, 0, }, /* 602 */ + { 13, 5, 12, 108, -35267, 13, 0, }, /* 603 */ + { 13, 7, 12, 0, 0, 13, 0, }, /* 604 */ + { 13, 21, 12, 0, 0, 13, 0, }, /* 605 */ + { 79, 7, 12, 0, 0, 79, 0, }, /* 606 */ + { 79, 14, 12, 0, 0, 79, 0, }, /* 607 */ + { 79, 12, 3, 0, 0, 79, 0, }, /* 608 */ + { 79, 21, 12, 0, 0, 79, 0, }, /* 609 */ + { 10, 24, 12, 0, 0, -64, 0, }, /* 610 */ + { 34, 9, 12, 0, -35332, 34, 0, }, /* 611 */ + { 34, 9, 12, 0, -42280, 34, 0, }, /* 612 */ + { 34, 5, 12, 0, 48, 34, 0, }, /* 613 */ + { 34, 9, 12, 0, -42308, 34, 0, }, /* 614 */ + { 34, 9, 12, 0, -42319, 34, 0, }, /* 615 */ + { 34, 9, 12, 0, -42315, 34, 0, }, /* 616 */ + { 34, 9, 12, 0, -42305, 34, 0, }, /* 617 */ + { 34, 9, 12, 0, -42258, 34, 0, }, /* 618 */ + { 34, 9, 12, 0, -42282, 34, 0, }, /* 619 */ + { 34, 9, 12, 0, -42261, 34, 0, }, /* 620 */ + { 34, 9, 12, 0, 928, 34, 0, }, /* 621 */ + { 34, 9, 12, 0, -48, 34, 0, }, /* 622 */ + { 34, 9, 12, 0, -42307, 34, 0, }, /* 623 */ + { 34, 9, 12, 0, -35384, 34, 0, }, /* 624 */ + { 49, 7, 12, 0, 0, 49, 0, }, /* 625 */ + { 49, 12, 3, 0, 0, 49, 0, }, /* 626 */ + { 49, 10, 5, 0, 0, 49, 0, }, /* 627 */ + { 49, 26, 12, 0, 0, 49, 0, }, /* 628 */ + { 10, 15, 12, 0, 0, -244, 0, }, /* 629 */ + { 10, 15, 12, 0, 0, -230, 0, }, /* 630 */ + { 10, 26, 12, 0, 0, -191, 0, }, /* 631 */ + { 10, 23, 12, 0, 0, -191, 0, }, /* 632 */ + { 65, 7, 12, 0, 0, 65, 0, }, /* 633 */ + { 65, 21, 12, 0, 0, 65, 0, }, /* 634 */ + { 75, 10, 5, 0, 0, 75, 0, }, /* 635 */ + { 75, 7, 12, 0, 0, 75, 0, }, /* 636 */ + { 75, 12, 3, 0, 0, 75, 0, }, /* 637 */ + { 75, 21, 12, 0, 0, 75, 0, }, /* 638 */ + { 75, 13, 12, 0, 0, 75, 0, }, /* 639 */ + { 15, 12, 3, 0, 0, -16, 0, }, /* 640 */ + { 15, 7, 12, 0, 0, -49, 0, }, /* 641 */ + { 69, 13, 12, 0, 0, 69, 0, }, /* 642 */ + { 69, 7, 12, 0, 0, 69, 0, }, /* 643 */ + { 69, 12, 3, 0, 0, 69, 0, }, /* 644 */ + { 10, 21, 12, 0, 0, -118, 0, }, /* 645 */ + { 69, 21, 12, 0, 0, 69, 0, }, /* 646 */ + { 74, 7, 12, 0, 0, 74, 0, }, /* 647 */ + { 74, 12, 3, 0, 0, 74, 0, }, /* 648 */ + { 74, 10, 5, 0, 0, 74, 0, }, /* 649 */ + { 74, 21, 12, 0, 0, 74, 0, }, /* 650 */ + { 84, 12, 3, 0, 0, 84, 0, }, /* 651 */ + { 84, 10, 5, 0, 0, 84, 0, }, /* 652 */ + { 84, 7, 12, 0, 0, 84, 0, }, /* 653 */ + { 84, 21, 12, 0, 0, 84, 0, }, /* 654 */ + { 10, 6, 12, 0, 0, -22, 0, }, /* 655 */ + { 84, 13, 12, 0, 0, 84, 0, }, /* 656 */ + { 39, 6, 12, 0, 0, 39, 0, }, /* 657 */ + { 68, 7, 12, 0, 0, 68, 0, }, /* 658 */ + { 68, 12, 3, 0, 0, 68, 0, }, /* 659 */ + { 68, 10, 5, 0, 0, 68, 0, }, /* 660 */ + { 68, 13, 12, 0, 0, 68, 0, }, /* 661 */ + { 68, 21, 12, 0, 0, 68, 0, }, /* 662 */ + { 92, 7, 12, 0, 0, 92, 0, }, /* 663 */ + { 92, 12, 3, 0, 0, 92, 0, }, /* 664 */ + { 92, 6, 12, 0, 0, 92, 0, }, /* 665 */ + { 92, 21, 12, 0, 0, 92, 0, }, /* 666 */ + { 87, 7, 12, 0, 0, 87, 0, }, /* 667 */ + { 87, 10, 5, 0, 0, 87, 0, }, /* 668 */ + { 87, 12, 3, 0, 0, 87, 0, }, /* 669 */ + { 87, 21, 12, 0, 0, 87, 0, }, /* 670 */ + { 87, 6, 12, 0, 0, 87, 0, }, /* 671 */ + { 34, 5, 12, 0, -928, 34, 0, }, /* 672 */ + { 9, 5, 12, 0, -38864, 9, 0, }, /* 673 */ + { 87, 13, 12, 0, 0, 87, 0, }, /* 674 */ + { 24, 7, 9, 0, 0, 24, 0, }, /* 675 */ + { 24, 7, 10, 0, 0, 24, 0, }, /* 676 */ + { 0, 4, 12, 0, 0, 0, 0, }, /* 677 */ + { 0, 3, 12, 0, 0, 0, 0, }, /* 678 */ + { 26, 25, 12, 0, 0, 26, 0, }, /* 679 */ + { 10, 18, 12, 0, 0, -7, 0, }, /* 680 */ + { 10, 22, 12, 0, 0, -7, 0, }, /* 681 */ + { 1, 7, 12, 0, 0, -13, 0, }, /* 682 */ + { 1, 26, 12, 0, 0, -13, 0, }, /* 683 */ + { 10, 6, 3, 0, 0, -67, 0, }, /* 684 */ + { 36, 7, 12, 0, 0, 36, 0, }, /* 685 */ + { 10, 21, 12, 0, 0, -98, 0, }, /* 686 */ + { 10, 21, 12, 0, 0, -25, 0, }, /* 687 */ + { 10, 15, 12, 0, 0, -102, 0, }, /* 688 */ + { 10, 26, 12, 0, 0, -25, 0, }, /* 689 */ + { 20, 14, 12, 0, 0, 20, 0, }, /* 690 */ + { 20, 15, 12, 0, 0, 20, 0, }, /* 691 */ + { 20, 26, 12, 0, 0, 20, 0, }, /* 692 */ + { 71, 7, 12, 0, 0, 71, 0, }, /* 693 */ + { 67, 7, 12, 0, 0, 67, 0, }, /* 694 */ + { 28, 12, 3, 0, 0, -1, 0, }, /* 695 */ + { 10, 15, 12, 0, 0, -1, 0, }, /* 696 */ + { 42, 7, 12, 0, 0, 42, 0, }, /* 697 */ + { 42, 15, 12, 0, 0, 42, 0, }, /* 698 */ + { 19, 7, 12, 0, 0, 19, 0, }, /* 699 */ + { 19, 14, 12, 0, 0, 19, 0, }, /* 700 */ + { 118, 7, 12, 0, 0, 118, 0, }, /* 701 */ + { 118, 12, 3, 0, 0, 118, 0, }, /* 702 */ + { 60, 7, 12, 0, 0, 60, 0, }, /* 703 */ + { 60, 21, 12, 0, 0, 60, 0, }, /* 704 */ + { 43, 7, 12, 0, 0, 43, 0, }, /* 705 */ + { 43, 21, 12, 0, 0, 43, 0, }, /* 706 */ + { 43, 14, 12, 0, 0, 43, 0, }, /* 707 */ + { 14, 9, 12, 0, 40, 14, 0, }, /* 708 */ + { 14, 5, 12, 0, -40, 14, 0, }, /* 709 */ + { 47, 7, 12, 0, 0, 47, 0, }, /* 710 */ + { 45, 7, 12, 0, 0, 45, 0, }, /* 711 */ + { 45, 13, 12, 0, 0, 45, 0, }, /* 712 */ + { 136, 9, 12, 0, 40, 136, 0, }, /* 713 */ + { 136, 5, 12, 0, -40, 136, 0, }, /* 714 */ + { 106, 7, 12, 0, 0, 106, 0, }, /* 715 */ + { 104, 7, 12, 0, 0, 104, 0, }, /* 716 */ + { 104, 21, 12, 0, 0, 104, 0, }, /* 717 */ + { 161, 9, 12, 0, 39, 161, 0, }, /* 718 */ + { 161, 5, 12, 0, -39, 161, 0, }, /* 719 */ + { 110, 7, 12, 0, 0, 110, 0, }, /* 720 */ + { 12, 7, 12, 0, 0, 12, 0, }, /* 721 */ + { 81, 7, 12, 0, 0, 81, 0, }, /* 722 */ + { 81, 21, 12, 0, 0, 81, 0, }, /* 723 */ + { 81, 15, 12, 0, 0, 81, 0, }, /* 724 */ + { 120, 7, 12, 0, 0, 120, 0, }, /* 725 */ + { 120, 26, 12, 0, 0, 120, 0, }, /* 726 */ + { 120, 15, 12, 0, 0, 120, 0, }, /* 727 */ + { 116, 7, 12, 0, 0, 116, 0, }, /* 728 */ + { 116, 15, 12, 0, 0, 116, 0, }, /* 729 */ + { 128, 7, 12, 0, 0, 128, 0, }, /* 730 */ + { 128, 15, 12, 0, 0, 128, 0, }, /* 731 */ + { 66, 7, 12, 0, 0, 66, 0, }, /* 732 */ + { 66, 15, 12, 0, 0, 66, 0, }, /* 733 */ + { 66, 21, 12, 0, 0, 66, 0, }, /* 734 */ + { 72, 7, 12, 0, 0, 72, 0, }, /* 735 */ + { 72, 21, 12, 0, 0, 72, 0, }, /* 736 */ + { 98, 7, 12, 0, 0, 98, 0, }, /* 737 */ + { 97, 7, 12, 0, 0, 97, 0, }, /* 738 */ + { 97, 15, 12, 0, 0, 97, 0, }, /* 739 */ + { 31, 7, 12, 0, 0, 31, 0, }, /* 740 */ + { 31, 12, 3, 0, 0, 31, 0, }, /* 741 */ + { 31, 15, 12, 0, 0, 31, 0, }, /* 742 */ + { 31, 21, 12, 0, 0, 31, 0, }, /* 743 */ + { 88, 7, 12, 0, 0, 88, 0, }, /* 744 */ + { 88, 15, 12, 0, 0, 88, 0, }, /* 745 */ + { 88, 21, 12, 0, 0, 88, 0, }, /* 746 */ + { 117, 7, 12, 0, 0, 117, 0, }, /* 747 */ + { 117, 15, 12, 0, 0, 117, 0, }, /* 748 */ + { 112, 7, 12, 0, 0, 112, 0, }, /* 749 */ + { 112, 26, 12, 0, 0, 112, 0, }, /* 750 */ + { 112, 12, 3, 0, 0, 112, 0, }, /* 751 */ + { 112, 15, 12, 0, 0, 112, 0, }, /* 752 */ + { 112, 21, 12, 0, 0, 112, 0, }, /* 753 */ + { 112, 21, 12, 0, 0, -76, 0, }, /* 754 */ + { 78, 7, 12, 0, 0, 78, 0, }, /* 755 */ + { 78, 21, 12, 0, 0, 78, 0, }, /* 756 */ + { 83, 7, 12, 0, 0, 83, 0, }, /* 757 */ + { 83, 15, 12, 0, 0, 83, 0, }, /* 758 */ + { 82, 7, 12, 0, 0, 82, 0, }, /* 759 */ + { 82, 15, 12, 0, 0, 82, 0, }, /* 760 */ + { 121, 7, 12, 0, 0, 121, 0, }, /* 761 */ + { 121, 21, 12, 0, 0, 121, 0, }, /* 762 */ + { 121, 15, 12, 0, 0, 121, 0, }, /* 763 */ + { 89, 7, 12, 0, 0, 89, 0, }, /* 764 */ + { 130, 9, 12, 0, 64, 130, 0, }, /* 765 */ + { 130, 5, 12, 0, -64, 130, 0, }, /* 766 */ + { 130, 15, 12, 0, 0, 130, 0, }, /* 767 */ + { 144, 7, 12, 0, 0, 144, 0, }, /* 768 */ + { 144, 12, 3, 0, 0, 144, 0, }, /* 769 */ + { 144, 13, 12, 0, 0, 144, 0, }, /* 770 */ + { 1, 15, 12, 0, 0, 1, 0, }, /* 771 */ + { 156, 7, 12, 0, 0, 156, 0, }, /* 772 */ + { 156, 12, 3, 0, 0, 156, 0, }, /* 773 */ + { 156, 17, 12, 0, 0, 156, 0, }, /* 774 */ + { 147, 7, 12, 0, 0, 147, 0, }, /* 775 */ + { 147, 15, 12, 0, 0, 147, 0, }, /* 776 */ + { 148, 7, 12, 0, 0, 148, 0, }, /* 777 */ + { 148, 12, 3, 0, 0, 148, 0, }, /* 778 */ + { 148, 15, 12, 0, 0, 148, 0, }, /* 779 */ + { 148, 21, 12, 0, 0, 148, 0, }, /* 780 */ + { 158, 7, 12, 0, 0, 158, 0, }, /* 781 */ + { 158, 12, 3, 0, 0, 158, 0, }, /* 782 */ + { 158, 21, 12, 0, 0, 158, 0, }, /* 783 */ + { 153, 7, 12, 0, 0, 153, 0, }, /* 784 */ + { 153, 15, 12, 0, 0, 153, 0, }, /* 785 */ + { 149, 7, 12, 0, 0, 149, 0, }, /* 786 */ + { 94, 10, 5, 0, 0, 94, 0, }, /* 787 */ + { 94, 12, 3, 0, 0, 94, 0, }, /* 788 */ + { 94, 7, 12, 0, 0, 94, 0, }, /* 789 */ + { 94, 21, 12, 0, 0, 94, 0, }, /* 790 */ + { 94, 15, 12, 0, 0, 94, 0, }, /* 791 */ + { 94, 13, 12, 0, 0, 94, 0, }, /* 792 */ + { 85, 12, 3, 0, 0, 85, 0, }, /* 793 */ + { 85, 10, 5, 0, 0, 85, 0, }, /* 794 */ + { 85, 7, 12, 0, 0, 85, 0, }, /* 795 */ + { 85, 21, 12, 0, 0, 85, 0, }, /* 796 */ + { 85, 1, 4, 0, 0, 85, 0, }, /* 797 */ + { 101, 7, 12, 0, 0, 101, 0, }, /* 798 */ + { 101, 13, 12, 0, 0, 101, 0, }, /* 799 */ + { 96, 12, 3, 0, 0, 96, 0, }, /* 800 */ + { 96, 7, 12, 0, 0, 96, 0, }, /* 801 */ + { 96, 10, 5, 0, 0, 96, 0, }, /* 802 */ + { 96, 13, 12, 0, 0, 96, 0, }, /* 803 */ + { 96, 21, 12, 0, 0, 96, 0, }, /* 804 */ + { 111, 7, 12, 0, 0, 111, 0, }, /* 805 */ + { 111, 12, 3, 0, 0, 111, 0, }, /* 806 */ + { 111, 21, 12, 0, 0, 111, 0, }, /* 807 */ + { 100, 12, 3, 0, 0, 100, 0, }, /* 808 */ + { 100, 10, 5, 0, 0, 100, 0, }, /* 809 */ + { 100, 7, 12, 0, 0, 100, 0, }, /* 810 */ + { 100, 7, 4, 0, 0, 100, 0, }, /* 811 */ + { 100, 21, 12, 0, 0, 100, 0, }, /* 812 */ + { 100, 13, 12, 0, 0, 100, 0, }, /* 813 */ + { 48, 15, 12, 0, 0, 48, 0, }, /* 814 */ + { 108, 7, 12, 0, 0, 108, 0, }, /* 815 */ + { 108, 10, 5, 0, 0, 108, 0, }, /* 816 */ + { 108, 12, 3, 0, 0, 108, 0, }, /* 817 */ + { 108, 21, 12, 0, 0, 108, 0, }, /* 818 */ + { 129, 7, 12, 0, 0, 129, 0, }, /* 819 */ + { 129, 21, 12, 0, 0, 129, 0, }, /* 820 */ + { 109, 7, 12, 0, 0, 109, 0, }, /* 821 */ + { 109, 12, 3, 0, 0, 109, 0, }, /* 822 */ + { 109, 10, 5, 0, 0, 109, 0, }, /* 823 */ + { 109, 13, 12, 0, 0, 109, 0, }, /* 824 */ + { 107, 12, 3, 0, 0, 107, 0, }, /* 825 */ + { 107, 12, 3, 0, 0, -55, 0, }, /* 826 */ + { 107, 10, 5, 0, 0, 107, 0, }, /* 827 */ + { 107, 10, 5, 0, 0, -55, 0, }, /* 828 */ + { 107, 7, 12, 0, 0, 107, 0, }, /* 829 */ + { 28, 12, 3, 0, 0, -55, 0, }, /* 830 */ + { 107, 10, 3, 0, 0, 107, 0, }, /* 831 */ + { 135, 7, 12, 0, 0, 135, 0, }, /* 832 */ + { 135, 10, 5, 0, 0, 135, 0, }, /* 833 */ + { 135, 12, 3, 0, 0, 135, 0, }, /* 834 */ + { 135, 21, 12, 0, 0, 135, 0, }, /* 835 */ + { 135, 13, 12, 0, 0, 135, 0, }, /* 836 */ + { 124, 7, 12, 0, 0, 124, 0, }, /* 837 */ + { 124, 10, 3, 0, 0, 124, 0, }, /* 838 */ + { 124, 10, 5, 0, 0, 124, 0, }, /* 839 */ + { 124, 12, 3, 0, 0, 124, 0, }, /* 840 */ + { 124, 21, 12, 0, 0, 124, 0, }, /* 841 */ + { 124, 13, 12, 0, 0, 124, 0, }, /* 842 */ + { 123, 7, 12, 0, 0, 123, 0, }, /* 843 */ + { 123, 10, 3, 0, 0, 123, 0, }, /* 844 */ + { 123, 10, 5, 0, 0, 123, 0, }, /* 845 */ + { 123, 12, 3, 0, 0, 123, 0, }, /* 846 */ + { 123, 21, 12, 0, 0, 123, 0, }, /* 847 */ + { 114, 7, 12, 0, 0, 114, 0, }, /* 848 */ + { 114, 10, 5, 0, 0, 114, 0, }, /* 849 */ + { 114, 12, 3, 0, 0, 114, 0, }, /* 850 */ + { 114, 21, 12, 0, 0, 114, 0, }, /* 851 */ + { 114, 13, 12, 0, 0, 114, 0, }, /* 852 */ + { 102, 7, 12, 0, 0, 102, 0, }, /* 853 */ + { 102, 12, 3, 0, 0, 102, 0, }, /* 854 */ + { 102, 10, 5, 0, 0, 102, 0, }, /* 855 */ + { 102, 21, 12, 0, 0, 102, 0, }, /* 856 */ + { 102, 13, 12, 0, 0, 102, 0, }, /* 857 */ + { 126, 7, 12, 0, 0, 126, 0, }, /* 858 */ + { 126, 12, 3, 0, 0, 126, 0, }, /* 859 */ + { 126, 10, 12, 0, 0, 126, 0, }, /* 860 */ + { 126, 10, 5, 0, 0, 126, 0, }, /* 861 */ + { 126, 13, 12, 0, 0, 126, 0, }, /* 862 */ + { 126, 15, 12, 0, 0, 126, 0, }, /* 863 */ + { 126, 21, 12, 0, 0, 126, 0, }, /* 864 */ + { 126, 26, 12, 0, 0, 126, 0, }, /* 865 */ + { 142, 7, 12, 0, 0, 142, 0, }, /* 866 */ + { 142, 10, 5, 0, 0, 142, 0, }, /* 867 */ + { 142, 12, 3, 0, 0, 142, 0, }, /* 868 */ + { 142, 21, 12, 0, 0, 142, 0, }, /* 869 */ + { 125, 9, 12, 0, 32, 125, 0, }, /* 870 */ + { 125, 5, 12, 0, -32, 125, 0, }, /* 871 */ + { 125, 13, 12, 0, 0, 125, 0, }, /* 872 */ + { 125, 15, 12, 0, 0, 125, 0, }, /* 873 */ + { 125, 7, 12, 0, 0, 125, 0, }, /* 874 */ + { 154, 7, 12, 0, 0, 154, 0, }, /* 875 */ + { 154, 10, 3, 0, 0, 154, 0, }, /* 876 */ + { 154, 10, 5, 0, 0, 154, 0, }, /* 877 */ + { 154, 12, 3, 0, 0, 154, 0, }, /* 878 */ + { 154, 7, 4, 0, 0, 154, 0, }, /* 879 */ + { 154, 21, 12, 0, 0, 154, 0, }, /* 880 */ + { 154, 13, 12, 0, 0, 154, 0, }, /* 881 */ + { 150, 7, 12, 0, 0, 150, 0, }, /* 882 */ + { 150, 10, 5, 0, 0, 150, 0, }, /* 883 */ + { 150, 12, 3, 0, 0, 150, 0, }, /* 884 */ + { 150, 21, 12, 0, 0, 150, 0, }, /* 885 */ + { 141, 7, 12, 0, 0, 141, 0, }, /* 886 */ + { 141, 12, 3, 0, 0, 141, 0, }, /* 887 */ + { 141, 10, 5, 0, 0, 141, 0, }, /* 888 */ + { 141, 7, 4, 0, 0, 141, 0, }, /* 889 */ + { 141, 21, 12, 0, 0, 141, 0, }, /* 890 */ + { 140, 7, 12, 0, 0, 140, 0, }, /* 891 */ + { 140, 12, 3, 0, 0, 140, 0, }, /* 892 */ + { 140, 10, 5, 0, 0, 140, 0, }, /* 893 */ + { 140, 7, 4, 0, 0, 140, 0, }, /* 894 */ + { 140, 21, 12, 0, 0, 140, 0, }, /* 895 */ + { 122, 7, 12, 0, 0, 122, 0, }, /* 896 */ + { 133, 7, 12, 0, 0, 133, 0, }, /* 897 */ + { 133, 10, 5, 0, 0, 133, 0, }, /* 898 */ + { 133, 12, 3, 0, 0, 133, 0, }, /* 899 */ + { 133, 21, 12, 0, 0, 133, 0, }, /* 900 */ + { 133, 13, 12, 0, 0, 133, 0, }, /* 901 */ + { 133, 15, 12, 0, 0, 133, 0, }, /* 902 */ + { 134, 21, 12, 0, 0, 134, 0, }, /* 903 */ + { 134, 7, 12, 0, 0, 134, 0, }, /* 904 */ + { 134, 12, 3, 0, 0, 134, 0, }, /* 905 */ + { 134, 10, 5, 0, 0, 134, 0, }, /* 906 */ + { 138, 7, 12, 0, 0, 138, 0, }, /* 907 */ + { 138, 12, 3, 0, 0, 138, 0, }, /* 908 */ + { 138, 7, 4, 0, 0, 138, 0, }, /* 909 */ + { 138, 13, 12, 0, 0, 138, 0, }, /* 910 */ + { 143, 7, 12, 0, 0, 143, 0, }, /* 911 */ + { 143, 10, 5, 0, 0, 143, 0, }, /* 912 */ + { 143, 12, 3, 0, 0, 143, 0, }, /* 913 */ + { 143, 13, 12, 0, 0, 143, 0, }, /* 914 */ + { 145, 7, 12, 0, 0, 145, 0, }, /* 915 */ + { 145, 12, 3, 0, 0, 145, 0, }, /* 916 */ + { 145, 10, 5, 0, 0, 145, 0, }, /* 917 */ + { 145, 21, 12, 0, 0, 145, 0, }, /* 918 */ + { 54, 15, 12, 0, 0, 54, 0, }, /* 919 */ + { 54, 21, 12, 0, 0, 54, 0, }, /* 920 */ + { 63, 7, 12, 0, 0, 63, 0, }, /* 921 */ + { 63, 14, 12, 0, 0, 63, 0, }, /* 922 */ + { 63, 21, 12, 0, 0, 63, 0, }, /* 923 */ + { 157, 7, 12, 0, 0, 157, 0, }, /* 924 */ + { 157, 21, 12, 0, 0, 157, 0, }, /* 925 */ + { 80, 7, 12, 0, 0, 80, 0, }, /* 926 */ + { 80, 1, 2, 0, 0, 80, 0, }, /* 927 */ + { 127, 7, 12, 0, 0, 127, 0, }, /* 928 */ + { 115, 7, 12, 0, 0, 115, 0, }, /* 929 */ + { 115, 13, 12, 0, 0, 115, 0, }, /* 930 */ + { 115, 21, 12, 0, 0, 115, 0, }, /* 931 */ + { 159, 7, 12, 0, 0, 159, 0, }, /* 932 */ + { 159, 13, 12, 0, 0, 159, 0, }, /* 933 */ + { 103, 7, 12, 0, 0, 103, 0, }, /* 934 */ + { 103, 12, 3, 0, 0, 103, 0, }, /* 935 */ + { 103, 21, 12, 0, 0, 103, 0, }, /* 936 */ + { 119, 7, 12, 0, 0, 119, 0, }, /* 937 */ + { 119, 12, 3, 0, 0, 119, 0, }, /* 938 */ + { 119, 21, 12, 0, 0, 119, 0, }, /* 939 */ + { 119, 26, 12, 0, 0, 119, 0, }, /* 940 */ + { 119, 6, 12, 0, 0, 119, 0, }, /* 941 */ + { 119, 13, 12, 0, 0, 119, 0, }, /* 942 */ + { 119, 15, 12, 0, 0, 119, 0, }, /* 943 */ + { 146, 9, 12, 0, 32, 146, 0, }, /* 944 */ + { 146, 5, 12, 0, -32, 146, 0, }, /* 945 */ + { 146, 15, 12, 0, 0, 146, 0, }, /* 946 */ + { 146, 21, 12, 0, 0, 146, 0, }, /* 947 */ + { 99, 7, 12, 0, 0, 99, 0, }, /* 948 */ + { 99, 12, 3, 0, 0, 99, 0, }, /* 949 */ + { 99, 10, 5, 0, 0, 99, 0, }, /* 950 */ + { 99, 6, 12, 0, 0, 99, 0, }, /* 951 */ + { 137, 6, 12, 0, 0, 137, 0, }, /* 952 */ + { 139, 6, 12, 0, 0, 139, 0, }, /* 953 */ + { 23, 21, 12, 0, 0, 23, 0, }, /* 954 */ + { 155, 12, 3, 0, 0, 155, 0, }, /* 955 */ + { 23, 10, 5, 0, 0, 23, 0, }, /* 956 */ + { 137, 7, 12, 0, 0, 137, 0, }, /* 957 */ + { 155, 7, 12, 0, 0, 155, 0, }, /* 958 */ + { 139, 7, 12, 0, 0, 139, 0, }, /* 959 */ + { 105, 7, 12, 0, 0, 105, 0, }, /* 960 */ + { 105, 26, 12, 0, 0, 105, 0, }, /* 961 */ + { 105, 12, 3, 0, 0, 105, 0, }, /* 962 */ + { 105, 21, 12, 0, 0, 105, 0, }, /* 963 */ + { 10, 1, 2, 0, 0, 105, 0, }, /* 964 */ + { 10, 10, 3, 0, 0, 10, 0, }, /* 965 */ + { 10, 10, 5, 0, 0, 10, 0, }, /* 966 */ + { 20, 12, 3, 0, 0, 20, 0, }, /* 967 */ + { 131, 26, 12, 0, 0, 131, 0, }, /* 968 */ + { 131, 12, 3, 0, 0, 131, 0, }, /* 969 */ + { 131, 21, 12, 0, 0, 131, 0, }, /* 970 */ + { 18, 12, 3, 0, 0, 18, 0, }, /* 971 */ + { 151, 7, 12, 0, 0, 151, 0, }, /* 972 */ + { 151, 12, 3, 0, 0, 151, 0, }, /* 973 */ + { 151, 6, 12, 0, 0, 151, 0, }, /* 974 */ + { 151, 13, 12, 0, 0, 151, 0, }, /* 975 */ + { 151, 26, 12, 0, 0, 151, 0, }, /* 976 */ + { 160, 7, 12, 0, 0, 160, 0, }, /* 977 */ + { 160, 12, 3, 0, 0, 160, 0, }, /* 978 */ + { 152, 7, 12, 0, 0, 152, 0, }, /* 979 */ + { 152, 12, 3, 0, 0, 152, 0, }, /* 980 */ + { 152, 13, 12, 0, 0, 152, 0, }, /* 981 */ + { 152, 23, 12, 0, 0, 152, 0, }, /* 982 */ + { 113, 7, 12, 0, 0, 113, 0, }, /* 983 */ + { 113, 15, 12, 0, 0, 113, 0, }, /* 984 */ + { 113, 12, 3, 0, 0, 113, 0, }, /* 985 */ + { 132, 9, 12, 0, 34, 132, 0, }, /* 986 */ + { 132, 5, 12, 0, -34, 132, 0, }, /* 987 */ + { 132, 12, 3, 0, 0, 132, 0, }, /* 988 */ + { 132, 6, 12, 0, 0, 132, 0, }, /* 989 */ + { 132, 13, 12, 0, 0, 132, 0, }, /* 990 */ + { 132, 21, 12, 0, 0, 132, 0, }, /* 991 */ + { 0, 2, 14, 0, 0, 0, 0, }, /* 992 */ + { 10, 26, 11, 0, 0, 10, 0, }, /* 993 */ + { 27, 26, 12, 0, 0, 27, 0, }, /* 994 */ + { 10, 24, 3, 0, 0, 10, 0, }, /* 995 */ + { 10, 1, 3, 0, 0, 10, 0, }, /* 996 */ }; const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ @@ -1190,51 +1216,51 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+8000 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+8800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+9000 */ - 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,100, /* U+9800 */ -101,102,102,102,102,102,102,102,102,103,104,104,105,106,107,108, /* U+A000 */ -109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,117, /* U+A800 */ -118,119,120,121,122,123,117,118,119,120,121,122,123,117,118,119, /* U+B000 */ -120,121,122,123,117,118,119,120,121,122,123,117,118,119,120,121, /* U+B800 */ -122,123,117,118,119,120,121,122,123,117,118,119,120,121,122,123, /* U+C000 */ -117,118,119,120,121,122,123,117,118,119,120,121,122,123,117,118, /* U+C800 */ -119,120,121,122,123,117,118,119,120,121,122,123,117,118,119,124, /* U+D000 */ -125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+D800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+E000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+E800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F000 */ -126,126, 98, 98,127,128,129,130,131,131,132,133,134,135,136,137, /* U+F800 */ -138,139,140,141,142,143,144,145,146,147,148,142,149,149,150,142, /* U+10000 */ -151,152,153,154,155,156,157,158,159,160,161,142,162,163,164,165, /* U+10800 */ -166,167,168,169,170,171,172,142,173,174,142,175,176,177,178,142, /* U+11000 */ -179,180,181,182,183,184,142,142,185,186,187,188,142,189,142,190, /* U+11800 */ -191,191,191,191,191,191,191,192,193,191,194,142,142,142,142,142, /* U+12000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+12800 */ -195,195,195,195,195,195,195,195,196,142,142,142,142,142,142,142, /* U+13000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+13800 */ -142,142,142,142,142,142,142,142,197,197,197,197,198,142,142,142, /* U+14000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+14800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+15000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+15800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+16000 */ -199,199,199,199,200,201,202,203,142,142,142,142,204,205,206,207, /* U+16800 */ -208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,208, /* U+17000 */ -208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,208, /* U+17800 */ -208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,209, /* U+18000 */ -208,208,208,208,208,208,210,210,210,211,212,142,142,142,142,142, /* U+18800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+19000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+19800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1A800 */ -213,214,215,216,216,217,142,142,142,142,142,142,142,142,142,142, /* U+1B000 */ -142,142,142,142,142,142,142,142,218,219,142,142,142,142,142,142, /* U+1B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1C800 */ - 71,220,221,222,223,224,225,142,226,227,228,229,230,231,232,233, /* U+1D000 */ -234,234,234,234,235,236,142,142,142,142,142,142,142,142,142,142, /* U+1D800 */ -237,142,238,142,142,239,142,142,142,142,142,142,142,142,142,142, /* U+1E000 */ -240,241,242,142,142,142,142,142,243,244,245,142,246,247,142,142, /* U+1E800 */ -248,249,250,251,252,253,254,255,254,254,256,254,257,258,259,260, /* U+1F000 */ -261,262,263,264,265,266, 71,267,253,253,253,253,253,253,253,268, /* U+1F800 */ + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+9800 */ +100,101,101,101,101,101,101,101,101,102,103,103,104,105,106,107, /* U+A000 */ +108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,116, /* U+A800 */ +117,118,119,120,121,122,116,117,118,119,120,121,122,116,117,118, /* U+B000 */ +119,120,121,122,116,117,118,119,120,121,122,116,117,118,119,120, /* U+B800 */ +121,122,116,117,118,119,120,121,122,116,117,118,119,120,121,122, /* U+C000 */ +116,117,118,119,120,121,122,116,117,118,119,120,121,122,116,117, /* U+C800 */ +118,119,120,121,122,116,117,118,119,120,121,122,116,117,118,123, /* U+D000 */ +124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+D800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+E000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+E800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F000 */ +125,125, 98, 98,126,127,128,129,130,130,131,132,133,134,135,136, /* U+F800 */ +137,138,139,140,141,142,143,144,145,146,147,148,149,149,150,151, /* U+10000 */ +152,153,154,155,156,157,158,159,160,161,162,141,163,164,165,166, /* U+10800 */ +167,168,169,170,171,172,173,141,174,175,141,176,177,178,179,141, /* U+11000 */ +180,181,182,183,184,185,141,141,186,187,188,189,141,190,141,191, /* U+11800 */ +192,192,192,192,192,192,192,193,194,192,195,141,141,141,141,141, /* U+12000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,196, /* U+12800 */ +197,197,197,197,197,197,197,197,198,141,141,141,141,141,141,141, /* U+13000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+13800 */ +141,141,141,141,141,141,141,141,199,199,199,199,200,141,141,141, /* U+14000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+14800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+15000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+15800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+16000 */ +201,201,201,201,202,203,204,205,141,141,141,141,206,207,208,209, /* U+16800 */ +210,210,210,210,210,210,210,210,210,210,210,210,210,210,210,210, /* U+17000 */ +210,210,210,210,210,210,210,210,210,210,210,210,210,210,210,210, /* U+17800 */ +210,210,210,210,210,210,210,210,210,210,210,210,210,210,210,211, /* U+18000 */ +210,210,210,210,210,210,212,212,212,213,214,141,141,141,141,141, /* U+18800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+19000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+19800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,215, /* U+1A800 */ +216,217,218,219,219,220,141,141,141,141,141,141,141,141,141,141, /* U+1B000 */ +141,141,141,141,141,141,141,141,221,222,141,141,141,141,141,141, /* U+1B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,223,224, /* U+1C800 */ + 71,225,226,227,228,229,230,141,231,232,233,234,235,236,237,238, /* U+1D000 */ +239,239,239,239,240,241,141,141,141,141,141,141,141,141,242,141, /* U+1D800 */ +243,141,244,141,141,245,141,141,141,141,141,141,141,141,141,246, /* U+1E000 */ +247,248,249,141,141,141,141,141,250,251,252,141,253,254,141,141, /* U+1E800 */ +255,256,257,258,259,260,261,262,261,261,263,261,264,265,266,267, /* U+1F000 */ +268,269,270,261,271,272, 71,273,260,260,260,260,260,260,260,274, /* U+1F800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+20000 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+20800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+21000 */ @@ -1255,469 +1281,469 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+28800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+29000 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+29800 */ - 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,269, 98, 98, /* U+2A000 */ + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,275, 98, 98, /* U+2A000 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2A800 */ - 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,270, 98, /* U+2B000 */ -271, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2B800 */ + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,276, 98, /* U+2B000 */ +277, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2B800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2C000 */ - 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,272, 98, 98, /* U+2C800 */ + 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,278, 98, 98, /* U+2C800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2D000 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2D800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2E000 */ - 98, 98, 98, 98, 98, 98, 98,273,142,142,142,142,142,142,142,142, /* U+2E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+2F000 */ - 98, 98, 98, 98,274,142,142,142,142,142,142,142,142,142,142,142, /* U+2F800 */ + 98, 98, 98, 98, 98, 98, 98,279,141,141,141,141,141,141,141,141, /* U+2E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+2F000 */ + 98, 98, 98, 98,280,141,141,141,141,141,141,141,141,141,141,141, /* U+2F800 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+30000 */ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+30800 */ - 98, 98, 98, 98, 98, 98,275,142,142,142,142,142,142,142,142,142, /* U+31000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+31800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+32000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+32800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+33000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+33800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+34000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+34800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+35000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+35800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+36000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+36800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+37000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+37800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+38000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+38800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+39000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+39800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+3F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+40000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+40800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+41000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+41800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+42000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+42800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+43000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+43800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+44000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+44800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+45000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+45800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+46000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+46800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+47000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+47800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+48000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+48800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+49000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+49800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+4F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+50000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+50800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+51000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+51800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+52000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+52800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+53000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+53800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+54000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+54800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+55000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+55800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+56000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+56800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+57000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+57800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+58000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+58800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+59000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+59800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+5F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+60000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+60800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+61000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+61800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+62000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+62800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+63000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+63800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+64000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+64800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+65000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+65800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+66000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+66800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+67000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+67800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+68000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+68800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+69000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+69800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+6F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+70000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+70800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+71000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+71800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+72000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+72800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+73000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+73800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+74000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+74800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+75000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+75800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+76000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+76800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+77000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+77800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+78000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+78800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+79000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+79800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+7F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+80000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+80800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+81000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+81800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+82000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+82800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+83000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+83800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+84000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+84800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+85000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+85800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+86000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+86800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+87000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+87800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+88000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+88800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+89000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+89800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+8F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+90000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+90800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+91000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+91800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+92000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+92800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+93000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+93800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+94000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+94800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+95000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+95800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+96000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+96800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+97000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+97800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+98000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+98800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+99000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+99800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9A000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9A800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9B000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9B800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9C000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9C800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9D000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9D800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9E000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9E800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9F000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+9F800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A0000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A0800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A1000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A1800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A2000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A2800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A3000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A3800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A4000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A4800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A5000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A5800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A6000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A6800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A7000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A7800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A8000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A8800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A9000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+A9800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AA000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AA800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AB000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AB800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AC000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AC800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AD000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AD800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AE000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AE800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AF000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+AF800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B0000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B0800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B1000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B1800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B2000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B2800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B3000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B3800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B4000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B4800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B5000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B5800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B6000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B6800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B7000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B7800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B8000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B8800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B9000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+B9800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BA000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BA800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BB000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BB800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BC000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BC800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BD000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BD800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BE000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BE800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BF000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+BF800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C0000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C0800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C1000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C1800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C2000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C2800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C3000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C3800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C4000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C4800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C5000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C5800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C6000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C6800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C7000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C7800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C8000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C8800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C9000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+C9800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CA000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CA800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CB000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CB800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CC000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CC800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CD000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CD800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CE000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CE800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CF000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+CF800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D0000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D0800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D1000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D1800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D2000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D2800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D3000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D3800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D4000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D4800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D5000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D5800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D6000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D6800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D7000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D7800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D8000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D8800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D9000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+D9800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DA000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DA800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DB000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DB800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DC000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DC800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DD000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DD800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DE000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DE800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DF000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DF800 */ -276,277,278,279,277,277,277,277,277,277,277,277,277,277,277,277, /* U+E0000 */ -277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277, /* U+E0800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E1000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E1800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E2000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E2800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E3000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E3800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E4000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E4800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E5000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E5800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E6000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E6800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E7000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E7800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E8000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E8800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E9000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E9800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EA000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EA800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EB000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EB800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EC000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EC800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+ED000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+ED800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EE000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EE800 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EF000 */ -142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+EF800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F0000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F0800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F1000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F1800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F2000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F2800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F3000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F3800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F4000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F4800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F5000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F5800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F6000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F6800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F7000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F7800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F8000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F8800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F9000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F9800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FA000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FA800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FB000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FB800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FC000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FC800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FD000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FD800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FE000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FE800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FF000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,280, /* U+FF800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+100000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+100800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+101000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+101800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+102000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+102800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+103000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+103800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+104000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+104800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+105000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+105800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+106000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+106800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+107000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+107800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+108000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+108800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+109000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+109800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10A000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10A800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10B000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10B800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10C000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10C800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10D000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10D800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10E000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10E800 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10F000 */ -126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,280, /* U+10F800 */ + 98, 98, 98, 98, 98, 98,281,141,141,141,141,141,141,141,141,141, /* U+31000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+31800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+32000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+32800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+33000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+33800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+34000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+34800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+35000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+35800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+36000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+36800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+37000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+37800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+38000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+38800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+39000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+39800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+40000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+40800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+41000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+41800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+42000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+42800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+43000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+43800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+44000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+44800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+45000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+45800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+46000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+46800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+47000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+47800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+48000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+48800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+49000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+49800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+50000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+50800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+51000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+51800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+52000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+52800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+53000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+53800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+54000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+54800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+55000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+55800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+56000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+56800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+57000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+57800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+58000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+58800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+59000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+59800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+60000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+60800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+61000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+61800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+62000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+62800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+63000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+63800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+64000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+64800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+65000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+65800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+66000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+66800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+67000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+67800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+68000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+68800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+69000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+69800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+70000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+70800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+71000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+71800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+72000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+72800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+73000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+73800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+74000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+74800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+75000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+75800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+76000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+76800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+77000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+77800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+78000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+78800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+79000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+79800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+80000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+80800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+81000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+81800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+82000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+82800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+83000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+83800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+84000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+84800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+85000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+85800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+86000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+86800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+87000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+87800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+88000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+88800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+89000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+89800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+90000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+90800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+91000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+91800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+92000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+92800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+93000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+93800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+94000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+94800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+95000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+95800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+96000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+96800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+97000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+97800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+98000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+98800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+99000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+99800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9A000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9A800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9B000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9B800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9C000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9C800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9D000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9D800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9E000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9E800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9F000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9F800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A0000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A0800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A1000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A1800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A2000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A2800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A3000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A3800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A4000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A4800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A5000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A5800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A6000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A6800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A7000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A7800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A8000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A8800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A9000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A9800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AA000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AA800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AB000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AB800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AC000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AC800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AD000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AD800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AE000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AE800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AF000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AF800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B0000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B0800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B1000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B1800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B2000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B2800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B3000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B3800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B4000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B4800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B5000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B5800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B6000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B6800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B7000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B7800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B8000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B8800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B9000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B9800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BA000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BA800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BB000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BB800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BC000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BC800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BD000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BD800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BE000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BE800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BF000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BF800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C0000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C0800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C1000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C1800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C2000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C2800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C3000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C3800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C4000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C4800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C5000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C5800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C6000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C6800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C7000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C7800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C8000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C8800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C9000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C9800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CA000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CA800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CB000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CB800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CC000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CC800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CD000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CD800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CE000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CE800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CF000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CF800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D0000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D0800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D1000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D1800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D2000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D2800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D3000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D3800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D4000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D4800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D5000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D5800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D6000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D6800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D7000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D7800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D8000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D8800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D9000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D9800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DA000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DA800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DB000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DB800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DC000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DC800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DD000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DD800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DE000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DE800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DF000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DF800 */ +282,283,284,285,283,283,283,283,283,283,283,283,283,283,283,283, /* U+E0000 */ +283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, /* U+E0800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E1000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E1800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E2000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E2800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E3000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E3800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E4000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E4800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E5000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E5800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E6000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E6800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E7000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E7800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E8000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E8800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E9000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E9800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EA000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EA800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EB000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EB800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EC000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EC800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+ED000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+ED800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EE000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EE800 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EF000 */ +141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EF800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F0000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F0800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F1000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F1800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F2000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F2800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F3000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F3800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F4000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F4800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F5000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F5800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F6000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F6800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F7000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F7800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F8000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F8800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F9000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F9800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FA000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FA800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FB000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FB800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FC000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FC800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FD000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FD800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FE000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FE800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FF000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,286, /* U+FF800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+100000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+100800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+101000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+101800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+102000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+102800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+103000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+103800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+104000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+104800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+105000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+105800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+106000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+106800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+107000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+107800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+108000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+108800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+109000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+109800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10A000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10A800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10B000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10B800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10C000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10C800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10D000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10D800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10E000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10E800 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10F000 */ +125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,286, /* U+10F800 */ }; -const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ +const uint16_t PRIV(ucd_stage2)[] = { /* 73472 bytes, block = 128 */ /* block 0 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -1840,463 +1866,463 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ /* block 12 */ 215,215,215,215,215,216,217,217,217,218,218,219,220,218,221,221, -222,222,222,222,222,222,222,222,222,222,222,220,223,120,218,220, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -225,224,224,224,224,224,224,224,224,224,224,226,226,226,226,226, -226,226,226,226,226,226,222,222,222,222,222,222,222,222,222,222, -227,227,227,227,227,227,227,227,227,227,218,218,218,218,224,224, -226,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +222,222,222,222,222,222,222,222,222,222,222,220,223,218,218,224, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +226,225,225,225,225,225,225,225,225,225,225,227,227,227,227,227, +227,227,227,227,227,227,222,222,222,222,222,222,222,222,222,222, +228,228,228,228,228,228,228,228,228,228,218,218,218,218,225,225, +227,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, /* block 13 */ -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,228,224,222,222,222,222,222,222,222,216,221,222, -222,222,222,222,222,229,229,222,222,221,222,222,222,222,224,224, -230,230,230,230,230,230,230,230,230,230,224,224,224,221,221,224, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,229,225,222,222,222,222,222,222,222,216,221,222, +222,222,222,222,222,230,230,222,222,221,222,222,222,222,225,225, +231,231,231,231,231,231,231,231,231,231,225,225,225,221,221,225, /* block 14 */ -231,231,231,231,231,231,231,231,231,231,231,231,231,231,120,232, -233,234,233,233,233,233,233,233,233,233,233,233,233,233,233,233, -233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,233, +232,232,232,232,232,232,232,232,232,232,232,232,232,232,120,233, +234,235,234,234,234,234,234,234,234,234,234,234,234,234,234,234, 234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,234, -234,234,234,234,234,234,234,234,234,234,234,120,120,233,233,233, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235, +235,235,235,235,235,235,235,235,235,235,235,120,120,234,234,234, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, /* block 15 */ -235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235, -235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235, -235,235,235,235,235,235,236,236,236,236,236,236,236,236,236,236, -236,235,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -237,237,237,237,237,237,237,237,237,237,238,238,238,238,238,238, -238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,238, -238,238,238,238,238,238,238,238,238,238,238,239,239,239,239,239, -239,239,239,239,240,240,241,242,242,242,240,120,120,239,243,243, +236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,236, +236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,236, +236,236,236,236,236,236,237,237,237,237,237,237,237,237,237,237, +237,236,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +238,238,238,238,238,238,238,238,238,238,239,239,239,239,239,239, +239,239,239,239,239,239,239,239,239,239,239,239,239,239,239,239, +239,239,239,239,239,239,239,239,239,239,239,240,240,240,240,240, +240,240,240,240,241,241,242,243,243,243,241,120,120,240,244,244, /* block 16 */ -244,244,244,244,244,244,244,244,244,244,244,244,244,244,244,244, -244,244,244,244,244,244,245,245,245,245,246,245,245,245,245,245, -245,245,245,245,246,245,245,245,246,245,245,245,245,245,120,120, -247,247,247,247,247,247,247,247,247,247,247,247,247,247,247,120, -248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248, -248,248,248,248,248,248,248,248,248,249,249,249,120,120,250,120, -233,233,233,233,233,233,233,233,233,233,233,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +245,245,245,245,245,245,245,245,245,245,245,245,245,245,245,245, +245,245,245,245,245,245,246,246,246,246,247,246,246,246,246,246, +246,246,246,246,247,246,246,246,247,246,246,246,246,246,120,120, +248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,120, +249,249,249,249,249,249,249,249,249,249,249,249,249,249,249,249, +249,249,249,249,249,249,249,249,249,250,250,250,120,120,251,120, +234,234,234,234,234,234,234,234,234,234,234,120,120,120,120,120, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, /* block 17 */ -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,120,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,120,120,120,120,120,120,120,120, -120,120,120,222,222,222,222,222,222,222,222,222,222,222,222,222, +225,225,225,225,225,225,225,225,252,225,225,225,225,225,225,120, +215,215,120,120,120,120,120,120,222,222,222,222,222,222,222,222, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,230,222,222,222,222,222,222, +222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222, 222,222,216,222,222,222,222,222,222,222,222,222,222,222,222,222, 222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222, /* block 18 */ -251,251,251,252,253,253,253,253,253,253,253,253,253,253,253,253, -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, -253,253,253,253,253,253,253,253,253,253,251,252,251,253,252,252, -252,251,251,251,251,251,251,251,251,252,252,252,252,251,252,252, -253,254,255,113,113,251,251,251,253,253,253,253,253,253,253,253, -253,253,251,251,256,257,258,258,258,258,258,258,258,258,258,258, -259,260,253,253,253,253,253,253,253,253,253,253,253,253,253,253, +253,253,253,254,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +255,255,255,255,255,255,255,255,255,255,253,254,253,255,254,254, +254,253,253,253,253,253,253,253,253,254,254,254,254,253,254,254, +255,256,257,113,113,253,253,253,255,255,255,255,255,255,255,255, +255,255,253,253,258,259,260,260,260,260,260,260,260,260,260,260, +261,262,255,255,255,255,255,255,255,255,255,255,255,255,255,255, /* block 19 */ -261,262,263,263,120,261,261,261,261,261,261,261,261,120,120,261, -261,120,120,261,261,261,261,261,261,261,261,261,261,261,261,261, -261,261,261,261,261,261,261,261,261,120,261,261,261,261,261,261, -261,120,261,120,120,120,261,261,261,261,120,120,262,261,264,263, -263,262,262,262,262,120,120,263,263,120,120,263,263,262,261,120, -120,120,120,120,120,120,120,264,120,120,120,120,261,261,120,261, -261,261,262,262,120,120,265,265,265,265,265,265,265,265,265,265, -261,261,266,266,267,267,267,267,267,267,268,266,261,269,262,120, +263,264,265,265,120,263,263,263,263,263,263,263,263,120,120,263, +263,120,120,263,263,263,263,263,263,263,263,263,263,263,263,263, +263,263,263,263,263,263,263,263,263,120,263,263,263,263,263,263, +263,120,263,120,120,120,263,263,263,263,120,120,264,263,266,265, +265,264,264,264,264,120,120,265,265,120,120,265,265,264,263,120, +120,120,120,120,120,120,120,266,120,120,120,120,263,263,120,263, +263,263,264,264,120,120,267,267,267,267,267,267,267,267,267,267, +263,263,268,268,269,269,269,269,269,269,270,268,263,271,264,120, /* block 20 */ -120,270,270,271,120,272,272,272,272,272,272,120,120,120,120,272, -272,120,120,272,272,272,272,272,272,272,272,272,272,272,272,272, -272,272,272,272,272,272,272,272,272,120,272,272,272,272,272,272, -272,120,272,272,120,272,272,120,272,272,120,120,270,120,271,271, -271,270,270,120,120,120,120,270,270,120,120,270,270,270,120,120, -120,270,120,120,120,120,120,120,120,272,272,272,272,120,272,120, -120,120,120,120,120,120,273,273,273,273,273,273,273,273,273,273, -270,270,272,272,272,270,274,120,120,120,120,120,120,120,120,120, +120,272,272,273,120,274,274,274,274,274,274,120,120,120,120,274, +274,120,120,274,274,274,274,274,274,274,274,274,274,274,274,274, +274,274,274,274,274,274,274,274,274,120,274,274,274,274,274,274, +274,120,274,274,120,274,274,120,274,274,120,120,272,120,273,273, +273,272,272,120,120,120,120,272,272,120,120,272,272,272,120,120, +120,272,120,120,120,120,120,120,120,274,274,274,274,120,274,120, +120,120,120,120,120,120,275,275,275,275,275,275,275,275,275,275, +272,272,274,274,274,272,276,120,120,120,120,120,120,120,120,120, /* block 21 */ -120,275,275,276,120,277,277,277,277,277,277,277,277,277,120,277, -277,277,120,277,277,277,277,277,277,277,277,277,277,277,277,277, -277,277,277,277,277,277,277,277,277,120,277,277,277,277,277,277, -277,120,277,277,120,277,277,277,277,277,120,120,275,277,276,276, -276,275,275,275,275,275,120,275,275,276,120,276,276,275,120,120, -277,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -277,277,275,275,120,120,278,278,278,278,278,278,278,278,278,278, -279,280,120,120,120,120,120,120,120,277,275,275,275,275,275,275, +120,277,277,278,120,279,279,279,279,279,279,279,279,279,120,279, +279,279,120,279,279,279,279,279,279,279,279,279,279,279,279,279, +279,279,279,279,279,279,279,279,279,120,279,279,279,279,279,279, +279,120,279,279,120,279,279,279,279,279,120,120,277,279,278,278, +278,277,277,277,277,277,120,277,277,278,120,278,278,277,120,120, +279,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +279,279,277,277,120,120,280,280,280,280,280,280,280,280,280,280, +281,282,120,120,120,120,120,120,120,279,277,277,277,277,277,277, /* block 22 */ -120,281,282,282,120,283,283,283,283,283,283,283,283,120,120,283, -283,120,120,283,283,283,283,283,283,283,283,283,283,283,283,283, -283,283,283,283,283,283,283,283,283,120,283,283,283,283,283,283, -283,120,283,283,120,283,283,283,283,283,120,120,281,283,284,281, -282,281,281,281,281,120,120,282,282,120,120,282,282,281,120,120, -120,120,120,120,120,281,281,284,120,120,120,120,283,283,120,283, -283,283,281,281,120,120,285,285,285,285,285,285,285,285,285,285, -286,283,287,287,287,287,287,287,120,120,120,120,120,120,120,120, +120,283,284,284,120,285,285,285,285,285,285,285,285,120,120,285, +285,120,120,285,285,285,285,285,285,285,285,285,285,285,285,285, +285,285,285,285,285,285,285,285,285,120,285,285,285,285,285,285, +285,120,285,285,120,285,285,285,285,285,120,120,283,285,286,283, +284,283,283,283,283,120,120,284,284,120,120,284,284,283,120,120, +120,120,120,120,120,283,283,286,120,120,120,120,285,285,120,285, +285,285,283,283,120,120,287,287,287,287,287,287,287,287,287,287, +288,285,289,289,289,289,289,289,120,120,120,120,120,120,120,120, /* block 23 */ -120,120,288,289,120,289,289,289,289,289,289,120,120,120,289,289, -289,120,289,289,289,289,120,120,120,289,289,120,289,120,289,289, -120,120,120,289,289,120,120,120,289,289,289,120,120,120,289,289, -289,289,289,289,289,289,289,289,289,289,120,120,120,120,290,291, -288,291,291,120,120,120,291,291,291,120,291,291,291,288,120,120, -289,120,120,120,120,120,120,290,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,292,292,292,292,292,292,292,292,292,292, -293,293,293,294,295,295,295,295,295,296,295,120,120,120,120,120, +120,120,290,291,120,291,291,291,291,291,291,120,120,120,291,291, +291,120,291,291,291,291,120,120,120,291,291,120,291,120,291,291, +120,120,120,291,291,120,120,120,291,291,291,120,120,120,291,291, +291,291,291,291,291,291,291,291,291,291,120,120,120,120,292,293, +290,293,293,120,120,120,293,293,293,120,293,293,293,290,120,120, +291,120,120,120,120,120,120,292,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,294,294,294,294,294,294,294,294,294,294, +295,295,295,296,297,297,297,297,297,298,297,120,120,120,120,120, /* block 24 */ -297,298,298,298,297,299,299,299,299,299,299,299,299,120,299,299, -299,120,299,299,299,299,299,299,299,299,299,299,299,299,299,299, -299,299,299,299,299,299,299,299,299,120,299,299,299,299,299,299, -299,299,299,299,299,299,299,299,299,299,120,120,120,299,297,297, -297,298,298,298,298,120,297,297,297,120,297,297,297,297,120,120, -120,120,120,120,120,297,297,120,299,299,299,120,120,120,120,120, -299,299,297,297,120,120,300,300,300,300,300,300,300,300,300,300, -120,120,120,120,120,120,120,301,302,302,302,302,302,302,302,303, +299,300,300,300,299,301,301,301,301,301,301,301,301,120,301,301, +301,120,301,301,301,301,301,301,301,301,301,301,301,301,301,301, +301,301,301,301,301,301,301,301,301,120,301,301,301,301,301,301, +301,301,301,301,301,301,301,301,301,301,120,120,299,301,299,299, +299,300,300,300,300,120,299,299,299,120,299,299,299,299,120,120, +120,120,120,120,120,299,299,120,301,301,301,120,120,301,120,120, +301,301,299,299,120,120,302,302,302,302,302,302,302,302,302,302, +120,120,120,120,120,120,120,303,304,304,304,304,304,304,304,305, /* block 25 */ -304,305,306,306,307,304,304,304,304,304,304,304,304,120,304,304, -304,120,304,304,304,304,304,304,304,304,304,304,304,304,304,304, -304,304,304,304,304,304,304,304,304,120,304,304,304,304,304,304, -304,304,304,304,120,304,304,304,304,304,120,120,305,304,306,305, -306,306,308,306,306,120,305,306,306,120,306,306,305,305,120,120, -120,120,120,120,120,308,308,120,120,120,120,120,120,120,304,120, -304,304,305,305,120,120,309,309,309,309,309,309,309,309,309,309, -120,304,304,120,120,120,120,120,120,120,120,120,120,120,120,120, +306,307,308,308,309,306,306,306,306,306,306,306,306,120,306,306, +306,120,306,306,306,306,306,306,306,306,306,306,306,306,306,306, +306,306,306,306,306,306,306,306,306,120,306,306,306,306,306,306, +306,306,306,306,120,306,306,306,306,306,120,120,307,306,308,307, +308,308,310,308,308,120,307,308,308,120,308,308,307,307,120,120, +120,120,120,120,120,310,310,120,120,120,120,120,120,306,306,120, +306,306,307,307,120,120,311,311,311,311,311,311,311,311,311,311, +120,306,306,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 26 */ -310,310,311,311,312,312,312,312,312,312,312,312,312,120,312,312, -312,120,312,312,312,312,312,312,312,312,312,312,312,312,312,312, -312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,312, -312,312,312,312,312,312,312,312,312,312,312,310,310,312,313,311, -311,310,310,310,310,120,311,311,311,120,311,311,311,310,314,315, -120,120,120,120,312,312,312,313,316,316,316,316,316,316,316,312, -312,312,310,310,120,120,317,317,317,317,317,317,317,317,317,317, -316,316,316,316,316,316,316,316,316,315,312,312,312,312,312,312, +312,312,313,313,314,314,314,314,314,314,314,314,314,120,314,314, +314,120,314,314,314,314,314,314,314,314,314,314,314,314,314,314, +314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314, +314,314,314,314,314,314,314,314,314,314,314,312,312,314,315,313, +313,312,312,312,312,120,313,313,313,120,313,313,313,312,316,317, +120,120,120,120,314,314,314,315,318,318,318,318,318,318,318,314, +314,314,312,312,120,120,319,319,319,319,319,319,319,319,319,319, +318,318,318,318,318,318,318,318,318,317,314,314,314,314,314,314, /* block 27 */ -120,318,319,319,120,320,320,320,320,320,320,320,320,320,320,320, -320,320,320,320,320,320,320,120,120,120,320,320,320,320,320,320, -320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320, -320,320,120,320,320,320,320,320,320,320,320,320,120,320,120,120, -320,320,320,320,320,320,320,120,120,120,318,120,120,120,120,321, -319,319,318,318,318,120,318,120,319,319,319,319,319,319,319,321, -120,120,120,120,120,120,322,322,322,322,322,322,322,322,322,322, -120,120,319,319,323,120,120,120,120,120,120,120,120,120,120,120, +120,320,321,321,120,322,322,322,322,322,322,322,322,322,322,322, +322,322,322,322,322,322,322,120,120,120,322,322,322,322,322,322, +322,322,322,322,322,322,322,322,322,322,322,322,322,322,322,322, +322,322,120,322,322,322,322,322,322,322,322,322,120,322,120,120, +322,322,322,322,322,322,322,120,120,120,320,120,120,120,120,323, +321,321,320,320,320,120,320,120,321,321,321,321,321,321,321,323, +120,120,120,120,120,120,324,324,324,324,324,324,324,324,324,324, +120,120,321,321,325,120,120,120,120,120,120,120,120,120,120,120, /* block 28 */ -120,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324, -324,325,324,326,325,325,325,325,325,325,325,120,120,120,120, 6, -324,324,324,324,324,324,327,325,325,325,325,325,325,325,325,328, -329,329,329,329,329,329,329,329,329,329,328,328,120,120,120,120, +120,326,326,326,326,326,326,326,326,326,326,326,326,326,326,326, +326,326,326,326,326,326,326,326,326,326,326,326,326,326,326,326, +326,326,326,326,326,326,326,326,326,326,326,326,326,326,326,326, +326,327,326,328,327,327,327,327,327,327,327,120,120,120,120, 6, +326,326,326,326,326,326,329,327,327,327,327,327,327,327,327,330, +331,331,331,331,331,331,331,331,331,331,330,330,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 29 */ -120,330,330,120,330,120,330,330,330,330,330,120,330,330,330,330, -330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330, -330,330,330,330,120,330,120,330,330,330,330,330,330,330,330,330, -330,331,330,332,331,331,331,331,331,331,331,331,331,330,120,120, -330,330,330,330,330,120,333,120,331,331,331,331,331,331,120,120, -334,334,334,334,334,334,334,334,334,334,120,120,330,330,330,330, +120,332,332,120,332,120,332,332,332,332,332,120,332,332,332,332, +332,332,332,332,332,332,332,332,332,332,332,332,332,332,332,332, +332,332,332,332,120,332,120,332,332,332,332,332,332,332,332,332, +332,333,332,334,333,333,333,333,333,333,333,333,333,332,120,120, +332,332,332,332,332,120,335,120,333,333,333,333,333,333,120,120, +336,336,336,336,336,336,336,336,336,336,120,120,332,332,332,332, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 30 */ -335,336,336,336,337,337,337,337,337,337,337,337,337,337,337,337, -337,337,337,336,337,336,336,336,338,338,336,336,336,336,336,336, -339,339,339,339,339,339,339,339,339,339,340,340,340,340,340,340, -340,340,340,340,336,338,336,338,336,338,341,342,341,342,343,343, -335,335,335,335,335,335,335,335,120,335,335,335,335,335,335,335, -335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335, -335,335,335,335,335,335,335,335,335,335,335,335,335,120,120,120, -120,338,338,338,338,338,338,338,338,338,338,338,338,338,338,343, +337,338,338,338,339,339,339,339,339,339,339,339,339,339,339,339, +339,339,339,338,339,338,338,338,340,340,338,338,338,338,338,338, +341,341,341,341,341,341,341,341,341,341,342,342,342,342,342,342, +342,342,342,342,338,340,338,340,338,340,343,344,343,344,345,345, +337,337,337,337,337,337,337,337,120,337,337,337,337,337,337,337, +337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337, +337,337,337,337,337,337,337,337,337,337,337,337,337,120,120,120, +120,340,340,340,340,340,340,340,340,340,340,340,340,340,340,345, /* block 31 */ -338,338,338,338,338,337,338,338,335,335,335,335,335,338,338,338, -338,338,338,338,338,338,338,338,120,338,338,338,338,338,338,338, -338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338, -338,338,338,338,338,338,338,338,338,338,338,338,338,120,336,336, -336,336,336,336,336,336,338,336,336,336,336,336,336,120,336,336, -337,337,337,337,337, 20, 20, 20, 20,337,337,120,120,120,120,120, +340,340,340,340,340,339,340,340,337,337,337,337,337,340,340,340, +340,340,340,340,340,340,340,340,120,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,120,338,338, +338,338,338,338,338,338,340,338,338,338,338,338,338,120,338,338, +339,339,339,339,339, 20, 20, 20, 20,339,339,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 32 */ -344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344, -344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344, -344,344,344,344,344,344,344,344,344,344,344,345,345,346,346,346, -346,347,346,346,346,346,346,346,345,346,346,347,347,346,346,344, -348,348,348,348,348,348,348,348,348,348,349,349,349,349,349,349, -344,344,344,344,344,344,347,347,346,346,344,344,344,344,346,346, -346,344,345,345,345,344,344,345,345,345,345,345,345,345,344,344, -344,346,346,346,346,344,344,344,344,344,344,344,344,344,344,344, +346,346,346,346,346,346,346,346,346,346,346,346,346,346,346,346, +346,346,346,346,346,346,346,346,346,346,346,346,346,346,346,346, +346,346,346,346,346,346,346,346,346,346,346,347,347,348,348,348, +348,349,348,348,348,348,348,348,347,348,348,349,349,348,348,346, +350,350,350,350,350,350,350,350,350,350,351,351,351,351,351,351, +346,346,346,346,346,346,349,349,348,348,346,346,346,346,348,348, +348,346,347,347,347,346,346,347,347,347,347,347,347,347,346,346, +346,348,348,348,348,346,346,346,346,346,346,346,346,346,346,346, /* block 33 */ -344,344,346,345,347,346,346,345,345,345,345,345,345,346,344,345, -350,350,350,350,350,350,350,350,350,350,345,345,345,346,351,351, -352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352, -352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352, -352,352,352,352,352,352,120,352,120,120,120,120,120,352,120,120, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353, -353,353,353,353,353,353,353,353,353,353,353,354,355,353,353,353, +346,346,348,347,349,348,348,347,347,347,347,347,347,348,346,347, +352,352,352,352,352,352,352,352,352,352,347,347,347,348,353,353, +354,354,354,354,354,354,354,354,354,354,354,354,354,354,354,354, +354,354,354,354,354,354,354,354,354,354,354,354,354,354,354,354, +354,354,354,354,354,354,120,354,120,120,120,120,120,354,120,120, +355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355, +355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355, +355,355,355,355,355,355,355,355,355,355,355,356,357,355,355,355, /* block 34 */ -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357, -357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357, - -/* block 35 */ -357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357, -357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357, -357,357,357,357,357,357,357,357,358,358,358,358,358,358,358,358, 358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, 358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, 358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, 358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, 358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, - -/* block 36 */ -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, +358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, 359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, 359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,120,359,359,359,359,120,120, -359,359,359,359,359,359,359,120,359,120,359,359,359,359,120,120, + +/* block 35 */ 359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, 359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, +359,359,359,359,359,359,359,359,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, + +/* block 36 */ +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,120,361,361,361,361,120,120, +361,361,361,361,361,361,361,120,361,120,361,361,361,361,120,120, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, /* block 37 */ -359,359,359,359,359,359,359,359,359,120,359,359,359,359,120,120, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,120,359,359,359,359,120,120,359,359,359,359,359,359,359,120, -359,120,359,359,359,359,120,120,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, +361,361,361,361,361,361,361,361,361,120,361,361,361,361,120,120, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,120,361,361,361,361,120,120,361,361,361,361,361,361,361,120, +361,120,361,361,361,361,120,120,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,120,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, /* block 38 */ -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,120,359,359,359,359,120,120,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,359,359,120,120,360,360,360, -361,361,361,361,361,361,361,361,361,362,362,362,362,362,362,362, -362,362,362,362,362,362,362,362,362,362,362,362,362,120,120,120, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,120,361,361,361,361,120,120,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,361,361,361,361,120,120,362,362,362, +363,363,363,363,363,363,363,363,363,364,364,364,364,364,364,364, +364,364,364,364,364,364,364,364,364,364,364,364,364,120,120,120, /* block 39 */ -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -363,363,363,363,363,363,363,363,363,363,120,120,120,120,120,120, -364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364, -364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364, -364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364, -364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364, -364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364, -365,365,365,365,365,365,120,120,366,366,366,366,366,366,120,120, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +365,365,365,365,365,365,365,365,365,365,120,120,120,120,120,120, +366,366,366,366,366,366,366,366,366,366,366,366,366,366,366,366, +366,366,366,366,366,366,366,366,366,366,366,366,366,366,366,366, +366,366,366,366,366,366,366,366,366,366,366,366,366,366,366,366, +366,366,366,366,366,366,366,366,366,366,366,366,366,366,366,366, +366,366,366,366,366,366,366,366,366,366,366,366,366,366,366,366, +367,367,367,367,367,367,120,120,368,368,368,368,368,368,120,120, /* block 40 */ -367,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, +369,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, /* block 41 */ -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, /* block 42 */ -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,369,370,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,371,372,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, /* block 43 */ -371,372,372,372,372,372,372,372,372,372,372,372,372,372,372,372, -372,372,372,372,372,372,372,372,372,372,372,373,374,120,120,120, -375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375, -375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375, -375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375, -375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375, -375,375,375,375,375,375,375,375,375,375,375, 5, 5, 5,376,376, -376,375,375,375,375,375,375,375,375,120,120,120,120,120,120,120, +373,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374, +374,374,374,374,374,374,374,374,374,374,374,375,376,120,120,120, +377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,377, +377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,377, +377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,377, +377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,377, +377,377,377,377,377,377,377,377,377,377,377, 5, 5, 5,378,378, +378,377,377,377,377,377,377,377,377,120,120,120,120,120,120,120, /* block 44 */ -377,377,377,377,377,377,377,377,377,377,377,377,377,120,377,377, -377,377,378,378,378,120,120,120,120,120,120,120,120,120,120,120, 379,379,379,379,379,379,379,379,379,379,379,379,379,379,379,379, -379,379,380,380,380,381,381,120,120,120,120,120,120,120,120,120, +379,379,380,380,380,381,120,120,120,120,120,120,120,120,120,379, 382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,382, -382,382,383,383,120,120,120,120,120,120,120,120,120,120,120,120, -384,384,384,384,384,384,384,384,384,384,384,384,384,120,384,384, -384,120,385,385,120,120,120,120,120,120,120,120,120,120,120,120, +382,382,383,383,384,385,385,120,120,120,120,120,120,120,120,120, +386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386, +386,386,387,387,120,120,120,120,120,120,120,120,120,120,120,120, +388,388,388,388,388,388,388,388,388,388,388,388,388,120,388,388, +388,120,389,389,120,120,120,120,120,120,120,120,120,120,120,120, /* block 45 */ -386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386, -386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386, -386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386, -386,386,386,386,387,387,388,387,387,387,387,387,387,387,388,388, -388,388,388,388,388,388,387,388,388,387,387,387,387,387,387,387, -387,387,387,387,389,389,389,390,389,389,389,391,386,387,120,120, -392,392,392,392,392,392,392,392,392,392,120,120,120,120,120,120, -393,393,393,393,393,393,393,393,393,393,120,120,120,120,120,120, +390,390,390,390,390,390,390,390,390,390,390,390,390,390,390,390, +390,390,390,390,390,390,390,390,390,390,390,390,390,390,390,390, +390,390,390,390,390,390,390,390,390,390,390,390,390,390,390,390, +390,390,390,390,391,391,392,391,391,391,391,391,391,391,392,392, +392,392,392,392,392,392,391,392,392,391,391,391,391,391,391,391, +391,391,391,391,393,393,393,394,393,393,393,395,390,391,120,120, +396,396,396,396,396,396,396,396,396,396,120,120,120,120,120,120, +397,397,397,397,397,397,397,397,397,397,120,120,120,120,120,120, /* block 46 */ -394,394,395,395,394,395,396,394,394,394,394,397,397,397,398,120, -399,399,399,399,399,399,399,399,399,399,120,120,120,120,120,120, -400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400, -400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400, -400,400,400,401,400,400,400,400,400,400,400,400,400,400,400,400, -400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400, -400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400, -400,400,400,400,400,400,400,400,400,120,120,120,120,120,120,120, +398,398,399,399,398,399,400,398,398,398,398,401,401,401,402,401, +403,403,403,403,403,403,403,403,403,403,120,120,120,120,120,120, +404,404,404,404,404,404,404,404,404,404,404,404,404,404,404,404, +404,404,404,404,404,404,404,404,404,404,404,404,404,404,404,404, +404,404,404,405,404,404,404,404,404,404,404,404,404,404,404,404, +404,404,404,404,404,404,404,404,404,404,404,404,404,404,404,404, +404,404,404,404,404,404,404,404,404,404,404,404,404,404,404,404, +404,404,404,404,404,404,404,404,404,120,120,120,120,120,120,120, /* block 47 */ -400,400,400,400,400,397,397,400,400,400,400,400,400,400,400,400, -400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400, -400,400,400,400,400,400,400,400,400,397,400,120,120,120,120,120, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368, -368,368,368,368,368,368,120,120,120,120,120,120,120,120,120,120, +404,404,404,404,404,401,401,404,404,404,404,404,404,404,404,404, +404,404,404,404,404,404,404,404,404,404,404,404,404,404,404,404, +404,404,404,404,404,404,404,404,404,401,404,120,120,120,120,120, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,370,370,370,120,120,120,120,120,120,120,120,120,120, /* block 48 */ -402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402, -402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,120, -403,403,403,404,404,404,404,403,403,404,404,404,120,120,120,120, -404,404,403,404,404,404,404,404,404,403,403,403,120,120,120,120, -405,120,120,120,406,406,407,407,407,407,407,407,407,407,407,407, -408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,408, -408,408,408,408,408,408,408,408,408,408,408,408,408,408,120,120, -408,408,408,408,408,120,120,120,120,120,120,120,120,120,120,120, +406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,406, +406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,120, +407,407,407,408,408,408,408,407,407,408,408,408,120,120,120,120, +408,408,407,408,408,408,408,408,408,407,407,407,120,120,120,120, +409,120,120,120,410,410,411,411,411,411,411,411,411,411,411,411, +412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,412, +412,412,412,412,412,412,412,412,412,412,412,412,412,412,120,120, +412,412,412,412,412,120,120,120,120,120,120,120,120,120,120,120, /* block 49 */ -409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409, -409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409, -409,409,409,409,409,409,409,409,409,409,409,409,120,120,120,120, -409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409, -409,409,409,409,409,409,409,409,409,409,120,120,120,120,120,120, -410,410,410,410,410,410,410,410,410,410,411,120,120,120,412,412, 413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,413, 413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,413, +413,413,413,413,413,413,413,413,413,413,413,413,120,120,120,120, +413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,413, +413,413,413,413,413,413,413,413,413,413,120,120,120,120,120,120, +414,414,414,414,414,414,414,414,414,414,415,120,120,120,416,416, +417,417,417,417,417,417,417,417,417,417,417,417,417,417,417,417, +417,417,417,417,417,417,417,417,417,417,417,417,417,417,417,417, /* block 50 */ -414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414, -414,414,414,414,414,414,414,415,415,416,416,415,120,120,417,417, -418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418, -418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418, 418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418, -418,418,418,418,418,419,420,419,420,420,420,420,420,420,420,120, -420,421,420,421,421,420,420,420,420,420,420,420,420,419,419,419, -419,419,419,420,420,420,420,420,420,420,420,420,420,120,120,420, +418,418,418,418,418,418,418,419,419,420,420,419,120,120,421,421, +422,422,422,422,422,422,422,422,422,422,422,422,422,422,422,422, +422,422,422,422,422,422,422,422,422,422,422,422,422,422,422,422, +422,422,422,422,422,422,422,422,422,422,422,422,422,422,422,422, +422,422,422,422,422,423,424,423,424,424,424,424,424,424,424,120, +424,425,424,425,425,424,424,424,424,424,424,424,424,423,423,423, +423,423,423,424,424,424,424,424,424,424,424,424,424,120,120,424, /* block 51 */ -422,422,422,422,422,422,422,422,422,422,120,120,120,120,120,120, -422,422,422,422,422,422,422,422,422,422,120,120,120,120,120,120, -423,423,423,423,423,423,423,424,423,423,423,423,423,423,120,120, -113,113,113,113,113,113,113,113,113,113,113,113,113,113,425,113, -113,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +426,426,426,426,426,426,426,426,426,426,120,120,120,120,120,120, +426,426,426,426,426,426,426,426,426,426,120,120,120,120,120,120, +427,427,427,427,427,427,427,428,427,427,427,427,427,427,120,120, +113,113,113,113,113,113,113,113,113,113,113,113,113,113,429,113, +113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 52 */ -426,426,426,426,427,428,428,428,428,428,428,428,428,428,428,428, -428,428,428,428,428,428,428,428,428,428,428,428,428,428,428,428, -428,428,428,428,428,428,428,428,428,428,428,428,428,428,428,428, -428,428,428,428,426,429,426,426,426,426,426,427,426,427,427,427, -427,427,426,427,427,428,428,428,428,428,428,428,120,120,120,120, -430,430,430,430,430,430,430,430,430,430,431,431,431,431,431,431, -431,432,432,432,432,432,432,432,432,432,432,426,426,426,426,426, -426,426,426,426,432,432,432,432,432,432,432,432,432,120,120,120, +430,430,430,430,431,432,432,432,432,432,432,432,432,432,432,432, +432,432,432,432,432,432,432,432,432,432,432,432,432,432,432,432, +432,432,432,432,432,432,432,432,432,432,432,432,432,432,432,432, +432,432,432,432,430,433,430,430,430,430,430,431,430,431,431,431, +431,431,430,431,431,432,432,432,432,432,432,432,432,120,120,120, +434,434,434,434,434,434,434,434,434,434,435,435,435,435,435,435, +435,436,436,436,436,436,436,436,436,436,436,430,430,430,430,430, +430,430,430,430,436,436,436,436,436,436,436,436,436,435,435,120, /* block 53 */ -433,433,434,435,435,435,435,435,435,435,435,435,435,435,435,435, -435,435,435,435,435,435,435,435,435,435,435,435,435,435,435,435, -435,434,433,433,433,433,434,434,433,433,434,433,433,433,435,435, -436,436,436,436,436,436,436,436,436,436,435,435,435,435,435,435, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,437,437,438,439,438,438,439,439,439,438,439,438, -438,438,439,439,120,120,120,120,120,120,120,120,440,440,440,440, - -/* block 54 */ +437,437,438,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, +439,438,437,437,437,437,438,438,437,437,438,437,437,437,439,439, +440,440,440,440,440,440,440,440,440,440,439,439,439,439,439,439, 441,441,441,441,441,441,441,441,441,441,441,441,441,441,441,441, 441,441,441,441,441,441,441,441,441,441,441,441,441,441,441,441, -441,441,441,441,442,442,442,442,442,442,442,442,443,443,443,443, -443,443,443,443,442,442,443,443,120,120,120,444,444,444,444,444, -445,445,445,445,445,445,445,445,445,445,120,120,120,441,441,441, -446,446,446,446,446,446,446,446,446,446,447,447,447,447,447,447, -447,447,447,447,447,447,447,447,447,447,447,447,447,447,447,447, -447,447,447,447,447,447,447,447,448,448,448,448,448,448,449,449, +441,441,441,441,441,441,442,443,442,442,443,443,443,442,443,442, +442,442,443,443,120,120,120,120,120,120,120,120,444,444,444,444, + +/* block 54 */ +445,445,445,445,445,445,445,445,445,445,445,445,445,445,445,445, +445,445,445,445,445,445,445,445,445,445,445,445,445,445,445,445, +445,445,445,445,446,446,446,446,446,446,446,446,447,447,447,447, +447,447,447,447,446,446,447,447,120,120,120,448,448,448,448,448, +449,449,449,449,449,449,449,449,449,449,120,120,120,445,445,445, +450,450,450,450,450,450,450,450,450,450,451,451,451,451,451,451, +451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,451, +451,451,451,451,451,451,451,451,452,452,452,452,452,452,453,453, /* block 55 */ -450,451,452,453,454,455,456,457,458,120,120,120,120,120,120,120, -459,459,459,459,459,459,459,459,459,459,459,459,459,459,459,459, -459,459,459,459,459,459,459,459,459,459,459,459,459,459,459,459, -459,459,459,459,459,459,459,459,459,459,459,120,120,459,459,459, -460,460,460,460,460,460,460,460,120,120,120,120,120,120,120,120, -461,462,461,463,462,464,464,465,464,465,466,462,465,465,462,462, -465,467,462,462,462,462,462,462,462,468,469,470,470,464,470,470, -470,470,471,472,473,469,469,474,475,475,476,120,120,120,120,120, +454,455,456,457,458,459,460,461,462,120,120,120,120,120,120,120, +463,463,463,463,463,463,463,463,463,463,463,463,463,463,463,463, +463,463,463,463,463,463,463,463,463,463,463,463,463,463,463,463, +463,463,463,463,463,463,463,463,463,463,463,120,120,463,463,463, +464,464,464,464,464,464,464,464,120,120,120,120,120,120,120,120, +465,466,465,467,466,468,468,469,468,469,470,466,469,469,466,466, +469,471,466,466,466,466,466,466,466,472,473,474,474,468,474,474, +474,474,475,476,477,473,473,478,479,479,480,120,120,120,120,120, /* block 56 */ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35,128,128,128,128,128,477,110,110,110,110, + 35, 35, 35, 35, 35, 35,128,128,128,128,128,481,110,110,110,110, 110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, 110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, 110,110,110,110,110,110,110,110,110,110,110,110,110,121,121,121, 121,121,110,110,110,110,121,121,121,121,121, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35,478,479, 35, 35, 35,480, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35,482,483, 35, 35, 35,484, 35, 35, /* block 57 */ - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,481, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,485, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,110,110,110,110,110, 110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, 110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,121, 114,114,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, -113,113,113,113,113,113,113,113,482,113,120,113,113,113,113,113, +113,113,113,113,113,113,113,113,486,113,487,113,113,113,113,113, /* block 58 */ 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, @@ -2305,12 +2331,12 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, -483,484, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, +488,489, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, /* block 59 */ 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, - 32, 33, 32, 33, 32, 33, 35, 35, 35, 35, 35,485, 35, 35,486, 35, + 32, 33, 32, 33, 32, 33, 35, 35, 35, 35, 35,490, 35, 35,491, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, @@ -2319,33 +2345,33 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, /* block 60 */ -487,487,487,487,487,487,487,487,488,488,488,488,488,488,488,488, -487,487,487,487,487,487,120,120,488,488,488,488,488,488,120,120, -487,487,487,487,487,487,487,487,488,488,488,488,488,488,488,488, -487,487,487,487,487,487,487,487,488,488,488,488,488,488,488,488, -487,487,487,487,487,487,120,120,488,488,488,488,488,488,120,120, -128,487,128,487,128,487,128,487,120,488,120,488,120,488,120,488, -487,487,487,487,487,487,487,487,488,488,488,488,488,488,488,488, -489,489,490,490,490,490,491,491,492,492,493,493,494,494,120,120, +492,492,492,492,492,492,492,492,493,493,493,493,493,493,493,493, +492,492,492,492,492,492,120,120,493,493,493,493,493,493,120,120, +492,492,492,492,492,492,492,492,493,493,493,493,493,493,493,493, +492,492,492,492,492,492,492,492,493,493,493,493,493,493,493,493, +492,492,492,492,492,492,120,120,493,493,493,493,493,493,120,120, +128,492,128,492,128,492,128,492,120,493,120,493,120,493,120,493, +492,492,492,492,492,492,492,492,493,493,493,493,493,493,493,493, +494,494,495,495,495,495,496,496,497,497,498,498,499,499,120,120, /* block 61 */ -487,487,487,487,487,487,487,487,495,495,495,495,495,495,495,495, -487,487,487,487,487,487,487,487,495,495,495,495,495,495,495,495, -487,487,487,487,487,487,487,487,495,495,495,495,495,495,495,495, -487,487,128,496,128,120,128,128,488,488,497,497,498,119,499,119, -119,119,128,496,128,120,128,128,500,500,500,500,498,119,119,119, -487,487,128,128,120,120,128,128,488,488,501,501,120,119,119,119, -487,487,128,128,128,169,128,128,488,488,502,502,174,119,119,119, -120,120,128,496,128,120,128,128,503,503,504,504,498,119,119,120, +492,492,492,492,492,492,492,492,500,500,500,500,500,500,500,500, +492,492,492,492,492,492,492,492,500,500,500,500,500,500,500,500, +492,492,492,492,492,492,492,492,500,500,500,500,500,500,500,500, +492,492,128,501,128,120,128,128,493,493,502,502,503,119,504,119, +119,119,128,501,128,120,128,128,505,505,505,505,503,119,119,119, +492,492,128,128,120,120,128,128,493,493,506,506,120,119,119,119, +492,492,128,128,128,169,128,128,493,493,507,507,174,119,119,119, +120,120,128,501,128,120,128,128,508,508,509,509,503,119,119,120, /* block 62 */ - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24,505,506, 24, 24, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24,510,511, 24, 24, 10, 10, 10, 10, 10, 10, 5, 5, 23, 27, 7, 23, 23, 27, 7, 23, - 5, 5, 5, 5, 5, 5, 5, 5,507,508, 24, 24, 24, 24, 24,509, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 23, 27, 5,510, 5, 5, 16, - 16, 5, 5, 5, 9, 7, 8, 5, 5,510, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5,512,513, 24, 24, 24, 24, 24,514, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 23, 27, 5,515, 5, 5, 16, + 16, 5, 5, 5, 9, 7, 8, 5, 5,515, 5, 5, 5, 5, 5, 5, 5, 5, 9, 5, 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, - 24, 24, 24, 24, 24,511, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24,516, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25,110,120,120, 25, 25, 25, 25, 25, 25, 9, 9, 9, 7, 8,110, /* block 63 */ @@ -2353,24 +2379,24 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 110,110,110,110,110,110,110,110,110,110,110,110,110,120,120,120, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -113,113,113,113,113,113,113,113,113,113,113,113,113,425,425,425, -425,113,425,425,425,113,113,113,113,113,113,113,113,113,113,113, -512,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, + 6,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +113,113,113,113,113,113,113,113,113,113,113,113,113,429,429,429, +429,113,429,429,429,113,113,113,113,113,113,113,113,113,113,113, +517,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 64 */ - 20, 20,513, 20, 20, 20, 20,513, 20, 20,514,513,513,513,514,514, -513,513,513,514, 20,513, 20, 20, 9,513,513,513,513,513, 20, 20, - 20, 20, 21, 20,513, 20,515, 20,513, 20,516,517,513,513, 20,514, -513,513,518,513,514,519,519,519,519,520, 20, 20,514,514,513,513, - 9, 9, 9, 9, 9,513,514,514,514,514, 20, 9, 20, 20,521, 20, + 20, 20,518, 20, 20, 20, 20,518, 20, 20,519,518,518,518,519,519, +518,518,518,519, 20,518, 20, 20, 9,518,518,518,518,518, 20, 20, + 20, 20, 21, 20,518, 20,520, 20,518, 20,521,522,518,518, 20,519, +518,518,523,518,519,524,524,524,524,525, 20, 20,519,519,518,518, + 9, 9, 9, 9, 9,518,519,519,519,519, 20, 9, 20, 20,526, 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, -522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522, -523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523, +527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, /* block 65 */ -524,524,524, 32, 33,524,524,524,524, 25, 20, 20,120,120,120,120, - 9, 9, 9, 9,525, 21, 21, 21, 21, 21, 9, 9, 20, 20, 20, 20, +529,529,529, 32, 33,529,529,529,529, 25, 20, 20,120,120,120,120, + 9, 9, 9, 9,530, 21, 21, 21, 21, 21, 9, 9, 20, 20, 20, 20, 9, 20, 20, 9, 20, 20, 9, 20, 20, 21, 21, 20, 20, 20, 9, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 9, 9, @@ -2422,10 +2448,10 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20,526,526,526,526,526,526,526,526,526,526, -526,526,527,526,526,526,526,526,526,526,526,526,526,526,526,526, -528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, -528,528,528,528,528,528,528,528,528,528, 25, 25, 25, 25, 25, 25, + 20, 20, 20, 20, 20, 20,531,531,531,531,531,531,531,531,531,531, +531,531,532,531,531,531,531,531,531,531,531,531,531,531,531,531, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, /* block 71 */ @@ -2446,7 +2472,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 21, 9, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 9, 9, 9,525,525,525,525, 9, + 20, 20, 20, 20, 20, 20, 20, 20, 9, 9, 9,530,530,530,530, 9, /* block 73 */ 21, 21, 21, 21, 21, 21, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, @@ -2455,7 +2481,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,525, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,530, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, /* block 74 */ @@ -2489,20 +2515,20 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* block 77 */ -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, -529,529,529,529,529,529,529,529,529,529,529,529,529,529,529,529, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, +534,534,534,534,534,534,534,534,534,534,534,534,534,534,534,534, /* block 78 */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9,525,525, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9,530,530, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, @@ -2539,14 +2565,14 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, /* block 82 */ -530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530, -530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530, -530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,120, -531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, -531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, -531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,120, - 32, 33,532,533,534,535,536, 32, 33, 32, 33, 32, 33,537,538,539, -540, 35, 32, 33, 35, 32, 33, 35, 35, 35, 35, 35,110,110,541,541, +535,535,535,535,535,535,535,535,535,535,535,535,535,535,535,535, +535,535,535,535,535,535,535,535,535,535,535,535,535,535,535,535, +535,535,535,535,535,535,535,535,535,535,535,535,535,535,535,535, +536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536, +536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536, +536,536,536,536,536,536,536,536,536,536,536,536,536,536,536,536, + 32, 33,537,538,539,540,541, 32, 33, 32, 33, 32, 33,542,543,544, +545, 35, 32, 33, 35, 32, 33, 35, 35, 35, 35, 35,110,110,546,546, /* block 83 */ 165,166,165,166,165,166,165,166,165,166,165,166,165,166,165,166, @@ -2555,138 +2581,138 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 165,166,165,166,165,166,165,166,165,166,165,166,165,166,165,166, 165,166,165,166,165,166,165,166,165,166,165,166,165,166,165,166, 165,166,165,166,165,166,165,166,165,166,165,166,165,166,165,166, -165,166,165,166,542,543,543,543,543,543,543,165,166,165,166,544, -544,544,165,166,120,120,120,120,120,545,545,545,545,546,545,545, +165,166,165,166,547,548,548,548,548,548,548,165,166,165,166,549, +549,549,165,166,120,120,120,120,120,550,550,550,550,551,550,550, /* block 84 */ -547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,547, -547,547,547,547,547,547,547,547,547,547,547,547,547,547,547,547, -547,547,547,547,547,547,120,547,120,120,120,120,120,547,120,120, -548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548, -548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548, -548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548, -548,548,548,548,548,548,548,548,120,120,120,120,120,120,120,549, -550,120,120,120,120,120,120,120,120,120,120,120,120,120,120,551, - -/* block 85 */ -359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,120,120,120,120,120,120,120,120,120, -359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120, -359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120, -359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120, -359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120, 552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, 552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,120,552,120,120,120,120,120,552,120,120, +553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,553, +553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,553, +553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,553, +553,553,553,553,553,553,553,553,120,120,120,120,120,120,120,554, +555,120,120,120,120,120,120,120,120,120,120,120,120,120,120,556, + +/* block 85 */ +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361, +361,361,361,361,361,361,361,120,120,120,120,120,120,120,120,120, +361,361,361,361,361,361,361,120,361,361,361,361,361,361,361,120, +361,361,361,361,361,361,361,120,361,361,361,361,361,361,361,120, +361,361,361,361,361,361,361,120,361,361,361,361,361,361,361,120, +361,361,361,361,361,361,361,120,361,361,361,361,361,361,361,120, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, +557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557, /* block 86 */ 5, 5, 23, 27, 23, 27, 5, 5, 5, 23, 27, 5, 23, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 5, 5, 10, 5, 23, 27, 5, 5, 23, 27, 7, 8, 7, 8, 7, 8, 7, 8, 5, 5, 5, 5, 5,111, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 5, 5, 5, 5, - 10, 5, 7,553, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 20, 20, 5,120,120,120,120,120,120,120,120,120,120,120,120,120, + 10, 5, 7,558, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 20, 20, 5, 5, 5, 7, 8, 7, 8, 7, 8, 7, 8, 10,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 87 */ -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,120,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,120,120,120,120,120,120,120,120,120,120,120,120, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,120,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,120,120,120,120,120,120,120,120,120,120,120,120, /* block 88 */ -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, /* block 89 */ -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,554,554,554,554,554,554,554,554,554,554, -554,554,554,554,554,554,120,120,120,120,120,120,120,120,120,120, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,559,559,559,559,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120, /* block 90 */ - 4,555,555,556, 20,557,558,559,560,561,560,561,560,561,560,561, -560,561, 20,562,560,561,560,561,560,561,560,561,563,564,565,565, - 20,559,559,559,559,559,559,559,559,559,566,566,566,566,567,567, -568,569,569,569,569,569, 20,562,559,559,559,557,570,571,572,572, -120,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, - -/* block 91 */ -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,120,120,574,574,575,575,576,576,573, -577,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, + 4,560,560,561, 20,562,563,564,565,566,565,566,565,566,565,566, +565,566, 20,567,565,566,565,566,565,566,565,566,568,569,570,570, + 20,564,564,564,564,564,564,564,564,564,571,571,571,571,572,572, +573,574,574,574,574,574, 20,567,564,564,564,562,575,576,577,577, +120,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, 578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, 578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, 578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, + +/* block 91 */ 578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, -578,578,578,578,578,578,578,578,578,578,578,555,569,579,579,578, +578,578,578,578,578,578,578,120,120,579,579,580,580,581,581,578, +582,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,560,574,584,584,583, /* block 92 */ -120,120,120,120,120,580,580,580,580,580,580,580,580,580,580,580, -580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, -580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, -120,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, +120,120,120,120,120,585,585,585,585,585,585,585,585,585,585,585, +585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, +585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, +120,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, /* block 93 */ -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,120, -572,572,582,582,582,582,572,572,572,572,572,572,572,572,572,572, -580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, -580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, -572,572,572,572,120,120,120,120,120,120,120,120,120,120,120,120, -578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,120, +577,577,587,587,587,587,577,577,577,577,577,577,577,577,577,577, +585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, +585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, +577,577,577,577,120,120,120,120,120,120,120,120,120,120,120,120, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, /* block 94 */ -583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, -583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,120, -582,582,582,582,582,582,582,582,582,582,572,572,572,572,572,572, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, -572,572,572,572,572,572,572,572, 25, 25, 25, 25, 25, 25, 25, 25, +588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588, +588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,120, +587,587,587,587,587,587,587,587,587,587,577,577,577,577,577,577, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, +577,577,577,577,577,577,577,577, 25, 25, 25, 25, 25, 25, 25, 25, 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, -583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, -583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, 20, +588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588, +588,588,588,588,588,588,588,588,588,588,588,588,588,588,588, 20, /* block 95 */ -582,582,582,582,582,582,582,582,582,582,572,572,572,572,572,572, -572,572,572,572,572,572,572,584,572,584,572,572,572,572,572,572, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, -572, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, -572,572,572,572,572,572,572,572,572,572,572,572, 20, 20, 20, 20, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,572, +587,587,587,587,587,587,587,587,587,587,577,577,577,577,577,577, +577,577,577,577,577,577,577,589,577,589,577,577,577,577,577,577, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, +577, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +577,577,577,577,577,577,577,577,577,577,577,577, 20, 20, 20, 20, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,577, /* block 96 */ -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,585,585,585,585,585,585,585,585, -585,585,585,585,585,585,585,585,572,572,572,572,572,572,572,572, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, -572, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,572,572,572,572,572, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, +590,590,590,590,590,590,590,590,577,577,577,577,577,577,577,577, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, +577, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,577,577,577,577,577, /* block 97 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, @@ -2695,450 +2721,440 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, -572,572,572,572,572,572,572,572,572,572,572,572,572,572,572, 20, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, +577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, 20, /* block 98 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, /* block 99 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, /* block 100 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,120,120,120, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,593,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, /* block 101 */ -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,588,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, +592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, /* block 102 */ -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, -587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, +592,592,592,592,592,592,592,592,592,592,592,592,592,120,120,120, +594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594, +594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594, +594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594, +594,594,594,594,594,594,594,120,120,120,120,120,120,120,120,120, +595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595, +595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595, +595,595,595,595,595,595,595,595,596,596,596,596,596,596,597,597, /* block 103 */ -587,587,587,587,587,587,587,587,587,587,587,587,587,120,120,120, -589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589, -589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589, -589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589, -589,589,589,589,589,589,589,120,120,120,120,120,120,120,120,120, -590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, -590,590,590,590,590,590,590,590,590,590,590,590,590,590,590,590, -590,590,590,590,590,590,590,590,591,591,591,591,591,591,592,592, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, /* block 104 */ -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, - -/* block 105 */ -593,593,593,593,593,593,593,593,593,593,593,593,594,595,595,595, -593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593, -596,596,596,596,596,596,596,596,596,596,593,593,120,120,120,120, +598,598,598,598,598,598,598,598,598,598,598,598,599,600,600,600, +598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, +601,601,601,601,601,601,601,601,601,601,598,598,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -192,193,192,193,192,193,192,193,192,193,597,598,192,193,192,193, +192,193,192,193,192,193,192,193,192,193,602,603,192,193,192,193, 192,193,192,193,192,193,192,193,192,193,192,193,192,193,192,193, -192,193,192,193,192,193,192,193,192,193,192,193,192,193,599,198, -200,200,200,600,552,552,552,552,552,552,552,552,552,552,600,478, +192,193,192,193,192,193,192,193,192,193,192,193,192,193,604,198, +200,200,200,605,557,557,557,557,557,557,557,557,557,557,605,482, -/* block 106 */ +/* block 105 */ 192,193,192,193,192,193,192,193,192,193,192,193,192,193,192,193, -192,193,192,193,192,193,192,193,192,193,192,193,478,478,552,552, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,602,602,602,602,602,602,602,602,602,602, -603,603,604,604,604,604,604,604,120,120,120,120,120,120,120,120, +192,193,192,193,192,193,192,193,192,193,192,193,482,482,557,557, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,607,607,607,607,607,607,607,607,607,607, +608,608,609,609,609,609,609,609,120,120,120,120,120,120,120,120, -/* block 107 */ -605,605,605,605,605,605,605,605, 15, 15, 15, 15, 15, 15, 15, 15, +/* block 106 */ +610,610,610,610,610,610,610,610, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,111,111,111,111,111,111,111,111,111, 15, 15, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 35, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, -110, 35, 35, 35, 35, 35, 35, 35, 35, 32, 33, 32, 33,606, 32, 33, +110, 35, 35, 35, 35, 35, 35, 35, 35, 32, 33, 32, 33,611, 32, 33, -/* block 108 */ - 32, 33, 32, 33, 32, 33, 32, 33,111, 15, 15, 32, 33,607, 35, 22, - 32, 33, 32, 33,608, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, - 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,609,610,611,612,609, 35, -613,614,615,616, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, -120,120, 32, 33,617,618,619, 32, 33, 32, 33,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +/* block 107 */ + 32, 33, 32, 33, 32, 33, 32, 33,111, 15, 15, 32, 33,612, 35, 22, + 32, 33, 32, 33,613, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, + 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,614,615,616,617,614, 35, +618,619,620,621, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, + 32, 33, 32, 33,622,623,624, 32, 33, 32, 33,120,120,120,120,120, + 32, 33,120, 35,120, 35, 32, 33, 32, 33,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120, 32, 33, 22,110,110, 35, 22, 22, 22, 22, 22, +120,120,110,110,110, 32, 33, 22,110,110, 35, 22, 22, 22, 22, 22, + +/* block 108 */ +625,625,626,625,625,625,626,625,625,625,625,626,625,625,625,625, +625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625, +625,625,625,627,627,626,626,627,628,628,628,628,626,120,120,120, +629,629,629,630,630,630,631,631,632,631,120,120,120,120,120,120, +633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633, +633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633, +633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633, +633,633,633,633,634,634,634,634,120,120,120,120,120,120,120,120, /* block 109 */ -620,620,621,620,620,620,621,620,620,620,620,621,620,620,620,620, -620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620, -620,620,620,622,622,621,621,622,623,623,623,623,621,120,120,120, -624,624,624,625,625,625,626,626,627,626,120,120,120,120,120,120, -628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628, -628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628, -628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628, -628,628,628,628,629,629,629,629,120,120,120,120,120,120,120,120, +635,635,636,636,636,636,636,636,636,636,636,636,636,636,636,636, +636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636, +636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636, +636,636,636,636,635,635,635,635,635,635,635,635,635,635,635,635, +635,635,635,635,637,637,120,120,120,120,120,120,120,120,638,638, +639,639,639,639,639,639,639,639,639,639,120,120,120,120,120,120, +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, +253,640,255,641,255,255,255,255,261,261,261,255,261,255,255,253, /* block 110 */ -630,630,631,631,631,631,631,631,631,631,631,631,631,631,631,631, -631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, -631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631, -631,631,631,631,630,630,630,630,630,630,630,630,630,630,630,630, -630,630,630,630,632,632,120,120,120,120,120,120,120,120,633,633, -634,634,634,634,634,634,634,634,634,634,120,120,120,120,120,120, -251,251,251,251,251,251,251,251,251,251,251,251,251,251,251,251, -251,635,253,636,253,253,253,253,259,259,259,253,259,253,253,251, +642,642,642,642,642,642,642,642,642,642,643,643,643,643,643,643, +643,643,643,643,643,643,643,643,643,643,643,643,643,643,643,643, +643,643,643,643,643,643,644,644,644,644,644,644,644,644,645,646, +647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647, +647,647,647,647,647,647,647,648,648,648,648,648,648,648,648,648, +648,648,649,649,120,120,120,120,120,120,120,120,120,120,120,650, +358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, +358,358,358,358,358,358,358,358,358,358,358,358,358,120,120,120, /* block 111 */ -637,637,637,637,637,637,637,637,637,637,638,638,638,638,638,638, -638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638, -638,638,638,638,638,638,639,639,639,639,639,639,639,639,640,641, -642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642, -642,642,642,642,642,642,642,643,643,643,643,643,643,643,643,643, -643,643,644,644,120,120,120,120,120,120,120,120,120,120,120,645, -356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356, -356,356,356,356,356,356,356,356,356,356,356,356,356,120,120,120, - -/* block 112 */ -646,646,646,647,648,648,648,648,648,648,648,648,648,648,648,648, -648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648, -648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648, -648,648,648,646,647,647,646,646,646,646,647,647,646,646,647,647, -647,649,649,649,649,649,649,649,649,649,649,649,649,649,120,650, -651,651,651,651,651,651,651,651,651,651,120,120,120,120,649,649, -344,344,344,344,344,346,652,344,344,344,344,344,344,344,344,344, -350,350,350,350,350,350,350,350,350,350,344,344,344,344,344,120, - -/* block 113 */ +651,651,651,652,653,653,653,653,653,653,653,653,653,653,653,653, 653,653,653,653,653,653,653,653,653,653,653,653,653,653,653,653, 653,653,653,653,653,653,653,653,653,653,653,653,653,653,653,653, -653,653,653,653,653,653,653,653,653,654,654,654,654,654,654,655, -655,654,654,655,655,654,654,120,120,120,120,120,120,120,120,120, -653,653,653,654,653,653,653,653,653,653,653,653,654,655,120,120, -656,656,656,656,656,656,656,656,656,656,120,120,657,657,657,657, -344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344, -652,344,344,344,344,344,344,351,351,351,344,345,346,345,344,344, +653,653,653,651,652,652,651,651,651,651,652,652,651,651,652,652, +652,654,654,654,654,654,654,654,654,654,654,654,654,654,120,655, +656,656,656,656,656,656,656,656,656,656,120,120,120,120,654,654, +346,346,346,346,346,348,657,346,346,346,346,346,346,346,346,346, +352,352,352,352,352,352,352,352,352,352,346,346,346,346,346,120, -/* block 114 */ -658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658, +/* block 112 */ 658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658, 658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658, -659,658,659,659,659,658,658,659,659,658,658,658,658,658,659,659, -658,659,658,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,658,658,660,661,661, -662,662,662,662,662,662,662,662,662,662,662,663,664,664,663,663, -665,665,662,666,666,663,664,120,120,120,120,120,120,120,120,120, +658,658,658,658,658,658,658,658,658,659,659,659,659,659,659,660, +660,659,659,660,660,659,659,120,120,120,120,120,120,120,120,120, +658,658,658,659,658,658,658,658,658,658,658,658,659,660,120,120, +661,661,661,661,661,661,661,661,661,661,120,120,662,662,662,662, +346,346,346,346,346,346,346,346,346,346,346,346,346,346,346,346, +657,346,346,346,346,346,346,353,353,353,346,347,348,347,346,346, -/* block 115 */ -120,359,359,359,359,359,359,120,120,359,359,359,359,359,359,120, -120,359,359,359,359,359,359,120,120,120,120,120,120,120,120,120, -359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120, +/* block 113 */ +663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663, +663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663, +663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663, +664,663,664,664,664,663,663,664,664,663,663,663,663,663,664,664, +663,664,663,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,663,663,665,666,666, +667,667,667,667,667,667,667,667,667,667,667,668,669,669,668,668, +670,670,667,671,671,668,669,120,120,120,120,120,120,120,120,120, + +/* block 114 */ +120,361,361,361,361,361,361,120,120,361,361,361,361,361,361,120, +120,361,361,361,361,361,361,120,120,120,120,120,120,120,120,120, +361,361,361,361,361,361,361,120,361,361,361,361,361,361,361,120, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35,667, 35, 35, 35, 35, 35, 35, 35, 15,110,110,110,110, + 35, 35, 35,672, 35, 35, 35, 35, 35, 35, 35, 15,110,110,110,110, 35, 35, 35, 35, 35,128, 35, 35, 35,110, 15, 15,120,120,120,120, -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, +673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, + +/* block 115 */ +673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, +673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, +673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, +673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, +667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667, +667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667, +667,667,667,668,668,669,668,668,669,668,668,670,668,669,120,120, +674,674,674,674,674,674,674,674,674,674,120,120,120,120,120,120, /* block 116 */ -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, -668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668, -662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662, -662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662, -662,662,662,663,663,664,663,663,664,663,663,665,663,664,120,120, -669,669,669,669,669,669,669,669,669,669,120,120,120,120,120,120, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, /* block 117 */ -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, /* block 118 */ -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, /* block 119 */ -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, /* block 120 */ -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, /* block 121 */ -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, /* block 122 */ -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +675,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,675,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,675,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, /* block 123 */ -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, +676,676,676,676,676,676,676,676,675,676,676,676,676,676,676,676, +676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676, +676,676,676,676,120,120,120,120,120,120,120,120,120,120,120,120, +359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, +359,359,359,359,359,359,359,120,120,120,120,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360, +360,360,360,360,360,360,360,360,360,360,360,360,120,120,120,120, /* block 124 */ -671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671, -671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671, -671,671,671,671,120,120,120,120,120,120,120,120,120,120,120,120, -357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357, -357,357,357,357,357,357,357,120,120,120,120,358,358,358,358,358, -358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, -358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358, -358,358,358,358,358,358,358,358,358,358,358,358,120,120,120,120, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, +677,677,677,677,677,677,677,677,677,677,677,677,677,677,677,677, /* block 125 */ -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, -672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, /* block 126 */ -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,120,120, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, /* block 127 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, - -/* block 128 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,120,120,120,120,120,120, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 129 */ +/* block 128 */ 35, 35, 35, 35, 35, 35, 35,120,120,120,120,120,120,120,120,120, 120,120,120,206,206,206,206,206,120,120,120,120,120,214,211,214, -214,214,214,214,214,214,214,214,214,674,214,214,214,214,214,214, +214,214,214,214,214,214,214,214,214,679,214,214,214,214,214,214, 214,214,214,214,214,214,214,120,214,214,214,214,214,120,214,120, 214,214,120,214,214,120,214,214,214,214,214,214,214,214,214,214, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, + +/* block 129 */ +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,252,252,252,252,252,252,252,252,252,252,252,252,252,252, +252,252,252,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, /* block 130 */ -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,675,675,675,675,675,675,675,675,675,675,675,675,675,675, -675,675,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, /* block 131 */ -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,680,681, +221,221,221,221,221,221,221,221,221,221,221,221,221,221,221,221, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, /* block 132 */ -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224, 8, 7, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +120,120,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,120,120,120,120,120,120,120,221, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, - -/* block 133 */ -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -120,120,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -224,224,676,224,224,224,224,224,224,224,224,224,219,677,120,120, +225,225,682,225,225,225,225,225,225,225,225,225,219,683,221,221, -/* block 134 */ +/* block 133 */ 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 5, 5, 5, 5, 5, 5, 5, 7, 8, 5,120,120,120,120,120,120, -113,113,113,113,113,113,113,113,113,113,113,113,113,113,552,552, +113,113,113,113,113,113,113,113,113,113,113,113,113,113,557,557, 5, 10, 10, 16, 16, 7, 8, 7, 8, 7, 8, 7, 8, 7, 8, 7, - 8, 7, 8, 7, 8,556,556, 7, 8, 5, 5, 5, 5, 16, 16, 16, + 8, 7, 8, 7, 8,561,561, 7, 8, 5, 5, 5, 5, 16, 16, 16, 5, 5, 5,120, 5, 5, 5, 5, 10, 7, 8, 7, 8, 7, 8, 5, 5, 5, 9, 10, 9, 9, 9,120, 5, 6, 5, 5,120,120,120,120, -224,224,224,224,224,120,224,224,224,224,224,224,224,224,224,224, +225,225,225,225,225,120,225,225,225,225,225,225,225,225,225,225, -/* block 135 */ -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,120,120, 24, +/* block 134 */ +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,120,120, 24, -/* block 136 */ +/* block 135 */ 120, 5, 5, 5, 6, 5, 5, 5, 7, 8, 5, 9, 5, 10, 5, 5, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 5, 5, 9, 9, 9, 5, 5, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 7, 5, 8, 15, 16, 15, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 7, 9, 8, 9, 7, - 8,555,560,561,555,555,578,578,578,578,578,578,578,578,578,578, -569,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, + 8,560,565,566,560,560,583,583,583,583,583,583,583,583,583,583, +574,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, -/* block 137 */ -578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, -578,578,578,578,578,578,578,578,578,578,578,578,578,578,678,678, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581, -581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,120, -120,120,581,581,581,581,581,581,120,120,581,581,581,581,581,581, -120,120,581,581,581,581,581,581,120,120,581,581,581,120,120,120, +/* block 136 */ +583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583, +583,583,583,583,583,583,583,583,583,583,583,583,583,583,684,684, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,120, +120,120,586,586,586,586,586,586,120,120,586,586,586,586,586,586, +120,120,586,586,586,586,586,586,120,120,586,586,586,120,120,120, 6, 6, 9, 15, 20, 6, 6,120, 20, 9, 9, 9, 9, 20, 20,120, -511,511,511,511,511,511,511,511,511, 24, 24, 24, 20, 20,120,120, +516,516,516,516,516,516,516,516,516, 24, 24, 24, 20, 20,120,120, -/* block 138 */ -679,679,679,679,679,679,679,679,679,679,679,679,120,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,120,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,120,679,679,120,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,120,120, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,120,120, +/* block 137 */ +685,685,685,685,685,685,685,685,685,685,685,685,120,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,120,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,120,685,685,120,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,120,120, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +/* block 138 */ +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, +685,685,685,685,685,685,685,685,685,685,685,120,120,120,120,120, + /* block 139 */ -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679, -679,679,679,679,679,679,679,679,679,679,679,120,120,120,120,120, +686,686,687,120,120,120,120,688,688,688,688,688,688,688,688,688, +688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, +688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688, +688,688,688,688,120,120,120,689,689,689,689,689,689,689,689,689, +690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690, +690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690, +690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690, +690,690,690,690,690,691,691,691,691,692,692,692,692,692,692,692, /* block 140 */ -680,680,680,120,120,120,120,681,681,681,681,681,681,681,681,681, -681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681, -681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681, -681,681,681,681,120,120,120,682,682,682,682,682,682,682,682,682, -683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683, -683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683, -683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683, -683,683,683,683,683,684,684,684,684,685,685,685,685,685,685,685, - -/* block 141 */ -685,685,685,685,685,685,685,685,685,685,684,684,685,685,685,120, +692,692,692,692,692,692,692,692,692,692,691,691,692,692,692,120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120, -685,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +692,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,113,120,120, -/* block 142 */ +/* block 141 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -3148,499 +3164,519 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 143 */ -686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686, -686,686,686,686,686,686,686,686,686,686,686,686,686,120,120,120, -687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687, -687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687, -687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687, -687,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -688,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689, -689,689,689,689,689,689,689,689,689,689,689,689,120,120,120,120, - -/* block 144 */ -690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690, -690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690, -691,691,691,691,120,120,120,120,120,120,120,120,120,690,690,690, -692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692, -692,693,692,692,692,692,692,692,692,692,693,120,120,120,120,120, +/* block 142 */ +693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693, +693,693,693,693,693,693,693,693,693,693,693,693,693,120,120,120, 694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694, 694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694, -694,694,694,694,694,694,695,695,695,695,695,120,120,120,120,120, - -/* block 145 */ -696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,696, -696,696,696,696,696,696,696,696,696,696,696,696,696,696,120,697, -698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698, -698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698, -698,698,698,698,120,120,120,120,698,698,698,698,698,698,698,698, -699,700,700,700,700,700,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694, +694,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +695,696,696,696,696,696,696,696,696,696,696,696,696,696,696,696, +696,696,696,696,696,696,696,696,696,696,696,696,120,120,120,120, -/* block 146 */ +/* block 143 */ +697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697, +697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697, +698,698,698,698,120,120,120,120,120,120,120,120,120,697,697,697, +699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,699, +699,700,699,699,699,699,699,699,699,699,700,120,120,120,120,120, 701,701,701,701,701,701,701,701,701,701,701,701,701,701,701,701, 701,701,701,701,701,701,701,701,701,701,701,701,701,701,701,701, -701,701,701,701,701,701,701,701,702,702,702,702,702,702,702,702, -702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702, -702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702, -703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703, -703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703, -703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703, +701,701,701,701,701,701,702,702,702,702,702,120,120,120,120,120, -/* block 147 */ -704,704,704,704,704,704,704,704,704,704,704,704,704,704,704,704, -704,704,704,704,704,704,704,704,704,704,704,704,704,704,120,120, -705,705,705,705,705,705,705,705,705,705,120,120,120,120,120,120, -706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706, -706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706, -706,706,706,706,120,120,120,120,707,707,707,707,707,707,707,707, -707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707, -707,707,707,707,707,707,707,707,707,707,707,707,120,120,120,120, +/* block 144 */ +703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703, +703,703,703,703,703,703,703,703,703,703,703,703,703,703,120,704, +705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705, +705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705, +705,705,705,705,120,120,120,120,705,705,705,705,705,705,705,705, +706,707,707,707,707,707,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 148 */ +/* block 145 */ 708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, 708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,120,120,120,120,120,120,120,120, +708,708,708,708,708,708,708,708,709,709,709,709,709,709,709,709, 709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, 709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709, -709,709,709,709,120,120,120,120,120,120,120,120,120,120,120,710, +710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710, +710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710, +710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710, + +/* block 146 */ +711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +711,711,711,711,711,711,711,711,711,711,711,711,711,711,120,120, +712,712,712,712,712,712,712,712,712,712,120,120,120,120,120,120, +713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713, +713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713, +713,713,713,713,120,120,120,120,714,714,714,714,714,714,714,714, +714,714,714,714,714,714,714,714,714,714,714,714,714,714,714,714, +714,714,714,714,714,714,714,714,714,714,714,714,120,120,120,120, + +/* block 147 */ +715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, +715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715, +715,715,715,715,715,715,715,715,120,120,120,120,120,120,120,120, +716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716, +716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716, +716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716, +716,716,716,716,120,120,120,120,120,120,120,120,120,120,120,717, +718,718,718,718,718,718,718,718,718,718,718,120,718,718,718,718, + +/* block 148 */ +718,718,718,718,718,718,718,718,718,718,718,120,718,718,718,718, +718,718,718,120,718,718,120,719,719,719,719,719,719,719,719,719, +719,719,120,719,719,719,719,719,719,719,719,719,719,719,719,719, +719,719,120,719,719,719,719,719,719,719,120,719,719,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 149 */ -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, /* block 150 */ -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,711,120,120,120,120,120,120,120,120,120, -711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711, -711,711,711,711,711,711,120,120,120,120,120,120,120,120,120,120, -711,711,711,711,711,711,711,711,120,120,120,120,120,120,120,120, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,720,120,120,120,120,120,120,120,120,120, +720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720, +720,720,720,720,720,720,120,120,120,120,120,120,120,120,120,120, +720,720,720,720,720,720,720,720,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 151 */ -712,712,712,712,712,712,120,120,712,120,712,712,712,712,712,712, -712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, -712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712, -712,712,712,712,712,712,120,712,712,120,120,120,712,120,120,712, -713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713, -713,713,713,713,713,713,120,714,715,715,715,715,715,715,715,715, -716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716, -716,716,716,716,716,716,716,717,717,718,718,718,718,718,718,718, - -/* block 152 */ -719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719, -719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,120, -120,120,120,120,120,120,120,720,720,720,720,720,720,720,720,720, +110,110,110,110,110,110,120,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110, +110,120,110,110,110,110,110,110,110,110,110,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, + +/* block 152 */ +721,721,721,721,721,721,120,120,721,120,721,721,721,721,721,721, +721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721, 721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721, -721,721,721,120,721,721,120,120,120,120,120,722,722,722,722,722, +721,721,721,721,721,721,120,721,721,120,120,120,721,120,120,721, +722,722,722,722,722,722,722,722,722,722,722,722,722,722,722,722, +722,722,722,722,722,722,120,723,724,724,724,724,724,724,724,724, +725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, +725,725,725,725,725,725,725,726,726,727,727,727,727,727,727,727, /* block 153 */ -723,723,723,723,723,723,723,723,723,723,723,723,723,723,723,723, -723,723,723,723,723,723,724,724,724,724,724,724,120,120,120,725, -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,120,120,120,120,120,727, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,728, +728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,120, +120,120,120,120,120,120,120,729,729,729,729,729,729,729,729,729, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +730,730,730,120,730,730,120,120,120,120,120,731,731,731,731,731, /* block 154 */ -728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,728, -728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,728, -729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,729, -729,729,729,729,729,729,729,729,120,120,120,120,730,730,729,729, -730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, -120,120,730,730,730,730,730,730,730,730,730,730,730,730,730,730, -730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, -730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730, +732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732, +732,732,732,732,732,732,733,733,733,733,733,733,120,120,120,734, +735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,735, +735,735,735,735,735,735,735,735,735,735,120,120,120,120,120,736, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 155 */ -731,732,732,732,120,732,732,120,120,120,120,120,732,732,732,732, -731,731,731,731,120,731,731,731,120,731,731,731,731,731,731,731, -731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731, -731,731,731,731,731,731,120,120,732,732,732,120,120,120,120,732, -733,733,733,733,733,733,733,733,733,120,120,120,120,120,120,120, -734,734,734,734,734,734,734,734,734,120,120,120,120,120,120,120, -735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,735, -735,735,735,735,735,735,735,735,735,735,735,735,735,736,736,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737, +738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738, +738,738,738,738,738,738,738,738,120,120,120,120,739,739,738,738, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +120,120,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, +739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, /* block 156 */ -738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738, -738,738,738,738,738,738,738,738,738,738,738,738,738,739,739,739, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -740,740,740,740,740,740,740,740,741,740,740,740,740,740,740,740, +740,741,741,741,120,741,741,120,120,120,120,120,741,741,741,741, +740,740,740,740,120,740,740,740,120,740,740,740,740,740,740,740, 740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740, -740,740,740,740,740,742,742,120,120,120,120,743,743,743,743,743, -744,744,744,744,744,744,744,120,120,120,120,120,120,120,120,120, +740,740,740,740,740,740,120,120,741,741,741,120,120,120,120,741, +742,742,742,742,742,742,742,742,742,120,120,120,120,120,120,120, +743,743,743,743,743,743,743,743,743,120,120,120,120,120,120,120, +744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,744, +744,744,744,744,744,744,744,744,744,744,744,744,744,745,745,746, /* block 157 */ -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,120,120,120,746,746,746,746,746,746,746, 747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747, -747,747,747,747,747,747,120,120,748,748,748,748,748,748,748,748, +747,747,747,747,747,747,747,747,747,747,747,747,747,748,748,748, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +749,749,749,749,749,749,749,749,750,749,749,749,749,749,749,749, 749,749,749,749,749,749,749,749,749,749,749,749,749,749,749,749, -749,749,749,120,120,120,120,120,750,750,750,750,750,750,750,750, +749,749,749,749,749,751,751,120,120,120,120,752,752,752,752,752, +753,753,754,753,753,753,753,120,120,120,120,120,120,120,120,120, /* block 158 */ -751,751,751,751,751,751,751,751,751,751,751,751,751,751,751,751, -751,751,120,120,120,120,120,120,120,752,752,752,752,120,120,120, -120,120,120,120,120,120,120,120,120,753,753,753,753,753,753,753, +755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755, +755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755, +755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755, +755,755,755,755,755,755,120,120,120,756,756,756,756,756,756,756, +757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757, +757,757,757,757,757,757,120,120,758,758,758,758,758,758,758,758, +759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,759, +759,759,759,120,120,120,120,120,760,760,760,760,760,760,760,760, + +/* block 159 */ +761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,761, +761,761,120,120,120,120,120,120,120,762,762,762,762,120,120,120, +120,120,120,120,120,120,120,120,120,763,763,763,763,763,763,763, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 159 */ -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,120,120,120,120,120,120,120, +/* block 160 */ +764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764, +764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764, +764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764, +764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764, +764,764,764,764,764,764,764,764,764,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 160 */ -755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755, -755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755, -755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755, -755,755,755,120,120,120,120,120,120,120,120,120,120,120,120,120, -756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756, -756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756, -756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756, -756,756,756,120,120,120,120,120,120,120,757,757,757,757,757,757, - /* block 161 */ -758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,758, -758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,758, -758,758,758,758,759,759,759,759,120,120,120,120,120,120,120,120, -760,760,760,760,760,760,760,760,760,760,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,120,120,120,120,120,120,120,120,120,120,120,120,120, +766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766, +766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766, +766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766, +766,766,766,120,120,120,120,120,120,120,767,767,767,767,767,767, /* block 162 */ +768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768, +768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768, +768,768,768,768,769,769,769,769,120,120,120,120,120,120,120,120, +770,770,770,770,770,770,770,770,770,770,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,761, -761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,120, /* block 163 */ -762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,762, -762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,762, -762,762,762,762,762,762,762,762,762,762,120,763,763,764,120,120, -762,762,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +771,771,771,771,771,771,771,771,771,771,771,771,771,771,771,771, +771,771,771,771,771,771,771,771,771,771,771,771,771,771,771,120, /* block 164 */ -765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, -765,765,765,765,765,765,765,765,765,765,765,765,765,766,766,766, -766,766,766,766,766,766,766,765,120,120,120,120,120,120,120,120, -767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,767, -767,767,767,767,767,767,768,768,768,768,768,768,768,768,768,768, -768,769,769,769,769,770,770,770,770,770,120,120,120,120,120,120, +772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772, +772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772, +772,772,772,772,772,772,772,772,772,772,120,773,773,774,120,120, +772,772,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 165 */ +775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775, +775,775,775,775,775,775,775,775,775,775,775,775,775,776,776,776, +776,776,776,776,776,776,776,775,120,120,120,120,120,120,120,120, +777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,777, +777,777,777,777,777,777,778,778,778,778,778,778,778,778,778,778, +778,779,779,779,779,780,780,780,780,780,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, + +/* block 166 */ +781,781,782,782,782,782,783,783,783,783,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -771,771,771,771,771,771,771,771,771,771,771,771,771,771,771,771, -771,771,771,771,771,772,772,772,772,772,772,772,120,120,120,120, +784,784,784,784,784,784,784,784,784,784,784,784,784,784,784,784, +784,784,784,784,784,785,785,785,785,785,785,785,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773, -773,773,773,773,773,773,773,120,120,120,120,120,120,120,120,120, - -/* block 166 */ -774,775,774,776,776,776,776,776,776,776,776,776,776,776,776,776, -776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776, -776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776, -776,776,776,776,776,776,776,776,775,775,775,775,775,775,775,775, -775,775,775,775,775,775,775,777,777,777,777,777,777,777,120,120, -120,120,778,778,778,778,778,778,778,778,778,778,778,778,778,778, -778,778,778,778,778,778,779,779,779,779,779,779,779,779,779,779, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,775, +786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,786, +786,786,786,786,786,786,786,120,120,120,120,120,120,120,120,120, /* block 167 */ -780,780,781,782,782,782,782,782,782,782,782,782,782,782,782,782, -782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, -782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, -781,781,781,780,780,780,780,781,781,780,780,783,783,784,783,783, -783,783,120,120,120,120,120,120,120,120,120,120,120,784,120,120, -785,785,785,785,785,785,785,785,785,785,785,785,785,785,785,785, -785,785,785,785,785,785,785,785,785,120,120,120,120,120,120,120, -786,786,786,786,786,786,786,786,786,786,120,120,120,120,120,120, +787,788,787,789,789,789,789,789,789,789,789,789,789,789,789,789, +789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,789, +789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,789, +789,789,789,789,789,789,789,789,788,788,788,788,788,788,788,788, +788,788,788,788,788,788,788,790,790,790,790,790,790,790,120,120, +120,120,791,791,791,791,791,791,791,791,791,791,791,791,791,791, +791,791,791,791,791,791,792,792,792,792,792,792,792,792,792,792, +788,789,789,788,788,789,120,120,120,120,120,120,120,120,120,788, /* block 168 */ -787,787,787,788,788,788,788,788,788,788,788,788,788,788,788,788, -788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,788, -788,788,788,788,788,788,788,787,787,787,787,787,789,787,787,787, -787,787,787,787,787,120,790,790,790,790,790,790,790,790,790,790, -791,791,791,791,788,789,789,788,120,120,120,120,120,120,120,120, -792,792,792,792,792,792,792,792,792,792,792,792,792,792,792,792, -792,792,792,792,792,792,792,792,792,792,792,792,792,792,792,792, -792,792,792,793,794,794,792,120,120,120,120,120,120,120,120,120, +793,793,794,795,795,795,795,795,795,795,795,795,795,795,795,795, +795,795,795,795,795,795,795,795,795,795,795,795,795,795,795,795, +795,795,795,795,795,795,795,795,795,795,795,795,795,795,795,795, +794,794,794,793,793,793,793,794,794,793,793,796,796,797,796,796, +796,796,793,120,120,120,120,120,120,120,120,120,120,797,120,120, +798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798, +798,798,798,798,798,798,798,798,798,120,120,120,120,120,120,120, +799,799,799,799,799,799,799,799,799,799,120,120,120,120,120,120, /* block 169 */ -795,795,796,797,797,797,797,797,797,797,797,797,797,797,797,797, -797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797, -797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797, -797,797,797,796,796,796,795,795,795,795,795,795,795,795,795,796, -796,797,798,798,797,799,799,799,799,795,795,795,795,799,796,795, -800,800,800,800,800,800,800,800,800,800,797,799,797,799,799,799, -120,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801, -801,801,801,801,801,120,120,120,120,120,120,120,120,120,120,120, +800,800,800,801,801,801,801,801,801,801,801,801,801,801,801,801, +801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801, +801,801,801,801,801,801,801,800,800,800,800,800,802,800,800,800, +800,800,800,800,800,120,803,803,803,803,803,803,803,803,803,803, +804,804,804,804,801,802,802,801,120,120,120,120,120,120,120,120, +805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805, +805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805, +805,805,805,806,807,807,805,120,120,120,120,120,120,120,120,120, /* block 170 */ -802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802, -802,802,120,802,802,802,802,802,802,802,802,802,802,802,802,802, -802,802,802,802,802,802,802,802,802,802,802,802,803,803,803,804, -804,804,803,803,804,803,804,804,805,805,805,805,805,805,804,120, +808,808,809,810,810,810,810,810,810,810,810,810,810,810,810,810, +810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,810, +810,810,810,810,810,810,810,810,810,810,810,810,810,810,810,810, +810,810,810,809,809,809,808,808,808,808,808,808,808,808,808,809, +809,810,811,811,810,812,812,812,812,808,808,808,808,812,809,808, +813,813,813,813,813,813,813,813,813,813,810,812,810,812,812,812, +120,814,814,814,814,814,814,814,814,814,814,814,814,814,814,814, +814,814,814,814,814,120,120,120,120,120,120,120,120,120,120,120, + +/* block 171 */ +815,815,815,815,815,815,815,815,815,815,815,815,815,815,815,815, +815,815,120,815,815,815,815,815,815,815,815,815,815,815,815,815, +815,815,815,815,815,815,815,815,815,815,815,815,816,816,816,817, +817,817,816,816,817,816,817,817,818,818,818,818,818,818,817,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 171 */ -806,806,806,806,806,806,806,120,806,120,806,806,806,806,120,806, -806,806,806,806,806,806,806,806,806,806,806,806,806,806,120,806, -806,806,806,806,806,806,806,806,806,807,120,120,120,120,120,120, -808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808, -808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808, -808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,809, -810,810,810,809,809,809,809,809,809,809,809,120,120,120,120,120, -811,811,811,811,811,811,811,811,811,811,120,120,120,120,120,120, - /* block 172 */ -812,813,814,815,120,816,816,816,816,816,816,816,816,120,120,816, -816,120,120,816,816,816,816,816,816,816,816,816,816,816,816,816, -816,816,816,816,816,816,816,816,816,120,816,816,816,816,816,816, -816,120,816,816,120,816,816,816,816,816,120,817,813,816,818,814, -812,814,814,814,814,120,120,814,814,120,120,814,814,814,120,120, -816,120,120,120,120,120,120,818,120,120,120,120,120,816,816,816, -816,816,814,814,120,120,812,812,812,812,812,812,812,120,120,120, -812,812,812,812,812,120,120,120,120,120,120,120,120,120,120,120, +819,819,819,819,819,819,819,120,819,120,819,819,819,819,120,819, +819,819,819,819,819,819,819,819,819,819,819,819,819,819,120,819, +819,819,819,819,819,819,819,819,819,820,120,120,120,120,120,120, +821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,821, +821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,821, +821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,822, +823,823,823,822,822,822,822,822,822,822,822,120,120,120,120,120, +824,824,824,824,824,824,824,824,824,824,120,120,120,120,120,120, /* block 173 */ -819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819, -819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819, -819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819, -819,819,819,819,819,820,820,820,821,821,821,821,821,821,821,821, -820,820,821,821,821,820,821,819,819,819,819,822,822,822,822,822, -823,823,823,823,823,823,823,823,823,823,822,822,120,822,821,819, -819,819,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +825,826,827,828,120,829,829,829,829,829,829,829,829,120,120,829, +829,120,120,829,829,829,829,829,829,829,829,829,829,829,829,829, +829,829,829,829,829,829,829,829,829,120,829,829,829,829,829,829, +829,120,829,829,120,829,829,829,829,829,120,830,826,829,831,827, +825,827,827,827,827,120,120,827,827,120,120,827,827,827,120,120, +829,120,120,120,120,120,120,831,120,120,120,120,120,829,829,829, +829,829,827,827,120,120,825,825,825,825,825,825,825,120,120,120, +825,825,825,825,825,120,120,120,120,120,120,120,120,120,120,120, /* block 174 */ -824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824, -824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824, -824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824, -825,826,826,827,827,827,827,827,827,826,827,826,826,825,826,827, -827,826,827,827,824,824,828,824,120,120,120,120,120,120,120,120, -829,829,829,829,829,829,829,829,829,829,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, +832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, +832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, +832,832,832,832,832,833,833,833,834,834,834,834,834,834,834,834, +833,833,834,834,834,833,834,832,832,832,832,835,835,835,835,835, +836,836,836,836,836,836,836,836,836,836,835,835,120,835,834,832, +832,832,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 175 */ -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,831, -832,832,833,833,833,833,120,120,832,832,832,832,833,833,832,833, -833,834,834,834,834,834,834,834,834,834,834,834,834,834,834,834, -834,834,834,834,834,834,834,834,830,830,830,830,833,833,120,120, +837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837, +837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837, +837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837, +838,839,839,840,840,840,840,840,840,839,840,839,839,838,839,840, +840,839,840,840,837,837,841,837,120,120,120,120,120,120,120,120, +842,842,842,842,842,842,842,842,842,842,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 176 */ -835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835, -835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835, -835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835, -836,836,836,837,837,837,837,837,837,837,837,836,836,837,836,837, -837,838,838,838,835,120,120,120,120,120,120,120,120,120,120,120, -839,839,839,839,839,839,839,839,839,839,120,120,120,120,120,120, -394,394,394,394,394,394,394,394,394,394,394,394,394,120,120,120, +843,843,843,843,843,843,843,843,843,843,843,843,843,843,843,843, +843,843,843,843,843,843,843,843,843,843,843,843,843,843,843,843, +843,843,843,843,843,843,843,843,843,843,843,843,843,843,843,844, +845,845,846,846,846,846,120,120,845,845,845,845,846,846,845,846, +846,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, +847,847,847,847,847,847,847,847,843,843,843,843,846,846,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 177 */ -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,841,842,841,842,842, -841,841,841,841,841,841,842,841,840,120,120,120,120,120,120,120, -843,843,843,843,843,843,843,843,843,843,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +848,848,848,848,848,848,848,848,848,848,848,848,848,848,848,848, +848,848,848,848,848,848,848,848,848,848,848,848,848,848,848,848, +848,848,848,848,848,848,848,848,848,848,848,848,848,848,848,848, +849,849,849,850,850,850,850,850,850,850,850,849,849,850,849,850, +850,851,851,851,848,120,120,120,120,120,120,120,120,120,120,120, +852,852,852,852,852,852,852,852,852,852,120,120,120,120,120,120, +398,398,398,398,398,398,398,398,398,398,398,398,398,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 178 */ -844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844, -844,844,844,844,844,844,844,844,844,844,844,120,120,845,845,845, -846,846,845,845,845,845,846,845,845,845,845,845,120,120,120,120, -847,847,847,847,847,847,847,847,847,847,848,848,849,849,849,850, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853, +853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853, +853,853,853,853,853,853,853,853,853,853,853,854,855,854,855,855, +854,854,854,854,854,854,855,854,853,856,120,120,120,120,120,120, +857,857,857,857,857,857,857,857,857,857,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 179 */ -851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,851, -851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,851, -851,851,851,851,851,851,851,851,851,851,851,851,852,852,852,853, -853,853,853,853,853,853,853,853,852,853,853,854,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,858, +858,858,858,858,858,858,858,858,858,858,858,120,120,859,859,859, +860,860,859,859,859,859,861,859,859,859,859,859,120,120,120,120, +862,862,862,862,862,862,862,862,862,862,863,863,864,864,864,865, +858,858,858,858,858,858,858,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 180 */ +866,866,866,866,866,866,866,866,866,866,866,866,866,866,866,866, +866,866,866,866,866,866,866,866,866,866,866,866,866,866,866,866, +866,866,866,866,866,866,866,866,866,866,866,866,867,867,867,868, +868,868,868,868,868,868,868,868,867,868,868,869,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -855,855,855,855,855,855,855,855,855,855,855,855,855,855,855,855, -855,855,855,855,855,855,855,855,855,855,855,855,855,855,855,855, -856,856,856,856,856,856,856,856,856,856,856,856,856,856,856,856, -856,856,856,856,856,856,856,856,856,856,856,856,856,856,856,856, -857,857,857,857,857,857,857,857,857,857,858,858,858,858,858,858, -858,858,858,120,120,120,120,120,120,120,120,120,120,120,120,859, /* block 181 */ -860,860,860,860,860,860,860,120,120,860,120,120,860,860,860,860, -860,860,860,860,120,860,860,120,860,860,860,860,860,860,860,860, -860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, -861,862,862,862,862,862,120,862,862,120,120,863,863,862,863,864, -862,864,862,863,865,865,865,120,120,120,120,120,120,120,120,120, -866,866,866,866,866,866,866,866,866,866,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +870,870,870,870,870,870,870,870,870,870,870,870,870,870,870,870, +870,870,870,870,870,870,870,870,870,870,870,870,870,870,870,870, +871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,871, +871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,871, +872,872,872,872,872,872,872,872,872,872,873,873,873,873,873,873, +873,873,873,120,120,120,120,120,120,120,120,120,120,120,120,874, /* block 182 */ +875,875,875,875,875,875,875,120,120,875,120,120,875,875,875,875, +875,875,875,875,120,875,875,120,875,875,875,875,875,875,875,875, +875,875,875,875,875,875,875,875,875,875,875,875,875,875,875,875, +876,877,877,877,877,877,120,877,877,120,120,878,878,877,878,879, +877,879,877,878,880,880,880,120,120,120,120,120,120,120,120,120, +881,881,881,881,881,881,881,881,881,881,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -867,867,867,867,867,867,867,867,120,120,867,867,867,867,867,867, -867,867,867,867,867,867,867,867,867,867,867,867,867,867,867,867, -867,867,867,867,867,867,867,867,867,867,867,867,867,867,867,867, -867,868,868,868,869,869,869,869,120,120,869,869,868,868,868,868, -869,867,870,867,868,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 183 */ -871,872,872,872,872,872,872,872,872,872,872,871,871,871,871,871, -871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,871, -871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,871, -871,871,871,872,872,872,872,872,872,873,874,872,872,872,872,875, -875,875,875,875,875,875,875,872,120,120,120,120,120,120,120,120, -876,877,877,877,877,877,877,878,878,877,877,877,876,876,876,876, -876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876, -876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +882,882,882,882,882,882,882,882,120,120,882,882,882,882,882,882, +882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,882, +882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,882, +882,883,883,883,884,884,884,884,120,120,884,884,883,883,883,883, +884,882,885,882,883,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 184 */ -876,876,876,876,879,879,879,879,879,879,877,877,877,877,877,877, -877,877,877,877,877,877,877,878,877,877,880,880,880,876,880,880, -880,880,880,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881, -881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881, -881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881, -881,881,881,881,881,881,881,881,881,120,120,120,120,120,120,120, +886,887,887,887,887,887,887,887,887,887,887,886,886,886,886,886, +886,886,886,886,886,886,886,886,886,886,886,886,886,886,886,886, +886,886,886,886,886,886,886,886,886,886,886,886,886,886,886,886, +886,886,886,887,887,887,887,887,887,888,889,887,887,887,887,890, +890,890,890,890,890,890,890,887,120,120,120,120,120,120,120,120, +891,892,892,892,892,892,892,893,893,892,892,892,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, /* block 185 */ -882,882,882,882,882,882,882,882,882,120,882,882,882,882,882,882, -882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,882, -882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,883, -884,884,884,884,884,884,884,120,884,884,884,884,884,884,883,884, -882,885,885,885,885,885,120,120,120,120,120,120,120,120,120,120, -886,886,886,886,886,886,886,886,886,886,887,887,887,887,887,887, -887,887,887,887,887,887,887,887,887,887,887,887,887,120,120,120, -888,888,889,889,889,889,889,889,889,889,889,889,889,889,889,889, +891,891,891,891,894,894,894,894,894,894,892,892,892,892,892,892, +892,892,892,892,892,892,892,893,892,892,895,895,895,891,895,895, +895,895,895,120,120,120,120,120,120,120,120,120,120,120,120,120, +370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,120,120,120,120,120,120,120, /* block 186 */ -889,889,889,889,889,889,889,889,889,889,889,889,889,889,889,889, -120,120,890,890,890,890,890,890,890,890,890,890,890,890,890,890, -890,890,890,890,890,890,890,890,120,891,890,890,890,890,890,890, -890,891,890,890,891,890,890,120,120,120,120,120,120,120,120,120, +897,897,897,897,897,897,897,897,897,120,897,897,897,897,897,897, +897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897, +897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,898, +899,899,899,899,899,899,899,120,899,899,899,899,899,899,898,899, +897,900,900,900,900,900,120,120,120,120,120,120,120,120,120,120, +901,901,901,901,901,901,901,901,901,901,902,902,902,902,902,902, +902,902,902,902,902,902,902,902,902,902,902,902,902,120,120,120, +903,903,904,904,904,904,904,904,904,904,904,904,904,904,904,904, + +/* block 187 */ +904,904,904,904,904,904,904,904,904,904,904,904,904,904,904,904, +120,120,905,905,905,905,905,905,905,905,905,905,905,905,905,905, +905,905,905,905,905,905,905,905,120,906,905,905,905,905,905,905, +905,906,905,905,906,905,905,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 187 */ -892,892,892,892,892,892,892,120,892,892,120,892,892,892,892,892, -892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,892, -892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,892, -892,893,893,893,893,893,893,120,120,120,893,120,893,893,120,893, -893,893,893,893,893,893,894,893,120,120,120,120,120,120,120,120, -895,895,895,895,895,895,895,895,895,895,120,120,120,120,120,120, -896,896,896,896,896,896,120,896,896,120,896,896,896,896,896,896, -896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, - /* block 188 */ -896,896,896,896,896,896,896,896,896,896,897,897,897,897,897,120, -898,898,120,897,897,898,897,898,896,120,120,120,120,120,120,120, -899,899,899,899,899,899,899,899,899,899,120,120,120,120,120,120, +907,907,907,907,907,907,907,120,907,907,120,907,907,907,907,907, +907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, +907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, +907,908,908,908,908,908,908,120,120,120,908,120,908,908,120,908, +908,908,908,908,908,908,909,908,120,120,120,120,120,120,120,120, +910,910,910,910,910,910,910,910,910,910,120,120,120,120,120,120, +911,911,911,911,911,911,120,911,911,120,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, + +/* block 189 */ +911,911,911,911,911,911,911,911,911,911,912,912,912,912,912,120, +913,913,120,912,912,913,912,913,911,120,120,120,120,120,120,120, +914,914,914,914,914,914,914,914,914,914,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 189 */ +/* block 190 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,900, -900,900,900,901,901,902,902,903,903,120,120,120,120,120,120,120, +915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915, +915,915,915,916,916,917,917,918,918,120,120,120,120,120,120,120, -/* block 190 */ +/* block 191 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -590,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -904,904,904,904,904,904,904,904,904,904,904,904,904,904,904,904, -293,293,904,293,904,295,295,295,295,295,295,295,295,296,296,296, -296,295,295,295,295,295,295,295,295,295,295,295,295,295,295,295, -295,295,120,120,120,120,120,120,120,120,120,120,120,120,120,905, - -/* block 191 */ -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, +595,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +919,919,919,919,919,919,919,919,919,919,919,919,919,919,919,919, +295,295,919,295,919,297,297,297,297,297,297,297,297,298,298,298, +298,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,120,120,120,120,120,120,120,120,120,120,120,120,120,920, /* block 192 */ -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,120,120,120,120,120,120, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, + +/* block 193 */ +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -3648,108 +3684,118 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 193 */ -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, -907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,120, -908,908,908,908,908,120,120,120,120,120,120,120,120,120,120,120, - /* block 194 */ -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906, -906,906,906,906,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922, +922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,120, +923,923,923,923,923,120,120,120,120,120,120,120,120,120,120,120, /* block 195 */ -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, - -/* block 196 */ -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, -909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,120, -910,910,910,910,910,910,910,910,910,120,120,120,120,120,120,120, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,921,921,921,921,921,921,921,921,921,921,921,921, +921,921,921,921,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, + +/* block 196 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, +924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, +924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, +924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, +924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, +924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, +924,925,925,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 197 */ -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, /* block 198 */ -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, -911,911,911,911,911,911,911,120,120,120,120,120,120,120,120,120, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,120, +927,927,927,927,927,927,927,927,927,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 199 */ -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, /* block 200 */ -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601, -601,601,601,601,601,601,601,601,601,120,120,120,120,120,120,120, -912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912, -912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,120, -913,913,913,913,913,913,913,913,913,913,120,120,120,120,914,914, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, - -/* block 201 */ +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,928,928,928,928,928,928,928,928,928, +928,928,928,928,928,928,928,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915, -915,915,915,915,915,915,915,915,915,915,915,915,915,915,120,120, -916,916,916,916,916,917,120,120,120,120,120,120,120,120,120,120, + +/* block 201 */ +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, /* block 202 */ -918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, -918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, -918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, -919,919,919,919,919,919,919,920,920,920,920,920,921,921,921,921, -922,922,922,922,920,921,120,120,120,120,120,120,120,120,120,120, -923,923,923,923,923,923,923,923,923,923,120,924,924,924,924,924, -924,924,120,918,918,918,918,918,918,918,918,918,918,918,918,918, -918,918,918,918,918,918,918,918,120,120,120,120,120,918,918,918, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606, +606,606,606,606,606,606,606,606,606,120,120,120,120,120,120,120, +929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, +929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,120, +930,930,930,930,930,930,930,930,930,930,120,120,120,120,931,931, +932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932, /* block 203 */ -918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932, +932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932, +932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932, +932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,120, +933,933,933,933,933,933,933,933,933,933,120,120,120,120,120,120, +934,934,934,934,934,934,934,934,934,934,934,934,934,934,934,934, +934,934,934,934,934,934,934,934,934,934,934,934,934,934,120,120, +935,935,935,935,935,936,120,120,120,120,120,120,120,120,120,120, + +/* block 204 */ +937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, +937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, +937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, +938,938,938,938,938,938,938,939,939,939,939,939,940,940,940,940, +941,941,941,941,939,940,120,120,120,120,120,120,120,120,120,120, +942,942,942,942,942,942,942,942,942,942,120,943,943,943,943,943, +943,943,120,937,937,937,937,937,937,937,937,937,937,937,937,937, +937,937,937,937,937,937,937,937,120,120,120,120,120,937,937,937, + +/* block 205 */ +937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -3758,19 +3804,19 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 204 */ +/* block 206 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -925,925,925,925,925,925,925,925,925,925,925,925,925,925,925,925, -925,925,925,925,925,925,925,925,925,925,925,925,925,925,925,925, -926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, -926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926, +944,944,944,944,944,944,944,944,944,944,944,944,944,944,944,944, +944,944,944,944,944,944,944,944,944,944,944,944,944,944,944,944, +945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, +945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, -/* block 205 */ -927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927, -927,927,927,927,927,927,927,928,928,928,928,120,120,120,120,120, +/* block 207 */ +946,946,946,946,946,946,946,946,946,946,946,946,946,946,946,946, +946,946,946,946,946,946,946,947,947,947,947,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -3778,68 +3824,68 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 206 */ -929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, -929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, -929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, -929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, -929,929,929,929,929,929,929,929,929,929,929,120,120,120,120,930, -929,931,931,931,931,931,931,931,931,931,931,931,931,931,931,931, -931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,931, -931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,931, +/* block 208 */ +948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, +948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, +948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, +948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, +948,948,948,948,948,948,948,948,948,948,948,120,120,120,120,949, +948,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, +950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, +950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -/* block 207 */ -931,931,931,931,931,931,931,931,120,120,120,120,120,120,120,930, -930,930,930,932,932,932,932,932,932,932,932,932,932,932,932,932, +/* block 209 */ +950,950,950,950,950,950,950,950,120,120,120,120,120,120,120,949, +949,949,949,951,951,951,951,951,951,951,951,951,951,951,951,951, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -933,934, 5,111,935,120,120,120,120,120,120,120,120,120,120,120, -936,936,120,120,120,120,120,120,120,120,120,120,120,120,120,120, - -/* block 208 */ -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, - -/* block 209 */ -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937, -937,937,937,937,937,937,937,937,120,120,120,120,120,120,120,120, +952,953,954,562,955,120,120,120,120,120,120,120,120,120,120,120, +956,956,120,120,120,120,120,120,120,120,120,120,120,120,120,120, /* block 210 */ -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, /* block 211 */ -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,120,120,120,120,120,120,120,120,120,120, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, +957,957,957,957,957,957,957,957,120,120,120,120,120,120,120,120, + +/* block 212 */ +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, + +/* block 213 */ +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958, +958,958,958,958,958,958,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 212 */ -937,937,937,937,937,937,937,937,937,120,120,120,120,120,120,120, +/* block 214 */ +957,957,957,957,957,957,957,957,957,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -3848,77 +3894,107 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 213 */ -578,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, - -/* block 214 */ -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, - /* block 215 */ -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573, -573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -573,573,573,120,120,120,120,120,120,120,120,120,120,120,120,120, -120,120,120,120,578,578,578,578,120,120,120,120,120,120,120,120, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +584,584,584,584,120,584,584,584,584,584,584,584,120,584,584,120, /* block 216 */ -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, +583,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, /* block 217 */ -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939, -939,939,939,939,939,939,939,939,939,939,939,939,120,120,120,120, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, /* block 218 */ -940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940, -940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940, -940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940, -940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940, -940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940, -940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940, -940,940,940,940,940,940,940,940,940,940,940,120,120,120,120,120, -940,940,940,940,940,940,940,940,940,940,940,940,940,120,120,120, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578, +583,583,583,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +578,578,578,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,583,583,583,583,120,120,120,120,120,120,120,120, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, /* block 219 */ -940,940,940,940,940,940,940,940,940,120,120,120,120,120,120,120, -940,940,940,940,940,940,940,940,940,940,120,120,941,942,942,943, -944,944,944,944,120,120,120,120,120,120,120,120,120,120,120,120, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, + +/* block 220 */ +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,959,959,959,959, +959,959,959,959,959,959,959,959,959,959,959,959,120,120,120,120, + +/* block 221 */ +960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +960,960,960,960,960,960,960,960,960,960,960,120,120,120,120,120, +960,960,960,960,960,960,960,960,960,960,960,960,960,120,120,120, + +/* block 222 */ +960,960,960,960,960,960,960,960,960,120,120,120,120,120,120,120, +960,960,960,960,960,960,960,960,960,960,120,120,961,962,962,963, +964,964,964,964,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 220 */ +/* block 223 */ +113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, +113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, +113,113,113,113,113,113,113,113,113,113,113,113,113,113,120,120, +113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, +113,113,113,113,113,113,113,120,120,120,120,120,120,120,120,120, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + +/* block 224 */ + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, + +/* block 225 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, @@ -3928,37 +4004,37 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,120,120,120,120,120,120, -/* block 221 */ +/* block 226 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20,945,946,113,113,113, 20, 20, 20,946,945,945, -945,945,945, 24, 24, 24, 24, 24, 24, 24, 24,113,113,113,113,113, + 20, 20, 20, 20, 20,965,966,113,113,113, 20, 20, 20,966,965,965, +965,965,965, 24, 24, 24, 24, 24, 24, 24, 24,113,113,113,113,113, -/* block 222 */ +/* block 227 */ 113,113,113, 20, 20,113,113,113,113,113,113,113, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,113,113,113,113, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,120,120,120, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 223 */ -685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, -685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, -685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, -685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685, -685,685,947,947,947,685,120,120,120,120,120,120,120,120,120,120, +/* block 228 */ +692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692, +692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692, +692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692, +692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692, +692,692,967,967,967,692,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 224 */ +/* block 229 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -3968,187 +4044,207 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 225 */ +/* block 230 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,120,120,120,120,120, -582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582, -582,582, 25, 25, 25, 25, 25, 25, 25,120,120,120,120,120,120,120, +587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587, +587,587, 25, 25, 25, 25, 25, 25, 25,120,120,120,120,120,120,120, -/* block 226 */ -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,514,514, -514,514,514,514,514,120,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, +/* block 231 */ +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,519,519, +519,519,519,519,519,120,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, -/* block 227 */ -513,513,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,513,120,513,513, -120,120,513,120,120,513,513,120,120,513,513,513,513,120,513,513, -513,513,513,513,513,513,514,514,514,514,120,514,120,514,514,514, -514,514,514,514,120,514,514,514,514,514,514,514,514,514,514,514, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, +/* block 232 */ +518,518,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,518,120,518,518, +120,120,518,120,120,518,518,120,120,518,518,518,518,120,518,518, +518,518,518,518,518,518,519,519,519,519,120,519,120,519,519,519, +519,519,519,519,120,519,519,519,519,519,519,519,519,519,519,519, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, -/* block 228 */ -514,514,514,514,513,513,120,513,513,513,513,120,120,513,513,513, -513,513,513,513,513,120,513,513,513,513,513,513,513,120,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,513,513,120,513,513,513,513,120, -513,513,513,513,513,120,513,120,120,120,513,513,513,513,513,513, -513,120,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, +/* block 233 */ +519,519,519,519,518,518,120,518,518,518,518,120,120,518,518,518, +518,518,518,518,518,120,518,518,518,518,518,518,518,120,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,518,518,120,518,518,518,518,120, +518,518,518,518,518,120,518,120,120,120,518,518,518,518,518,518, +518,120,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, -/* block 229 */ -513,513,513,513,513,513,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, +/* block 234 */ +518,518,518,518,518,518,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, -/* block 230 */ -514,514,514,514,514,514,514,514,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, +/* block 235 */ +519,519,519,519,519,519,519,519,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, -/* block 231 */ -513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,120,120,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513, 9,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514, 9,514,514,514,514, -514,514,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513, 9,514,514,514,514, +/* block 236 */ +518,518,518,518,518,518,518,518,518,518,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,120,120,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518, 9,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519, 9,519,519,519,519, +519,519,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518, 9,519,519,519,519, -/* block 232 */ -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514, 9,514,514,514,514,514,514,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513, 9,514,514,514,514,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, 9, -514,514,514,514,514,514,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, 9, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, +/* block 237 */ +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519, 9,519,519,519,519,519,519,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518, 9,519,519,519,519,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, 9, +519,519,519,519,519,519,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, 9, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, -/* block 233 */ -514,514,514,514,514,514,514,514,514, 9,514,514,514,514,514,514, -513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, -513,513,513,513,513,513,513,513,513, 9,514,514,514,514,514,514, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,514, 9,514,514,514,514,514,514,513,514,120,120, 11, 11, +/* block 238 */ +519,519,519,519,519,519,519,519,519, 9,519,519,519,519,519,519, +518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, +518,518,518,518,518,518,518,518,518, 9,519,519,519,519,519,519, +519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519, +519,519,519, 9,519,519,519,519,519,519,518,519,120,120, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -/* block 234 */ -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, -948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948, +/* block 239 */ +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, -/* block 235 */ -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,948,948,948,948,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,948,948,948, -948,948,948,948,948,949,948,948,948,948,948,948,948,948,948,948, +/* block 240 */ +969,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969, +969,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969, +969,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969, +969,969,969,969,969,969,969,968,968,968,968,969,969,969,969,969, +969,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969, +969,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969, +969,969,969,969,969,969,969,969,969,969,969,969,969,968,968,968, +968,968,968,968,968,969,968,968,968,968,968,968,968,968,968,968, -/* block 236 */ -948,948,948,948,949,948,948,950,950,950,950,950,120,120,120,120, -120,120,120,120,120,120,120,120,120,120,120,949,949,949,949,949, -120,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, +/* block 241 */ +968,968,968,968,969,968,968,970,970,970,970,970,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,969,969,969,969,969, +120,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 237 */ -951,951,951,951,951,951,951,120,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,120,120,951,951,951,951,951, -951,951,120,951,951,120,951,951,951,951,951,120,120,120,120,120, +/* block 242 */ + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 22, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 238 */ -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,120,120,120, -953,953,953,953,953,953,953,954,954,954,954,954,954,954,120,120, -955,955,955,955,955,955,955,955,955,955,120,120,120,120,952,956, +/* block 243 */ +971,971,971,971,971,971,971,120,971,971,971,971,971,971,971,971, +971,971,971,971,971,971,971,971,971,120,120,971,971,971,971,971, +971,971,120,971,971,120,971,971,971,971,971,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 239 */ +/* block 244 */ +972,972,972,972,972,972,972,972,972,972,972,972,972,972,972,972, +972,972,972,972,972,972,972,972,972,972,972,972,972,972,972,972, +972,972,972,972,972,972,972,972,972,972,972,972,972,120,120,120, +973,973,973,973,973,973,973,974,974,974,974,974,974,974,120,120, +975,975,975,975,975,975,975,975,975,975,120,120,120,120,972,976, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, + +/* block 245 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, -957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, -957,957,957,957,957,957,957,957,957,957,957,957,958,958,958,958, -959,959,959,959,959,959,959,959,959,959,120,120,120,120,120,960, +977,977,977,977,977,977,977,977,977,977,977,977,977,977,977,977, +977,977,977,977,977,977,977,977,977,977,977,977,977,977,978,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +979,979,979,979,979,979,979,979,979,979,979,979,979,979,979,979, +979,979,979,979,979,979,979,979,979,979,979,979,979,979,979,979, +979,979,979,979,979,979,979,979,979,979,979,979,980,980,980,980, +981,981,981,981,981,981,981,981,981,981,120,120,120,120,120,982, -/* block 240 */ -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, +/* block 246 */ +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +361,361,361,361,361,361,361,120,361,361,361,361,120,361,361,120, +361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,120, -/* block 241 */ -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961, -961,961,961,961,961,120,120,962,962,962,962,962,962,962,962,962, -963,963,963,963,963,963,963,120,120,120,120,120,120,120,120,120, +/* block 247 */ +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, + +/* block 248 */ +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,983,983,983,983,983,983,983,983,983,983,983, +983,983,983,983,983,120,120,984,984,984,984,984,984,984,984,984, +985,985,985,985,985,985,985,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 242 */ -964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964, -964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964, -964,964,965,965,965,965,965,965,965,965,965,965,965,965,965,965, -965,965,965,965,965,965,965,965,965,965,965,965,965,965,965,965, -965,965,965,965,966,966,966,966,966,966,966,967,120,120,120,120, -968,968,968,968,968,968,968,968,968,968,120,120,120,120,969,969, +/* block 249 */ +986,986,986,986,986,986,986,986,986,986,986,986,986,986,986,986, +986,986,986,986,986,986,986,986,986,986,986,986,986,986,986,986, +986,986,987,987,987,987,987,987,987,987,987,987,987,987,987,987, +987,987,987,987,987,987,987,987,987,987,987,987,987,987,987,987, +987,987,987,987,988,988,988,988,988,988,988,989,120,120,120,120, +990,990,990,990,990,990,990,990,990,990,120,120,120,120,991,991, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 243 */ +/* block 250 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -4158,7 +4254,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, -/* block 244 */ +/* block 251 */ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 25, 25, 25, @@ -4168,7 +4264,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 245 */ +/* block 252 */ 120, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 25, @@ -4178,47 +4274,47 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 246 */ -224,224,224,224,120,224,224,224,224,224,224,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224, -120,224,224,120,224,120,120,224,120,224,224,224,224,224,224,224, -224,224,224,120,224,224,224,224,120,224,120,224,120,120,120,120, -120,120,224,120,120,120,120,224,120,224,120,224,120,224,224,224, -120,224,224,120,224,120,120,224,120,224,120,224,120,224,120,224, -120,224,224,120,224,120,120,224,224,224,224,120,224,224,224,224, -224,224,224,120,224,224,224,224,120,224,224,224,224,120,224,120, +/* block 253 */ +225,225,225,225,120,225,225,225,225,225,225,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225, +120,225,225,120,225,120,120,225,120,225,225,225,225,225,225,225, +225,225,225,120,225,225,225,225,120,225,120,225,120,120,120,120, +120,120,225,120,120,120,120,225,120,225,120,225,120,225,225,225, +120,225,225,120,225,120,120,225,120,225,120,225,120,225,120,225, +120,225,225,120,225,120,120,225,225,225,225,120,225,225,225,225, +225,225,225,120,225,225,225,225,120,225,225,225,225,120,225,120, -/* block 247 */ -224,224,224,224,224,224,224,224,224,224,120,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,120,120,120,120, -120,224,224,224,120,224,224,224,224,224,120,224,224,224,224,224, -224,224,224,224,224,224,224,224,224,224,224,224,120,120,120,120, +/* block 254 */ +225,225,225,225,225,225,225,225,225,225,120,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,120,120,120,120, +120,225,225,225,120,225,225,225,225,225,120,225,225,225,225,225, +225,225,225,225,225,225,225,225,225,225,225,225,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 217,217,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 248 */ +/* block 255 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992,992, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -/* block 249 */ +/* block 256 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970, -970, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -970, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -970, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21,992,992,992,992,992,992,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992, +992, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, +992, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, +992, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970, + 21, 21, 21, 21, 21, 21,992,992,992,992,992,992,992,992,992,992, -/* block 250 */ +/* block 257 */ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, @@ -4228,37 +4324,37 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, -/* block 251 */ +/* block 258 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,971,971,971,971,971,971,971,971,971,971, -971,971,971,971,971,971,971,971,971,971,971,971,971,971,971,971, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,993,993,993,993,993,993,993,993,993,993, +993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -/* block 252 */ -972, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,970, +/* block 259 */ +994, 21, 21,992,992,992,992,992,992,992,992,992,992,992,992,992, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, - 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20,970,970,970,970, - 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,970, -584,584,970,970,970,970,970,970,970,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, + 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20,992,992,992,992, + 20, 20, 20, 20, 20, 20, 20, 20, 20,992,992,992,992,992,992,992, +589,589,992,992,992,992,992,992,992,992,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, -/* block 253 */ -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, +/* block 260 */ +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, -/* block 254 */ +/* block 261 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, @@ -4268,7 +4364,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -/* block 255 */ +/* block 262 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, @@ -4276,9 +4372,9 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,973,973,973,973,973, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,995,995,995,995,995, -/* block 256 */ +/* block 263 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, @@ -4288,7 +4384,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -/* block 257 */ +/* block 264 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, @@ -4298,17 +4394,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, -/* block 258 */ +/* block 265 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970, + 21, 21, 21, 21, 21, 21, 21, 21,992,992,992,992,992, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992, -/* block 259 */ +/* block 266 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, @@ -4316,39 +4412,39 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20,970,970,970,970,970,970,970,970,970,970,970,970, + 20, 20, 20, 20,992,992,992,992,992,992,992,992,992,992,992,992, -/* block 260 */ +/* block 267 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 21, 21, 21, 21,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, + 20, 20, 20, 20, 20, 21, 21, 21, 21,992,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992,992, + 21,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, -/* block 261 */ - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970, +/* block 268 */ + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,992,992,992,992, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,970,970, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970, + 20, 20, 20, 20, 20, 20, 20, 20,992,992,992,992,992,992,992,992, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,992,992,992,992,992,992, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, -/* block 262 */ - 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,970,970, +/* block 269 */ + 20, 20, 20, 20, 20, 20, 20, 20,992,992,992,992,992,992,992,992, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970, - 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,992,992, + 21, 21,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, -/* block 263 */ +/* block 270 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, @@ -4356,39 +4452,29 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 21, 21, 21, 21, 21, 21, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21,970, 21, 21, 21, 21, 21, 21, - -/* block 264 */ - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, -/* block 265 */ +/* block 271 */ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970, - 21, 21, 21, 21, 21,970,970,970, 21, 21, 21,970,970,970,970,970, + 21, 21, 21, 21,992,992,992,992,992,992,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992, + 21, 21, 21, 21, 21,992,992,992, 21, 21, 21, 21, 21,992,992,992, -/* block 266 */ - 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970, +/* block 272 */ + 21, 21, 21, 21, 21, 21, 21,992,992,992,992,992,992,992,992,992, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970, - 21, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,970, - 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992,992,992, + 21, 21, 21, 21, 21, 21,992,992,992,992,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21, 21, 21,992,992,992,992,992,992,992,992, + 21, 21, 21, 21, 21, 21, 21,992,992,992,992,992,992,992,992,992, -/* block 267 */ +/* block 273 */ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, @@ -4398,69 +4484,69 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,120,120,120,120,120,120, -/* block 268 */ -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970, -970,970,970,970,970,970,970,970,970,970,970,970,970,970,120,120, +/* block 274 */ +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,992,992, +992,992,992,992,992,992,992,992,992,992,992,992,992,992,120,120, -/* block 269 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120, +/* block 275 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 270 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,120,120,120,120,120,120,120,120,120,120,120, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +/* block 276 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,120,120,120,120,120,120,120, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, -/* block 271 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +/* block 277 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,120,120, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, -/* block 272 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, +/* block 278 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, -/* block 273 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, +/* block 279 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 274 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120, +/* block 280 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, @@ -4468,37 +4554,37 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 275 */ -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586, -586,586,586,586,586,586,586,586,586,586,586,120,120,120,120,120, +/* block 281 */ +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, 120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120, -/* block 276 */ -511, 24,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974, -974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974, -974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974, -974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974, -974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974, -974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974, +/* block 282 */ +516, 24,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +996,996,996,996,996,996,996,996,996,996,996,996,996,996,996,996, +996,996,996,996,996,996,996,996,996,996,996,996,996,996,996,996, +996,996,996,996,996,996,996,996,996,996,996,996,996,996,996,996, +996,996,996,996,996,996,996,996,996,996,996,996,996,996,996,996, +996,996,996,996,996,996,996,996,996,996,996,996,996,996,996,996, +996,996,996,996,996,996,996,996,996,996,996,996,996,996,996,996, -/* block 277 */ -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, +/* block 283 */ +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, -/* block 278 */ +/* block 284 */ 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, @@ -4508,7 +4594,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, -/* block 279 */ +/* block 285 */ 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, @@ -4516,17 +4602,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */ 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, 113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113, -511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, - -/* block 280 */ -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673, -673,673,673,673,673,673,673,673,673,673,673,673,673,673,120,120, +516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516, + +/* block 286 */ +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678, +678,678,678,678,678,678,678,678,678,678,678,678,678,678,120,120, }; diff --git a/thirdparty/pcre2/src/pcre2_ucp.h b/thirdparty/pcre2/src/pcre2_ucp.h index 9538062c71..d84f269e87 100644 --- a/thirdparty/pcre2/src/pcre2_ucp.h +++ b/thirdparty/pcre2/src/pcre2_ucp.h @@ -291,7 +291,13 @@ enum { ucp_Chorasmian, ucp_Dives_Akuru, ucp_Khitan_Small_Script, - ucp_Yezidi + ucp_Yezidi, + /* New for Unicode 14.0.0 */ + ucp_Cypro_Minoan, + ucp_Old_Uyghur, + ucp_Tangsa, + ucp_Toto, + ucp_Vithkuqi }; #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ diff --git a/thirdparty/pcre2/src/sljit/sljitConfigInternal.h b/thirdparty/pcre2/src/sljit/sljitConfigInternal.h index eb1132db30..7bb9990a59 100644 --- a/thirdparty/pcre2/src/sljit/sljitConfigInternal.h +++ b/thirdparty/pcre2/src/sljit/sljitConfigInternal.h @@ -158,6 +158,8 @@ extern "C" { #define SLJIT_CONFIG_MIPS_64 1 #elif defined(__sparc__) || defined(__sparc) #define SLJIT_CONFIG_SPARC_32 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 #else /* Unsupported architecture */ #define SLJIT_CONFIG_UNSUPPORTED 1 @@ -759,6 +761,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) +/********************************/ +/* CPU status flags management. */ +/********************************/ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_HAS_STATUS_FLAGS_STATE 1 +#endif + /*************************************/ /* Debug and verbose related macros. */ /*************************************/ diff --git a/thirdparty/pcre2/src/sljit/sljitExecAllocator.c b/thirdparty/pcre2/src/sljit/sljitExecAllocator.c index 61a32f23e9..6e5bf78e45 100644 --- a/thirdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/thirdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -79,6 +79,7 @@ */ #ifdef _WIN32 +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { @@ -91,96 +92,108 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) VirtualFree(chunk, 0, MEM_RELEASE); } -#else - -#ifdef __APPLE__ -#ifdef MAP_ANON -/* Configures TARGET_OS_OSX when appropriate */ -#include <TargetConditionals.h> - -#if TARGET_OS_OSX && defined(MAP_JIT) -#include <sys/utsname.h> -#endif /* TARGET_OS_OSX && MAP_JIT */ - -#ifdef MAP_JIT +#else /* POSIX */ +#if defined(__APPLE__) && defined(MAP_JIT) /* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a - version where it's OK to have more than one JIT block. + version where it's OK to have more than one JIT block or where MAP_JIT is + required. On non-macOS systems, returns MAP_JIT if it is defined. */ +#include <TargetConditionals.h> +#if TARGET_OS_OSX +#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86 +#ifdef MAP_ANON +#include <sys/utsname.h> +#include <stdlib.h> + +#define SLJIT_MAP_JIT (get_map_jit_flag()) + static SLJIT_INLINE int get_map_jit_flag() { -#if TARGET_OS_OSX - sljit_sw page_size = get_page_alignment() + 1; + sljit_sw page_size; void *ptr; + struct utsname name; static int map_jit_flag = -1; - /* - The following code is thread safe because multiple initialization - sets map_jit_flag to the same value and the code has no side-effects. - Changing the kernel version witout system restart is (very) unlikely. - */ - if (map_jit_flag == -1) { - struct utsname name; - + if (map_jit_flag < 0) { map_jit_flag = 0; uname(&name); - /* Kernel version for 10.14.0 (Mojave) */ + /* Kernel version for 10.14.0 (Mojave) or later */ if (atoi(name.release) >= 18) { + page_size = get_page_alignment() + 1; /* Only use MAP_JIT if a hardened runtime is used */ + ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); - ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); - - if (ptr == MAP_FAILED) { - map_jit_flag = MAP_JIT; - } else { + if (ptr != MAP_FAILED) munmap(ptr, page_size); - } + else + map_jit_flag = MAP_JIT; } } - return map_jit_flag; -#else /* !TARGET_OS_OSX */ - return MAP_JIT; -#endif /* TARGET_OS_OSX */ } - -#endif /* MAP_JIT */ #endif /* MAP_ANON */ -#endif /* __APPLE__ */ +#else /* !SLJIT_CONFIG_X86 */ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#error Unsupported architecture +#endif /* SLJIT_CONFIG_ARM */ +#include <pthread.h> + +#define SLJIT_MAP_JIT (MAP_JIT) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + apple_update_wx_flags(enable_exec) + +static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) +{ + pthread_jit_write_protect_np(enable_exec); +} +#endif /* SLJIT_CONFIG_X86 */ +#else /* !TARGET_OS_OSX */ +#define SLJIT_MAP_JIT (MAP_JIT) +#endif /* TARGET_OS_OSX */ +#endif /* __APPLE__ && MAP_JIT */ +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif /* !SLJIT_UPDATE_WX_FLAGS */ +#ifndef SLJIT_MAP_JIT +#define SLJIT_MAP_JIT (0) +#endif /* !SLJIT_MAP_JIT */ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { void *retval; - const int prot = PROT_READ | PROT_WRITE | PROT_EXEC; - -#ifdef MAP_ANON + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; - int flags = MAP_PRIVATE | MAP_ANON; - -#ifdef MAP_JIT - flags |= get_map_jit_flag(); +#ifdef PROT_MAX + prot |= PROT_MAX(prot); #endif - retval = mmap(NULL, size, prot, flags, -1, 0); +#ifdef MAP_ANON + flags |= MAP_ANON | SLJIT_MAP_JIT; #else /* !MAP_ANON */ if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) return NULL; - retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0); + fd = dev_zero; #endif /* MAP_ANON */ + retval = mmap(NULL, size, prot, flags, fd, 0); if (retval == MAP_FAILED) - retval = NULL; - else { - if (mprotect(retval, size, prot) < 0) { - munmap(retval, size); - retval = NULL; - } + return NULL; + + if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + munmap(retval, size); + return NULL; } + SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); + return retval; } @@ -189,7 +202,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) munmap(chunk, size); } -#endif +#endif /* windows */ /* --------------------------------------------------------------------- */ /* Common functions */ @@ -261,6 +274,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) while (free_block) { if (free_block->size >= size) { chunk_size = free_block->size; + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); if (chunk_size > size + 64) { /* We just cut a block from the end of the free block. */ chunk_size -= size; @@ -326,6 +340,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) allocated_size -= header->size; /* Connecting free blocks together if possible. */ + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); /* If header->prev_size == 0, free_block will equal to header. In this case, free_block->header.size will be > 0. */ @@ -358,6 +373,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) } } + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } @@ -367,6 +383,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) struct free_block* next_free_block; SLJIT_ALLOCATOR_LOCK(); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); free_block = free_blocks; while (free_block) { @@ -381,5 +398,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) } SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } diff --git a/thirdparty/pcre2/src/sljit/sljitLir.c b/thirdparty/pcre2/src/sljit/sljitLir.c index d817c90b3a..a24a99ab87 100644 --- a/thirdparty/pcre2/src/sljit/sljitLir.c +++ b/thirdparty/pcre2/src/sljit/sljitLir.c @@ -532,13 +532,21 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_la put_label->label = label; } +#define SLJIT_CURRENT_FLAGS_ALL \ + (SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(current_flags); +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = current_flags; +#endif + #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_I32_OP | SLJIT_SET_Z)) == 0) { + compiler->last_flags = 0; + if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); } #endif @@ -968,7 +976,7 @@ static const char* fop2_names[] = { }; #define JUMP_POSTFIX(type) \ - ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ + ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_F32_OP) ? ".f32" : ".f64") : "")) static char* jump_names[] = { @@ -978,7 +986,6 @@ static char* jump_names[] = { (char*)"sig_less", (char*)"sig_greater_equal", (char*)"sig_greater", (char*)"sig_less_equal", (char*)"overflow", (char*)"not_overflow", - (char*)"mul_overflow", (char*)"mul_not_overflow", (char*)"carry", (char*)"", (char*)"equal", (char*)"not_equal", (char*)"less", (char*)"greater_equal", @@ -1278,7 +1285,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler case SLJIT_MUL: CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) - || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW); + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); break; case SLJIT_ADD: CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) @@ -1601,9 +1608,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) - || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); - CHECK_ARGUMENT((type & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1818,8 +1823,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) - || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); FUNCTION_CHECK_DST(dst, dstw, 0); @@ -1858,8 +1862,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) - || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { diff --git a/thirdparty/pcre2/src/sljit/sljitLir.h b/thirdparty/pcre2/src/sljit/sljitLir.h index 93d2804675..0eb62fc21b 100644 --- a/thirdparty/pcre2/src/sljit/sljitLir.h +++ b/thirdparty/pcre2/src/sljit/sljitLir.h @@ -412,6 +412,10 @@ struct sljit_compiler { /* Executable size for statistical purposes. */ sljit_uw executable_size; +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 args; sljit_s32 locals_offset; @@ -460,7 +464,7 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Need to allocate register save area to make calls. */ - sljit_s32 have_save_area; + sljit_s32 mode; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -996,7 +1000,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) #define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP) /* Note: integer mul - Flags: MUL_OVERFLOW */ + Flags: OVERFLOW */ #define SLJIT_MUL (SLJIT_OP2_BASE + 4) #define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP) /* Flags: Z */ @@ -1141,89 +1145,69 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* Integer comparison types. */ #define SLJIT_EQUAL 0 -#define SLJIT_EQUAL32 (SLJIT_EQUAL | SLJIT_I32_OP) -#define SLJIT_ZERO 0 -#define SLJIT_ZERO32 (SLJIT_ZERO | SLJIT_I32_OP) +#define SLJIT_ZERO SLJIT_EQUAL #define SLJIT_NOT_EQUAL 1 -#define SLJIT_NOT_EQUAL32 (SLJIT_NOT_EQUAL | SLJIT_I32_OP) -#define SLJIT_NOT_ZERO 1 -#define SLJIT_NOT_ZERO32 (SLJIT_NOT_ZERO | SLJIT_I32_OP) +#define SLJIT_NOT_ZERO SLJIT_NOT_EQUAL #define SLJIT_LESS 2 -#define SLJIT_LESS32 (SLJIT_LESS | SLJIT_I32_OP) #define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) #define SLJIT_GREATER_EQUAL 3 -#define SLJIT_GREATER_EQUAL32 (SLJIT_GREATER_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL) #define SLJIT_GREATER 4 -#define SLJIT_GREATER32 (SLJIT_GREATER | SLJIT_I32_OP) #define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) #define SLJIT_LESS_EQUAL 5 -#define SLJIT_LESS_EQUAL32 (SLJIT_LESS_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL) #define SLJIT_SIG_LESS 6 -#define SLJIT_SIG_LESS32 (SLJIT_SIG_LESS | SLJIT_I32_OP) #define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) #define SLJIT_SIG_GREATER_EQUAL 7 -#define SLJIT_SIG_GREATER_EQUAL32 (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL) #define SLJIT_SIG_GREATER 8 -#define SLJIT_SIG_GREATER32 (SLJIT_SIG_GREATER | SLJIT_I32_OP) #define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) #define SLJIT_SIG_LESS_EQUAL 9 -#define SLJIT_SIG_LESS_EQUAL32 (SLJIT_SIG_LESS_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL) #define SLJIT_OVERFLOW 10 -#define SLJIT_OVERFLOW32 (SLJIT_OVERFLOW | SLJIT_I32_OP) #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_NOT_OVERFLOW 11 -#define SLJIT_NOT_OVERFLOW32 (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP) - -#define SLJIT_MUL_OVERFLOW 12 -#define SLJIT_MUL_OVERFLOW32 (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP) -#define SLJIT_SET_MUL_OVERFLOW SLJIT_SET(SLJIT_MUL_OVERFLOW) -#define SLJIT_MUL_NOT_OVERFLOW 13 -#define SLJIT_MUL_NOT_OVERFLOW32 (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP) /* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ -#define SLJIT_SET_CARRY SLJIT_SET(14) +#define SLJIT_SET_CARRY SLJIT_SET(12) /* Floating point comparison types. */ -#define SLJIT_EQUAL_F64 16 +#define SLJIT_EQUAL_F64 14 #define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64) -#define SLJIT_NOT_EQUAL_F64 17 +#define SLJIT_NOT_EQUAL_F64 15 #define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64) -#define SLJIT_LESS_F64 18 +#define SLJIT_LESS_F64 16 #define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP) #define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64) -#define SLJIT_GREATER_EQUAL_F64 19 +#define SLJIT_GREATER_EQUAL_F64 17 #define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64) -#define SLJIT_GREATER_F64 20 +#define SLJIT_GREATER_F64 18 #define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP) #define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64) -#define SLJIT_LESS_EQUAL_F64 21 +#define SLJIT_LESS_EQUAL_F64 19 #define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64) -#define SLJIT_UNORDERED_F64 22 +#define SLJIT_UNORDERED_F64 20 #define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP) #define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64) -#define SLJIT_ORDERED_F64 23 +#define SLJIT_ORDERED_F64 21 #define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP) #define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64) /* Unconditional jump types. */ -#define SLJIT_JUMP 24 +#define SLJIT_JUMP 22 /* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */ -#define SLJIT_FAST_CALL 25 +#define SLJIT_FAST_CALL 23 /* Called function must be declared with the SLJIT_FUNC attribute. */ -#define SLJIT_CALL 26 +#define SLJIT_CALL 24 /* Called function must be declared with cdecl attribute. This is the default attribute for C functions. */ -#define SLJIT_CALL_CDECL 27 +#define SLJIT_CALL_CDECL 25 /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 @@ -1534,8 +1518,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_s32 size); -/* Define the currently available CPU status flags. It is usually used after an - sljit_emit_op_custom call to define which flags are set. */ +/* Flags were set by a 32 bit operation. */ +#define SLJIT_CURRENT_FLAGS_I32_OP SLJIT_I32_OP + +/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 + +/* Flags were set by a SUB with unused destination. + Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 + +/* Define the currently available CPU status flags. It is usually used after + an sljit_emit_label or sljit_emit_op_custom operations to define which CPU + status flags are available. + + The current_flags must be a valid combination of SLJIT_SET_* and + SLJIT_CURRENT_FLAGS_* constants. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c index ae8479f031..74cf55fcd2 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c @@ -1197,6 +1197,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1207,6 +1209,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) @@ -1220,6 +1224,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MUL: SLJIT_ASSERT(!(flags & INV_IMM)); SLJIT_ASSERT(!(src2 & SRC2_IMM)); + compiler->status_flags_state = 0; if (!HAS_FLAGS(op)) return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); @@ -2153,16 +2158,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_s32 type) +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_EQUAL_F64: return 0x00000000; case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_NOT_EQUAL_F64: return 0x10000000; @@ -2195,10 +2198,16 @@ static sljit_uw get_cc(sljit_s32 type) return 0xd0000000; case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x10000000; + case SLJIT_UNORDERED_F64: return 0x60000000; case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x00000000; + case SLJIT_ORDERED_F64: return 0x70000000; @@ -2242,7 +2251,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, - type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); if (jump->flags & SLJIT_REWRITABLE_JUMP) { jump->addr = compiler->size; @@ -2260,7 +2269,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); jump->addr = compiler->size; #endif return jump; @@ -2589,7 +2598,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { @@ -2629,7 +2638,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_I32_OP; - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { tmp = get_imm(srcw); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c index 52267e7df7..3f0f5fcc30 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -644,6 +644,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s imm = -imm; /* Fall through. */ case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (imm == 0) { CHECK_FLAGS(1 << 29); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); @@ -781,6 +782,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; /* Set flags. */ case SLJIT_NEG: SLJIT_ASSERT(arg1 == TMP_REG1); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (flags & SET_FLAGS) inv_bits |= 1 << 29; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); @@ -789,17 +791,20 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: CHECK_FLAGS(1 << 29); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_ADDC: CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUB: CHECK_FLAGS(1 << 29); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUBC: CHECK_FLAGS(1 << 29); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_MUL: + compiler->status_flags_state = 0; if (!(flags & SET_FLAGS)) return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); if (flags & INT_OP) { @@ -1600,16 +1605,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_s32 type) +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_EQUAL_F64: return 0x1; case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_NOT_EQUAL_F64: return 0x0; @@ -1642,10 +1645,16 @@ static sljit_uw get_cc(sljit_s32 type) return 0xc; case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x0; + case SLJIT_UNORDERED_F64: return 0x7; case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x1; + case SLJIT_ORDERED_F64: return 0x6; @@ -1685,7 +1694,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type < SLJIT_JUMP) { jump->flags |= IS_COND; - PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type))); + PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); } else if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; @@ -1799,7 +1808,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (GET_OPCODE(op) < SLJIT_ADD) { @@ -1854,7 +1863,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil srcw = 0; } - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_reg &= ~SLJIT_I32_OP; return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c index 4624882f42..e35dbe99b3 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -610,6 +610,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ break; case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; nimm = -(sljit_sw)imm; if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) @@ -643,6 +644,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; case SLJIT_SUB: /* SUB operation can be replaced by ADD because of the negative carry flag. */ + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (flags & ARG1_IMM) { if (imm == 0 && IS_2_LO_REGS(reg, dst)) return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); @@ -801,6 +803,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); return SLJIT_SUCCESS; case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); if (dst == arg1 && !(flags & SET_FLAGS)) @@ -811,6 +814,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (flags & UNUSED_RETURN) { if (IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); @@ -824,6 +828,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MUL: + compiler->status_flags_state = 0; if (!(flags & SET_FLAGS)) return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); SLJIT_ASSERT(dst != TMP_REG2); @@ -1760,16 +1765,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_s32 type) +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_EQUAL_F64: return 0x0; case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_NOT_EQUAL_F64: return 0x1; @@ -1802,10 +1805,16 @@ static sljit_uw get_cc(sljit_s32 type) return 0xd; case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x1; + case SLJIT_UNORDERED_F64: return 0x6; case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x0; + case SLJIT_ORDERED_F64: return 0x7; @@ -1847,7 +1856,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); if (type < SLJIT_JUMP) { jump->flags |= IS_COND; - cc = get_cc(type); + cc = get_cc(compiler, type); jump->flags |= cc << 8; PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); } @@ -2177,7 +2186,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { @@ -2229,7 +2238,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_I32_OP; - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); if (!(src & SLJIT_IMM)) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c index f887ee1311..a90345f1f8 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -367,7 +367,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) { + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c index 5ab9b7d06b..1f22e49ed9 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -458,7 +458,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) { + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c index ecf4dac4c8..fd747695a7 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -1377,6 +1377,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_NEG: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); case SLJIT_CLZ: @@ -1424,13 +1425,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: + compiler->status_flags_state = 0; return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: @@ -1860,7 +1864,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: BR_Z(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -1868,7 +1871,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: BR_NZ(OTHER_FLAG); break; case SLJIT_NOT_EQUAL_F64: @@ -2127,8 +2129,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); src_ar = dst_ar; break; - case SLJIT_MUL_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { + src_ar = OTHER_FLAG; + break; + } FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); src_ar = dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ @@ -2219,7 +2225,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: ins = MOVN | TA(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -2227,7 +2232,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: ins = MOVZ | TA(OTHER_FLAG); break; case SLJIT_EQUAL_F64: diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c index 7d9ec5338f..6ddb5508ec 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c @@ -119,9 +119,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } + SLJIT_ASSERT(!(flags & ALT_FORM4)); if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); - if (flags & ALT_FORM4) + if (flags & ALT_FORM5) return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); @@ -143,24 +144,29 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { /* Setting XER SO is not enough, CR SO is also needed. */ return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); } - if (flags & ALT_FORM3) { + if (flags & ALT_FORM4) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM4) { - if (flags & ALT_FORM5) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm); - } - return push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)); - } - if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); if (flags & ALT_FORM5) diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c index 92147d2a5d..cbdf2dd8a2 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c @@ -252,10 +252,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl BIN_IMM_EXTS(); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm)); + else + FAIL_IF(push_inst(compiler, ADD | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, CMPI | A(dst) | 0); + } if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); BIN_EXTS(); - if (flags & ALT_FORM4) + if (flags & ALT_FORM5) return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); @@ -278,6 +285,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { if (flags & ALT_SIGN_EXT) { FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); src1 = TMP_REG1; @@ -291,20 +311,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } - if (flags & ALT_FORM3) { + if (flags & ALT_FORM4) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM4) { - if (flags & ALT_FORM5) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); - } - return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); - } - if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); BIN_EXTS(); diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c index d84562ce09..2174dbb07b 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c @@ -1324,6 +1324,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ((src) & SLJIT_IMM) #endif +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z | SLJIT_SET_CARRY)) +#define TEST_SUB_FORM2(op) \ + ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z)) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) +#else +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#define TEST_SUB_FORM2(op) \ + (GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#endif + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, @@ -1362,7 +1381,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: - if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + if (TEST_ADD_FORM1(op)) return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { @@ -1392,6 +1411,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } +#endif if (HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; @@ -1402,7 +1435,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } - return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); @@ -1424,18 +1457,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); } - if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); + } - if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + if (TEST_SUB_FORM2(op)) { + if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM3(op)) + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); + } + + if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + + if (!HAS_FLAGS(op)) { if (TEST_SH_IMM(src2, -src2w)) { compiler->imm = ((-src2w) >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1447,18 +1498,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile } } - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) { - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); - } - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); - } - - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); @@ -1536,6 +1575,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +#undef TEST_ADD_FORM1 +#undef TEST_SUB_FORM2 +#undef TEST_SUB_FORM3 + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1941,11 +1984,9 @@ static sljit_ins get_bo_bi_flags(sljit_s32 type) return (4 << 21) | ((4 + 1) << 16); case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: return (12 << 21) | (3 << 16); case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: return (4 << 21) | (3 << 16); case SLJIT_EQUAL_F64: @@ -2143,12 +2184,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: cr_bit = 3; break; case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: cr_bit = 3; invert = 1; break; diff --git a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c index a8b65112d4..716491ec72 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c @@ -42,10 +42,10 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) typedef sljit_uw sljit_ins; /* Instruction tags (most significant halfword). */ -const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 }; /* there are also a[2-15] available, but they are slower to access and @@ -66,22 +66,22 @@ typedef sljit_uw sljit_gpr; * will be retired ASAP (TODO: carenas) */ -const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ -const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ -const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ -const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ -const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ -const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ -const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ -const sljit_gpr r7 = 7; /* reg_map[6] */ -const sljit_gpr r8 = 8; /* reg_map[7] */ -const sljit_gpr r9 = 9; /* reg_map[8] */ -const sljit_gpr r10 = 10; /* reg_map[9] */ -const sljit_gpr r11 = 11; /* reg_map[10] */ -const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ -const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ -const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ -const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ +static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ +static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ +static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ +static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ +static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ +static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ +static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ +static const sljit_gpr r7 = 7; /* reg_map[6] */ +static const sljit_gpr r8 = 8; /* reg_map[7] */ +static const sljit_gpr r9 = 9; /* reg_map[8] */ +static const sljit_gpr r10 = 10; /* reg_map[9] */ +static const sljit_gpr r11 = 11; /* reg_map[10] */ +static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ +static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ +static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ /* TODO(carenas): r12 might conflict in PIC code, reserve? */ @@ -100,8 +100,8 @@ const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack point /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ -const sljit_gpr link_r = 14; /* r14 */ -const sljit_gpr fast_link_r = 0; /* r0 */ +static const sljit_gpr link_r = 14; /* r14 */ +static const sljit_gpr fast_link_r = 0; /* r0 */ /* Flag register layout: @@ -110,7 +110,7 @@ const sljit_gpr fast_link_r = 0; /* r0 */ | ZERO | 0 | 0 | C C |///////| +---------------+---+---+-------+-------+ */ -const sljit_gpr flag_r = 14; /* r14 */ +static const sljit_gpr flag_r = 14; /* r14 */ struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -120,8 +120,7 @@ struct sljit_s390x_const { /* Convert SLJIT register to hardware register. */ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) { - SLJIT_ASSERT(r != SLJIT_UNUSED); - SLJIT_ASSERT(r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); return reg_map[r]; } @@ -172,51 +171,93 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins) return SLJIT_SUCCESS; } +#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + /* Map the given type to a 4-bit condition code mask. */ -static SLJIT_INLINE sljit_u8 get_cc(sljit_s32 type) { - const sljit_u8 eq = 1 << 3; /* equal {,to zero} */ - const sljit_u8 lt = 1 << 2; /* less than {,zero} */ - const sljit_u8 gt = 1 << 1; /* greater than {,zero} */ - const sljit_u8 ov = 1 << 0; /* {overflow,NaN} */ +static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { + const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */ + const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */ + const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */ + const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */ switch (type) { case SLJIT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return cc0; + if (type == SLJIT_OVERFLOW) + return (cc0 | cc3); + return (cc0 | cc2); + } + case SLJIT_EQUAL_F64: - return eq; + return cc0; case SLJIT_NOT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return (cc1 | cc2 | cc3); + if (type == SLJIT_OVERFLOW) + return (cc1 | cc2); + return (cc1 | cc3); + } + case SLJIT_NOT_EQUAL_F64: - return ~eq; + return (cc1 | cc2 | cc3); case SLJIT_LESS: + return cc1; + + case SLJIT_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_GREATER: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return cc2; + return cc3; + + case SLJIT_LESS_EQUAL: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return (cc0 | cc1); + return (cc0 | cc1 | cc2); + case SLJIT_SIG_LESS: case SLJIT_LESS_F64: - return lt; + return cc1; - case SLJIT_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: - return (lt | eq); + return (cc0 | cc1); - case SLJIT_GREATER: case SLJIT_SIG_GREATER: - case SLJIT_GREATER_F64: - return gt; + /* Overflow is considered greater, see SLJIT_SUB. */ + return cc2 | cc3; - case SLJIT_GREATER_EQUAL: case SLJIT_SIG_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: - return (gt | eq); + return (cc0 | cc2 | cc3); case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc2 | cc3); + case SLJIT_UNORDERED_F64: - return ov; + return cc3; case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc0 | cc1); + case SLJIT_ORDERED_F64: - return ~ov; + return (cc0 | cc1 | cc2); + + case SLJIT_GREATER_F64: + return cc2; + + case SLJIT_GREATER_EQUAL_F64: + return (cc0 | cc2); } SLJIT_UNREACHABLE(); @@ -346,19 +387,20 @@ HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY) #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL) #define CHECK_SIGNED(v, bitlen) \ - ((v) == (((v) << (sizeof(v) * 8 - bitlen)) >> (sizeof(v) * 8 - bitlen))) + ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1))) +#define is_s8(d) CHECK_SIGNED((d), 8) #define is_s16(d) CHECK_SIGNED((d), 16) #define is_s20(d) CHECK_SIGNED((d), 20) -#define is_s32(d) CHECK_SIGNED((d), 32) +#define is_s32(d) ((d) == (sljit_s32)(d)) -static SLJIT_INLINE sljit_uw disp_s20(sljit_s32 d) +static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d) { + SLJIT_ASSERT(is_s20(d)); + sljit_uw dh = (d >> 12) & 0xff; sljit_uw dl = (d << 8) & 0xfff00; - - SLJIT_ASSERT(is_s20(d)); - return dh | dl; + return (dh | dl) << 8; } /* TODO(carenas): variadic macro is not strictly needed */ @@ -372,12 +414,6 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ } -/* ADD */ -SLJIT_S390X_RR(ar, 0x1a00) - -/* ADD LOGICAL */ -SLJIT_S390X_RR(alr, 0x1e00) - /* AND */ SLJIT_S390X_RR(nr, 0x1400) @@ -387,12 +423,6 @@ SLJIT_S390X_RR(basr, 0x0d00) /* BRANCH ON CONDITION */ SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */ -/* COMPARE */ -SLJIT_S390X_RR(cr, 0x1900) - -/* COMPARE LOGICAL */ -SLJIT_S390X_RR(clr, 0x1500) - /* DIVIDE */ SLJIT_S390X_RR(dr, 0x1d00) @@ -408,12 +438,6 @@ SLJIT_S390X_RR(lcr, 0x1300) /* OR */ SLJIT_S390X_RR(or, 0x1600) -/* SUBTRACT */ -SLJIT_S390X_RR(sr, 0x1b00) - -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RR(slr, 0x1f00) - #undef SLJIT_S390X_RR /* RRE form instructions */ @@ -423,25 +447,9 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ } -/* ADD */ -SLJIT_S390X_RRE(agr, 0xb9080000) - -/* ADD LOGICAL */ -SLJIT_S390X_RRE(algr, 0xb90a0000) - -/* ADD LOGICAL WITH CARRY */ -SLJIT_S390X_RRE(alcr, 0xb9980000) -SLJIT_S390X_RRE(alcgr, 0xb9880000) - /* AND */ SLJIT_S390X_RRE(ngr, 0xb9800000) -/* COMPARE */ -SLJIT_S390X_RRE(cgr, 0xb9200000) - -/* COMPARE LOGICAL */ -SLJIT_S390X_RRE(clgr, 0xb9210000) - /* DIVIDE LOGICAL */ SLJIT_S390X_RRE(dlr, 0xb9970000) SLJIT_S390X_RRE(dlgr, 0xb9870000) @@ -482,8 +490,6 @@ SLJIT_S390X_RRE(llghr, 0xb9850000) SLJIT_S390X_RRE(mlgr, 0xb9860000) /* MULTIPLY SINGLE */ -SLJIT_S390X_RRE(msr, 0xb2520000) -SLJIT_S390X_RRE(msgr, 0xb90c0000) SLJIT_S390X_RRE(msgfr, 0xb91c0000) /* OR */ @@ -492,13 +498,6 @@ SLJIT_S390X_RRE(ogr, 0xb9810000) /* SUBTRACT */ SLJIT_S390X_RRE(sgr, 0xb9090000) -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RRE(slgr, 0xb90b0000) - -/* SUBTRACT LOGICAL WITH BORROW */ -SLJIT_S390X_RRE(slbr, 0xb9990000) -SLJIT_S390X_RRE(slbgr, 0xb9890000) - #undef SLJIT_S390X_RRE /* RI-a form instructions */ @@ -509,13 +508,8 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ } /* ADD HALFWORD IMMEDIATE */ -SLJIT_S390X_RIA(ahi, 0xa70a0000, sljit_s16) SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16) -/* COMPARE HALFWORD IMMEDIATE */ -SLJIT_S390X_RIA(chi, 0xa70e0000, sljit_s16) -SLJIT_S390X_RIA(cghi, 0xa70f0000, sljit_s16) - /* LOAD HALFWORD IMMEDIATE */ SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16) SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16) @@ -533,9 +527,6 @@ SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16) /* OR IMMEDIATE */ SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) -/* TEST UNDER MASK */ -SLJIT_S390X_RIA(tmlh, 0xa7000000, sljit_u16) - #undef SLJIT_S390X_RIA /* RIL-a form instructions (requires extended immediate facility) */ @@ -547,30 +538,13 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ } /* ADD IMMEDIATE */ -SLJIT_S390X_RILA(afi, 0xc20900000000, sljit_s32) SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32) /* ADD IMMEDIATE HIGH */ SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */ -/* ADD LOGICAL IMMEDIATE */ -SLJIT_S390X_RILA(alfi, 0xc20b00000000, sljit_u32) -SLJIT_S390X_RILA(algfi, 0xc20a00000000, sljit_u32) - /* AND IMMEDIATE */ SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32) -SLJIT_S390X_RILA(nilf, 0xc00b00000000, sljit_u32) - -/* COMPARE IMMEDIATE */ -SLJIT_S390X_RILA(cfi, 0xc20d00000000, sljit_s32) -SLJIT_S390X_RILA(cgfi, 0xc20c00000000, sljit_s32) - -/* COMPARE IMMEDIATE HIGH */ -SLJIT_S390X_RILA(cih, 0xcc0d00000000, sljit_s32) /* TODO(mundaym): high-word facility? */ - -/* COMPARE LOGICAL IMMEDIATE */ -SLJIT_S390X_RILA(clfi, 0xc20f00000000, sljit_u32) -SLJIT_S390X_RILA(clgfi, 0xc20e00000000, sljit_u32) /* EXCLUSIVE OR IMMEDIATE */ SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32) @@ -586,8 +560,8 @@ SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32) SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32) SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32) -/* OR IMMEDIATE */ -SLJIT_S390X_RILA(oilf, 0xc00d00000000, sljit_u32) +/* SUBTRACT LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) #undef SLJIT_S390X_RILA @@ -606,18 +580,6 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b return (pattern) | ri | xi | bi | di; \ } -/* ADD */ -SLJIT_S390X_RXA(a, 0x5a000000) - -/* ADD LOGICAL */ -SLJIT_S390X_RXA(al, 0x5e000000) - -/* AND */ -SLJIT_S390X_RXA(n, 0x54000000) - -/* EXCLUSIVE OR */ -SLJIT_S390X_RXA(x, 0x57000000) - /* LOAD */ SLJIT_S390X_RXA(l, 0x58000000) @@ -630,9 +592,6 @@ SLJIT_S390X_RXA(lh, 0x48000000) /* MULTIPLY SINGLE */ SLJIT_S390X_RXA(ms, 0x71000000) -/* OR */ -SLJIT_S390X_RXA(o, 0x56000000) - /* STORE */ SLJIT_S390X_RXA(st, 0x50000000) @@ -642,12 +601,6 @@ SLJIT_S390X_RXA(stc, 0x42000000) /* STORE HALFWORD */ SLJIT_S390X_RXA(sth, 0x40000000) -/* SUBTRACT */ -SLJIT_S390X_RXA(s, 0x5b000000) - -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RXA(sl, 0x5f000000) - #undef SLJIT_S390X_RXA /* RXY-a instructions */ @@ -660,31 +613,11 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b ri = (sljit_ins)(r & 0xf) << 36; \ xi = (sljit_ins)(x & 0xf) << 32; \ bi = (sljit_ins)(b & 0xf) << 28; \ - di = (sljit_ins)disp_s20(d) << 8; \ + di = disp_s20(d); \ \ return (pattern) | ri | xi | bi | di; \ } -/* ADD */ -SLJIT_S390X_RXYA(ay, 0xe3000000005a, have_ldisp()) -SLJIT_S390X_RXYA(ag, 0xe30000000008, 1) - -/* ADD LOGICAL */ -SLJIT_S390X_RXYA(aly, 0xe3000000005e, have_ldisp()) -SLJIT_S390X_RXYA(alg, 0xe3000000000a, 1) - -/* ADD LOGICAL WITH CARRY */ -SLJIT_S390X_RXYA(alc, 0xe30000000098, 1) -SLJIT_S390X_RXYA(alcg, 0xe30000000088, 1) - -/* AND */ -SLJIT_S390X_RXYA(ny, 0xe30000000054, have_ldisp()) -SLJIT_S390X_RXYA(ng, 0xe30000000080, 1) - -/* EXCLUSIVE OR */ -SLJIT_S390X_RXYA(xy, 0xe30000000057, have_ldisp()) -SLJIT_S390X_RXYA(xg, 0xe30000000082, 1) - /* LOAD */ SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp()) SLJIT_S390X_RXYA(lg, 0xe30000000004, 1) @@ -713,10 +646,6 @@ SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1) SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp()) SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1) -/* OR */ -SLJIT_S390X_RXYA(oy, 0xe30000000056, have_ldisp()) -SLJIT_S390X_RXYA(og, 0xe30000000081, 1) - /* STORE */ SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp()) SLJIT_S390X_RXYA(stg, 0xe30000000024, 1) @@ -727,41 +656,8 @@ SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp()) /* STORE HALFWORD */ SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) -/* SUBTRACT */ -SLJIT_S390X_RXYA(sy, 0xe3000000005b, have_ldisp()) -SLJIT_S390X_RXYA(sg, 0xe30000000009, 1) - -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RXYA(sly, 0xe3000000005f, have_ldisp()) -SLJIT_S390X_RXYA(slg, 0xe3000000000b, 1) - -/* SUBTRACT LOGICAL WITH BORROW */ -SLJIT_S390X_RXYA(slb, 0xe30000000099, 1) -SLJIT_S390X_RXYA(slbg, 0xe30000000089, 1) - #undef SLJIT_S390X_RXYA -/* RS-a instructions */ -#define SLJIT_S390X_RSA(name, pattern) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw d, sljit_gpr b) \ -{ \ - sljit_ins r1 = (sljit_ins)(reg & 0xf) << 20; \ - sljit_ins b2 = (sljit_ins)(b & 0xf) << 12; \ - sljit_ins d2 = (sljit_ins)(d & 0xfff); \ - return (pattern) | r1 | b2 | d2; \ -} - -/* SHIFT LEFT SINGLE LOGICAL */ -SLJIT_S390X_RSA(sll, 0x89000000) - -/* SHIFT RIGHT SINGLE */ -SLJIT_S390X_RSA(sra, 0x8a000000) - -/* SHIFT RIGHT SINGLE LOGICAL */ -SLJIT_S390X_RSA(srl, 0x88000000) - -#undef SLJIT_S390X_RSA - /* RSY-a instructions */ #define SLJIT_S390X_RSYA(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \ @@ -772,7 +668,7 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gp r1 = (sljit_ins)(dst & 0xf) << 36; \ r3 = (sljit_ins)(src & 0xf) << 32; \ b2 = (sljit_ins)(b & 0xf) << 28; \ - d2 = (sljit_ins)disp_s20(d) << 8; \ + d2 = disp_s20(d); \ \ return (pattern) | r1 | r3 | b2 | d2; \ } @@ -786,9 +682,6 @@ SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1) /* SHIFT RIGHT SINGLE */ SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1) -/* SHIFT RIGHT SINGLE LOGICAL */ -SLJIT_S390X_RSYA(srlg, 0xeb000000000c, 1) - /* STORE MULTIPLE */ SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) @@ -831,26 +724,6 @@ SLJIT_S390X_RIEF(risbhg, 0xec000000005d) #undef SLJIT_S390X_RIEF -/* RRF-a instructions */ -#define SLJIT_S390X_RRFA(name, pattern, cond) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src1, sljit_gpr src2) \ -{ \ - sljit_ins r1, r2, r3; \ -\ - SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(dst & 0xf) << 4; \ - r2 = (sljit_ins)(src1 & 0xf); \ - r3 = (sljit_ins)(src2 & 0xf) << 12; \ -\ - return (pattern) | r3 | r1 | r2; \ -} - -/* MULTIPLY */ -SLJIT_S390X_RRFA(msrkc, 0xb9fd0000, have_misc2()) -SLJIT_S390X_RRFA(msgrkc, 0xb9ed0000, have_misc2()) - -#undef SLJIT_S390X_RRFA - /* RRF-c instructions (require load/store-on-condition 1 facility) */ #define SLJIT_S390X_RRFC(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ @@ -919,6 +792,13 @@ SLJIT_S390X_INSTRUCTION(br, sljit_gpr target) return 0x07f0 | target; } +SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20; + sljit_ins ri2 = (sljit_ins)target & 0xffff; + return 0xa7040000L | m1 | ri2; +} + SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) { sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36; @@ -940,6 +820,12 @@ SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4); } +/* SET PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) +{ + return 0x0400 | ((sljit_ins)(dst & 0xf) << 4); +} + /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) { @@ -948,30 +834,20 @@ SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, s #undef SLJIT_S390X_INSTRUCTION -/* load condition code as needed to match type */ -static sljit_s32 push_load_cc(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r) { - type &= ~SLJIT_I32_OP; - switch (type) { - case SLJIT_ZERO: - case SLJIT_NOT_ZERO: - return push_inst(compiler, cih(flag_r, 0)); - break; - default: - return push_inst(compiler, tmlh(flag_r, 0x3000)); - break; - } - return SLJIT_SUCCESS; -} - -static sljit_s32 push_store_zero_flag(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr source) -{ - /* insert low 32-bits into high 32-bits of flag register */ - FAIL_IF(push_inst(compiler, risbhgz(flag_r, source, 0, 31, 32))); - if (!(op & SLJIT_I32_OP)) { - /* OR high 32-bits with high 32-bits of flag register */ - return push_inst(compiler, rosbg(flag_r, source, 0, 31, 0)); - } + /* Condition codes: bits 18 and 19. + Transformation: + 0 (zero and no overflow) : unchanged + 1 (non-zero and no overflow) : unchanged + 2 (zero and overflow) : decreased by 1 + 3 (non-zero and overflow) : decreased by 1 if non-zero */ + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(flag_r))); + FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); + FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(flag_r))); return SLJIT_SUCCESS; } @@ -1088,18 +964,19 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, #define WHEN(cond, r, i1, i2, addr) \ (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr) +/* May clobber tmp1. */ static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst, sljit_s32 src, sljit_sw srcw, - sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit) + sljit_s32 is_32bit) { struct addr addr; sljit_ins ins; SLJIT_ASSERT(src & SLJIT_MEM); if (have_ldisp() || !is_32bit) - FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp)); + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); else - FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp)); + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); if (is_32bit) ins = WHEN(is_u12(addr.offset), dst, l, ly, addr); @@ -1109,18 +986,19 @@ static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst, return push_inst(compiler, ins); } +/* May clobber tmp1. */ static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src, sljit_s32 dst, sljit_sw dstw, - sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit) + sljit_s32 is_32bit) { struct addr addr; sljit_ins ins; SLJIT_ASSERT(dst & SLJIT_MEM); if (have_ldisp() || !is_32bit) - FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp)); + FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); else - FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp)); + FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1)); if (is_32bit) ins = WHEN(is_u12(addr.offset), src, st, sty, addr); @@ -1132,6 +1010,358 @@ static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src, #undef WHEN +static sljit_s32 emit_move(struct sljit_compiler *compiler, + sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK)); + + if (src & SLJIT_IMM) + return push_load_imm_inst(compiler, dst_r, srcw); + + if (src & SLJIT_MEM) + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0); + + sljit_gpr src_r = gpr(src & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); +} + +static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = tmp0; + sljit_gpr src_r = tmp1; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + + if (dst == src1) + needs_move = 0; + else if (dst == src2) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (FAST_IS_REG(src2)) + src_r = gpr(src2 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src1_r = tmp0; + sljit_gpr src2_r = tmp1; + + if (FAST_IS_REG(src1)) + src1_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (FAST_IS_REG(src2)) + src2_r = gpr(src2 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12)); +} + +typedef enum { + RI_A, + RIL_A, +} emit_ril_type; + +static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w, + emit_ril_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == RIL_A) + return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff)); +} + +static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w) +{ + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + + if (!SLOW_IS_REG(src1)) + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + else + src_r = gpr(src1 & REG_MASK); + + return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16); +} + +typedef enum { + RX_A, + RXY_A, +} emit_rx_type; + +static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w, + emit_rx_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + sljit_gpr base, index; + + SLJIT_ASSERT(src2 & SLJIT_MEM); + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + base = gpr(src2 & REG_MASK); + index = tmp0; + + if (src2 & OFFS_REG_MASK) { + index = gpr(OFFS_REG(src2)); + + if (src2w != 0) { + FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0))); + src2w = 0; + index = tmp1; + } + } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w)); + + if (src2 & REG_MASK) + index = tmp1; + else + base = tmp1; + src2w = 0; + } + + if (type == RX_A) + ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w; + else + ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w); + + FAIL_IF(push_inst(compiler, ins)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_sw srcw) +{ + SLJIT_ASSERT(dst & SLJIT_MEM); + + sljit_gpr dst_r = tmp1; + + if (dst & OFFS_REG_MASK) { + sljit_gpr index = tmp1; + + if ((dstw & 0x3) == 0) + index = gpr(OFFS_REG(dst)); + else + FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0))); + + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index))); + dstw = 0; + } + else if (!is_s20(dstw)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw)); + + if (dst & REG_MASK) + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1))); + + dstw = 0; + } + else + dst_r = gpr(dst & REG_MASK); + + return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw)); +} + +struct ins_forms { + sljit_ins op_r; + sljit_ins op_gr; + sljit_ins op_rk; + sljit_ins op_grk; + sljit_ins op; + sljit_ins op_y; + sljit_ins op_g; +}; + +static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins, ins_k; + + if ((src1 | src2) & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_I32_OP) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + /* Extra instructions needed for address computation can be executed independently. */ + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + if (src1 & SLJIT_MEM) { + if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w)) + return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A); + + return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A); + } + } + else if (ins12 || ins20) { + emit_rx_type rx_type; + + if (ins12) { + rx_type = RX_A; + ins = ins12; + } + else { + rx_type = RXY_A; + ins = ins20; + } + + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w))))) + return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type); + + if (src1 & SLJIT_MEM) + return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type); + } + } + + if (mode & SLJIT_I32_OP) { + ins = forms->op_r; + ins_k = forms->op_rk; + } + else { + ins = forms->op_gr; + ins_k = forms->op_grk; + } + + SLJIT_ASSERT(ins != 0 || ins_k != 0); + + if (ins && SLOW_IS_REG(dst)) { + if (dst == src1) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + if (dst == src2) + return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w); + } + + if (ins_k == 0) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins; + + if (src2 & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_I32_OP) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + else if (ins12) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + else if (ins20) + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk; + + if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_label *label; @@ -1465,7 +1695,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile op = GET_OPCODE(op) | (op & SLJIT_I32_OP); switch (op) { case SLJIT_BREAKPOINT: - /* TODO(mundaym): insert real breakpoint? */ + /* The following invalid instruction is emitted by gdb. */ + return push_inst(compiler, 0x0001 /* 2-byte trap */); case SLJIT_NOP: return push_inst(compiler, 0x0700 /* 2-byte nop */); case SLJIT_LMUL_UW: @@ -1559,6 +1790,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile /* TODO(carenas): implement prefetch? */ return SLJIT_SUCCESS; } + if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { /* LOAD REGISTER */ if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { @@ -1609,11 +1841,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } FAIL_IF(push_inst(compiler, ins)); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, dst_r); - } return SLJIT_SUCCESS; } /* LOAD IMMEDIATE */ @@ -1690,11 +1917,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } FAIL_IF(push_inst(compiler, ins)); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, reg); - } return SLJIT_SUCCESS; } /* STORE and STORE IMMEDIATE */ @@ -1723,11 +1945,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV_P: case SLJIT_MOV: FAIL_IF(push_inst(compiler, LEVAL(stg))); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, reg); - } return SLJIT_SUCCESS; default: SLJIT_UNREACHABLE(); @@ -1767,11 +1984,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); FAIL_IF(push_inst(compiler, EVAL(stg, tmp0, mem))); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, tmp0); - } return SLJIT_SUCCESS; default: SLJIT_UNREACHABLE(); @@ -1785,7 +1997,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, src & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP)); + + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); /* TODO(mundaym): optimize loads and stores */ switch (opcode | (op & SLJIT_I32_OP)) { @@ -1810,9 +2024,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } break; case SLJIT_NEG: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); break; case SLJIT_NEG32: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); break; case SLJIT_CLZ: @@ -1839,17 +2055,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } - /* write condition code to emulated flag register */ - if (op & VARIABLE_FLAG_MASK) - FAIL_IF(push_inst(compiler, ipm(flag_r))); - - /* write zero flag to emulated flag register */ - if (op & SLJIT_SET_Z) - FAIL_IF(push_store_zero_flag(compiler, op, dst_r)); + if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW)) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); /* TODO(carenas): doesn't need FAIL_IF */ if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM)) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); return SLJIT_SUCCESS; } @@ -1887,530 +2098,554 @@ static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) return 0; } -/* Report whether we have an instruction for: - op dst src imm - where dst and src are separate registers. */ -static int have_op_3_imm(sljit_s32 op, sljit_sw imm) { - return 0; /* TODO(mundaym): implement */ -} - -/* Report whether we have an instruction for: - op reg imm - where reg is both a source and the destination. */ -static int have_op_2_imm(sljit_s32 op, sljit_sw imm) { - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - case SLJIT_ADD32: - case SLJIT_ADD: - if (!HAS_FLAGS(op) || sets_signed_flag(op)) - return have_eimm() ? is_s32(imm) : is_s16(imm); +static const struct ins_forms add_forms = { + 0x1a00, /* ar */ + 0xb9080000, /* agr */ + 0xb9f80000, /* ark */ + 0xb9e80000, /* agrk */ + 0x5a000000, /* a */ + 0xe3000000005a, /* ay */ + 0xe30000000008, /* ag */ +}; - return have_eimm() && is_u32(imm); - case SLJIT_MUL32: - case SLJIT_MUL: - /* TODO(mundaym): general extension check */ - /* for ms{,g}fi */ - if (op & VARIABLE_FLAG_MASK) - return 0; - - return have_genext() && is_s16(imm); - case SLJIT_OR32: - case SLJIT_XOR32: - case SLJIT_AND32: - /* only use if have extended immediate facility */ - /* this ensures flags are set correctly */ - return have_eimm(); - case SLJIT_AND: - case SLJIT_OR: - case SLJIT_XOR: - /* TODO(mundaym): make this more flexible */ - /* avoid using immediate variations, flags */ - /* won't be set correctly */ - return 0; - case SLJIT_ADDC32: - case SLJIT_ADDC: - /* no ADD LOGICAL WITH CARRY IMMEDIATE */ - return 0; - case SLJIT_SUB: - case SLJIT_SUB32: - case SLJIT_SUBC: - case SLJIT_SUBC32: - /* no SUBTRACT IMMEDIATE */ - /* TODO(mundaym): SUBTRACT LOGICAL IMMEDIATE */ - return 0; - } - return 0; -} +static const struct ins_forms logical_add_forms = { + 0x1e00, /* alr */ + 0xb90a0000, /* algr */ + 0xb9fa0000, /* alrk */ + 0xb9ea0000, /* algrk */ + 0x5e000000, /* al */ + 0xe3000000005e, /* aly */ + 0xe3000000000a, /* alg */ +}; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW; + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; - if (is_commutative(op)) { - #define SWAP_ARGS \ - do { \ - sljit_s32 t = src1; \ - sljit_sw tw = src1w; \ - src1 = src2; \ - src1w = src2w; \ - src2 = t; \ - src2w = tw; \ - } while(0); - - /* prefer immediate in src2 */ - if (src1 & SLJIT_IMM) { - SWAP_ARGS + if (src2 & SLJIT_IMM) { + if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_overflow) + ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, src2w); } - /* prefer to have src1 use same register as dst */ - if (FAST_IS_REG(src2) && gpr(src2 & REG_MASK) == dst_r) { - SWAP_ARGS + if (is_s16(src2w)) { + if (sets_overflow) + ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); + goto done; } - /* prefer memory argument in src2 */ - if (FAST_IS_REG(src2) && (src1 & SLJIT_MEM)) { - SWAP_ARGS + if (!sets_overflow) { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(-src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_I32_OP) || is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; } - #undef SWAP_ARGS } - /* src1 must be in a register */ - sljit_gpr src1_r = FAST_IS_REG(src1) ? gpr(src1 & REG_MASK) : tmp0; - if (src1 & SLJIT_IMM) - FAIL_IF(push_load_imm_inst(compiler, src1_r, src1w)); - - if (src1 & SLJIT_MEM) - FAIL_IF(load_word(compiler, src1_r, src1, src1w, tmp1, op & SLJIT_I32_OP)); - - /* emit comparison before subtract */ - if (GET_OPCODE(op) == SLJIT_SUB && (op & VARIABLE_FLAG_MASK)) { - sljit_sw cmp = 0; - switch (GET_FLAG_TYPE(op)) { - case SLJIT_LESS: - case SLJIT_LESS_EQUAL: - case SLJIT_GREATER: - case SLJIT_GREATER_EQUAL: - cmp = 1; /* unsigned */ - break; - case SLJIT_EQUAL: - case SLJIT_SIG_LESS: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_SIG_GREATER: - case SLJIT_SIG_GREATER_EQUAL: - cmp = -1; /* signed */ - break; - } - if (cmp) { - /* clear flags - no need to generate now */ - op &= ~VARIABLE_FLAG_MASK; - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - #define LEVAL(i) i(src1_r, src2w) - if (cmp > 0 && is_u32(src2w)) { - /* unsigned */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, clfi, clgfi))); - } - else if (cmp < 0 && is_s16(src2w)) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, chi, cghi))); - } - else if (cmp < 0 && is_s32(src2w)) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, cfi, cgfi))); - } - #undef LEVAL - #define LEVAL(i) i(src1_r, src2_r) - else { - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); - if (cmp > 0) { - /* unsigned */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, clr, clgr))); - } - if (cmp < 0) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, cr, cgr))); - } + forms = sets_overflow ? &add_forms : &logical_add_forms; + FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + +done: + if (sets_zero_overflow) + FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms sub_forms = { + 0x1b00, /* sr */ + 0xb9090000, /* sgr */ + 0xb9f90000, /* srk */ + 0xb9e90000, /* sgrk */ + 0x5b000000, /* s */ + 0xe3000000005b, /* sy */ + 0xe30000000009, /* sg */ +}; + +static const struct ins_forms logical_sub_forms = { + 0x1f00, /* slr */ + 0xb90b0000, /* slgr */ + 0xb9fb0000, /* slrk */ + 0xb9eb0000, /* slgrk */ + 0x5f000000, /* sl */ + 0xe3000000005f, /* sly */ + 0xe3000000000b, /* slg */ +}; + +static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + int sets_signed = sets_signed_flag(op); + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; + + if (src2 & SLJIT_IMM) { + if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) + { + if ((op & SLJIT_I32_OP) || is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } } else { - if (src2 & SLJIT_MEM) { - /* TODO(mundaym): comparisons with memory */ - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } - if (cmp > 0) { - /* unsigned */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, clr, clgr))); - } - if (cmp < 0) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, cr, cgr))); - } - #undef LEVAL + if (is_s16(src2w)) + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w); } - FAIL_IF(push_inst(compiler, ipm(flag_r))); } - } + else if (src2 & SLJIT_MEM) { + if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); + } - if (!HAS_FLAGS(op) && dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; + if (compare_signed) + ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + else + ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); + } - /* need to specify signed or logical operation */ - int signed_flags = sets_signed_flag(op); + if (compare_signed) + ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + else + ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); + } - if (is_shift(op)) { - /* handle shifts first, they have more constraints than other operations */ - sljit_sw d = 0; - sljit_gpr b = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : r0; - if (src2 & SLJIT_IMM) - d = src2w & ((op & SLJIT_I32_OP) ? 31 : 63); + if (src2 & SLJIT_IMM) { + sljit_sw neg_src2w = -src2w; - if (src2 & SLJIT_MEM) { - /* shift amount (b) cannot be in r0 (i.e. tmp0) */ - FAIL_IF(load_word(compiler, tmp1, src2, src2w, tmp1, op & SLJIT_I32_OP)); - b = tmp1; - } - /* src1 and dst share the same register in the base 32-bit ISA */ - /* TODO(mundaym): not needed when distinct-operand facility is available */ - int workaround_alias = op & SLJIT_I32_OP && src1_r != dst_r; - if (workaround_alias) { - /* put src1 into tmp0 so we can overwrite it */ - FAIL_IF(push_inst(compiler, lr(tmp0, src1_r))); - src1_r = tmp0; - } - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - case SLJIT_SHL: - FAIL_IF(push_inst(compiler, sllg(dst_r, src1_r, d, b))); - break; - case SLJIT_SHL32: - FAIL_IF(push_inst(compiler, sll(src1_r, d, b))); - break; - case SLJIT_LSHR: - FAIL_IF(push_inst(compiler, srlg(dst_r, src1_r, d, b))); - break; - case SLJIT_LSHR32: - FAIL_IF(push_inst(compiler, srl(src1_r, d, b))); - break; - case SLJIT_ASHR: - FAIL_IF(push_inst(compiler, srag(dst_r, src1_r, d, b))); - break; - case SLJIT_ASHR32: - FAIL_IF(push_inst(compiler, sra(src1_r, d, b))); - break; - default: - SLJIT_UNREACHABLE(); + if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { + if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_signed) + ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, neg_src2w); + } + + if (is_s16(neg_src2w)) { + if (sets_signed) + ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); + goto done; + } } - if (workaround_alias && dst_r != src1_r) - FAIL_IF(push_inst(compiler, lr(dst_r, src1_r))); - } - else if ((GET_OPCODE(op) == SLJIT_MUL) && HAS_FLAGS(op)) { - /* multiply instructions do not generally set flags so we need to manually */ - /* detect overflow conditions */ - /* TODO(mundaym): 64-bit overflow */ - SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW || - GET_FLAG_TYPE(op) == SLJIT_MUL_NOT_OVERFLOW); - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - /* load src2 into register */ - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); + if (!sets_signed) { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(neg_src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } } - if (src2 & SLJIT_MEM) { - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); + else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; } - if (have_misc2()) { - #define LEVAL(i) i(dst_r, src1_r, src2_r) - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, msrkc, msgrkc))); - #undef LEVAL + } + + forms = sets_signed ? &sub_forms : &logical_sub_forms; + FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + +done: + if (sets_signed) { + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { + /* In case of overflow, the sign bit of the two source operands must be different, and + - the first operand is greater if the sign bit of the result is set + - the first operand is less if the sign bit of the result is not set + The -result operation sets the corrent sign, because the result cannot be zero. + The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ + FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); + FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } - else if (op & SLJIT_I32_OP) { - op &= ~VARIABLE_FLAG_MASK; - FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); - FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); - if (dst_r != tmp0) { - FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); - } - FAIL_IF(push_inst(compiler, aih(tmp0, 1))); - FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); + else if (op & SLJIT_SET_Z) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms multiply_forms = { + 0xb2520000, /* msr */ + 0xb90c0000, /* msgr */ + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0x71000000, /* ms */ + 0xe30000000051, /* msy */ + 0xe3000000000c, /* msg */ +}; + +static const struct ins_forms multiply_overflow_forms = { + 0, + 0, + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0, + 0xe30000000053, /* msc */ + 0xe30000000083, /* msgc */ +}; + +static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins; + + if (HAS_FLAGS(op)) { + /* if have_misc2 fails, this operation should be emulated. 32 bit emulation: + FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); + FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); + if (dst_r != tmp0) { + FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); } - else - return SLJIT_ERR_UNSUPPORTED; + FAIL_IF(push_inst(compiler, aih(tmp0, 1))); + FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); + FAIL_IF(push_inst(compiler, ipm(flag_r))); + FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */ + return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w); } - else if ((GET_OPCODE(op) == SLJIT_SUB) && (op & SLJIT_SET_Z) && !signed_flags) { - /* subtract logical instructions do not set the right flags unfortunately */ - /* instead, negate src2 and issue an add logical */ - /* TODO(mundaym): distinct operand facility where needed */ - if (src1_r != dst_r && src1_r != tmp0) { - #define LEVAL(i) i(tmp0, src1_r) - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lr, lgr))); - src1_r = tmp0; - #undef LEVAL - } - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - /* load src2 into register */ - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); + + if (src2 & SLJIT_IMM) { + if (is_s16(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); } - if (src2 & SLJIT_MEM) { - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); + + if (is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); } - if (op & SLJIT_I32_OP) { - FAIL_IF(push_inst(compiler, lcr(tmp1, src2_r))); - FAIL_IF(push_inst(compiler, alr(src1_r, tmp1))); - if (src1_r != dst_r) - FAIL_IF(push_inst(compiler, lr(dst_r, src1_r))); + } + + return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_uw imm, sljit_s32 count16) +{ + sljit_s32 mode = compiler->mode; + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == SLJIT_AND) { + if (!(mode & SLJIT_I32_OP)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff)); + } + else if (type == SLJIT_OR) { + if (count16 >= 3) { + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); } - else { - FAIL_IF(push_inst(compiler, lcgr(tmp1, src2_r))); - FAIL_IF(push_inst(compiler, algr(src1_r, tmp1))); - if (src1_r != dst_r) - FAIL_IF(push_inst(compiler, lgr(dst_r, src1_r))); + + if (count16 >= 2) { + if ((imm & 0x00000000ffffffffull) == 0) + return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)); + if ((imm & 0xffffffff00000000ull) == 0) + return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); } + + if ((imm & 0xffff000000000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48))); + if ((imm & 0x0000ffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff))); + if ((imm & 0x00000000ffff0000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff))); + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff)); + return SLJIT_SUCCESS; } - else if ((src2 & SLJIT_IMM) && (src1_r == dst_r) && have_op_2_imm(op, src2w)) { - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - #define LEVAL(i) i(dst_r, src2w) - case SLJIT_ADD: - if (!HAS_FLAGS(op) || signed_flags) { - FAIL_IF(push_inst(compiler, - WHEN2(is_s16(src2w), aghi, agfi))); - } - else - FAIL_IF(push_inst(compiler, LEVAL(algfi))); - break; - case SLJIT_ADD32: - if (!HAS_FLAGS(op) || signed_flags) - FAIL_IF(push_inst(compiler, - WHEN2(is_s16(src2w), ahi, afi))); + if ((imm & 0xffffffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32))); + if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) + return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return SLJIT_SUCCESS; +} + +static const struct ins_forms bitwise_and_forms = { + 0x1400, /* nr */ + 0xb9800000, /* ngr */ + 0xb9f40000, /* nrk */ + 0xb9e40000, /* ngrk */ + 0x54000000, /* n */ + 0xe30000000054, /* ny */ + 0xe30000000080, /* ng */ +}; + +static const struct ins_forms bitwise_or_forms = { + 0x1600, /* or */ + 0xb9810000, /* ogr */ + 0xb9f60000, /* ork */ + 0xb9e60000, /* ogrk */ + 0x56000000, /* o */ + 0xe30000000056, /* oy */ + 0xe30000000081, /* og */ +}; + +static const struct ins_forms bitwise_xor_forms = { + 0x1700, /* xr */ + 0xb9820000, /* xgr */ + 0xb9f70000, /* xrk */ + 0xb9e70000, /* xgrk */ + 0x57000000, /* x */ + 0xe30000000057, /* xy */ + 0xe30000000082, /* xg */ +}; + +static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + const struct ins_forms *forms; + + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) { + sljit_s32 count16 = 0; + sljit_uw imm = (sljit_uw)src2w; + + if (op & SLJIT_I32_OP) + imm &= 0xffffffffull; + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + count16++; + if ((imm & 0x00000000ffff0000ull) != 0) + count16++; + if ((imm & 0x0000ffff00000000ull) != 0) + count16++; + if ((imm & 0xffff000000000000ull) != 0) + count16++; + + if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) { + sljit_gpr src_r = tmp0; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); else - FAIL_IF(push_inst(compiler, LEVAL(alfi))); - - break; - #undef LEVAL /* TODO(carenas): move down and refactor? */ - case SLJIT_MUL: - FAIL_IF(push_inst(compiler, mhi(dst_r, src2w))); - break; - case SLJIT_MUL32: - FAIL_IF(push_inst(compiler, mghi(dst_r, src2w))); - break; - case SLJIT_OR32: - FAIL_IF(push_inst(compiler, oilf(dst_r, src2w))); - break; - case SLJIT_XOR32: - FAIL_IF(push_inst(compiler, xilf(dst_r, src2w))); - break; - case SLJIT_AND32: - FAIL_IF(push_inst(compiler, nilf(dst_r, src2w))); - break; - default: - SLJIT_UNREACHABLE(); + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm); + if ((imm & 0x00000000ffff0000ull) != 0) + return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16)); + if ((imm & 0x0000ffff00000000ull) != 0) + return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48)); } + + if (!(op & SLJIT_SET_Z)) + return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16); } - else if ((src2 & SLJIT_IMM) && have_op_3_imm(op, src2w)) { - abort(); /* TODO(mundaym): implement */ + + if (type == SLJIT_AND) + forms = &bitwise_and_forms; + else if (type == SLJIT_OR) + forms = &bitwise_or_forms; + else + forms = &bitwise_xor_forms; + + return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (src2 & SLJIT_IMM) + imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f); + else if (FAST_IS_REG(src2)) + base_r = gpr(src2 & REG_MASK); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; } - else if ((src2 & SLJIT_MEM) && (dst_r == src1_r)) { - /* most 32-bit instructions can only handle 12-bit immediate offsets */ - int need_u12 = !have_ldisp() && - (op & SLJIT_I32_OP) && - (GET_OPCODE(op) != SLJIT_ADDC) && - (GET_OPCODE(op) != SLJIT_SUBC); - struct addr mem; - if (need_u12) - FAIL_IF(make_addr_bx(compiler, &mem, src2, src2w, tmp1)); + + if ((op & SLJIT_I32_OP) && dst_r == src_r) { + if (type == SLJIT_SHL) + ins = 0x89000000 /* sll */; + else if (type == SLJIT_LSHR) + ins = 0x88000000 /* srl */; else - FAIL_IF(make_addr_bxy(compiler, &mem, src2, src2w, tmp1)); - - int can_u12 = is_u12(mem.offset) ? 1 : 0; - sljit_ins ins = 0; - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - /* 64-bit ops */ - #define LEVAL(i) EVAL(i, dst_r, mem) - case SLJIT_ADD: - ins = WHEN2(signed_flags, ag, alg); - break; - case SLJIT_SUB: - ins = WHEN2(signed_flags, sg, slg); - break; - case SLJIT_ADDC: - ins = LEVAL(alcg); - break; - case SLJIT_SUBC: - ins = LEVAL(slbg); - break; - case SLJIT_MUL: - ins = LEVAL(msg); - break; - case SLJIT_OR: - ins = LEVAL(og); - break; - case SLJIT_XOR: - ins = LEVAL(xg); - break; - case SLJIT_AND: - ins = LEVAL(ng); - break; - /* 32-bit ops */ - case SLJIT_ADD32: - if (signed_flags) - ins = WHEN2(can_u12, a, ay); - else - ins = WHEN2(can_u12, al, aly); - break; - case SLJIT_SUB32: - if (signed_flags) - ins = WHEN2(can_u12, s, sy); - else - ins = WHEN2(can_u12, sl, sly); - break; - case SLJIT_ADDC32: - ins = LEVAL(alc); - break; - case SLJIT_SUBC32: - ins = LEVAL(slb); - break; - case SLJIT_MUL32: - ins = WHEN2(can_u12, ms, msy); - break; - case SLJIT_OR32: - ins = WHEN2(can_u12, o, oy); - break; - case SLJIT_XOR32: - ins = WHEN2(can_u12, x, xy); - break; - case SLJIT_AND32: - ins = WHEN2(can_u12, n, ny); - break; - #undef LEVAL - default: - SLJIT_UNREACHABLE(); - } - FAIL_IF(push_inst(compiler, ins)); + ins = 0x8a000000 /* sra */; + + FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm)); } else { - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - /* load src2 into register */ - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); - } - if (src2 & SLJIT_MEM) { - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); - } - /* TODO(mundaym): distinct operand facility where needed */ - #define LEVAL(i) i(tmp0, src1_r) - if (src1_r != dst_r && src1_r != tmp0) { - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lr, lgr))); - src1_r = tmp0; - } - #undef LEVAL - sljit_ins ins = 0; - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - #define LEVAL(i) i(src1_r, src2_r) - /* 64-bit ops */ - case SLJIT_ADD: - ins = WHEN2(signed_flags, agr, algr); - break; - case SLJIT_SUB: - ins = WHEN2(signed_flags, sgr, slgr); - break; - case SLJIT_ADDC: - ins = LEVAL(alcgr); - break; - case SLJIT_SUBC: - ins = LEVAL(slbgr); - break; - case SLJIT_MUL: - ins = LEVAL(msgr); - break; - case SLJIT_AND: - ins = LEVAL(ngr); - break; - case SLJIT_OR: - ins = LEVAL(ogr); - break; - case SLJIT_XOR: - ins = LEVAL(xgr); - break; - /* 32-bit ops */ - case SLJIT_ADD32: - ins = WHEN2(signed_flags, ar, alr); - break; - case SLJIT_SUB32: - ins = WHEN2(signed_flags, sr, slr); - break; - case SLJIT_ADDC32: - ins = LEVAL(alcr); - break; - case SLJIT_SUBC32: - ins = LEVAL(slbr); - break; - case SLJIT_MUL32: - ins = LEVAL(msr); - break; - case SLJIT_AND32: - ins = LEVAL(nr); - break; - case SLJIT_OR32: - ins = LEVAL(or); - break; - case SLJIT_XOR32: - ins = LEVAL(xr); - break; - #undef LEVAL - default: - SLJIT_UNREACHABLE(); - } - FAIL_IF(push_inst(compiler, ins)); - #define LEVAL(i) i(dst_r, src1_r) - if (src1_r != dst_r) - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lr, lgr))); - #undef LEVAL + if (type == SLJIT_SHL) + ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + else if (type == SLJIT_LSHR) + ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + + FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16))); } - /* write condition code to emulated flag register */ - if (op & VARIABLE_FLAG_MASK) - FAIL_IF(push_inst(compiler, ipm(flag_r))); + if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) + return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms addc_forms = { + 0xb9980000, /* alcr */ + 0xb9880000, /* alcgr */ + 0, + 0, + 0, + 0xe30000000098, /* alc */ + 0xe30000000088, /* alcg */ +}; + +static const struct ins_forms subc_forms = { + 0xb9990000, /* slbr */ + 0xb9890000, /* slbgr */ + 0, + 0, + 0, + 0xe30000000099, /* slb */ + 0xe30000000089, /* slbg */ +}; + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; - /* write zero flag to emulated flag register */ - if (op & SLJIT_SET_Z) - FAIL_IF(push_store_zero_flag(compiler, op, dst_r)); + compiler->mode = op & SLJIT_I32_OP; + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; + + if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { + src1 ^= src2; + src2 ^= src1; + src1 ^= src2; + + src1w ^= src2w; + src2w ^= src1w; + src1w ^= src2w; + } - /* finally write the result to memory if required */ - if (dst & SLJIT_MEM) { - SLJIT_ASSERT(dst_r != tmp1); - /* TODO(carenas): s/FAIL_IF/ return */ - FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; + case SLJIT_SUB: + return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; + case SLJIT_MUL: + FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; } + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); return SLJIT_SUCCESS; } @@ -2428,7 +2663,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( case SLJIT_FAST_RETURN: src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, tmp1, src, srcw, tmp1, 0)); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0)); return push_inst(compiler, br(src_r)); case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: @@ -2507,7 +2742,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * return push_inst(compiler, lgr(gpr(dst), fast_link_r)); /* memory */ - return store_word(compiler, fast_link_r, dst, dstw, tmp1, 0); + return store_word(compiler, fast_link_r, dst, dstw, 0); } /* --------------------------------------------------------------------- */ @@ -2532,15 +2767,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { - sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(type & 0xff) : 0xf; + sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); - /* reload condition code */ - if (mask != 0xf) - PTR_FAIL_IF(push_load_cc(compiler, type & 0xff)); - /* record jump */ struct sljit_jump *jump = (struct sljit_jump *) ensure_abuf(compiler, sizeof(struct sljit_jump)); @@ -2585,7 +2816,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); } else if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, 0 /* 64-bit */)); + FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); /* emit jump instruction */ if (type >= SLJIT_FAST_CALL) @@ -2613,7 +2844,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_u8 mask = get_cc(type & 0xff); + sljit_u8 mask = get_cc(compiler, type & 0xff); CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); @@ -2624,9 +2855,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: + compiler->status_flags_state = op & SLJIT_SET_Z; + /* dst is also source operand */ if (dst & SLJIT_MEM) - FAIL_IF(load_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); break; case SLJIT_MOV: @@ -2638,9 +2871,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co SLJIT_UNREACHABLE(); } - if (mask != 0xf) - FAIL_IF(push_load_cc(compiler, type & 0xff)); - /* TODO(mundaym): fold into cmov helper function? */ #define LEVAL(i) i(loc_r, 1, mask) if (have_lscond2()) { @@ -2671,14 +2901,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #undef LEVAL } - /* set zero flag if needed */ - if (op & SLJIT_SET_Z) - FAIL_IF(push_store_zero_flag(compiler, op, dst_r)); - /* store result to memory if required */ - /* TODO(carenas): s/FAIL_IF/ return */ if (dst & SLJIT_MEM) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP); return SLJIT_SUCCESS; } @@ -2687,16 +2912,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 dst_reg, sljit_s32 src, sljit_sw srcw) { - sljit_u8 mask = get_cc(type & 0xff); + sljit_u8 mask = get_cc(compiler, type & 0xff); sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP); sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - if (mask != 0xf) - FAIL_IF(push_load_cc(compiler, type & 0xff)); - if (src & SLJIT_IMM) { /* TODO(mundaym): fast path with lscond2 */ FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); @@ -2750,7 +2972,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi } if (dst & SLJIT_MEM) - PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0 /* always 64-bit */)); + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */)); return (struct sljit_const*)const_; } @@ -2797,7 +3019,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( } if (dst & SLJIT_MEM) - PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0)); + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); return put_label; } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c index e5167f02ba..28886405af 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c @@ -93,18 +93,21 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_ADDC: return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_SUBC: return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_MUL: + compiler->status_flags_state = 0; FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); if (!(flags & SET_FLAGS)) return SLJIT_SUCCESS; diff --git a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c index 544d80d028..e833f09d7a 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c @@ -1275,16 +1275,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_cc(sljit_s32 type) +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_NOT_EQUAL_F64: /* Unordered. */ return DA(0x1); case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_EQUAL_F64: return DA(0x9); @@ -1317,10 +1315,16 @@ static sljit_ins get_cc(sljit_s32 type) return DA(0x2); case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return DA(0x9); + case SLJIT_UNORDERED_F64: return DA(0x7); case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return DA(0x1); + case SLJIT_ORDERED_F64: return DA(0xf); @@ -1347,7 +1351,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) jump->flags |= IS_MOVABLE; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); #else #error "Implementation required" #endif @@ -1357,7 +1361,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) jump->flags |= IS_MOVABLE; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); #else #error "Implementation required" #endif @@ -1474,9 +1478,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co type &= 0xff; if (type < SLJIT_EQUAL_F64) - FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); else - FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c index ddcc5ebf76..515d98aefd 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c @@ -411,11 +411,9 @@ static sljit_u8 get_jump_code(sljit_s32 type) return 0x8e /* jle */; case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: return 0x80 /* jo */; case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: return 0x81 /* jno */; case SLJIT_UNORDERED_F64: diff --git a/thirdparty/pcre2/src/sljit/sljitUtils.c b/thirdparty/pcre2/src/sljit/sljitUtils.c index 08ca35cf37..9bce714735 100644 --- a/thirdparty/pcre2/src/sljit/sljitUtils.c +++ b/thirdparty/pcre2/src/sljit/sljitUtils.c @@ -48,7 +48,7 @@ static HANDLE allocator_lock; static SLJIT_INLINE void allocator_grab_lock(void) { HANDLE lock; - if (SLJIT_UNLIKELY(!allocator_lock)) { + if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) { lock = CreateMutex(NULL, FALSE, NULL); if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL)) CloseHandle(lock); @@ -146,9 +146,13 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) { #include <unistd.h> static SLJIT_INLINE sljit_sw get_page_alignment(void) { - static sljit_sw sljit_page_align; - if (!sljit_page_align) { + static sljit_sw sljit_page_align = -1; + if (sljit_page_align < 0) { +#ifdef _SC_PAGESIZE sljit_page_align = sysconf(_SC_PAGESIZE); +#else + sljit_page_align = getpagesize(); +#endif /* Should never happen. */ if (sljit_page_align < 0) sljit_page_align = 4096; diff --git a/thirdparty/pcre2/src/sljit/sljitWXExecAllocator.c b/thirdparty/pcre2/src/sljit/sljitWXExecAllocator.c index 6ef71f7d83..72d5b8dd2b 100644 --- a/thirdparty/pcre2/src/sljit/sljitWXExecAllocator.c +++ b/thirdparty/pcre2/src/sljit/sljitWXExecAllocator.c @@ -121,14 +121,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; #endif static int se_protected = !SLJIT_PROT_WX; + int prot = PROT_READ | PROT_WRITE | SLJIT_PROT_WX; sljit_uw* ptr; if (SLJIT_UNLIKELY(se_protected < 0)) return NULL; +#ifdef PROT_MAX + prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + size += sizeof(sljit_uw); - ptr = (sljit_uw*)mmap(NULL, size, PROT_READ | PROT_WRITE | SLJIT_PROT_WX, - MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0); if (ptr == MAP_FAILED) return NULL; diff --git a/thirdparty/tinyexr/tinyexr.cc b/thirdparty/tinyexr/tinyexr.cc index fef8f66c98..70115ea5c2 100644 --- a/thirdparty/tinyexr/tinyexr.cc +++ b/thirdparty/tinyexr/tinyexr.cc @@ -4,5 +4,9 @@ #endif #endif +// -- GODOT start -- +#include <zlib.h> // Should come before including tinyexr. +// -- GODOT end -- + #define TINYEXR_IMPLEMENTATION #include "tinyexr.h" diff --git a/thirdparty/tinyexr/tinyexr.h b/thirdparty/tinyexr/tinyexr.h index a3e7b23161..969f07ad79 100644 --- a/thirdparty/tinyexr/tinyexr.h +++ b/thirdparty/tinyexr/tinyexr.h @@ -1,7 +1,7 @@ #ifndef TINYEXR_H_ #define TINYEXR_H_ /* -Copyright (c) 2014 - 2020, Syoyo Fujita and many contributors. +Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -65,6 +65,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // End of OpenEXR license ------------------------------------------------- + // // // Do this: @@ -88,7 +89,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern "C" { #endif -// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__i386) || defined(__i486__) || defined(__i486) || \ + defined(i386) || defined(__ia64__) || defined(__x86_64__) +#define TINYEXR_X86_OR_X64_CPU 1 +#else +#define TINYEXR_X86_OR_X64_CPU 0 +#endif + +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || TINYEXR_X86_OR_X64_CPU +#define TINYEXR_LITTLE_ENDIAN 1 +#else +#define TINYEXR_LITTLE_ENDIAN 0 +#endif + +// Use miniz or not to decode ZIP format pixel. Linking with zlib // required if this flas is 0. #ifndef TINYEXR_USE_MINIZ #define TINYEXR_USE_MINIZ (1) @@ -162,9 +177,13 @@ extern "C" { typedef struct _EXRVersion { int version; // this must be 2 - int tiled; // tile format image + // tile format image; + // not zero for only a single-part "normal" tiled file (according to spec.) + int tiled; int long_name; // long name attribute - int non_image; // deep image(EXR 2.0) + // deep image(EXR 2.0); + // for a multi-part file, indicates that at least one part is of type deep* (according to spec.) + int non_image; int multipart; // multi-part(EXR 2.0) } EXRVersion; @@ -222,6 +241,8 @@ typedef struct _EXRHeader { int tile_rounding_mode; int long_name; + // for a single-part file, agree with the version field bit 11 + // for a multi-part file, it is consistent with the type of part int non_image; int multipart; unsigned int header_len; @@ -244,7 +265,11 @@ typedef struct _EXRHeader { // ParseEXRHeaderFrom(Meomory|File), then users // can edit it(only valid for HALF pixel type // channel) - + // name attribute required for multipart files; + // must be unique and non empty (according to spec.); + // use EXRSetNameAttr for setting value; + // max 255 character allowed - excluding terminating zero + char name[256]; } EXRHeader; typedef struct _EXRMultiPartHeader { @@ -256,6 +281,10 @@ typedef struct _EXRMultiPartHeader { typedef struct _EXRImage { EXRTile *tiles; // Tiled pixel data. The application must reconstruct image // from tiles manually. NULL if scanline format. + struct _EXRImage* next_level; // NULL if scanline format or image is the last level. + int level_x; // x level index + int level_y; // y level index + unsigned char **images; // image[channels][pixels]. NULL if tiled format. int width; @@ -339,9 +368,15 @@ extern int SaveEXR(const float *data, const int width, const int height, const int components, const int save_as_fp16, const char *filename, const char **err); +// Returns the number of resolution levels of the image (including the base) +extern int EXRNumLevels(const EXRImage* exr_image); + // Initialize EXRHeader struct extern void InitEXRHeader(EXRHeader *exr_header); +// Set name attribute of EXRHeader struct (it makes a copy) +extern void EXRSetNameAttr(EXRHeader *exr_header, const char* name); + // Initialize EXRImage struct extern void InitEXRImage(EXRImage *exr_image); @@ -465,6 +500,30 @@ extern size_t SaveEXRImageToMemory(const EXRImage *image, const EXRHeader *exr_header, unsigned char **memory, const char **err); +// Saves multi-channel, multi-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// File global attributes (eg. display_window) must be set in the first header. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int SaveEXRMultipartImageToFile(const EXRImage *images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const char *filename, const char **err); + +// Saves multi-channel, multi-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// File global attributes (eg. display_window) must be set in the first header. +// Return the number of bytes if success. +// Return zero and will set error string in `err` when there's an +// error. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern size_t SaveEXRMultipartImageToMemory(const EXRImage *images, + const EXRHeader **exr_headers, + unsigned int num_parts, + unsigned char **memory, const char **err); // Loads single-frame OpenEXR deep image. // Application must free memory of variables in DeepImage(image, offset_table) // Returns negative value and may set error string in `err` when there's an @@ -514,6 +573,9 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif +#ifndef NOMINMAX +#define NOMINMAX +#endif #include <windows.h> // for UTF-8 #endif @@ -530,8 +592,11 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, #include <limits> #include <string> #include <vector> +#include <set> -#if __cplusplus > 199711L +// https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support +#if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900) +#define TINYEXR_HAS_CXX11 (1) // C++11 #include <cstdint> @@ -547,6 +612,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, #endif #if TINYEXR_USE_MINIZ +#include <miniz.h> #else // Issue #46. Please include your own zlib-compatible API header before // including `tinyexr.h` @@ -588,6467 +654,6 @@ typedef long long tinyexr_int64; #endif #endif -#if TINYEXR_USE_MINIZ - -namespace miniz { - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wpadded" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wc++11-extensions" -#pragma clang diagnostic ignored "-Wconversion" -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wundef" - -#if __has_warning("-Wcomma") -#pragma clang diagnostic ignored "-Wcomma" -#endif - -#if __has_warning("-Wmacro-redefined") -#pragma clang diagnostic ignored "-Wmacro-redefined" -#endif - -#if __has_warning("-Wcast-qual") -#pragma clang diagnostic ignored "-Wcast-qual" -#endif - -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#if __has_warning("-Wtautological-constant-compare") -#pragma clang diagnostic ignored "-Wtautological-constant-compare" -#endif - -#if __has_warning("-Wextra-semi-stmt") -#pragma clang diagnostic ignored "-Wextra-semi-stmt" -#endif - -#endif - -/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP - reading/writing/appending, PNG writing - See "unlicense" statement at the end of this file. - Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013 - Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: - http://www.ietf.org/rfc/rfc1951.txt - - Most API's defined in miniz.c are optional. For example, to disable the - archive related functions just define - MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO - (see the list below for more macros). - - * Change History - 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major - release with Zip64 support (almost there!): - - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug - (thanks kahmyong.moon@hp.com) which could cause locate files to not find - files. This bug - would only have occurred in earlier versions if you explicitly used this - flag, OR if you used mz_zip_extract_archive_file_to_heap() or - mz_zip_add_mem_to_archive_file_in_place() - (which used this flag). If you can't switch to v1.15 but want to fix - this bug, just remove the uses of this flag from both helper funcs (and of - course don't use the flag). - - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when - pUser_read_buf is not NULL and compressed size is > uncompressed size - - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract - compressed data from directory entries, to account for weird zipfiles which - contain zero-size compressed data on dir entries. - Hopefully this fix won't cause any issues on weird zip archives, - because it assumes the low 16-bits of zip external attributes are DOS - attributes (which I believe they always are in practice). - - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the - internal attributes, just the filename and external attributes - - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed - - Added cmake support for Linux builds which builds all the examples, - tested with clang v3.3 and gcc v4.6. - - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti - - Merged MZ_FORCEINLINE fix from hdeanclark - - Fix <time.h> include before config #ifdef, thanks emil.brink - - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping - (super useful for OpenGL apps), and explicit control over the compression - level (so you can - set it to 1 for real-time compression). - - Merged in some compiler fixes from paulharris's github repro. - - Retested this build under Windows (VS 2010, including static analysis), - tcc 0.9.26, gcc v4.6 and clang v3.3. - - Added example6.c, which dumps an image of the mandelbrot set to a PNG - file. - - Modified example2 to help test the - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. - - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix - possible src file fclose() leak if alignment bytes+local header file write - faiiled - - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): - Was pushing the wrong central dir header offset, appears harmless in this - release, but it became a problem in the zip64 branch - 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, - #include <time.h> (thanks fermtect). - 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix - mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. - - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and - re-ran a randomized regression test on ~500k files. - - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. - - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze - (static analysis) option and fixed all warnings (except for the silly - "Use of the comma-operator in a tested expression.." analysis warning, - which I purposely use to work around a MSVC compiler warning). - - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and - tested Linux executables. The codeblocks workspace is compatible with - Linux+Win32/x64. - - Added miniz_tester solution/project, which is a useful little app - derived from LZHAM's tester app that I use as part of the regression test. - - Ran miniz.c and tinfl.c through another series of regression testing on - ~500,000 files and archives. - - Modified example5.c so it purposely disables a bunch of high-level - functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the - MINIZ_NO_STDIO bug report.) - - Fix ftell() usage in examples so they exit with an error on files which - are too large (a limitation of the examples, not miniz itself). - 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple - minor level_and_flags issues in the archive API's. - level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce - Dawson <bruced@valvesoftware.com> for the feedback/bug report. - 5/28/11 v1.11 - Added statement from unlicense.org - 5/27/11 v1.10 - Substantial compressor optimizations: - - Level 1 is now ~4x faster than before. The L1 compressor's throughput - now varies between 70-110MB/sec. on a - - Core i7 (actual throughput varies depending on the type of data, and x64 - vs. x86). - - Improved baseline L2-L9 compression perf. Also, greatly improved - compression perf. issues on some file types. - - Refactored the compression code for better readability and - maintainability. - - Added level 10 compression level (L10 has slightly better ratio than - level 9, but could have a potentially large - drop in throughput on some files). - 5/15/11 v1.09 - Initial stable release. - - * Low-level Deflate/Inflate implementation notes: - - Compression: Use the "tdefl" API's. The compressor supports raw, static, - and dynamic blocks, lazy or - greedy parsing, match length filtering, RLE-only, and Huffman-only streams. - It performs and compresses - approximately as well as zlib. - - Decompression: Use the "tinfl" API's. The entire decompressor is - implemented as a single function - coroutine: see tinfl_decompress(). It supports decompression into a 32KB - (or larger power of 2) wrapping buffer, or into a memory - block large enough to hold the entire file. - - The low-level tdefl/tinfl API's do not make any use of dynamic memory - allocation. - - * zlib-style API notes: - - miniz.c implements a fairly large subset of zlib. There's enough - functionality present for it to be a drop-in - zlib replacement in many apps: - The z_stream struct, optional memory allocation callbacks - deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound - inflateInit/inflateInit2/inflate/inflateEnd - compress, compress2, compressBound, uncompress - CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly - routines. - Supports raw deflate streams or standard zlib streams with adler-32 - checking. - - Limitations: - The callback API's are not implemented yet. No support for gzip headers or - zlib static dictionaries. - I've tried to closely emulate zlib's various flavors of stream flushing - and return status codes, but - there are no guarantees that miniz.c pulls this off perfectly. - - * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, - originally written by - Alex Evans. Supports 1-4 bytes/pixel images. - - * ZIP archive API notes: - - The ZIP archive API's where designed with simplicity and efficiency in - mind, with just enough abstraction to - get the job done with minimal fuss. There are simple API's to retrieve file - information, read files from - existing archives, create new archives, append new files to existing - archives, or clone archive data from - one archive to another. It supports archives located in memory or the heap, - on disk (using stdio.h), - or you can specify custom file read/write callbacks. - - - Archive reading: Just call this function to read a single file from a - disk archive: - - void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const - char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - - For more complex cases, use the "mz_zip_reader" functions. Upon opening an - archive, the entire central - directory is located and read as-is into memory, and subsequent file access - only occurs when reading individual files. - - - Archives file scanning: The simple way is to use this function to scan a - loaded archive for a specific file: - - int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags); - - The locate operation can optionally check file comments too, which (as one - example) can be used to identify - multiple versions of the same file in an archive. This function uses a - simple linear search through the central - directory, so it's not very fast. - - Alternately, you can iterate through all the files in an archive (using - mz_zip_reader_get_num_files()) and - retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - - - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer - immediately writes compressed file data - to disk and builds an exact image of the central directory in memory. The - central directory image is written - all at once at the end of the archive file when the archive is finalized. - - The archive writer can optionally align each file's local header and file - data to any power of 2 alignment, - which can be useful when the archive will be read from optical media. Also, - the writer supports placing - arbitrary data blobs at the very beginning of ZIP archives. Archives - written using either feature are still - readable by any ZIP tool. - - - Archive appending: The simple way to add a single file to an archive is - to call this function: - - mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, - const char *pArchive_name, - const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 - comment_size, mz_uint level_and_flags); - - The archive will be created if it doesn't already exist, otherwise it'll be - appended to. - Note the appending is done in-place and is not an atomic operation, so if - something goes wrong - during the operation it's possible the archive could be left without a - central directory (although the local - file headers and file data will be fine, so the archive will be - recoverable). - - For more complex archive modification scenarios: - 1. The safest way is to use a mz_zip_reader to read the existing archive, - cloning only those bits you want to - preserve into a new archive using using the - mz_zip_writer_add_from_zip_reader() function (which compiles the - compressed file data as-is). When you're done, delete the old archive and - rename the newly written archive, and - you're done. This is safe but requires a bunch of temporary disk space or - heap memory. - - 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using - mz_zip_writer_init_from_reader(), - append new files as needed, then finalize the archive which will write an - updated central directory to the - original archive. (This is basically what - mz_zip_add_mem_to_archive_file_in_place() does.) There's a - possibility that the archive's central directory could be lost with this - method if anything goes wrong, though. - - - ZIP archive support limitations: - No zip64 or spanning support. Extraction functions can only handle - unencrypted, stored or deflated files. - Requires streams capable of seeking. - - * This is a header file library, like stb_image.c. To get only a header file, - either cut and paste the - below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then - include miniz.c from it. - - * Important: For best perf. be sure to customize the below macros for your - target platform: - #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 - #define MINIZ_LITTLE_ENDIAN 1 - #define MINIZ_HAS_64BIT_REGISTERS 1 - - * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before - including miniz.c to ensure miniz - uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be - able to process large files - (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). -*/ - -#ifndef MINIZ_HEADER_INCLUDED -#define MINIZ_HEADER_INCLUDED - -//#include <stdlib.h> - -// Defines to completely disable specific portions of miniz.c: -// If all macros here are defined the only functionality remaining will be -// CRC-32, adler-32, tinfl, and tdefl. - -// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on -// stdio for file I/O. -//#define MINIZ_NO_STDIO - -// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able -// to get the current time, or -// get/set file times, and the C run-time funcs that get/set times won't be -// called. -// The current downside is the times written to your archives will be from 1979. -#define MINIZ_NO_TIME - -// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. -#define MINIZ_NO_ARCHIVE_APIS - -// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive -// API's. -//#define MINIZ_NO_ARCHIVE_WRITING_APIS - -// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression -// API's. -//#define MINIZ_NO_ZLIB_APIS - -// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent -// conflicts against stock zlib. -//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. -// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom -// user alloc/free/realloc -// callbacks to the zlib and archive API's, and a few stand-alone helper API's -// which don't provide custom user -// functions (such as tdefl_compress_mem_to_heap() and -// tinfl_decompress_mem_to_heap()) won't work. -//#define MINIZ_NO_MALLOC - -#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) -// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc -// on Linux -#define MINIZ_NO_TIME -#endif - -#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) -//#include <time.h> -#endif - -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ - defined(__i386) || defined(__i486__) || defined(__i486) || \ - defined(i386) || defined(__ia64__) || defined(__x86_64__) -// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. -#define MINIZ_X86_OR_X64_CPU 1 -#endif - -#if defined(__sparcv9) -// Big endian -#else -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. -#define MINIZ_LITTLE_ENDIAN 1 -#endif -#endif - -#if MINIZ_X86_OR_X64_CPU -// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient -// integer loads and stores from unaligned addresses. -//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \ - 0 // disable to suppress compiler warnings -#endif - -#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \ - defined(_LP64) || defined(__LP64__) || defined(__ia64__) || \ - defined(__x86_64__) -// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are -// reasonably fast (and don't involve compiler generated calls to helper -// functions). -#define MINIZ_HAS_64BIT_REGISTERS 1 -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ------------------- zlib-style API Definitions. - -// For more compatibility with zlib, miniz.c uses unsigned long for some -// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! -typedef unsigned long mz_ulong; - -// mz_free() internally uses the MZ_FREE() macro (which by default calls free() -// unless you've modified the MZ_MALLOC macro) to release a block allocated from -// the heap. -void mz_free(void *p); - -#define MZ_ADLER32_INIT (1) -// mz_adler32() returns the initial adler-32 value to use when called with -// ptr==NULL. -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); - -#define MZ_CRC32_INIT (0) -// mz_crc32() returns the initial CRC-32 value to use when called with -// ptr==NULL. -mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); - -// Compression strategies. -enum { - MZ_DEFAULT_STRATEGY = 0, - MZ_FILTERED = 1, - MZ_HUFFMAN_ONLY = 2, - MZ_RLE = 3, - MZ_FIXED = 4 -}; - -// Method -#define MZ_DEFLATED 8 - -#ifndef MINIZ_NO_ZLIB_APIS - -// Heap allocation callbacks. -// Note that mz_alloc_func parameter types purpsosely differ from zlib's: -// items/size is size_t, not unsigned long. -typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); -typedef void (*mz_free_func)(void *opaque, void *address); -typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, - size_t size); - -#define MZ_VERSION "9.1.15" -#define MZ_VERNUM 0x91F0 -#define MZ_VER_MAJOR 9 -#define MZ_VER_MINOR 1 -#define MZ_VER_REVISION 15 -#define MZ_VER_SUBREVISION 0 - -// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The -// other values are for advanced use (refer to the zlib docs). -enum { - MZ_NO_FLUSH = 0, - MZ_PARTIAL_FLUSH = 1, - MZ_SYNC_FLUSH = 2, - MZ_FULL_FLUSH = 3, - MZ_FINISH = 4, - MZ_BLOCK = 5 -}; - -// Return status codes. MZ_PARAM_ERROR is non-standard. -enum { - MZ_OK = 0, - MZ_STREAM_END = 1, - MZ_NEED_DICT = 2, - MZ_ERRNO = -1, - MZ_STREAM_ERROR = -2, - MZ_DATA_ERROR = -3, - MZ_MEM_ERROR = -4, - MZ_BUF_ERROR = -5, - MZ_VERSION_ERROR = -6, - MZ_PARAM_ERROR = -10000 -}; - -// Compression levels: 0-9 are the standard zlib-style levels, 10 is best -// possible compression (not zlib compatible, and may be very slow), -// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. -enum { - MZ_NO_COMPRESSION = 0, - MZ_BEST_SPEED = 1, - MZ_BEST_COMPRESSION = 9, - MZ_UBER_COMPRESSION = 10, - MZ_DEFAULT_LEVEL = 6, - MZ_DEFAULT_COMPRESSION = -1 -}; - -// Window bits -#define MZ_DEFAULT_WINDOW_BITS 15 - -struct mz_internal_state; - -// Compression/decompression stream struct. -typedef struct mz_stream_s { - const unsigned char *next_in; // pointer to next byte to read - unsigned int avail_in; // number of bytes available at next_in - mz_ulong total_in; // total number of bytes consumed so far - - unsigned char *next_out; // pointer to next byte to write - unsigned int avail_out; // number of bytes that can be written to next_out - mz_ulong total_out; // total number of bytes produced so far - - char *msg; // error msg (unused) - struct mz_internal_state *state; // internal state, allocated by zalloc/zfree - - mz_alloc_func - zalloc; // optional heap allocation function (defaults to malloc) - mz_free_func zfree; // optional heap free function (defaults to free) - void *opaque; // heap alloc function user pointer - - int data_type; // data_type (unused) - mz_ulong adler; // adler32 of the source or uncompressed data - mz_ulong reserved; // not used -} mz_stream; - -typedef mz_stream *mz_streamp; - -// Returns the version string of miniz.c. -const char *mz_version(void); - -// mz_deflateInit() initializes a compressor with default options: -// Parameters: -// pStream must point to an initialized mz_stream struct. -// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. -// level 1 enables a specially optimized compression function that's been -// optimized purely for performance, not ratio. -// (This special func. is currently only enabled when -// MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) -// Return values: -// MZ_OK on success. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_PARAM_ERROR if the input parameters are bogus. -// MZ_MEM_ERROR on out of memory. -int mz_deflateInit(mz_streamp pStream, int level); - -// mz_deflateInit2() is like mz_deflate(), except with more control: -// Additional parameters: -// method must be MZ_DEFLATED -// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with -// zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no -// header or footer) -// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, - int mem_level, int strategy); - -// Quickly resets a compressor without having to reallocate anything. Same as -// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). -int mz_deflateReset(mz_streamp pStream); - -// mz_deflate() compresses the input to output, consuming as much of the input -// and producing as much output as possible. -// Parameters: -// pStream is the stream to read from and write to. You must initialize/update -// the next_in, avail_in, next_out, and avail_out members. -// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or -// MZ_FINISH. -// Return values: -// MZ_OK on success (when flushing, or if more input is needed but not -// available, and/or there's more output to be written but the output buffer -// is full). -// MZ_STREAM_END if all input has been consumed and all output bytes have been -// written. Don't call mz_deflate() on the stream anymore. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_PARAM_ERROR if one of the parameters is invalid. -// MZ_BUF_ERROR if no forward progress is possible because the input and/or -// output buffers are empty. (Fill up the input buffer or free up some output -// space and try again.) -int mz_deflate(mz_streamp pStream, int flush); - -// mz_deflateEnd() deinitializes a compressor: -// Return values: -// MZ_OK on success. -// MZ_STREAM_ERROR if the stream is bogus. -int mz_deflateEnd(mz_streamp pStream); - -// mz_deflateBound() returns a (very) conservative upper bound on the amount of -// data that could be generated by deflate(), assuming flush is set to only -// MZ_NO_FLUSH or MZ_FINISH. -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); - -// Single-call compression functions mz_compress() and mz_compress2(): -// Returns MZ_OK on success, or one of the error codes from mz_deflate() on -// failure. -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len); -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len, int level); - -// mz_compressBound() returns a (very) conservative upper bound on the amount of -// data that could be generated by calling mz_compress(). -mz_ulong mz_compressBound(mz_ulong source_len); - -// Initializes a decompressor. -int mz_inflateInit(mz_streamp pStream); - -// mz_inflateInit2() is like mz_inflateInit() with an additional option that -// controls the window size and whether or not the stream has been wrapped with -// a zlib header/footer: -// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -// -MZ_DEFAULT_WINDOW_BITS (raw deflate). -int mz_inflateInit2(mz_streamp pStream, int window_bits); - -// Decompresses the input stream to the output, consuming only as much of the -// input as needed, and writing as much to the output as possible. -// Parameters: -// pStream is the stream to read from and write to. You must initialize/update -// the next_in, avail_in, next_out, and avail_out members. -// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. -// On the first call, if flush is MZ_FINISH it's assumed the input and output -// buffers are both sized large enough to decompress the entire stream in a -// single call (this is slightly faster). -// MZ_FINISH implies that there are no more source bytes available beside -// what's already in the input buffer, and that the output buffer is large -// enough to hold the rest of the decompressed data. -// Return values: -// MZ_OK on success. Either more input is needed but not available, and/or -// there's more output to be written but the output buffer is full. -// MZ_STREAM_END if all needed input has been consumed and all output bytes -// have been written. For zlib streams, the adler-32 of the decompressed data -// has also been verified. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_DATA_ERROR if the deflate stream is invalid. -// MZ_PARAM_ERROR if one of the parameters is invalid. -// MZ_BUF_ERROR if no forward progress is possible because the input buffer is -// empty but the inflater needs more input to continue, or if the output -// buffer is not large enough. Call mz_inflate() again -// with more input data, or with more room in the output buffer (except when -// using single call decompression, described above). -int mz_inflate(mz_streamp pStream, int flush); - -// Deinitializes a decompressor. -int mz_inflateEnd(mz_streamp pStream); - -// Single-call decompression. -// Returns MZ_OK on success, or one of the error codes from mz_inflate() on -// failure. -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len); - -// Returns a string description of the specified error code, or NULL if the -// error code is invalid. -const char *mz_error(int err); - -// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used -// as a drop-in replacement for the subset of zlib that miniz.c supports. -// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you -// use zlib in the same project. -#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES -typedef unsigned char Byte; -typedef unsigned int uInt; -typedef mz_ulong uLong; -typedef Byte Bytef; -typedef uInt uIntf; -typedef char charf; -typedef int intf; -typedef void *voidpf; -typedef uLong uLongf; -typedef void *voidp; -typedef void *const voidpc; -#define Z_NULL 0 -#define Z_NO_FLUSH MZ_NO_FLUSH -#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH -#define Z_SYNC_FLUSH MZ_SYNC_FLUSH -#define Z_FULL_FLUSH MZ_FULL_FLUSH -#define Z_FINISH MZ_FINISH -#define Z_BLOCK MZ_BLOCK -#define Z_OK MZ_OK -#define Z_STREAM_END MZ_STREAM_END -#define Z_NEED_DICT MZ_NEED_DICT -#define Z_ERRNO MZ_ERRNO -#define Z_STREAM_ERROR MZ_STREAM_ERROR -#define Z_DATA_ERROR MZ_DATA_ERROR -#define Z_MEM_ERROR MZ_MEM_ERROR -#define Z_BUF_ERROR MZ_BUF_ERROR -#define Z_VERSION_ERROR MZ_VERSION_ERROR -#define Z_PARAM_ERROR MZ_PARAM_ERROR -#define Z_NO_COMPRESSION MZ_NO_COMPRESSION -#define Z_BEST_SPEED MZ_BEST_SPEED -#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION -#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION -#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY -#define Z_FILTERED MZ_FILTERED -#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY -#define Z_RLE MZ_RLE -#define Z_FIXED MZ_FIXED -#define Z_DEFLATED MZ_DEFLATED -#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS -#define alloc_func mz_alloc_func -#define free_func mz_free_func -#define internal_state mz_internal_state -#define z_stream mz_stream -#define deflateInit mz_deflateInit -#define deflateInit2 mz_deflateInit2 -#define deflateReset mz_deflateReset -#define deflate mz_deflate -#define deflateEnd mz_deflateEnd -#define deflateBound mz_deflateBound -#define compress mz_compress -#define compress2 mz_compress2 -#define compressBound mz_compressBound -#define inflateInit mz_inflateInit -#define inflateInit2 mz_inflateInit2 -#define inflate mz_inflate -#define inflateEnd mz_inflateEnd -#define uncompress mz_uncompress -#define crc32 mz_crc32 -#define adler32 mz_adler32 -#define MAX_WBITS 15 -#define MAX_MEM_LEVEL 9 -#define zError mz_error -#define ZLIB_VERSION MZ_VERSION -#define ZLIB_VERNUM MZ_VERNUM -#define ZLIB_VER_MAJOR MZ_VER_MAJOR -#define ZLIB_VER_MINOR MZ_VER_MINOR -#define ZLIB_VER_REVISION MZ_VER_REVISION -#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION -#define zlibVersion mz_version -#define zlib_version mz_version() -#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -#endif // MINIZ_NO_ZLIB_APIS - -// ------------------- Types and macros - -typedef unsigned char mz_uint8; -typedef signed short mz_int16; -typedef unsigned short mz_uint16; -typedef unsigned int mz_uint32; -typedef unsigned int mz_uint; -typedef long long mz_int64; -typedef unsigned long long mz_uint64; -typedef int mz_bool; - -#define MZ_FALSE (0) -#define MZ_TRUE (1) - -// An attempt to work around MSVC's spammy "warning C4127: conditional -// expression is constant" message. -#ifdef _MSC_VER -#define MZ_MACRO_END while (0, 0) -#else -#define MZ_MACRO_END while (0) -#endif - -// ------------------- ZIP archive reading/writing - -#ifndef MINIZ_NO_ARCHIVE_APIS - -enum { - MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, - MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, - MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 -}; - -typedef struct { - mz_uint32 m_file_index; - mz_uint32 m_central_dir_ofs; - mz_uint16 m_version_made_by; - mz_uint16 m_version_needed; - mz_uint16 m_bit_flag; - mz_uint16 m_method; -#ifndef MINIZ_NO_TIME - time_t m_time; -#endif - mz_uint32 m_crc32; - mz_uint64 m_comp_size; - mz_uint64 m_uncomp_size; - mz_uint16 m_internal_attr; - mz_uint32 m_external_attr; - mz_uint64 m_local_header_ofs; - mz_uint32 m_comment_size; - char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; - char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; -} mz_zip_archive_file_stat; - -typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n); -typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n); - -struct mz_zip_internal_state_tag; -typedef struct mz_zip_internal_state_tag mz_zip_internal_state; - -typedef enum { - MZ_ZIP_MODE_INVALID = 0, - MZ_ZIP_MODE_READING = 1, - MZ_ZIP_MODE_WRITING = 2, - MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 -} mz_zip_mode; - -typedef struct mz_zip_archive_tag { - mz_uint64 m_archive_size; - mz_uint64 m_central_directory_file_ofs; - mz_uint m_total_files; - mz_zip_mode m_zip_mode; - - mz_uint m_file_offset_alignment; - - mz_alloc_func m_pAlloc; - mz_free_func m_pFree; - mz_realloc_func m_pRealloc; - void *m_pAlloc_opaque; - - mz_file_read_func m_pRead; - mz_file_write_func m_pWrite; - void *m_pIO_opaque; - - mz_zip_internal_state *m_pState; - -} mz_zip_archive; - -typedef enum { - MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, - MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, - MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 -} mz_zip_flags; - -// ZIP archive reading - -// Inits a ZIP archive reader. -// These functions read and validate the archive's central directory. -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, - mz_uint32 flags); -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, - size_t size, mz_uint32 flags); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint32 flags); -#endif - -// Returns the total number of files in the archive. -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); - -// Returns detailed information about an archive file entry. -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, - mz_zip_archive_file_stat *pStat); - -// Determines if an archive file entry is a directory entry. -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, - mz_uint file_index); -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, - mz_uint file_index); - -// Retrieves the filename of an archive file entry. -// Returns the number of bytes written to pFilename, or if filename_buf_size is -// 0 this function returns the number of bytes needed to fully store the -// filename. -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, - char *pFilename, mz_uint filename_buf_size); - -// Attempts to locates a file in the archive's central directory. -// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH -// Returns -1 if the file cannot be found. -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags); - -// Extracts a archive file to a memory buffer using no memory allocation. -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, - mz_uint file_index, void *pBuf, - size_t buf_size, mz_uint flags, - void *pUser_read_buf, - size_t user_read_buf_size); -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( - mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, - mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); - -// Extracts a archive file to a memory buffer. -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, - void *pBuf, size_t buf_size, - mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, - const char *pFilename, void *pBuf, - size_t buf_size, mz_uint flags); - -// Extracts a archive file to a dynamically allocated heap buffer. -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, - size_t *pSize, mz_uint flags); -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, - const char *pFilename, size_t *pSize, - mz_uint flags); - -// Extracts a archive file using a callback function to output the file's data. -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, - mz_uint file_index, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, - const char *pFilename, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags); - -#ifndef MINIZ_NO_STDIO -// Extracts a archive file to a disk file and sets its last accessed and -// modified times. -// This function only extracts files, not archive directory records. -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, - const char *pDst_filename, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, - const char *pArchive_filename, - const char *pDst_filename, - mz_uint flags); -#endif - -// Ends archive reading, freeing all allocations, and closing the input archive -// file if mz_zip_reader_init_file() was used. -mz_bool mz_zip_reader_end(mz_zip_archive *pZip); - -// ZIP archive writing - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -// Inits a ZIP archive writer. -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, - size_t size_to_reserve_at_beginning, - size_t initial_allocation_size); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint64 size_to_reserve_at_beginning); -#endif - -// Converts a ZIP archive reader object into a writer object, to allow efficient -// in-place file appends to occur on an existing archive. -// For archives opened using mz_zip_reader_init_file, pFilename must be the -// archive's filename so it can be reopened for writing. If the file can't be -// reopened, mz_zip_reader_end() will be called. -// For archives opened using mz_zip_reader_init_mem, the memory block must be -// growable using the realloc callback (which defaults to realloc unless you've -// overridden it). -// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's -// user provided m_pWrite function cannot be NULL. -// Note: In-place archive modification is not recommended unless you know what -// you're doing, because if execution stops or something goes wrong before -// the archive is finalized the file's central directory will be hosed. -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, - const char *pFilename); - -// Adds the contents of a memory buffer to an archive. These functions record -// the current local time into the archive. -// To add a directory entry, call this method with an archive name ending in a -// forwardslash with empty buffer. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, - const void *pBuf, size_t buf_size, - mz_uint level_and_flags); -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, - const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags, mz_uint64 uncomp_size, - mz_uint32 uncomp_crc32); - -#ifndef MINIZ_NO_STDIO -// Adds the contents of a disk file to an archive. This function also records -// the disk file's modified time into the archive. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, - const char *pSrc_filename, const void *pComment, - mz_uint16 comment_size, mz_uint level_and_flags); -#endif - -// Adds a file to an archive by fully cloning the data from another archive. -// This function fully clones the source file's compressed data (no -// recompression), along with its full filename, extra data, and comment fields. -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, - mz_zip_archive *pSource_zip, - mz_uint file_index); - -// Finalizes the archive by writing the central directory records followed by -// the end of central directory record. -// After an archive is finalized, the only valid call on the mz_zip_archive -// struct is mz_zip_writer_end(). -// An archive must be manually finalized by calling this function for it to be -// valid. -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, - size_t *pSize); - -// Ends archive writing, freeing all allocations, and closing the output file if -// mz_zip_writer_init_file() was used. -// Note for the archive to be valid, it must have been finalized before ending. -mz_bool mz_zip_writer_end(mz_zip_archive *pZip); - -// Misc. high-level helper functions: - -// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) -// appends a memory blob to a ZIP archive. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_add_mem_to_archive_file_in_place( - const char *pZip_filename, const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, mz_uint16 comment_size, - mz_uint level_and_flags); - -// Reads a single file from an archive into a heap block. -// Returns NULL on failure. -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, - const char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - -#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -#endif // #ifndef MINIZ_NO_ARCHIVE_APIS - -// ------------------- Low-level Decompression API Definitions - -// Decompression flags used by tinfl_decompress(). -// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and -// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the -// input is a raw deflate stream. -// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available -// beyond the end of the supplied input buffer. If clear, the input buffer -// contains all remaining input. -// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large -// enough to hold the entire decompressed stream. If clear, the output buffer is -// at least the size of the dictionary (typically 32KB). -// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the -// decompressed bytes. -enum { - TINFL_FLAG_PARSE_ZLIB_HEADER = 1, - TINFL_FLAG_HAS_MORE_INPUT = 2, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, - TINFL_FLAG_COMPUTE_ADLER32 = 8 -}; - -// High level decompression functions: -// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block -// allocated via malloc(). -// On entry: -// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data -// to decompress. -// On return: -// Function returns a pointer to the decompressed data, or NULL on failure. -// *pOut_len will be set to the decompressed data's size, which could be larger -// than src_buf_len on uncompressible data. -// The caller must call mz_free() on the returned block when it's no longer -// needed. -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags); - -// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block -// in memory. -// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes -// written on success. -#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags); - -// tinfl_decompress_mem_to_callback() decompresses a block in memory to an -// internal 32KB buffer, and a user provided callback function will be called to -// flush the buffer. -// Returns 1 on success or 0 on failure. -typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, - tinfl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -struct tinfl_decompressor_tag; -typedef struct tinfl_decompressor_tag tinfl_decompressor; - -// Max size of LZ dictionary. -#define TINFL_LZ_DICT_SIZE 32768 - -// Return status. -typedef enum { - TINFL_STATUS_BAD_PARAM = -3, - TINFL_STATUS_ADLER32_MISMATCH = -2, - TINFL_STATUS_FAILED = -1, - TINFL_STATUS_DONE = 0, - TINFL_STATUS_NEEDS_MORE_INPUT = 1, - TINFL_STATUS_HAS_MORE_OUTPUT = 2 -} tinfl_status; - -// Initializes the decompressor to its initial state. -#define tinfl_init(r) \ - do { \ - (r)->m_state = 0; \ - } \ - MZ_MACRO_END -#define tinfl_get_adler32(r) (r)->m_check_adler32 - -// Main low-level decompressor coroutine function. This is the only function -// actually needed for decompression. All the other functions are just -// high-level helpers for improved usability. -// This is a universal API, i.e. it can be used as a building block to build any -// desired higher level decompression API. In the limit case, it can be called -// once per every byte input or output. -tinfl_status tinfl_decompress(tinfl_decompressor *r, - const mz_uint8 *pIn_buf_next, - size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, - mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, - const mz_uint32 decomp_flags); - -// Internal/private bits follow. -enum { - TINFL_MAX_HUFF_TABLES = 3, - TINFL_MAX_HUFF_SYMBOLS_0 = 288, - TINFL_MAX_HUFF_SYMBOLS_1 = 32, - TINFL_MAX_HUFF_SYMBOLS_2 = 19, - TINFL_FAST_LOOKUP_BITS = 10, - TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS -}; - -typedef struct { - mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; - mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], - m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; -} tinfl_huff_table; - -#if MINIZ_HAS_64BIT_REGISTERS -#define TINFL_USE_64BIT_BITBUF 1 -#endif - -#if TINFL_USE_64BIT_BITBUF -typedef mz_uint64 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (64) -#else -typedef mz_uint32 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (32) -#endif - -struct tinfl_decompressor_tag { - mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, - m_check_adler32, m_dist, m_counter, m_num_extra, - m_table_sizes[TINFL_MAX_HUFF_TABLES]; - tinfl_bit_buf_t m_bit_buf; - size_t m_dist_from_out_buf_start; - tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; - mz_uint8 m_raw_header[4], - m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; -}; - -// ------------------- Low-level Compression API Definitions - -// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly -// slower, and raw/dynamic blocks will be output more frequently). -#define TDEFL_LESS_MEMORY 0 - -// tdefl_init() compression flags logically OR'd together (low 12 bits contain -// the max. number of probes per dictionary search): -// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes -// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap -// compression), 4095=Huffman+LZ (slowest/best compression). -enum { - TDEFL_HUFFMAN_ONLY = 0, - TDEFL_DEFAULT_MAX_PROBES = 128, - TDEFL_MAX_PROBES_MASK = 0xFFF -}; - -// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before -// the deflate data, and the Adler-32 of the source data at the end. Otherwise, -// you'll get raw deflate data. -// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even -// when not writing zlib headers). -// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more -// efficient lazy parsing. -// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's -// initialization time to the minimum, but the output may vary from run to run -// given the same input (depending on the contents of memory). -// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) -// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. -// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. -// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. -// The low 12 bits are reserved to control the max # of hash probes per -// dictionary lookup (see TDEFL_MAX_PROBES_MASK). -enum { - TDEFL_WRITE_ZLIB_HEADER = 0x01000, - TDEFL_COMPUTE_ADLER32 = 0x02000, - TDEFL_GREEDY_PARSING_FLAG = 0x04000, - TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, - TDEFL_RLE_MATCHES = 0x10000, - TDEFL_FILTER_MATCHES = 0x20000, - TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, - TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 -}; - -// High level compression functions: -// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block -// allocated via malloc(). -// On entry: -// pSrc_buf, src_buf_len: Pointer and size of source block to compress. -// flags: The max match finder probes (default is 128) logically OR'd against -// the above flags. Higher probes are slower but improve compression. -// On return: -// Function returns a pointer to the compressed data, or NULL on failure. -// *pOut_len will be set to the compressed data's size, which could be larger -// than src_buf_len on uncompressible data. -// The caller must free() the returned block when it's no longer needed. -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags); - -// tdefl_compress_mem_to_mem() compresses a block in memory to another block in -// memory. -// Returns 0 on failure. -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags); - -// Compresses an image to a compressed PNG file in memory. -// On entry: -// pImage, w, h, and num_chans describe the image to compress. num_chans may be -// 1, 2, 3, or 4. -// The image pitch in bytes per scanline will be w*num_chans. The leftmost -// pixel on the top scanline is stored first in memory. -// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL -// If flip is true, the image will be flipped on the Y axis (useful for OpenGL -// apps). -// On return: -// Function returns a pointer to the compressed data, or NULL on failure. -// *pLen_out will be set to the size of the PNG image file. -// The caller must mz_free() the returned heap block (which will typically be -// larger than *pLen_out) when it's no longer needed. -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, - int h, int num_chans, - size_t *pLen_out, - mz_uint level, mz_bool flip); -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, - int num_chans, size_t *pLen_out); - -// Output stream interface. The compressor uses this interface to write -// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. -typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, - void *pUser); - -// tdefl_compress_mem_to_output() compresses a block to an output stream. The -// above helpers use this function internally. -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -enum { - TDEFL_MAX_HUFF_TABLES = 3, - TDEFL_MAX_HUFF_SYMBOLS_0 = 288, - TDEFL_MAX_HUFF_SYMBOLS_1 = 32, - TDEFL_MAX_HUFF_SYMBOLS_2 = 19, - TDEFL_LZ_DICT_SIZE = 32768, - TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, - TDEFL_MIN_MATCH_LEN = 3, - TDEFL_MAX_MATCH_LEN = 258 -}; - -// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed -// output block (using static/fixed Huffman codes). -#if TDEFL_LESS_MEMORY -enum { - TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 12, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#else -enum { - TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 15, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#endif - -// The low-level tdefl functions below may be used directly if the above helper -// functions aren't flexible enough. The low-level functions don't make any heap -// allocations, unlike the above helper functions. -typedef enum { - TDEFL_STATUS_BAD_PARAM = -2, - TDEFL_STATUS_PUT_BUF_FAILED = -1, - TDEFL_STATUS_OKAY = 0, - TDEFL_STATUS_DONE = 1 -} tdefl_status; - -// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums -typedef enum { - TDEFL_NO_FLUSH = 0, - TDEFL_SYNC_FLUSH = 2, - TDEFL_FULL_FLUSH = 3, - TDEFL_FINISH = 4 -} tdefl_flush; - -// tdefl's compression state structure. -typedef struct { - tdefl_put_buf_func_ptr m_pPut_buf_func; - void *m_pPut_buf_user; - mz_uint m_flags, m_max_probes[2]; - int m_greedy_parsing; - mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; - mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; - mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, - m_bit_buffer; - mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, - m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, - m_wants_to_finish; - tdefl_status m_prev_return_status; - const void *m_pIn_buf; - void *m_pOut_buf; - size_t *m_pIn_buf_size, *m_pOut_buf_size; - tdefl_flush m_flush; - const mz_uint8 *m_pSrc; - size_t m_src_buf_left, m_out_buf_ofs; - mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; - mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; - mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; - mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; - mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; -} tdefl_compressor; - -// Initializes the compressor. -// There is no corresponding deinit() function because the tdefl API's do not -// dynamically allocate memory. -// pBut_buf_func: If NULL, output data will be supplied to the specified -// callback. In this case, the user should call the tdefl_compress_buffer() API -// for compression. -// If pBut_buf_func is NULL the user should always call the tdefl_compress() -// API. -// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, -// etc.) -tdefl_status tdefl_init(tdefl_compressor *d, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -// Compresses a block of data, consuming as much of the specified input buffer -// as possible, and writing as much compressed data to the specified output -// buffer as possible. -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, - size_t *pIn_buf_size, void *pOut_buf, - size_t *pOut_buf_size, tdefl_flush flush); - -// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a -// non-NULL tdefl_put_buf_func_ptr. -// tdefl_compress_buffer() always consumes the entire input buffer. -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, - size_t in_buf_size, tdefl_flush flush); - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); -mz_uint32 tdefl_get_adler32(tdefl_compressor *d); - -// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't -// defined, because it uses some of its macros. -#ifndef MINIZ_NO_ZLIB_APIS -// Create tdefl_compress() flags given zlib-style compression parameters. -// level may range from [0,10] (where 10 is absolute max compression, but may be -// much slower on some files) -// window_bits may be -15 (raw deflate) or 15 (zlib) -// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, -// MZ_RLE, or MZ_FIXED -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, - int strategy); -#endif // #ifndef MINIZ_NO_ZLIB_APIS - -#ifdef __cplusplus -} -#endif - -#endif // MINIZ_HEADER_INCLUDED - -// ------------------- End of Header: Implementation follows. (If you only want -// the header, define MINIZ_HEADER_FILE_ONLY.) - -#ifndef MINIZ_HEADER_FILE_ONLY - -typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; -typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; -typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; - -//#include <assert.h> -//#include <string.h> - -#define MZ_ASSERT(x) assert(x) - -#ifdef MINIZ_NO_MALLOC -#define MZ_MALLOC(x) NULL -#define MZ_FREE(x) (void)x, ((void)0) -#define MZ_REALLOC(p, x) NULL -#else -#define MZ_MALLOC(x) malloc(x) -#define MZ_FREE(x) free(x) -#define MZ_REALLOC(p, x) realloc(p, x) -#endif - -#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) -#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) -#else -#define MZ_READ_LE16(p) \ - ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) -#define MZ_READ_LE32(p) \ - ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) -#endif - -#ifdef _MSC_VER -#define MZ_FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define MZ_FORCEINLINE inline __attribute__((__always_inline__)) -#else -#define MZ_FORCEINLINE inline -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ------------------- zlib-style API's - -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { - mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); - size_t block_len = buf_len % 5552; - if (!ptr) return MZ_ADLER32_INIT; - while (buf_len) { - for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { - s1 += ptr[0], s2 += s1; - s1 += ptr[1], s2 += s1; - s1 += ptr[2], s2 += s1; - s1 += ptr[3], s2 += s1; - s1 += ptr[4], s2 += s1; - s1 += ptr[5], s2 += s1; - s1 += ptr[6], s2 += s1; - s1 += ptr[7], s2 += s1; - } - for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; - s1 %= 65521U, s2 %= 65521U; - buf_len -= block_len; - block_len = 5552; - } - return (s2 << 16) + s1; -} - -// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C -// implementation that balances processor cache usage against speed": -// http://www.geocities.com/malbrain/ -mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { - static const mz_uint32 s_crc32[16] = { - 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, - 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, - 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c}; - mz_uint32 crcu32 = (mz_uint32)crc; - if (!ptr) return MZ_CRC32_INIT; - crcu32 = ~crcu32; - while (buf_len--) { - mz_uint8 b = *ptr++; - crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; - crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; - } - return ~crcu32; -} - -void mz_free(void *p) { MZ_FREE(p); } - -#ifndef MINIZ_NO_ZLIB_APIS - -static void *def_alloc_func(void *opaque, size_t items, size_t size) { - (void)opaque, (void)items, (void)size; - return MZ_MALLOC(items * size); -} -static void def_free_func(void *opaque, void *address) { - (void)opaque, (void)address; - MZ_FREE(address); -} -// static void *def_realloc_func(void *opaque, void *address, size_t items, -// size_t size) { -// (void)opaque, (void)address, (void)items, (void)size; -// return MZ_REALLOC(address, items * size); -//} - -const char *mz_version(void) { return MZ_VERSION; } - -int mz_deflateInit(mz_streamp pStream, int level) { - return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, - MZ_DEFAULT_STRATEGY); -} - -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, - int mem_level, int strategy) { - tdefl_compressor *pComp; - mz_uint comp_flags = - TDEFL_COMPUTE_ADLER32 | - tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); - - if (!pStream) return MZ_STREAM_ERROR; - if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || - ((window_bits != MZ_DEFAULT_WINDOW_BITS) && - (-window_bits != MZ_DEFAULT_WINDOW_BITS))) - return MZ_PARAM_ERROR; - - pStream->data_type = 0; - pStream->adler = MZ_ADLER32_INIT; - pStream->msg = NULL; - pStream->reserved = 0; - pStream->total_in = 0; - pStream->total_out = 0; - if (!pStream->zalloc) pStream->zalloc = def_alloc_func; - if (!pStream->zfree) pStream->zfree = def_free_func; - - pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, - sizeof(tdefl_compressor)); - if (!pComp) return MZ_MEM_ERROR; - - pStream->state = (struct mz_internal_state *)pComp; - - if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { - mz_deflateEnd(pStream); - return MZ_PARAM_ERROR; - } - - return MZ_OK; -} - -int mz_deflateReset(mz_streamp pStream) { - if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || - (!pStream->zfree)) - return MZ_STREAM_ERROR; - pStream->total_in = pStream->total_out = 0; - tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, - ((tdefl_compressor *)pStream->state)->m_flags); - return MZ_OK; -} - -int mz_deflate(mz_streamp pStream, int flush) { - size_t in_bytes, out_bytes; - mz_ulong orig_total_in, orig_total_out; - int mz_status = MZ_OK; - - if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || - (!pStream->next_out)) - return MZ_STREAM_ERROR; - if (!pStream->avail_out) return MZ_BUF_ERROR; - - if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; - - if (((tdefl_compressor *)pStream->state)->m_prev_return_status == - TDEFL_STATUS_DONE) - return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; - - orig_total_in = pStream->total_in; - orig_total_out = pStream->total_out; - for (;;) { - tdefl_status defl_status; - in_bytes = pStream->avail_in; - out_bytes = pStream->avail_out; - - defl_status = tdefl_compress((tdefl_compressor *)pStream->state, - pStream->next_in, &in_bytes, pStream->next_out, - &out_bytes, (tdefl_flush)flush); - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); - - pStream->next_out += (mz_uint)out_bytes; - pStream->avail_out -= (mz_uint)out_bytes; - pStream->total_out += (mz_uint)out_bytes; - - if (defl_status < 0) { - mz_status = MZ_STREAM_ERROR; - break; - } else if (defl_status == TDEFL_STATUS_DONE) { - mz_status = MZ_STREAM_END; - break; - } else if (!pStream->avail_out) - break; - else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { - if ((flush) || (pStream->total_in != orig_total_in) || - (pStream->total_out != orig_total_out)) - break; - return MZ_BUF_ERROR; // Can't make forward progress without some input. - } - } - return mz_status; -} - -int mz_deflateEnd(mz_streamp pStream) { - if (!pStream) return MZ_STREAM_ERROR; - if (pStream->state) { - pStream->zfree(pStream->opaque, pStream->state); - pStream->state = NULL; - } - return MZ_OK; -} - -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { - (void)pStream; - // This is really over conservative. (And lame, but it's actually pretty - // tricky to compute a true upper bound given the way tdefl's blocking works.) - return MZ_MAX(128 + (source_len * 110) / 100, - 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); -} - -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len, int level) { - int status; - mz_stream stream; - memset(&stream, 0, sizeof(stream)); - - // In case mz_ulong is 64-bits (argh I hate longs). - if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; - - stream.next_in = pSource; - stream.avail_in = (mz_uint32)source_len; - stream.next_out = pDest; - stream.avail_out = (mz_uint32)*pDest_len; - - status = mz_deflateInit(&stream, level); - if (status != MZ_OK) return status; - - status = mz_deflate(&stream, MZ_FINISH); - if (status != MZ_STREAM_END) { - mz_deflateEnd(&stream); - return (status == MZ_OK) ? MZ_BUF_ERROR : status; - } - - *pDest_len = stream.total_out; - return mz_deflateEnd(&stream); -} - -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len) { - return mz_compress2(pDest, pDest_len, pSource, source_len, - MZ_DEFAULT_COMPRESSION); -} - -mz_ulong mz_compressBound(mz_ulong source_len) { - return mz_deflateBound(NULL, source_len); -} - -typedef struct { - tinfl_decompressor m_decomp; - mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; - int m_window_bits; - mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; - tinfl_status m_last_status; -} inflate_state; - -int mz_inflateInit2(mz_streamp pStream, int window_bits) { - inflate_state *pDecomp; - if (!pStream) return MZ_STREAM_ERROR; - if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && - (-window_bits != MZ_DEFAULT_WINDOW_BITS)) - return MZ_PARAM_ERROR; - - pStream->data_type = 0; - pStream->adler = 0; - pStream->msg = NULL; - pStream->total_in = 0; - pStream->total_out = 0; - pStream->reserved = 0; - if (!pStream->zalloc) pStream->zalloc = def_alloc_func; - if (!pStream->zfree) pStream->zfree = def_free_func; - - pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, - sizeof(inflate_state)); - if (!pDecomp) return MZ_MEM_ERROR; - - pStream->state = (struct mz_internal_state *)pDecomp; - - tinfl_init(&pDecomp->m_decomp); - pDecomp->m_dict_ofs = 0; - pDecomp->m_dict_avail = 0; - pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; - pDecomp->m_first_call = 1; - pDecomp->m_has_flushed = 0; - pDecomp->m_window_bits = window_bits; - - return MZ_OK; -} - -int mz_inflateInit(mz_streamp pStream) { - return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); -} - -int mz_inflate(mz_streamp pStream, int flush) { - inflate_state *pState; - mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; - size_t in_bytes, out_bytes, orig_avail_in; - tinfl_status status; - - if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; - if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; - if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) - return MZ_STREAM_ERROR; - - pState = (inflate_state *)pStream->state; - if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; - orig_avail_in = pStream->avail_in; - - first_call = pState->m_first_call; - pState->m_first_call = 0; - if (pState->m_last_status < 0) return MZ_DATA_ERROR; - - if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; - pState->m_has_flushed |= (flush == MZ_FINISH); - - if ((flush == MZ_FINISH) && (first_call)) { - // MZ_FINISH on the first call implies that the input and output buffers are - // large enough to hold the entire compressed/decompressed file. - decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; - in_bytes = pStream->avail_in; - out_bytes = pStream->avail_out; - status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, - pStream->next_out, pStream->next_out, &out_bytes, - decomp_flags); - pState->m_last_status = status; - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tinfl_get_adler32(&pState->m_decomp); - pStream->next_out += (mz_uint)out_bytes; - pStream->avail_out -= (mz_uint)out_bytes; - pStream->total_out += (mz_uint)out_bytes; - - if (status < 0) - return MZ_DATA_ERROR; - else if (status != TINFL_STATUS_DONE) { - pState->m_last_status = TINFL_STATUS_FAILED; - return MZ_BUF_ERROR; - } - return MZ_STREAM_END; - } - // flush != MZ_FINISH then we must assume there's more input. - if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; - - if (pState->m_dict_avail) { - n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); - memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); - pStream->next_out += n; - pStream->avail_out -= n; - pStream->total_out += n; - pState->m_dict_avail -= n; - pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); - return ((pState->m_last_status == TINFL_STATUS_DONE) && - (!pState->m_dict_avail)) - ? MZ_STREAM_END - : MZ_OK; - } - - for (;;) { - in_bytes = pStream->avail_in; - out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; - - status = tinfl_decompress( - &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, - pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); - pState->m_last_status = status; - - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tinfl_get_adler32(&pState->m_decomp); - - pState->m_dict_avail = (mz_uint)out_bytes; - - n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); - memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); - pStream->next_out += n; - pStream->avail_out -= n; - pStream->total_out += n; - pState->m_dict_avail -= n; - pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); - - if (status < 0) - return MZ_DATA_ERROR; // Stream is corrupted (there could be some - // uncompressed data left in the output dictionary - - // oh well). - else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) - return MZ_BUF_ERROR; // Signal caller that we can't make forward progress - // without supplying more input or by setting flush - // to MZ_FINISH. - else if (flush == MZ_FINISH) { - // The output buffer MUST be large to hold the remaining uncompressed data - // when flush==MZ_FINISH. - if (status == TINFL_STATUS_DONE) - return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; - // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's - // at least 1 more byte on the way. If there's no more room left in the - // output buffer then something is wrong. - else if (!pStream->avail_out) - return MZ_BUF_ERROR; - } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || - (!pStream->avail_out) || (pState->m_dict_avail)) - break; - } - - return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) - ? MZ_STREAM_END - : MZ_OK; -} - -int mz_inflateEnd(mz_streamp pStream) { - if (!pStream) return MZ_STREAM_ERROR; - if (pStream->state) { - pStream->zfree(pStream->opaque, pStream->state); - pStream->state = NULL; - } - return MZ_OK; -} - -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len) { - mz_stream stream; - int status; - memset(&stream, 0, sizeof(stream)); - - // In case mz_ulong is 64-bits (argh I hate longs). - if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; - - stream.next_in = pSource; - stream.avail_in = (mz_uint32)source_len; - stream.next_out = pDest; - stream.avail_out = (mz_uint32)*pDest_len; - - status = mz_inflateInit(&stream); - if (status != MZ_OK) return status; - - status = mz_inflate(&stream, MZ_FINISH); - if (status != MZ_STREAM_END) { - mz_inflateEnd(&stream); - return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR - : status; - } - *pDest_len = stream.total_out; - - return mz_inflateEnd(&stream); -} - -const char *mz_error(int err) { - static struct { - int m_err; - const char *m_pDesc; - } s_error_descs[] = {{MZ_OK, ""}, - {MZ_STREAM_END, "stream end"}, - {MZ_NEED_DICT, "need dictionary"}, - {MZ_ERRNO, "file error"}, - {MZ_STREAM_ERROR, "stream error"}, - {MZ_DATA_ERROR, "data error"}, - {MZ_MEM_ERROR, "out of memory"}, - {MZ_BUF_ERROR, "buf error"}, - {MZ_VERSION_ERROR, "version error"}, - {MZ_PARAM_ERROR, "parameter error"}}; - mz_uint i; - for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) - if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; - return NULL; -} - -#endif // MINIZ_NO_ZLIB_APIS - -// ------------------- Low-level Decompression (completely independent from all -// compression API's) - -#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) -#define TINFL_MEMSET(p, c, l) memset(p, c, l) - -#define TINFL_CR_BEGIN \ - switch (r->m_state) { \ - case 0: -#define TINFL_CR_RETURN(state_index, result) \ - do { \ - status = result; \ - r->m_state = state_index; \ - goto common_exit; \ - case state_index:; \ - } \ - MZ_MACRO_END -#define TINFL_CR_RETURN_FOREVER(state_index, result) \ - do { \ - for (;;) { \ - TINFL_CR_RETURN(state_index, result); \ - } \ - } \ - MZ_MACRO_END -#define TINFL_CR_FINISH } - -// TODO: If the caller has indicated that there's no more input, and we attempt -// to read beyond the input buf, then something is wrong with the input because -// the inflator never -// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of -// the stream with 0's in this scenario. -#define TINFL_GET_BYTE(state_index, c) \ - do { \ - if (pIn_buf_cur >= pIn_buf_end) { \ - for (;;) { \ - if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ - TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ - if (pIn_buf_cur < pIn_buf_end) { \ - c = *pIn_buf_cur++; \ - break; \ - } \ - } else { \ - c = 0; \ - break; \ - } \ - } \ - } else \ - c = *pIn_buf_cur++; \ - } \ - MZ_MACRO_END - -#define TINFL_NEED_BITS(state_index, n) \ - do { \ - mz_uint c; \ - TINFL_GET_BYTE(state_index, c); \ - bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ - num_bits += 8; \ - } while (num_bits < (mz_uint)(n)) -#define TINFL_SKIP_BITS(state_index, n) \ - do { \ - if (num_bits < (mz_uint)(n)) { \ - TINFL_NEED_BITS(state_index, n); \ - } \ - bit_buf >>= (n); \ - num_bits -= (n); \ - } \ - MZ_MACRO_END -#define TINFL_GET_BITS(state_index, b, n) \ - do { \ - if (num_bits < (mz_uint)(n)) { \ - TINFL_NEED_BITS(state_index, n); \ - } \ - b = bit_buf & ((1 << (n)) - 1); \ - bit_buf >>= (n); \ - num_bits -= (n); \ - } \ - MZ_MACRO_END - -// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes -// remaining in the input buffer falls below 2. -// It reads just enough bytes from the input stream that are needed to decode -// the next Huffman code (and absolutely no more). It works by trying to fully -// decode a -// Huffman code by using whatever bits are currently present in the bit buffer. -// If this fails, it reads another byte, and tries again until it succeeds or -// until the -// bit buffer contains >=15 bits (deflate's max. Huffman code size). -#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ - do { \ - temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ - if (temp >= 0) { \ - code_len = temp >> 9; \ - if ((code_len) && (num_bits >= code_len)) break; \ - } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ - code_len = TINFL_FAST_LOOKUP_BITS; \ - do { \ - temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ - } while ((temp < 0) && (num_bits >= (code_len + 1))); \ - if (temp >= 0) break; \ - } \ - TINFL_GET_BYTE(state_index, c); \ - bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ - num_bits += 8; \ - } while (num_bits < 15); - -// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex -// than you would initially expect because the zlib API expects the decompressor -// to never read -// beyond the final byte of the deflate stream. (In other words, when this macro -// wants to read another byte from the input, it REALLY needs another byte in -// order to fully -// decode the next Huffman code.) Handling this properly is particularly -// important on raw deflate (non-zlib) streams, which aren't followed by a byte -// aligned adler-32. -// The slow path is only executed at the very end of the input buffer. -#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ - do { \ - int temp; \ - mz_uint code_len, c; \ - if (num_bits < 15) { \ - if ((pIn_buf_end - pIn_buf_cur) < 2) { \ - TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ - } else { \ - bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | \ - (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ - pIn_buf_cur += 2; \ - num_bits += 16; \ - } \ - } \ - if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \ - 0) \ - code_len = temp >> 9, temp &= 511; \ - else { \ - code_len = TINFL_FAST_LOOKUP_BITS; \ - do { \ - temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ - } while (temp < 0); \ - } \ - sym = temp; \ - bit_buf >>= code_len; \ - num_bits -= code_len; \ - } \ - MZ_MACRO_END - -tinfl_status tinfl_decompress(tinfl_decompressor *r, - const mz_uint8 *pIn_buf_next, - size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, - mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, - const mz_uint32 decomp_flags) { - static const int s_length_base[31] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, - 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, - 4, 4, 5, 5, 5, 5, 0, 0, 0}; - static const int s_dist_base[32] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, - 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, - 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0}; - static const int s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; - static const mz_uint8 s_length_dezigzag[19] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - static const int s_min_table_sizes[3] = {257, 1, 4}; - - tinfl_status status = TINFL_STATUS_FAILED; - mz_uint32 num_bits, dist, counter, num_extra; - tinfl_bit_buf_t bit_buf; - const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = - pIn_buf_next + *pIn_buf_size; - mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = - pOut_buf_next + *pOut_buf_size; - size_t out_buf_size_mask = - (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) - ? (size_t)-1 - : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, - dist_from_out_buf_start; - - // Ensure the output buffer's size is a power of 2, unless the output buffer - // is large enough to hold the entire output file (in which case it doesn't - // matter). - if (((out_buf_size_mask + 1) & out_buf_size_mask) || - (pOut_buf_next < pOut_buf_start)) { - *pIn_buf_size = *pOut_buf_size = 0; - return TINFL_STATUS_BAD_PARAM; - } - - num_bits = r->m_num_bits; - bit_buf = r->m_bit_buf; - dist = r->m_dist; - counter = r->m_counter; - num_extra = r->m_num_extra; - dist_from_out_buf_start = r->m_dist_from_out_buf_start; - TINFL_CR_BEGIN - - bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; - r->m_z_adler32 = r->m_check_adler32 = 1; - if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { - TINFL_GET_BYTE(1, r->m_zhdr0); - TINFL_GET_BYTE(2, r->m_zhdr1); - counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || - (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); - if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) - counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || - ((out_buf_size_mask + 1) < - (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); - if (counter) { - TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); - } - } - - do { - TINFL_GET_BITS(3, r->m_final, 3); - r->m_type = r->m_final >> 1; - if (r->m_type == 0) { - TINFL_SKIP_BITS(5, num_bits & 7); - for (counter = 0; counter < 4; ++counter) { - if (num_bits) - TINFL_GET_BITS(6, r->m_raw_header[counter], 8); - else - TINFL_GET_BYTE(7, r->m_raw_header[counter]); - } - if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != - (mz_uint)(0xFFFF ^ - (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { - TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); - } - while ((counter) && (num_bits)) { - TINFL_GET_BITS(51, dist, 8); - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = (mz_uint8)dist; - counter--; - } - while (counter) { - size_t n; - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); - } - while (pIn_buf_cur >= pIn_buf_end) { - if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { - TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); - } else { - TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); - } - } - n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), - (size_t)(pIn_buf_end - pIn_buf_cur)), - counter); - TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); - pIn_buf_cur += n; - pOut_buf_cur += n; - counter -= (mz_uint)n; - } - } else if (r->m_type == 3) { - TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); - } else { - if (r->m_type == 1) { - mz_uint8 *p = r->m_tables[0].m_code_size; - mz_uint i; - r->m_table_sizes[0] = 288; - r->m_table_sizes[1] = 32; - TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); - for (i = 0; i <= 143; ++i) *p++ = 8; - for (; i <= 255; ++i) *p++ = 9; - for (; i <= 279; ++i) *p++ = 7; - for (; i <= 287; ++i) *p++ = 8; - } else { - for (counter = 0; counter < 3; counter++) { - TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); - r->m_table_sizes[counter] += s_min_table_sizes[counter]; - } - MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); - for (counter = 0; counter < r->m_table_sizes[2]; counter++) { - mz_uint s; - TINFL_GET_BITS(14, s, 3); - r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; - } - r->m_table_sizes[2] = 19; - } - for (; (int)r->m_type >= 0; r->m_type--) { - int tree_next, tree_cur; - tinfl_huff_table *pTable; - mz_uint i, j, used_syms, total, sym_index, next_code[17], - total_syms[16]; - pTable = &r->m_tables[r->m_type]; - MZ_CLEAR_OBJ(total_syms); - MZ_CLEAR_OBJ(pTable->m_look_up); - MZ_CLEAR_OBJ(pTable->m_tree); - for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) - total_syms[pTable->m_code_size[i]]++; - used_syms = 0, total = 0; - next_code[0] = next_code[1] = 0; - for (i = 1; i <= 15; ++i) { - used_syms += total_syms[i]; - next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); - } - if ((65536 != total) && (used_syms > 1)) { - TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); - } - for (tree_next = -1, sym_index = 0; - sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { - mz_uint rev_code = 0, l, cur_code, - code_size = pTable->m_code_size[sym_index]; - if (!code_size) continue; - cur_code = next_code[code_size]++; - for (l = code_size; l > 0; l--, cur_code >>= 1) - rev_code = (rev_code << 1) | (cur_code & 1); - if (code_size <= TINFL_FAST_LOOKUP_BITS) { - mz_int16 k = (mz_int16)((code_size << 9) | sym_index); - while (rev_code < TINFL_FAST_LOOKUP_SIZE) { - pTable->m_look_up[rev_code] = k; - rev_code += (1 << code_size); - } - continue; - } - if (0 == - (tree_cur = pTable->m_look_up[rev_code & - (TINFL_FAST_LOOKUP_SIZE - 1)])) { - pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = - (mz_int16)tree_next; - tree_cur = tree_next; - tree_next -= 2; - } - rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); - for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { - tree_cur -= ((rev_code >>= 1) & 1); - if (!pTable->m_tree[-tree_cur - 1]) { - pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; - tree_cur = tree_next; - tree_next -= 2; - } else - tree_cur = pTable->m_tree[-tree_cur - 1]; - } - tree_cur -= ((rev_code >>= 1) & 1); - pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; - } - if (r->m_type == 2) { - for (counter = 0; - counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { - mz_uint s; - TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); - if (dist < 16) { - r->m_len_codes[counter++] = (mz_uint8)dist; - continue; - } - if ((dist == 16) && (!counter)) { - TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); - } - num_extra = "\02\03\07"[dist - 16]; - TINFL_GET_BITS(18, s, num_extra); - s += "\03\03\013"[dist - 16]; - TINFL_MEMSET(r->m_len_codes + counter, - (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); - counter += s; - } - if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { - TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); - } - TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, - r->m_table_sizes[0]); - TINFL_MEMCPY(r->m_tables[1].m_code_size, - r->m_len_codes + r->m_table_sizes[0], - r->m_table_sizes[1]); - } - } - for (;;) { - mz_uint8 *pSrc; - for (;;) { - if (((pIn_buf_end - pIn_buf_cur) < 4) || - ((pOut_buf_end - pOut_buf_cur) < 2)) { - TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); - if (counter >= 256) break; - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = (mz_uint8)counter; - } else { - int sym2; - mz_uint code_len; -#if TINFL_USE_64BIT_BITBUF - if (num_bits < 30) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 4; - num_bits += 32; - } -#else - if (num_bits < 15) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 2; - num_bits += 16; - } -#endif - if ((sym2 = - r->m_tables[0] - .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= - 0) - code_len = sym2 >> 9; - else { - code_len = TINFL_FAST_LOOKUP_BITS; - do { - sym2 = r->m_tables[0] - .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; - } while (sym2 < 0); - } - counter = sym2; - bit_buf >>= code_len; - num_bits -= code_len; - if (counter & 256) break; - -#if !TINFL_USE_64BIT_BITBUF - if (num_bits < 15) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 2; - num_bits += 16; - } -#endif - if ((sym2 = - r->m_tables[0] - .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= - 0) - code_len = sym2 >> 9; - else { - code_len = TINFL_FAST_LOOKUP_BITS; - do { - sym2 = r->m_tables[0] - .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; - } while (sym2 < 0); - } - bit_buf >>= code_len; - num_bits -= code_len; - - pOut_buf_cur[0] = (mz_uint8)counter; - if (sym2 & 256) { - pOut_buf_cur++; - counter = sym2; - break; - } - pOut_buf_cur[1] = (mz_uint8)sym2; - pOut_buf_cur += 2; - } - } - if ((counter &= 511) == 256) break; - - num_extra = s_length_extra[counter - 257]; - counter = s_length_base[counter - 257]; - if (num_extra) { - mz_uint extra_bits; - TINFL_GET_BITS(25, extra_bits, num_extra); - counter += extra_bits; - } - - TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); - num_extra = s_dist_extra[dist]; - dist = s_dist_base[dist]; - if (num_extra) { - mz_uint extra_bits; - TINFL_GET_BITS(27, extra_bits, num_extra); - dist += extra_bits; - } - - dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; - if ((dist > dist_from_out_buf_start) && - (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { - TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); - } - - pSrc = pOut_buf_start + - ((dist_from_out_buf_start - dist) & out_buf_size_mask); - - if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { - while (counter--) { - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = - pOut_buf_start[(dist_from_out_buf_start++ - dist) & - out_buf_size_mask]; - } - continue; - } -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES - else if ((counter >= 9) && (counter <= dist)) { - const mz_uint8 *pSrc_end = pSrc + (counter & ~7); - do { - ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; - ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; - pOut_buf_cur += 8; - } while ((pSrc += 8) < pSrc_end); - if ((counter &= 7) < 3) { - if (counter) { - pOut_buf_cur[0] = pSrc[0]; - if (counter > 1) pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur += counter; - } - continue; - } - } -#endif - do { - pOut_buf_cur[0] = pSrc[0]; - pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur[2] = pSrc[2]; - pOut_buf_cur += 3; - pSrc += 3; - } while ((int)(counter -= 3) > 2); - if ((int)counter > 0) { - pOut_buf_cur[0] = pSrc[0]; - if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur += counter; - } - } - } - } while (!(r->m_final & 1)); - if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { - TINFL_SKIP_BITS(32, num_bits & 7); - for (counter = 0; counter < 4; ++counter) { - mz_uint s; - if (num_bits) - TINFL_GET_BITS(41, s, 8); - else - TINFL_GET_BYTE(42, s); - r->m_z_adler32 = (r->m_z_adler32 << 8) | s; - } - } - TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); - TINFL_CR_FINISH - -common_exit: - r->m_num_bits = num_bits; - r->m_bit_buf = bit_buf; - r->m_dist = dist; - r->m_counter = counter; - r->m_num_extra = num_extra; - r->m_dist_from_out_buf_start = dist_from_out_buf_start; - *pIn_buf_size = pIn_buf_cur - pIn_buf_next; - *pOut_buf_size = pOut_buf_cur - pOut_buf_next; - if ((decomp_flags & - (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && - (status >= 0)) { - const mz_uint8 *ptr = pOut_buf_next; - size_t buf_len = *pOut_buf_size; - mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, - s2 = r->m_check_adler32 >> 16; - size_t block_len = buf_len % 5552; - while (buf_len) { - for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { - s1 += ptr[0], s2 += s1; - s1 += ptr[1], s2 += s1; - s1 += ptr[2], s2 += s1; - s1 += ptr[3], s2 += s1; - s1 += ptr[4], s2 += s1; - s1 += ptr[5], s2 += s1; - s1 += ptr[6], s2 += s1; - s1 += ptr[7], s2 += s1; - } - for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; - s1 %= 65521U, s2 %= 65521U; - buf_len -= block_len; - block_len = 5552; - } - r->m_check_adler32 = (s2 << 16) + s1; - if ((status == TINFL_STATUS_DONE) && - (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && - (r->m_check_adler32 != r->m_z_adler32)) - status = TINFL_STATUS_ADLER32_MISMATCH; - } - return status; -} - -// Higher level helper functions. -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags) { - tinfl_decompressor decomp; - void *pBuf = NULL, *pNew_buf; - size_t src_buf_ofs = 0, out_buf_capacity = 0; - *pOut_len = 0; - tinfl_init(&decomp); - for (;;) { - size_t src_buf_size = src_buf_len - src_buf_ofs, - dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; - tinfl_status status = tinfl_decompress( - &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, - (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, - &dst_buf_size, - (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { - MZ_FREE(pBuf); - *pOut_len = 0; - return NULL; - } - src_buf_ofs += src_buf_size; - *pOut_len += dst_buf_size; - if (status == TINFL_STATUS_DONE) break; - new_out_buf_capacity = out_buf_capacity * 2; - if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; - pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); - if (!pNew_buf) { - MZ_FREE(pBuf); - *pOut_len = 0; - return NULL; - } - pBuf = pNew_buf; - out_buf_capacity = new_out_buf_capacity; - } - return pBuf; -} - -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags) { - tinfl_decompressor decomp; - tinfl_status status; - tinfl_init(&decomp); - status = - tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, - (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, - (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED - : out_buf_len; -} - -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, - tinfl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - int result = 0; - tinfl_decompressor decomp; - mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); - size_t in_buf_ofs = 0, dict_ofs = 0; - if (!pDict) return TINFL_STATUS_FAILED; - tinfl_init(&decomp); - for (;;) { - size_t in_buf_size = *pIn_buf_size - in_buf_ofs, - dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; - tinfl_status status = - tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, - &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, - (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); - in_buf_ofs += in_buf_size; - if ((dst_buf_size) && - (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) - break; - if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { - result = (status == TINFL_STATUS_DONE); - break; - } - dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); - } - MZ_FREE(pDict); - *pIn_buf_size = in_buf_ofs; - return result; -} - -// ------------------- Low-level Compression (independent from all decompression -// API's) - -// Purposely making these tables static for faster init and thread safety. -static const mz_uint16 s_tdefl_len_sym[256] = { - 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, - 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, - 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, - 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, - 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, - 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, - 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, - 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, - 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, - 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, - 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, - 285}; - -static const mz_uint8 s_tdefl_len_extra[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0}; - -static const mz_uint8 s_tdefl_small_dist_sym[512] = { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, - 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; - -static const mz_uint8 s_tdefl_small_dist_extra[512] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; - -static const mz_uint8 s_tdefl_large_dist_sym[128] = { - 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, - 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; - -static const mz_uint8 s_tdefl_large_dist_extra[128] = { - 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}; - -// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted -// values. -typedef struct { - mz_uint16 m_key, m_sym_index; -} tdefl_sym_freq; -static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, - tdefl_sym_freq *pSyms0, - tdefl_sym_freq *pSyms1) { - mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; - tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; - MZ_CLEAR_OBJ(hist); - for (i = 0; i < num_syms; i++) { - mz_uint freq = pSyms0[i].m_key; - hist[freq & 0xFF]++; - hist[256 + ((freq >> 8) & 0xFF)]++; - } - while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) - total_passes--; - for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { - const mz_uint32 *pHist = &hist[pass << 8]; - mz_uint offsets[256], cur_ofs = 0; - for (i = 0; i < 256; i++) { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; - } - for (i = 0; i < num_syms; i++) - pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = - pCur_syms[i]; - { - tdefl_sym_freq *t = pCur_syms; - pCur_syms = pNew_syms; - pNew_syms = t; - } - } - return pCur_syms; -} - -// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, -// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. -static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { - int root, leaf, next, avbl, used, dpth; - if (n == 0) - return; - else if (n == 1) { - A[0].m_key = 1; - return; - } - A[0].m_key += A[1].m_key; - root = 0; - leaf = 2; - for (next = 1; next < n - 1; next++) { - if (leaf >= n || A[root].m_key < A[leaf].m_key) { - A[next].m_key = A[root].m_key; - A[root++].m_key = (mz_uint16)next; - } else - A[next].m_key = A[leaf++].m_key; - if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { - A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); - A[root++].m_key = (mz_uint16)next; - } else - A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); - } - A[n - 2].m_key = 0; - for (next = n - 3; next >= 0; next--) - A[next].m_key = A[A[next].m_key].m_key + 1; - avbl = 1; - used = dpth = 0; - root = n - 2; - next = n - 1; - while (avbl > 0) { - while (root >= 0 && (int)A[root].m_key == dpth) { - used++; - root--; - } - while (avbl > used) { - A[next--].m_key = (mz_uint16)(dpth); - avbl--; - } - avbl = 2 * used; - dpth++; - used = 0; - } -} - -// Limits canonical Huffman code table's max code size. -enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; -static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, - int code_list_len, - int max_code_size) { - int i; - mz_uint32 total = 0; - if (code_list_len <= 1) return; - for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) - pNum_codes[max_code_size] += pNum_codes[i]; - for (i = max_code_size; i > 0; i--) - total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); - while (total != (1UL << max_code_size)) { - pNum_codes[max_code_size]--; - for (i = max_code_size - 1; i > 0; i--) - if (pNum_codes[i]) { - pNum_codes[i]--; - pNum_codes[i + 1] += 2; - break; - } - total--; - } -} - -static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, - int table_len, int code_size_limit, - int static_table) { - int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; - mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; - MZ_CLEAR_OBJ(num_codes); - if (static_table) { - for (i = 0; i < table_len; i++) - num_codes[d->m_huff_code_sizes[table_num][i]]++; - } else { - tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], - *pSyms; - int num_used_syms = 0; - const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; - for (i = 0; i < table_len; i++) - if (pSym_count[i]) { - syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; - syms0[num_used_syms++].m_sym_index = (mz_uint16)i; - } - - pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); - tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); - - for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; - - tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, - code_size_limit); - - MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); - MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); - for (i = 1, j = num_used_syms; i <= code_size_limit; i++) - for (l = num_codes[i]; l > 0; l--) - d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); - } - - next_code[1] = 0; - for (j = 0, i = 2; i <= code_size_limit; i++) - next_code[i] = j = ((j + num_codes[i - 1]) << 1); - - for (i = 0; i < table_len; i++) { - mz_uint rev_code = 0, code, code_size; - if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; - code = next_code[code_size]++; - for (l = code_size; l > 0; l--, code >>= 1) - rev_code = (rev_code << 1) | (code & 1); - d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; - } -} - -#define TDEFL_PUT_BITS(b, l) \ - do { \ - mz_uint bits = b; \ - mz_uint len = l; \ - MZ_ASSERT(bits <= ((1U << len) - 1U)); \ - d->m_bit_buffer |= (bits << d->m_bits_in); \ - d->m_bits_in += len; \ - while (d->m_bits_in >= 8) { \ - if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ - *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ - d->m_bit_buffer >>= 8; \ - d->m_bits_in -= 8; \ - } \ - } \ - MZ_MACRO_END - -#define TDEFL_RLE_PREV_CODE_SIZE() \ - { \ - if (rle_repeat_count) { \ - if (rle_repeat_count < 3) { \ - d->m_huff_count[2][prev_code_size] = (mz_uint16)( \ - d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ - while (rle_repeat_count--) \ - packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ - } else { \ - d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 16; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_repeat_count - 3); \ - } \ - rle_repeat_count = 0; \ - } \ - } - -#define TDEFL_RLE_ZERO_CODE_SIZE() \ - { \ - if (rle_z_count) { \ - if (rle_z_count < 3) { \ - d->m_huff_count[2][0] = \ - (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ - while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ - } else if (rle_z_count <= 10) { \ - d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 17; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_z_count - 3); \ - } else { \ - d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 18; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_z_count - 11); \ - } \ - rle_z_count = 0; \ - } \ - } - -static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -static void tdefl_start_dynamic_block(tdefl_compressor *d) { - int num_lit_codes, num_dist_codes, num_bit_lengths; - mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, - rle_repeat_count, packed_code_sizes_index; - mz_uint8 - code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], - packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], - prev_code_size = 0xFF; - - d->m_huff_count[0][256] = 1; - - tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); - tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); - - for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) - if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; - for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) - if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; - - memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); - memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], - num_dist_codes); - total_code_sizes_to_pack = num_lit_codes + num_dist_codes; - num_packed_code_sizes = 0; - rle_z_count = 0; - rle_repeat_count = 0; - - memset(&d->m_huff_count[2][0], 0, - sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); - for (i = 0; i < total_code_sizes_to_pack; i++) { - mz_uint8 code_size = code_sizes_to_pack[i]; - if (!code_size) { - TDEFL_RLE_PREV_CODE_SIZE(); - if (++rle_z_count == 138) { - TDEFL_RLE_ZERO_CODE_SIZE(); - } - } else { - TDEFL_RLE_ZERO_CODE_SIZE(); - if (code_size != prev_code_size) { - TDEFL_RLE_PREV_CODE_SIZE(); - d->m_huff_count[2][code_size] = - (mz_uint16)(d->m_huff_count[2][code_size] + 1); - packed_code_sizes[num_packed_code_sizes++] = code_size; - } else if (++rle_repeat_count == 6) { - TDEFL_RLE_PREV_CODE_SIZE(); - } - } - prev_code_size = code_size; - } - if (rle_repeat_count) { - TDEFL_RLE_PREV_CODE_SIZE(); - } else { - TDEFL_RLE_ZERO_CODE_SIZE(); - } - - tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); - - TDEFL_PUT_BITS(2, 2); - - TDEFL_PUT_BITS(num_lit_codes - 257, 5); - TDEFL_PUT_BITS(num_dist_codes - 1, 5); - - for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) - if (d->m_huff_code_sizes - [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) - break; - num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); - TDEFL_PUT_BITS(num_bit_lengths - 4, 4); - for (i = 0; (int)i < num_bit_lengths; i++) - TDEFL_PUT_BITS( - d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); - - for (packed_code_sizes_index = 0; - packed_code_sizes_index < num_packed_code_sizes;) { - mz_uint code = packed_code_sizes[packed_code_sizes_index++]; - MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); - TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); - if (code >= 16) - TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], - "\02\03\07"[code - 16]); - } -} - -static void tdefl_start_static_block(tdefl_compressor *d) { - mz_uint i; - mz_uint8 *p = &d->m_huff_code_sizes[0][0]; - - for (i = 0; i <= 143; ++i) *p++ = 8; - for (; i <= 255; ++i) *p++ = 9; - for (; i <= 279; ++i) *p++ = 7; - for (; i <= 287; ++i) *p++ = 8; - - memset(d->m_huff_code_sizes[1], 5, 32); - - tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); - tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); - - TDEFL_PUT_BITS(1, 2); -} - -static const mz_uint mz_bitmasks[17] = { - 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, - 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF}; - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \ - MINIZ_HAS_64BIT_REGISTERS -static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { - mz_uint flags; - mz_uint8 *pLZ_codes; - mz_uint8 *pOutput_buf = d->m_pOutput_buf; - mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; - mz_uint64 bit_buffer = d->m_bit_buffer; - mz_uint bits_in = d->m_bits_in; - -#define TDEFL_PUT_BITS_FAST(b, l) \ - { \ - bit_buffer |= (((mz_uint64)(b)) << bits_in); \ - bits_in += (l); \ - } - - flags = 1; - for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; - flags >>= 1) { - if (flags == 1) flags = *pLZ_codes++ | 0x100; - - if (flags & 1) { - mz_uint s0, s1, n0, n1, sym, num_extra_bits; - mz_uint match_len = pLZ_codes[0], - match_dist = *(const mz_uint16 *)(pLZ_codes + 1); - pLZ_codes += 3; - - MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], - d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], - s_tdefl_len_extra[match_len]); - - // This sequence coaxes MSVC into using cmov's vs. jmp's. - s0 = s_tdefl_small_dist_sym[match_dist & 511]; - n0 = s_tdefl_small_dist_extra[match_dist & 511]; - s1 = s_tdefl_large_dist_sym[match_dist >> 8]; - n1 = s_tdefl_large_dist_extra[match_dist >> 8]; - sym = (match_dist < 512) ? s0 : s1; - num_extra_bits = (match_dist < 512) ? n0 : n1; - - MZ_ASSERT(d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], - d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], - num_extra_bits); - } else { - mz_uint lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - - if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { - flags >>= 1; - lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - - if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { - flags >>= 1; - lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - } - } - } - - if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; - - *(mz_uint64 *)pOutput_buf = bit_buffer; - pOutput_buf += (bits_in >> 3); - bit_buffer >>= (bits_in & ~7); - bits_in &= 7; - } - -#undef TDEFL_PUT_BITS_FAST - - d->m_pOutput_buf = pOutput_buf; - d->m_bits_in = 0; - d->m_bit_buffer = 0; - - while (bits_in) { - mz_uint32 n = MZ_MIN(bits_in, 16); - TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); - bit_buffer >>= n; - bits_in -= n; - } - - TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); - - return (d->m_pOutput_buf < d->m_pOutput_buf_end); -} -#else -static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { - mz_uint flags; - mz_uint8 *pLZ_codes; - - flags = 1; - for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; - flags >>= 1) { - if (flags == 1) flags = *pLZ_codes++ | 0x100; - if (flags & 1) { - mz_uint sym, num_extra_bits; - mz_uint match_len = pLZ_codes[0], - match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); - pLZ_codes += 3; - - MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], - d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], - s_tdefl_len_extra[match_len]); - - if (match_dist < 512) { - sym = s_tdefl_small_dist_sym[match_dist]; - num_extra_bits = s_tdefl_small_dist_extra[match_dist]; - } else { - sym = s_tdefl_large_dist_sym[match_dist >> 8]; - num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; - } - MZ_ASSERT(d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); - } else { - mz_uint lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); - } - } - - TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); - - return (d->m_pOutput_buf < d->m_pOutput_buf_end); -} -#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && - // MINIZ_HAS_64BIT_REGISTERS - -static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { - if (static_block) - tdefl_start_static_block(d); - else - tdefl_start_dynamic_block(d); - return tdefl_compress_lz_codes(d); -} - -static int tdefl_flush_block(tdefl_compressor *d, int flush) { - mz_uint saved_bit_buf, saved_bits_in; - mz_uint8 *pSaved_output_buf; - mz_bool comp_block_succeeded = MZ_FALSE; - int n, use_raw_block = - ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && - (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; - mz_uint8 *pOutput_buf_start = - ((d->m_pPut_buf_func == NULL) && - ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) - ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) - : d->m_output_buf; - - d->m_pOutput_buf = pOutput_buf_start; - d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; - - MZ_ASSERT(!d->m_output_flush_remaining); - d->m_output_flush_ofs = 0; - d->m_output_flush_remaining = 0; - - *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); - d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); - - if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { - TDEFL_PUT_BITS(0x78, 8); - TDEFL_PUT_BITS(0x01, 8); - } - - TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); - - pSaved_output_buf = d->m_pOutput_buf; - saved_bit_buf = d->m_bit_buffer; - saved_bits_in = d->m_bits_in; - - if (!use_raw_block) - comp_block_succeeded = - tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || - (d->m_total_lz_bytes < 48)); - - // If the block gets expanded, forget the current contents of the output - // buffer and send a raw block instead. - if (((use_raw_block) || - ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= - d->m_total_lz_bytes))) && - ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { - mz_uint i; - d->m_pOutput_buf = pSaved_output_buf; - d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; - TDEFL_PUT_BITS(0, 2); - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { - TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); - } - for (i = 0; i < d->m_total_lz_bytes; ++i) { - TDEFL_PUT_BITS( - d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], - 8); - } - } - // Check for the extremely unlikely (if not impossible) case of the compressed - // block not fitting into the output buffer when using dynamic codes. - else if (!comp_block_succeeded) { - d->m_pOutput_buf = pSaved_output_buf; - d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; - tdefl_compress_block(d, MZ_TRUE); - } - - if (flush) { - if (flush == TDEFL_FINISH) { - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { - mz_uint i, a = d->m_adler32; - for (i = 0; i < 4; i++) { - TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); - a <<= 8; - } - } - } else { - mz_uint i, z = 0; - TDEFL_PUT_BITS(0, 3); - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - for (i = 2; i; --i, z ^= 0xFFFF) { - TDEFL_PUT_BITS(z & 0xFFFF, 16); - } - } - } - - MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); - - memset(&d->m_huff_count[0][0], 0, - sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); - memset(&d->m_huff_count[1][0], 0, - sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); - - d->m_pLZ_code_buf = d->m_lz_code_buf + 1; - d->m_pLZ_flags = d->m_lz_code_buf; - d->m_num_flags_left = 8; - d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; - d->m_total_lz_bytes = 0; - d->m_block_index++; - - if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { - if (d->m_pPut_buf_func) { - *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; - if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) - return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); - } else if (pOutput_buf_start == d->m_output_buf) { - int bytes_to_copy = (int)MZ_MIN( - (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); - memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, - bytes_to_copy); - d->m_out_buf_ofs += bytes_to_copy; - if ((n -= bytes_to_copy) != 0) { - d->m_output_flush_ofs = bytes_to_copy; - d->m_output_flush_remaining = n; - } - } else { - d->m_out_buf_ofs += n; - } - } - - return d->m_output_flush_remaining; -} - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES -#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) -static MZ_FORCEINLINE void tdefl_find_match( - tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, - mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { - mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, - match_len = *pMatch_len, probe_pos = pos, next_probe_pos, - probe_len; - mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; - const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; - mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), - s01 = TDEFL_READ_UNALIGNED_WORD(s); - MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); - if (max_match_len <= match_len) return; - for (;;) { - for (;;) { - if (--num_probes_left == 0) return; -#define TDEFL_PROBE \ - next_probe_pos = d->m_next[probe_pos]; \ - if ((!next_probe_pos) || \ - ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ - return; \ - probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ - if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ - break; - TDEFL_PROBE; - TDEFL_PROBE; - TDEFL_PROBE; - } - if (!dist) break; - q = (const mz_uint16 *)(d->m_dict + probe_pos); - if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; - p = s; - probe_len = 32; - do { - } while ( - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (--probe_len > 0)); - if (!probe_len) { - *pMatch_dist = dist; - *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); - break; - } else if ((probe_len = ((mz_uint)(p - s) * 2) + - (mz_uint)(*(const mz_uint8 *)p == - *(const mz_uint8 *)q)) > match_len) { - *pMatch_dist = dist; - if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == - max_match_len) - break; - c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); - } - } -} -#else -static MZ_FORCEINLINE void tdefl_find_match( - tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, - mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { - mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, - match_len = *pMatch_len, probe_pos = pos, next_probe_pos, - probe_len; - mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; - const mz_uint8 *s = d->m_dict + pos, *p, *q; - mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; - MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); - if (max_match_len <= match_len) return; - for (;;) { - for (;;) { - if (--num_probes_left == 0) return; -#define TDEFL_PROBE \ - next_probe_pos = d->m_next[probe_pos]; \ - if ((!next_probe_pos) || \ - ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ - return; \ - probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ - if ((d->m_dict[probe_pos + match_len] == c0) && \ - (d->m_dict[probe_pos + match_len - 1] == c1)) \ - break; - TDEFL_PROBE; - TDEFL_PROBE; - TDEFL_PROBE; - } - if (!dist) break; - p = s; - q = d->m_dict + probe_pos; - for (probe_len = 0; probe_len < max_match_len; probe_len++) - if (*p++ != *q++) break; - if (probe_len > match_len) { - *pMatch_dist = dist; - if ((*pMatch_len = match_len = probe_len) == max_match_len) return; - c0 = d->m_dict[pos + match_len]; - c1 = d->m_dict[pos + match_len - 1]; - } - } -} -#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -static mz_bool tdefl_compress_fast(tdefl_compressor *d) { - // Faster, minimally featured LZRW1-style match+parse loop with better - // register utilization. Intended for applications where raw throughput is - // valued more highly than ratio. - mz_uint lookahead_pos = d->m_lookahead_pos, - lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, - total_lz_bytes = d->m_total_lz_bytes, - num_flags_left = d->m_num_flags_left; - mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; - mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; - - while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { - const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; - mz_uint dst_pos = - (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; - mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( - d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); - d->m_src_buf_left -= num_bytes_to_process; - lookahead_size += num_bytes_to_process; - - while (num_bytes_to_process) { - mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); - memcpy(d->m_dict + dst_pos, d->m_pSrc, n); - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, - MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); - d->m_pSrc += n; - dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; - num_bytes_to_process -= n; - } - - dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); - if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) - break; - - while (lookahead_size >= 4) { - mz_uint cur_match_dist, cur_match_len = 1; - mz_uint8 *pCur_dict = d->m_dict + cur_pos; - mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; - mz_uint hash = - (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & - TDEFL_LEVEL1_HASH_SIZE_MASK; - mz_uint probe_pos = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)lookahead_pos; - - if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= - dict_size) && - ((*(const mz_uint32 *)(d->m_dict + - (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & - 0xFFFFFF) == first_trigram)) { - const mz_uint16 *p = (const mz_uint16 *)pCur_dict; - const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); - mz_uint32 probe_len = 32; - do { - } while ((TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (--probe_len > 0)); - cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + - (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); - if (!probe_len) - cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; - - if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || - ((cur_match_len == TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 8U * 1024U))) { - cur_match_len = 1; - *pLZ_code_buf++ = (mz_uint8)first_trigram; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - d->m_huff_count[0][(mz_uint8)first_trigram]++; - } else { - mz_uint32 s0, s1; - cur_match_len = MZ_MIN(cur_match_len, lookahead_size); - - MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 1) && - (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); - - cur_match_dist--; - - pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); - *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; - pLZ_code_buf += 3; - *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); - - s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; - s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; - d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; - - d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - - TDEFL_MIN_MATCH_LEN]]++; - } - } else { - *pLZ_code_buf++ = (mz_uint8)first_trigram; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - d->m_huff_count[0][(mz_uint8)first_trigram]++; - } - - if (--num_flags_left == 0) { - num_flags_left = 8; - pLZ_flags = pLZ_code_buf++; - } - - total_lz_bytes += cur_match_len; - lookahead_pos += cur_match_len; - dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); - cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; - MZ_ASSERT(lookahead_size >= cur_match_len); - lookahead_size -= cur_match_len; - - if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { - int n; - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - total_lz_bytes = d->m_total_lz_bytes; - pLZ_code_buf = d->m_pLZ_code_buf; - pLZ_flags = d->m_pLZ_flags; - num_flags_left = d->m_num_flags_left; - } - } - - while (lookahead_size) { - mz_uint8 lit = d->m_dict[cur_pos]; - - total_lz_bytes++; - *pLZ_code_buf++ = lit; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - if (--num_flags_left == 0) { - num_flags_left = 8; - pLZ_flags = pLZ_code_buf++; - } - - d->m_huff_count[0][lit]++; - - lookahead_pos++; - dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); - cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; - lookahead_size--; - - if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { - int n; - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - total_lz_bytes = d->m_total_lz_bytes; - pLZ_code_buf = d->m_pLZ_code_buf; - pLZ_flags = d->m_pLZ_flags; - num_flags_left = d->m_num_flags_left; - } - } - } - - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - return MZ_TRUE; -} -#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - -static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, - mz_uint8 lit) { - d->m_total_lz_bytes++; - *d->m_pLZ_code_buf++ = lit; - *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); - if (--d->m_num_flags_left == 0) { - d->m_num_flags_left = 8; - d->m_pLZ_flags = d->m_pLZ_code_buf++; - } - d->m_huff_count[0][lit]++; -} - -static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, - mz_uint match_len, - mz_uint match_dist) { - mz_uint32 s0, s1; - - MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && - (match_dist <= TDEFL_LZ_DICT_SIZE)); - - d->m_total_lz_bytes += match_len; - - d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); - - match_dist -= 1; - d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); - d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); - d->m_pLZ_code_buf += 3; - - *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); - if (--d->m_num_flags_left == 0) { - d->m_num_flags_left = 8; - d->m_pLZ_flags = d->m_pLZ_code_buf++; - } - - s0 = s_tdefl_small_dist_sym[match_dist & 511]; - s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; - d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; - - if (match_len >= TDEFL_MIN_MATCH_LEN) - d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; -} - -static mz_bool tdefl_compress_normal(tdefl_compressor *d) { - const mz_uint8 *pSrc = d->m_pSrc; - size_t src_buf_left = d->m_src_buf_left; - tdefl_flush flush = d->m_flush; - - while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { - mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; - // Update dictionary and hash chains. Keeps the lookahead size equal to - // TDEFL_MAX_MATCH_LEN. - if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { - mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & - TDEFL_LZ_DICT_SIZE_MASK, - ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; - mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] - << TDEFL_LZ_HASH_SHIFT) ^ - d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; - mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( - src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); - const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; - src_buf_left -= num_bytes_to_process; - d->m_lookahead_size += num_bytes_to_process; - while (pSrc != pSrc_end) { - mz_uint8 c = *pSrc++; - d->m_dict[dst_pos] = c; - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; - hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); - d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)(ins_pos); - dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; - ins_pos++; - } - } else { - while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { - mz_uint8 c = *pSrc++; - mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & - TDEFL_LZ_DICT_SIZE_MASK; - src_buf_left--; - d->m_dict[dst_pos] = c; - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; - if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { - mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; - mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] - << (TDEFL_LZ_HASH_SHIFT * 2)) ^ - (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] - << TDEFL_LZ_HASH_SHIFT) ^ - c) & - (TDEFL_LZ_HASH_SIZE - 1); - d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)(ins_pos); - } - } - } - d->m_dict_size = - MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); - if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; - - // Simple lazy/greedy parsing state machine. - len_to_move = 1; - cur_match_dist = 0; - cur_match_len = - d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); - cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; - if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { - if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { - mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; - cur_match_len = 0; - while (cur_match_len < d->m_lookahead_size) { - if (d->m_dict[cur_pos + cur_match_len] != c) break; - cur_match_len++; - } - if (cur_match_len < TDEFL_MIN_MATCH_LEN) - cur_match_len = 0; - else - cur_match_dist = 1; - } - } else { - tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, - d->m_lookahead_size, &cur_match_dist, &cur_match_len); - } - if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 8U * 1024U)) || - (cur_pos == cur_match_dist) || - ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { - cur_match_dist = cur_match_len = 0; - } - if (d->m_saved_match_len) { - if (cur_match_len > d->m_saved_match_len) { - tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); - if (cur_match_len >= 128) { - tdefl_record_match(d, cur_match_len, cur_match_dist); - d->m_saved_match_len = 0; - len_to_move = cur_match_len; - } else { - d->m_saved_lit = d->m_dict[cur_pos]; - d->m_saved_match_dist = cur_match_dist; - d->m_saved_match_len = cur_match_len; - } - } else { - tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); - len_to_move = d->m_saved_match_len - 1; - d->m_saved_match_len = 0; - } - } else if (!cur_match_dist) - tdefl_record_literal(d, - d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); - else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || - (cur_match_len >= 128)) { - tdefl_record_match(d, cur_match_len, cur_match_dist); - len_to_move = cur_match_len; - } else { - d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; - d->m_saved_match_dist = cur_match_dist; - d->m_saved_match_len = cur_match_len; - } - // Move the lookahead forward by len_to_move bytes. - d->m_lookahead_pos += len_to_move; - MZ_ASSERT(d->m_lookahead_size >= len_to_move); - d->m_lookahead_size -= len_to_move; - d->m_dict_size = - MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); - // Check if it's time to flush the current LZ codes to the internal output - // buffer. - if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || - ((d->m_total_lz_bytes > 31 * 1024) && - (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= - d->m_total_lz_bytes) || - (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { - int n; - d->m_pSrc = pSrc; - d->m_src_buf_left = src_buf_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - } - } - - d->m_pSrc = pSrc; - d->m_src_buf_left = src_buf_left; - return MZ_TRUE; -} - -static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { - if (d->m_pIn_buf_size) { - *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; - } - - if (d->m_pOut_buf_size) { - size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, - d->m_output_flush_remaining); - memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, - d->m_output_buf + d->m_output_flush_ofs, n); - d->m_output_flush_ofs += (mz_uint)n; - d->m_output_flush_remaining -= (mz_uint)n; - d->m_out_buf_ofs += n; - - *d->m_pOut_buf_size = d->m_out_buf_ofs; - } - - return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE - : TDEFL_STATUS_OKAY; -} - -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, - size_t *pIn_buf_size, void *pOut_buf, - size_t *pOut_buf_size, tdefl_flush flush) { - if (!d) { - if (pIn_buf_size) *pIn_buf_size = 0; - if (pOut_buf_size) *pOut_buf_size = 0; - return TDEFL_STATUS_BAD_PARAM; - } - - d->m_pIn_buf = pIn_buf; - d->m_pIn_buf_size = pIn_buf_size; - d->m_pOut_buf = pOut_buf; - d->m_pOut_buf_size = pOut_buf_size; - d->m_pSrc = (const mz_uint8 *)(pIn_buf); - d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; - d->m_out_buf_ofs = 0; - d->m_flush = flush; - - if (((d->m_pPut_buf_func != NULL) == - ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || - (d->m_prev_return_status != TDEFL_STATUS_OKAY) || - (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || - (pIn_buf_size && *pIn_buf_size && !pIn_buf) || - (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { - if (pIn_buf_size) *pIn_buf_size = 0; - if (pOut_buf_size) *pOut_buf_size = 0; - return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); - } - d->m_wants_to_finish |= (flush == TDEFL_FINISH); - - if ((d->m_output_flush_remaining) || (d->m_finished)) - return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && - ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && - ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | - TDEFL_RLE_MATCHES)) == 0)) { - if (!tdefl_compress_fast(d)) return d->m_prev_return_status; - } else -#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - { - if (!tdefl_compress_normal(d)) return d->m_prev_return_status; - } - - if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && - (pIn_buf)) - d->m_adler32 = - (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, - d->m_pSrc - (const mz_uint8 *)pIn_buf); - - if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && - (!d->m_output_flush_remaining)) { - if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; - d->m_finished = (flush == TDEFL_FINISH); - if (flush == TDEFL_FULL_FLUSH) { - MZ_CLEAR_OBJ(d->m_hash); - MZ_CLEAR_OBJ(d->m_next); - d->m_dict_size = 0; - } - } - - return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); -} - -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, - size_t in_buf_size, tdefl_flush flush) { - MZ_ASSERT(d->m_pPut_buf_func); - return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); -} - -tdefl_status tdefl_init(tdefl_compressor *d, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - d->m_pPut_buf_func = pPut_buf_func; - d->m_pPut_buf_user = pPut_buf_user; - d->m_flags = (mz_uint)(flags); - d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; - d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; - d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; - if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); - d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = - d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; - d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = - d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; - d->m_pLZ_code_buf = d->m_lz_code_buf + 1; - d->m_pLZ_flags = d->m_lz_code_buf; - d->m_num_flags_left = 8; - d->m_pOutput_buf = d->m_output_buf; - d->m_pOutput_buf_end = d->m_output_buf; - d->m_prev_return_status = TDEFL_STATUS_OKAY; - d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; - d->m_adler32 = 1; - d->m_pIn_buf = NULL; - d->m_pOut_buf = NULL; - d->m_pIn_buf_size = NULL; - d->m_pOut_buf_size = NULL; - d->m_flush = TDEFL_NO_FLUSH; - d->m_pSrc = NULL; - d->m_src_buf_left = 0; - d->m_out_buf_ofs = 0; - memset(&d->m_huff_count[0][0], 0, - sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); - memset(&d->m_huff_count[1][0], 0, - sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); - return TDEFL_STATUS_OKAY; -} - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { - return d->m_prev_return_status; -} - -mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } - -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - tdefl_compressor *pComp; - mz_bool succeeded; - if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; - pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); - if (!pComp) return MZ_FALSE; - succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == - TDEFL_STATUS_OKAY); - succeeded = - succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == - TDEFL_STATUS_DONE); - MZ_FREE(pComp); - return succeeded; -} - -typedef struct { - size_t m_size, m_capacity; - mz_uint8 *m_pBuf; - mz_bool m_expandable; -} tdefl_output_buffer; - -static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, - void *pUser) { - tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; - size_t new_size = p->m_size + len; - if (new_size > p->m_capacity) { - size_t new_capacity = p->m_capacity; - mz_uint8 *pNew_buf; - if (!p->m_expandable) return MZ_FALSE; - do { - new_capacity = MZ_MAX(128U, new_capacity << 1U); - } while (new_size > new_capacity); - pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); - if (!pNew_buf) return MZ_FALSE; - p->m_pBuf = pNew_buf; - p->m_capacity = new_capacity; - } - memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); - p->m_size = new_size; - return MZ_TRUE; -} - -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags) { - tdefl_output_buffer out_buf; - MZ_CLEAR_OBJ(out_buf); - if (!pOut_len) - return MZ_FALSE; - else - *pOut_len = 0; - out_buf.m_expandable = MZ_TRUE; - if (!tdefl_compress_mem_to_output( - pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) - return NULL; - *pOut_len = out_buf.m_size; - return out_buf.m_pBuf; -} - -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags) { - tdefl_output_buffer out_buf; - MZ_CLEAR_OBJ(out_buf); - if (!pOut_buf) return 0; - out_buf.m_pBuf = (mz_uint8 *)pOut_buf; - out_buf.m_capacity = out_buf_len; - if (!tdefl_compress_mem_to_output( - pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) - return 0; - return out_buf.m_size; -} - -#ifndef MINIZ_NO_ZLIB_APIS -static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, - 128, 256, 512, 768, 1500}; - -// level may actually range from [0,10] (10 is a "hidden" max level, where we -// want a bit more compression and it's fine if throughput to fall off a cliff -// on some files). -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, - int strategy) { - mz_uint comp_flags = - s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | - ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); - if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; - - if (!level) - comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; - else if (strategy == MZ_FILTERED) - comp_flags |= TDEFL_FILTER_MATCHES; - else if (strategy == MZ_HUFFMAN_ONLY) - comp_flags &= ~TDEFL_MAX_PROBES_MASK; - else if (strategy == MZ_FIXED) - comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; - else if (strategy == MZ_RLE) - comp_flags |= TDEFL_RLE_MATCHES; - - return comp_flags; -} -#endif // MINIZ_NO_ZLIB_APIS - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4204) // nonstandard extension used : non-constant - // aggregate initializer (also supported by GNU - // C and C99, so no big deal) -#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is - // deprecated. Instead, use the ISO C and C++ - // conformant name: _strdup. -#endif - -// Simple PNG writer function by Alex Evans, 2011. Released into the public -// domain: https://gist.github.com/908299, more context at -// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. -// This is actually a modification of Alex's original code so PNG files -// generated by this function pass pngcheck. -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, - int h, int num_chans, - size_t *pLen_out, - mz_uint level, mz_bool flip) { - // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was - // defined. - static const mz_uint s_tdefl_png_num_probes[11] = { - 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500}; - tdefl_compressor *pComp = - (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); - tdefl_output_buffer out_buf; - int i, bpl = w * num_chans, y, z; - mz_uint32 c; - *pLen_out = 0; - if (!pComp) return NULL; - MZ_CLEAR_OBJ(out_buf); - out_buf.m_expandable = MZ_TRUE; - out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); - if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { - MZ_FREE(pComp); - return NULL; - } - // write dummy header - for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); - // compress image data - tdefl_init( - pComp, tdefl_output_buffer_putter, &out_buf, - s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); - for (y = 0; y < h; ++y) { - tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); - tdefl_compress_buffer(pComp, - (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, - bpl, TDEFL_NO_FLUSH); - } - if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != - TDEFL_STATUS_DONE) { - MZ_FREE(pComp); - MZ_FREE(out_buf.m_pBuf); - return NULL; - } - // write real header - *pLen_out = out_buf.m_size - 41; - { - static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; - mz_uint8 pnghdr[41] = {0x89, - 0x50, - 0x4e, - 0x47, - 0x0d, - 0x0a, - 0x1a, - 0x0a, - 0x00, - 0x00, - 0x00, - 0x0d, - 0x49, - 0x48, - 0x44, - 0x52, - 0, - 0, - (mz_uint8)(w >> 8), - (mz_uint8)w, - 0, - 0, - (mz_uint8)(h >> 8), - (mz_uint8)h, - 8, - chans[num_chans], - 0, - 0, - 0, - 0, - 0, - 0, - 0, - (mz_uint8)(*pLen_out >> 24), - (mz_uint8)(*pLen_out >> 16), - (mz_uint8)(*pLen_out >> 8), - (mz_uint8)*pLen_out, - 0x49, - 0x44, - 0x41, - 0x54}; - c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); - for (i = 0; i < 4; ++i, c <<= 8) - ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); - memcpy(out_buf.m_pBuf, pnghdr, 41); - } - // write footer (IDAT CRC-32, followed by IEND chunk) - if (!tdefl_output_buffer_putter( - "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { - *pLen_out = 0; - MZ_FREE(pComp); - MZ_FREE(out_buf.m_pBuf); - return NULL; - } - c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, - *pLen_out + 4); - for (i = 0; i < 4; ++i, c <<= 8) - (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); - // compute final size of file, grab compressed data buffer and return - *pLen_out += 57; - MZ_FREE(pComp); - return out_buf.m_pBuf; -} -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, - int num_chans, size_t *pLen_out) { - // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we - // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's - // where #defined out) - return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, - pLen_out, 6, MZ_FALSE); -} - -// ------------------- .ZIP archive reading - -#ifndef MINIZ_NO_ARCHIVE_APIS -#error "No arvhive APIs" - -#ifdef MINIZ_NO_STDIO -#define MZ_FILE void * -#else -#include <stdio.h> -#include <sys/stat.h> - -#if defined(_MSC_VER) || defined(__MINGW64__) -static FILE *mz_fopen(const char *pFilename, const char *pMode) { - FILE *pFile = NULL; - fopen_s(&pFile, pFilename, pMode); - return pFile; -} -static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { - FILE *pFile = NULL; - if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; - return pFile; -} -#ifndef MINIZ_NO_TIME -#include <sys/utime.h> -#endif -#define MZ_FILE FILE -#define MZ_FOPEN mz_fopen -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 _ftelli64 -#define MZ_FSEEK64 _fseeki64 -#define MZ_FILE_STAT_STRUCT _stat -#define MZ_FILE_STAT _stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN mz_freopen -#define MZ_DELETE_FILE remove -#elif defined(__MINGW32__) -#ifndef MINIZ_NO_TIME -#include <sys/utime.h> -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello64 -#define MZ_FSEEK64 fseeko64 -#define MZ_FILE_STAT_STRUCT _stat -#define MZ_FILE_STAT _stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#elif defined(__TINYC__) -#ifndef MINIZ_NO_TIME -#include <sys/utime.h> -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftell -#define MZ_FSEEK64 fseek -#define MZ_FILE_STAT_STRUCT stat -#define MZ_FILE_STAT stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE -#ifndef MINIZ_NO_TIME -#include <utime.h> -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen64(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello64 -#define MZ_FSEEK64 fseeko64 -#define MZ_FILE_STAT_STRUCT stat64 -#define MZ_FILE_STAT stat64 -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) -#define MZ_DELETE_FILE remove -#else -#ifndef MINIZ_NO_TIME -#include <utime.h> -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello -#define MZ_FSEEK64 fseeko -#define MZ_FILE_STAT_STRUCT stat -#define MZ_FILE_STAT stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#endif // #ifdef _MSC_VER -#endif // #ifdef MINIZ_NO_STDIO - -#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) - -// Various ZIP archive enums. To completely avoid cross platform compiler -// alignment and platform endian issues, miniz.c doesn't use structs for any of -// this stuff. -enum { - // ZIP archive identifiers and record sizes - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, - MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, - MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, - // Central directory header record offsets - MZ_ZIP_CDH_SIG_OFS = 0, - MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, - MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, - MZ_ZIP_CDH_BIT_FLAG_OFS = 8, - MZ_ZIP_CDH_METHOD_OFS = 10, - MZ_ZIP_CDH_FILE_TIME_OFS = 12, - MZ_ZIP_CDH_FILE_DATE_OFS = 14, - MZ_ZIP_CDH_CRC32_OFS = 16, - MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, - MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, - MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, - MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, - MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, - MZ_ZIP_CDH_DISK_START_OFS = 34, - MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, - MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, - MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, - // Local directory header offsets - MZ_ZIP_LDH_SIG_OFS = 0, - MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, - MZ_ZIP_LDH_BIT_FLAG_OFS = 6, - MZ_ZIP_LDH_METHOD_OFS = 8, - MZ_ZIP_LDH_FILE_TIME_OFS = 10, - MZ_ZIP_LDH_FILE_DATE_OFS = 12, - MZ_ZIP_LDH_CRC32_OFS = 14, - MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, - MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, - MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, - MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, - // End of central directory offsets - MZ_ZIP_ECDH_SIG_OFS = 0, - MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, - MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, - MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, - MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, - MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, - MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, - MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, -}; - -typedef struct { - void *m_p; - size_t m_size, m_capacity; - mz_uint m_element_size; -} mz_zip_array; - -struct mz_zip_internal_state_tag { - mz_zip_array m_central_dir; - mz_zip_array m_central_dir_offsets; - mz_zip_array m_sorted_central_dir_offsets; - MZ_FILE *m_pFile; - void *m_pMem; - size_t m_mem_size; - size_t m_mem_capacity; -}; - -#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \ - (array_ptr)->m_element_size = element_size -#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \ - ((element_type *)((array_ptr)->m_p))[index] - -static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, - mz_zip_array *pArray) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); - memset(pArray, 0, sizeof(mz_zip_array)); -} - -static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t min_new_capacity, - mz_uint growing) { - void *pNew_p; - size_t new_capacity = min_new_capacity; - MZ_ASSERT(pArray->m_element_size); - if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; - if (growing) { - new_capacity = MZ_MAX(1, pArray->m_capacity); - while (new_capacity < min_new_capacity) new_capacity *= 2; - } - if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, - pArray->m_element_size, new_capacity))) - return MZ_FALSE; - pArray->m_p = pNew_p; - pArray->m_capacity = new_capacity; - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t new_capacity, - mz_uint growing) { - if (new_capacity > pArray->m_capacity) { - if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) - return MZ_FALSE; - } - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t new_size, - mz_uint growing) { - if (new_size > pArray->m_capacity) { - if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) - return MZ_FALSE; - } - pArray->m_size = new_size; - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t n) { - return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, - mz_zip_array *pArray, - const void *pElements, - size_t n) { - size_t orig_size = pArray->m_size; - if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) - return MZ_FALSE; - memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, - pElements, n * pArray->m_element_size); - return MZ_TRUE; -} - -#ifndef MINIZ_NO_TIME -static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) { - struct tm tm; - memset(&tm, 0, sizeof(tm)); - tm.tm_isdst = -1; - tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; - tm.tm_mon = ((dos_date >> 5) & 15) - 1; - tm.tm_mday = dos_date & 31; - tm.tm_hour = (dos_time >> 11) & 31; - tm.tm_min = (dos_time >> 5) & 63; - tm.tm_sec = (dos_time << 1) & 62; - return mktime(&tm); -} - -static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, - mz_uint16 *pDOS_date) { -#ifdef _MSC_VER - struct tm tm_struct; - struct tm *tm = &tm_struct; - errno_t err = localtime_s(tm, &time); - if (err) { - *pDOS_date = 0; - *pDOS_time = 0; - return; - } -#else - struct tm *tm = localtime(&time); -#endif - *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + - ((tm->tm_sec) >> 1)); - *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + - ((tm->tm_mon + 1) << 5) + tm->tm_mday); -} -#endif - -#ifndef MINIZ_NO_STDIO -static mz_bool mz_zip_get_file_modified_time(const char *pFilename, - mz_uint16 *pDOS_time, - mz_uint16 *pDOS_date) { -#ifdef MINIZ_NO_TIME - (void)pFilename; - *pDOS_date = *pDOS_time = 0; -#else - struct MZ_FILE_STAT_STRUCT file_stat; - // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 - // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. - if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; - mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); -#endif // #ifdef MINIZ_NO_TIME - return MZ_TRUE; -} - -#ifndef MINIZ_NO_TIME -static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, - time_t modified_time) { - struct utimbuf t; - t.actime = access_time; - t.modtime = modified_time; - return !utime(pFilename, &t); -} -#endif // #ifndef MINIZ_NO_TIME -#endif // #ifndef MINIZ_NO_STDIO - -static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, - mz_uint32 flags) { - (void)flags; - if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) - return MZ_FALSE; - - if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; - if (!pZip->m_pFree) pZip->m_pFree = def_free_func; - if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; - - pZip->m_zip_mode = MZ_ZIP_MODE_READING; - pZip->m_archive_size = 0; - pZip->m_central_directory_file_ofs = 0; - pZip->m_total_files = 0; - - if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) - return MZ_FALSE; - memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, - sizeof(mz_uint8)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, - sizeof(mz_uint32)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, - sizeof(mz_uint32)); - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool -mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, - const mz_zip_array *pCentral_dir_offsets, - mz_uint l_index, mz_uint r_index) { - const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, - l_index)), - *pE; - const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); - mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), - r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); - mz_uint8 l = 0, r = 0; - pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pE = pL + MZ_MIN(l_len, r_len); - while (pL < pE) { - if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; - pL++; - pR++; - } - return (pL == pE) ? (l_len < r_len) : (l < r); -} - -#define MZ_SWAP_UINT32(a, b) \ - do { \ - mz_uint32 t = a; \ - a = b; \ - b = t; \ - } \ - MZ_MACRO_END - -// Heap sort of lowercased filenames, used to help accelerate plain central -// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), -// but it could allocate memory.) -static void mz_zip_reader_sort_central_dir_offsets_by_filename( - mz_zip_archive *pZip) { - mz_zip_internal_state *pState = pZip->m_pState; - const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; - const mz_zip_array *pCentral_dir = &pState->m_central_dir; - mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( - &pState->m_sorted_central_dir_offsets, mz_uint32, 0); - const int size = pZip->m_total_files; - int start = (size - 2) >> 1, end; - while (start >= 0) { - int child, root = start; - for (;;) { - if ((child = (root << 1) + 1) >= size) break; - child += - (((child + 1) < size) && - (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[child], pIndices[child + 1]))); - if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[root], pIndices[child])) - break; - MZ_SWAP_UINT32(pIndices[root], pIndices[child]); - root = child; - } - start--; - } - - end = size - 1; - while (end > 0) { - int child, root = 0; - MZ_SWAP_UINT32(pIndices[end], pIndices[0]); - for (;;) { - if ((child = (root << 1) + 1) >= end) break; - child += - (((child + 1) < end) && - mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[child], pIndices[child + 1])); - if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[root], pIndices[child])) - break; - MZ_SWAP_UINT32(pIndices[root], pIndices[child]); - root = child; - } - end--; - } -} - -static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, - mz_uint32 flags) { - mz_uint cdir_size, num_this_disk, cdir_disk_index; - mz_uint64 cdir_ofs; - mz_int64 cur_file_ofs; - const mz_uint8 *p; - mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; - mz_uint8 *pBuf = (mz_uint8 *)buf_u32; - mz_bool sort_central_dir = - ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); - // Basic sanity checks - reject files which are too small, and check the first - // 4 bytes of the file to make sure a local header is there. - if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - // Find the end of central directory record by scanning the file from the end - // towards the beginning. - cur_file_ofs = - MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); - for (;;) { - int i, - n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) - return MZ_FALSE; - for (i = n - 4; i >= 0; --i) - if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break; - if (i >= 0) { - cur_file_ofs += i; - break; - } - if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= - (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) - return MZ_FALSE; - cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); - } - // Read and verify the end of central directory record. - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || - ((pZip->m_total_files = - MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != - MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) - return MZ_FALSE; - - num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); - cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); - if (((num_this_disk | cdir_disk_index) != 0) && - ((num_this_disk != 1) || (cdir_disk_index != 1))) - return MZ_FALSE; - - if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < - pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - - cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); - if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE; - - pZip->m_central_directory_file_ofs = cdir_ofs; - - if (pZip->m_total_files) { - mz_uint i, n; - - // Read the entire central directory into a heap block, and allocate another - // heap block to hold the unsorted central dir file record offsets, and - // another to hold the sorted indices. - if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, - MZ_FALSE)) || - (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, - pZip->m_total_files, MZ_FALSE))) - return MZ_FALSE; - - if (sort_central_dir) { - if (!mz_zip_array_resize(pZip, - &pZip->m_pState->m_sorted_central_dir_offsets, - pZip->m_total_files, MZ_FALSE)) - return MZ_FALSE; - } - - if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, - pZip->m_pState->m_central_dir.m_p, - cdir_size) != cdir_size) - return MZ_FALSE; - - // Now create an index into the central directory file records, do some - // basic sanity checking on each record, and check for zip64 entries (which - // are not yet supported). - p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; - for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { - mz_uint total_header_size, comp_size, decomp_size, disk_index; - if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || - (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) - return MZ_FALSE; - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - i) = - (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); - if (sort_central_dir) - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, - mz_uint32, i) = i; - comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && - (decomp_size != comp_size)) || - (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || - (comp_size == 0xFFFFFFFF)) - return MZ_FALSE; - disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); - if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE; - if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) - return MZ_FALSE; - if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > - n) - return MZ_FALSE; - n -= total_header_size; - p += total_header_size; - } - } - - if (sort_central_dir) - mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); - - return MZ_TRUE; -} - -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, - mz_uint32 flags) { - if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE; - if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; - pZip->m_archive_size = size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} - -static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - size_t s = (file_ofs >= pZip->m_archive_size) - ? 0 - : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); - memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); - return s; -} - -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, - size_t size, mz_uint32 flags) { - if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; - pZip->m_archive_size = size; - pZip->m_pRead = mz_zip_mem_read_func; - pZip->m_pIO_opaque = pZip; -#ifdef __cplusplus - pZip->m_pState->m_pMem = const_cast<void *>(pMem); -#else - pZip->m_pState->m_pMem = (void *)pMem; -#endif - pZip->m_pState->m_mem_size = size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); - if (((mz_int64)file_ofs < 0) || - (((cur_ofs != (mz_int64)file_ofs)) && - (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) - return 0; - return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); -} - -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint32 flags) { - mz_uint64 file_size; - MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); - if (!pFile) return MZ_FALSE; - if (MZ_FSEEK64(pFile, 0, SEEK_END)) { - MZ_FCLOSE(pFile); - return MZ_FALSE; - } - file_size = MZ_FTELL64(pFile); - if (!mz_zip_reader_init_internal(pZip, flags)) { - MZ_FCLOSE(pFile); - return MZ_FALSE; - } - pZip->m_pRead = mz_zip_file_read_func; - pZip->m_pIO_opaque = pZip; - pZip->m_pState->m_pFile = pFile; - pZip->m_archive_size = file_size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { - return pZip ? pZip->m_total_files : 0; -} - -static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh( - mz_zip_archive *pZip, mz_uint file_index) { - if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return NULL; - return &MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - file_index)); -} - -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, - mz_uint file_index) { - mz_uint m_bit_flag; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) return MZ_FALSE; - m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); - return (m_bit_flag & 1); -} - -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, - mz_uint file_index) { - mz_uint filename_len, external_attr; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) return MZ_FALSE; - - // First see if the filename ends with a '/' character. - filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - if (filename_len) { - if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') - return MZ_TRUE; - } - - // Bugfix: This code was also checking if the internal attribute was non-zero, - // which wasn't correct. - // Most/all zip writers (hopefully) set DOS file/directory attributes in the - // low 16-bits, so check for the DOS directory flag and ignore the source OS - // ID in the created by field. - // FIXME: Remove this check? Is it necessary - we already check the filename. - external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); - if ((external_attr & 0x10) != 0) return MZ_TRUE; - - return MZ_FALSE; -} - -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, - mz_zip_archive_file_stat *pStat) { - mz_uint n; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if ((!p) || (!pStat)) return MZ_FALSE; - - // Unpack the central directory record. - pStat->m_file_index = file_index; - pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); - pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); - pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); - pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); - pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); -#ifndef MINIZ_NO_TIME - pStat->m_time = - mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), - MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); -#endif - pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); - pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); - pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); - pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); - - // Copy as much of the filename and comment as possible. - n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); - memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); - pStat->m_filename[n] = '\0'; - - n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); - n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); - pStat->m_comment_size = n; - memcpy(pStat->m_comment, - p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), - n); - pStat->m_comment[n] = '\0'; - - return MZ_TRUE; -} - -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, - char *pFilename, mz_uint filename_buf_size) { - mz_uint n; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) { - if (filename_buf_size) pFilename[0] = '\0'; - return 0; - } - n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - if (filename_buf_size) { - n = MZ_MIN(n, filename_buf_size - 1); - memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); - pFilename[n] = '\0'; - } - return n + 1; -} - -static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, - const char *pB, - mz_uint len, - mz_uint flags) { - mz_uint i; - if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); - for (i = 0; i < len; ++i) - if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; - return MZ_TRUE; -} - -static MZ_FORCEINLINE int mz_zip_reader_filename_compare( - const mz_zip_array *pCentral_dir_array, - const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, - mz_uint r_len) { - const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, - l_index)), - *pE; - mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); - mz_uint8 l = 0, r = 0; - pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pE = pL + MZ_MIN(l_len, r_len); - while (pL < pE) { - if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; - pL++; - pR++; - } - return (pL == pE) ? (int)(l_len - r_len) : (l - r); -} - -static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, - const char *pFilename) { - mz_zip_internal_state *pState = pZip->m_pState; - const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; - const mz_zip_array *pCentral_dir = &pState->m_central_dir; - mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( - &pState->m_sorted_central_dir_offsets, mz_uint32, 0); - const int size = pZip->m_total_files; - const mz_uint filename_len = (mz_uint)strlen(pFilename); - int l = 0, h = size - 1; - while (l <= h) { - int m = (l + h) >> 1, file_index = pIndices[m], - comp = - mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, - file_index, pFilename, filename_len); - if (!comp) - return file_index; - else if (comp < 0) - l = m + 1; - else - h = m - 1; - } - return -1; -} - -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags) { - mz_uint file_index; - size_t name_len, comment_len; - if ((!pZip) || (!pZip->m_pState) || (!pName) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return -1; - if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && - (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) - return mz_zip_reader_locate_file_binary_search(pZip, pName); - name_len = strlen(pName); - if (name_len > 0xFFFF) return -1; - comment_len = pComment ? strlen(pComment) : 0; - if (comment_len > 0xFFFF) return -1; - for (file_index = 0; file_index < pZip->m_total_files; file_index++) { - const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - file_index)); - mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); - const char *pFilename = - (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - if (filename_len < name_len) continue; - if (comment_len) { - mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), - file_comment_len = - MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); - const char *pFile_comment = pFilename + filename_len + file_extra_len; - if ((file_comment_len != comment_len) || - (!mz_zip_reader_string_equal(pComment, pFile_comment, - file_comment_len, flags))) - continue; - } - if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { - int ofs = filename_len - 1; - do { - if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || - (pFilename[ofs] == ':')) - break; - } while (--ofs >= 0); - ofs++; - pFilename += ofs; - filename_len -= ofs; - } - if ((filename_len == name_len) && - (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) - return file_index; - } - return -1; -} - -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, - mz_uint file_index, void *pBuf, - size_t buf_size, mz_uint flags, - void *pUser_read_buf, - size_t user_read_buf_size) { - int status = TINFL_STATUS_DONE; - mz_uint64 needed_size, cur_file_ofs, comp_remaining, - out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; - mz_zip_archive_file_stat file_stat; - void *pRead_buf; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - tinfl_decompressor inflator; - - if ((buf_size) && (!pBuf)) return MZ_FALSE; - - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - - // Empty file, or a directory (but not always a directory - I've seen odd zips - // with directories that have compressed data which inflates to 0 bytes) - if (!file_stat.m_comp_size) return MZ_TRUE; - - // Entry is a subdirectory (I've seen old zips with dir entries which have - // compressed deflate data which inflates to 0 bytes, but these entries claim - // to uncompress to 512 bytes in the headers). - // I'm torn how to handle this case - should it fail instead? - if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; - - // Encryption and patch files are not supported. - if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; - - // This function only supports stored and deflate. - if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && - (file_stat.m_method != MZ_DEFLATED)) - return MZ_FALSE; - - // Ensure supplied output buffer is large enough. - needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size - : file_stat.m_uncomp_size; - if (buf_size < needed_size) return MZ_FALSE; - - // Read and parse the local directory entry. - cur_file_ofs = file_stat.m_local_header_ofs; - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - - cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) - return MZ_FALSE; - - if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { - // The file is stored or the caller has requested the compressed data. - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, - (size_t)needed_size) != needed_size) - return MZ_FALSE; - return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || - (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, - (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); - } - - // Decompress the file either directly from memory or from a file input - // buffer. - tinfl_init(&inflator); - - if (pZip->m_pState->m_pMem) { - // Read directly from the archive in memory. - pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; - read_buf_size = read_buf_avail = file_stat.m_comp_size; - comp_remaining = 0; - } else if (pUser_read_buf) { - // Use a user provided read buffer. - if (!user_read_buf_size) return MZ_FALSE; - pRead_buf = (mz_uint8 *)pUser_read_buf; - read_buf_size = user_read_buf_size; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } else { - // Temporarily allocate a read buffer. - read_buf_size = - MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && - (read_buf_size > 0x7FFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) -#endif - return MZ_FALSE; - if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - (size_t)read_buf_size))) - return MZ_FALSE; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } - - do { - size_t in_buf_size, - out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); - if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - read_buf_ofs = 0; - } - in_buf_size = (size_t)read_buf_avail; - status = tinfl_decompress( - &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, - (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | - (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); - read_buf_avail -= in_buf_size; - read_buf_ofs += in_buf_size; - out_buf_ofs += out_buf_size; - } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); - - if (status == TINFL_STATUS_DONE) { - // Make sure the entire file was decompressed, and check its CRC. - if ((out_buf_ofs != file_stat.m_uncomp_size) || - (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, - (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) - status = TINFL_STATUS_FAILED; - } - - if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - - return status == TINFL_STATUS_DONE; -} - -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( - mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, - mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, - flags, pUser_read_buf, - user_read_buf_size); -} - -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, - void *pBuf, size_t buf_size, - mz_uint flags) { - return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, - flags, NULL, 0); -} - -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, - const char *pFilename, void *pBuf, - size_t buf_size, mz_uint flags) { - return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, - buf_size, flags, NULL, 0); -} - -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, - size_t *pSize, mz_uint flags) { - mz_uint64 comp_size, uncomp_size, alloc_size; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - void *pBuf; - - if (pSize) *pSize = 0; - if (!p) return NULL; - - comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - - alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) -#endif - return NULL; - if (NULL == - (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) - return NULL; - - if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, - flags)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return NULL; - } - - if (pSize) *pSize = (size_t)alloc_size; - return pBuf; -} - -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, - const char *pFilename, size_t *pSize, - mz_uint flags) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) { - if (pSize) *pSize = 0; - return MZ_FALSE; - } - return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); -} - -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, - mz_uint file_index, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags) { - int status = TINFL_STATUS_DONE; - mz_uint file_crc32 = MZ_CRC32_INIT; - mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, - out_buf_ofs = 0, cur_file_ofs; - mz_zip_archive_file_stat file_stat; - void *pRead_buf = NULL; - void *pWrite_buf = NULL; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - - // Empty file, or a directory (but not always a directory - I've seen odd zips - // with directories that have compressed data which inflates to 0 bytes) - if (!file_stat.m_comp_size) return MZ_TRUE; - - // Entry is a subdirectory (I've seen old zips with dir entries which have - // compressed deflate data which inflates to 0 bytes, but these entries claim - // to uncompress to 512 bytes in the headers). - // I'm torn how to handle this case - should it fail instead? - if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; - - // Encryption and patch files are not supported. - if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; - - // This function only supports stored and deflate. - if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && - (file_stat.m_method != MZ_DEFLATED)) - return MZ_FALSE; - - // Read and parse the local directory entry. - cur_file_ofs = file_stat.m_local_header_ofs; - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - - cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) - return MZ_FALSE; - - // Decompress the file either directly from memory or from a file input - // buffer. - if (pZip->m_pState->m_pMem) { - pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; - read_buf_size = read_buf_avail = file_stat.m_comp_size; - comp_remaining = 0; - } else { - read_buf_size = - MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); - if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - (size_t)read_buf_size))) - return MZ_FALSE; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } - - if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { - // The file is stored or the caller has requested the compressed data. - if (pZip->m_pState->m_pMem) { -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && - (file_stat.m_comp_size > 0xFFFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && - (file_stat.m_comp_size > 0xFFFFFFFF)) -#endif - return MZ_FALSE; - if (pCallback(pOpaque, out_buf_ofs, pRead_buf, - (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) - status = TINFL_STATUS_FAILED; - else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) - file_crc32 = - (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, - (size_t)file_stat.m_comp_size); - cur_file_ofs += file_stat.m_comp_size; - out_buf_ofs += file_stat.m_comp_size; - comp_remaining = 0; - } else { - while (comp_remaining) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - - if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) - file_crc32 = (mz_uint32)mz_crc32( - file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); - - if (pCallback(pOpaque, out_buf_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - out_buf_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - } - } - } else { - tinfl_decompressor inflator; - tinfl_init(&inflator); - - if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - TINFL_LZ_DICT_SIZE))) - status = TINFL_STATUS_FAILED; - else { - do { - mz_uint8 *pWrite_buf_cur = - (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); - size_t in_buf_size, - out_buf_size = - TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); - if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - read_buf_ofs = 0; - } - - in_buf_size = (size_t)read_buf_avail; - status = tinfl_decompress( - &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, - (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, - comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); - read_buf_avail -= in_buf_size; - read_buf_ofs += in_buf_size; - - if (out_buf_size) { - if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != - out_buf_size) { - status = TINFL_STATUS_FAILED; - break; - } - file_crc32 = - (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); - if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { - status = TINFL_STATUS_FAILED; - break; - } - } - } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || - (status == TINFL_STATUS_HAS_MORE_OUTPUT)); - } - } - - if ((status == TINFL_STATUS_DONE) && - (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { - // Make sure the entire file was decompressed, and check its CRC. - if ((out_buf_ofs != file_stat.m_uncomp_size) || - (file_crc32 != file_stat.m_crc32)) - status = TINFL_STATUS_FAILED; - } - - if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); - - return status == TINFL_STATUS_DONE; -} - -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, - const char *pFilename, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, - flags); -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, - const void *pBuf, size_t n) { - (void)ofs; - return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); -} - -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, - const char *pDst_filename, - mz_uint flags) { - mz_bool status; - mz_zip_archive_file_stat file_stat; - MZ_FILE *pFile; - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - pFile = MZ_FOPEN(pDst_filename, "wb"); - if (!pFile) return MZ_FALSE; - status = mz_zip_reader_extract_to_callback( - pZip, file_index, mz_zip_file_write_callback, pFile, flags); - if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE; -#ifndef MINIZ_NO_TIME - if (status) - mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); -#endif - return status; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { - if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return MZ_FALSE; - - if (pZip->m_pState) { - mz_zip_internal_state *pState = pZip->m_pState; - pZip->m_pState = NULL; - mz_zip_array_clear(pZip, &pState->m_central_dir); - mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); - mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); - -#ifndef MINIZ_NO_STDIO - if (pState->m_pFile) { - MZ_FCLOSE(pState->m_pFile); - pState->m_pFile = NULL; - } -#endif // #ifndef MINIZ_NO_STDIO - - pZip->m_pFree(pZip->m_pAlloc_opaque, pState); - } - pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; - - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, - const char *pArchive_filename, - const char *pDst_filename, - mz_uint flags) { - int file_index = - mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); -} -#endif - -// ------------------- .ZIP archive writing - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { - p[0] = (mz_uint8)v; - p[1] = (mz_uint8)(v >> 8); -} -static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { - p[0] = (mz_uint8)v; - p[1] = (mz_uint8)(v >> 8); - p[2] = (mz_uint8)(v >> 16); - p[3] = (mz_uint8)(v >> 24); -} -#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) -#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) - -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { - if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || - (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) - return MZ_FALSE; - - if (pZip->m_file_offset_alignment) { - // Ensure user specified file offset alignment is a power of 2. - if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) - return MZ_FALSE; - } - - if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; - if (!pZip->m_pFree) pZip->m_pFree = def_free_func; - if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; - - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; - pZip->m_archive_size = existing_size; - pZip->m_central_directory_file_ofs = 0; - pZip->m_total_files = 0; - - if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) - return MZ_FALSE; - memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, - sizeof(mz_uint8)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, - sizeof(mz_uint32)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, - sizeof(mz_uint32)); - return MZ_TRUE; -} - -static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_zip_internal_state *pState = pZip->m_pState; - mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); -#ifdef _MSC_VER - if ((!n) || - ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) -#else - if ((!n) || - ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) -#endif - return 0; - if (new_size > pState->m_mem_capacity) { - void *pNew_block; - size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); - while (new_capacity < new_size) new_capacity *= 2; - if (NULL == (pNew_block = pZip->m_pRealloc( - pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) - return 0; - pState->m_pMem = pNew_block; - pState->m_mem_capacity = new_capacity; - } - memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); - pState->m_mem_size = (size_t)new_size; - return n; -} - -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, - size_t size_to_reserve_at_beginning, - size_t initial_allocation_size) { - pZip->m_pWrite = mz_zip_heap_write_func; - pZip->m_pIO_opaque = pZip; - if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; - if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, - size_to_reserve_at_beginning))) { - if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - pZip->m_pState->m_mem_capacity = initial_allocation_size; - } - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); - if (((mz_int64)file_ofs < 0) || - (((cur_ofs != (mz_int64)file_ofs)) && - (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) - return 0; - return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); -} - -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint64 size_to_reserve_at_beginning) { - MZ_FILE *pFile; - pZip->m_pWrite = mz_zip_file_write_func; - pZip->m_pIO_opaque = pZip; - if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; - if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - pZip->m_pState->m_pFile = pFile; - if (size_to_reserve_at_beginning) { - mz_uint64 cur_ofs = 0; - char buf[4096]; - MZ_CLEAR_OBJ(buf); - do { - size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - cur_ofs += n; - size_to_reserve_at_beginning -= n; - } while (size_to_reserve_at_beginning); - } - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, - const char *pFilename) { - mz_zip_internal_state *pState; - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return MZ_FALSE; - // No sense in trying to write to an archive that's already at the support max - // size - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) - return MZ_FALSE; - - pState = pZip->m_pState; - - if (pState->m_pFile) { -#ifdef MINIZ_NO_STDIO - pFilename; - return MZ_FALSE; -#else - // Archive is being read from stdio - try to reopen as writable. - if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; - if (!pFilename) return MZ_FALSE; - pZip->m_pWrite = mz_zip_file_write_func; - if (NULL == - (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { - // The mz_zip_archive is now in a bogus state because pState->m_pFile is - // NULL, so just close it. - mz_zip_reader_end(pZip); - return MZ_FALSE; - } -#endif // #ifdef MINIZ_NO_STDIO - } else if (pState->m_pMem) { - // Archive lives in a memory block. Assume it's from the heap that we can - // resize using the realloc callback. - if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; - pState->m_mem_capacity = pState->m_mem_size; - pZip->m_pWrite = mz_zip_heap_write_func; - } - // Archive is being read via a user provided read function - make sure the - // user has specified a write function too. - else if (!pZip->m_pWrite) - return MZ_FALSE; - - // Start writing new files at the archive's current central directory - // location. - pZip->m_archive_size = pZip->m_central_directory_file_ofs; - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; - pZip->m_central_directory_file_ofs = 0; - - return MZ_TRUE; -} - -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, - const void *pBuf, size_t buf_size, - mz_uint level_and_flags) { - return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, - level_and_flags, 0, 0); -} - -typedef struct { - mz_zip_archive *m_pZip; - mz_uint64 m_cur_archive_file_ofs; - mz_uint64 m_comp_size; -} mz_zip_writer_add_state; - -static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, - void *pUser) { - mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; - if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, - pState->m_cur_archive_file_ofs, pBuf, - len) != len) - return MZ_FALSE; - pState->m_cur_archive_file_ofs += len; - pState->m_comp_size += len; - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_create_local_dir_header( - mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, - mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, - mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, - mz_uint16 dos_time, mz_uint16 dos_date) { - (void)pZip; - memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_create_central_dir_header( - mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, - mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, - mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, - mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, - mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { - (void)pZip; - memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_add_to_central_dir( - mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, - const void *pExtra, mz_uint16 extra_size, const void *pComment, - mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, - mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, - mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, - mz_uint32 ext_attributes) { - mz_zip_internal_state *pState = pZip->m_pState; - mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; - size_t orig_central_dir_size = pState->m_central_dir.m_size; - mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; - - // No zip64 support yet - if ((local_header_ofs > 0xFFFFFFFF) || - (((mz_uint64)pState->m_central_dir.m_size + - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + - comment_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_central_dir_header( - pZip, central_dir_header, filename_size, extra_size, comment_size, - uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, - dos_date, local_header_ofs, ext_attributes)) - return MZ_FALSE; - - if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, - filename_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, - extra_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, - comment_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, - ¢ral_dir_ofs, 1))) { - // Try to push the central directory array back into its original state. - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { - // Basic ZIP archive filename validity checks: Valid filenames cannot start - // with a forward slash, cannot contain a drive letter, and cannot use - // DOS-style backward slashes. - if (*pArchive_name == '/') return MZ_FALSE; - while (*pArchive_name) { - if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE; - pArchive_name++; - } - return MZ_TRUE; -} - -static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment( - mz_zip_archive *pZip) { - mz_uint32 n; - if (!pZip->m_file_offset_alignment) return 0; - n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); - return (pZip->m_file_offset_alignment - n) & - (pZip->m_file_offset_alignment - 1); -} - -static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, - mz_uint64 cur_file_ofs, mz_uint32 n) { - char buf[4096]; - memset(buf, 0, MZ_MIN(sizeof(buf), n)); - while (n) { - mz_uint32 s = MZ_MIN(sizeof(buf), n); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) - return MZ_FALSE; - cur_file_ofs += s; - n -= s; - } - return MZ_TRUE; -} - -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, - const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags, mz_uint64 uncomp_size, - mz_uint32 uncomp_crc32) { - mz_uint16 method = 0, dos_time = 0, dos_date = 0; - mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; - mz_uint64 local_dir_header_ofs = pZip->m_archive_size, - cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; - size_t archive_name_size; - mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; - tdefl_compressor *pComp = NULL; - mz_bool store_data_uncompressed; - mz_zip_internal_state *pState; - - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - level = level_and_flags & 0xF; - store_data_uncompressed = - ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); - - if ((!pZip) || (!pZip->m_pState) || - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || - (!pArchive_name) || ((comment_size) && (!pComment)) || - (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - - pState = pZip->m_pState; - - if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) - return MZ_FALSE; - // No zip64 support yet - if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - -#ifndef MINIZ_NO_TIME - { - time_t cur_time; - time(&cur_time); - mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); - } -#endif // #ifndef MINIZ_NO_TIME - - archive_name_size = strlen(pArchive_name); - if (archive_name_size > 0xFFFF) return MZ_FALSE; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - comment_size + archive_name_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { - // Set DOS Subdirectory attribute bit. - ext_attributes |= 0x10; - // Subdirectories cannot contain data. - if ((buf_size) || (uncomp_size)) return MZ_FALSE; - } - - // Try to do any allocations before writing to the archive, so if an - // allocation fails the file remains unmodified. (A good idea if we're doing - // an in-place modification.) - if ((!mz_zip_array_ensure_room( - pZip, &pState->m_central_dir, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || - (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) - return MZ_FALSE; - - if ((!store_data_uncompressed) && (buf_size)) { - if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) - return MZ_FALSE; - } - - if (!mz_zip_writer_write_zeros( - pZip, cur_archive_file_ofs, - num_alignment_padding_bytes + sizeof(local_dir_header))) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - local_dir_header_ofs += num_alignment_padding_bytes; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - cur_archive_file_ofs += - num_alignment_padding_bytes + sizeof(local_dir_header); - - MZ_CLEAR_OBJ(local_dir_header); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, - archive_name_size) != archive_name_size) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - cur_archive_file_ofs += archive_name_size; - - if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { - uncomp_crc32 = - (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); - uncomp_size = buf_size; - if (uncomp_size <= 3) { - level = 0; - store_data_uncompressed = MZ_TRUE; - } - } - - if (store_data_uncompressed) { - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, - buf_size) != buf_size) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - - cur_archive_file_ofs += buf_size; - comp_size = buf_size; - - if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED; - } else if (buf_size) { - mz_zip_writer_add_state state; - - state.m_pZip = pZip; - state.m_cur_archive_file_ofs = cur_archive_file_ofs; - state.m_comp_size = 0; - - if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, - tdefl_create_comp_flags_from_zip_params( - level, -15, MZ_DEFAULT_STRATEGY)) != - TDEFL_STATUS_OKAY) || - (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != - TDEFL_STATUS_DONE)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - - comp_size = state.m_comp_size; - cur_archive_file_ofs = state.m_cur_archive_file_ofs; - - method = MZ_DEFLATED; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - pComp = NULL; - - // no zip64 support yet - if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_local_dir_header( - pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, - comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) - return MZ_FALSE; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, - sizeof(local_dir_header)) != sizeof(local_dir_header)) - return MZ_FALSE; - - if (!mz_zip_writer_add_to_central_dir( - pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, - comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, - dos_time, dos_date, local_dir_header_ofs, ext_attributes)) - return MZ_FALSE; - - pZip->m_total_files++; - pZip->m_archive_size = cur_archive_file_ofs; - - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, - const char *pSrc_filename, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags) { - mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; - mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; - mz_uint64 local_dir_header_ofs = pZip->m_archive_size, - cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, - comp_size = 0; - size_t archive_name_size; - mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; - MZ_FILE *pSrc_file = NULL; - - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - level = level_and_flags & 0xF; - - if ((!pZip) || (!pZip->m_pState) || - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || - ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - - archive_name_size = strlen(pArchive_name); - if (archive_name_size > 0xFFFF) return MZ_FALSE; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - comment_size + archive_name_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) - return MZ_FALSE; - - pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); - if (!pSrc_file) return MZ_FALSE; - MZ_FSEEK64(pSrc_file, 0, SEEK_END); - uncomp_size = MZ_FTELL64(pSrc_file); - MZ_FSEEK64(pSrc_file, 0, SEEK_SET); - - if (uncomp_size > 0xFFFFFFFF) { - // No zip64 support yet - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - if (uncomp_size <= 3) level = 0; - - if (!mz_zip_writer_write_zeros( - pZip, cur_archive_file_ofs, - num_alignment_padding_bytes + sizeof(local_dir_header))) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - local_dir_header_ofs += num_alignment_padding_bytes; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - cur_archive_file_ofs += - num_alignment_padding_bytes + sizeof(local_dir_header); - - MZ_CLEAR_OBJ(local_dir_header); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, - archive_name_size) != archive_name_size) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - cur_archive_file_ofs += archive_name_size; - - if (uncomp_size) { - mz_uint64 uncomp_remaining = uncomp_size; - void *pRead_buf = - pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); - if (!pRead_buf) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - if (!level) { - while (uncomp_remaining) { - mz_uint n = - (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); - if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || - (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, - n) != n)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - uncomp_crc32 = - (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); - uncomp_remaining -= n; - cur_archive_file_ofs += n; - } - comp_size = uncomp_size; - } else { - mz_bool result = MZ_FALSE; - mz_zip_writer_add_state state; - tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); - if (!pComp) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - state.m_pZip = pZip; - state.m_cur_archive_file_ofs = cur_archive_file_ofs; - state.m_comp_size = 0; - - if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, - tdefl_create_comp_flags_from_zip_params( - level, -15, MZ_DEFAULT_STRATEGY)) != - TDEFL_STATUS_OKAY) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - for (;;) { - size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, - (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); - tdefl_status status; - - if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) - break; - - uncomp_crc32 = (mz_uint32)mz_crc32( - uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); - uncomp_remaining -= in_buf_size; - - status = tdefl_compress_buffer( - pComp, pRead_buf, in_buf_size, - uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); - if (status == TDEFL_STATUS_DONE) { - result = MZ_TRUE; - break; - } else if (status != TDEFL_STATUS_OKAY) - break; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - - if (!result) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - comp_size = state.m_comp_size; - cur_archive_file_ofs = state.m_cur_archive_file_ofs; - - method = MZ_DEFLATED; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - } - - MZ_FCLOSE(pSrc_file); - pSrc_file = NULL; - - // no zip64 support yet - if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_local_dir_header( - pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, - comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) - return MZ_FALSE; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, - sizeof(local_dir_header)) != sizeof(local_dir_header)) - return MZ_FALSE; - - if (!mz_zip_writer_add_to_central_dir( - pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, - comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, - dos_time, dos_date, local_dir_header_ofs, ext_attributes)) - return MZ_FALSE; - - pZip->m_total_files++; - pZip->m_archive_size = cur_archive_file_ofs; - - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, - mz_zip_archive *pSource_zip, - mz_uint file_index) { - mz_uint n, bit_flags, num_alignment_padding_bytes; - mz_uint64 comp_bytes_remaining, local_dir_header_ofs; - mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; - size_t orig_central_dir_size; - mz_zip_internal_state *pState; - void *pBuf; - const mz_uint8 *pSrc_central_header; - - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) - return MZ_FALSE; - if (NULL == - (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) - return MZ_FALSE; - pState = pZip->m_pState; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > - 0xFFFFFFFF)) - return MZ_FALSE; - - cur_src_file_ofs = - MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); - cur_dst_file_ofs = pZip->m_archive_size; - - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, - pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; - - if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, - num_alignment_padding_bytes)) - return MZ_FALSE; - cur_dst_file_ofs += num_alignment_padding_bytes; - local_dir_header_ofs = cur_dst_file_ofs; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; - - n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - comp_bytes_remaining = - n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - - if (NULL == (pBuf = pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, - (size_t)MZ_MAX(sizeof(mz_uint32) * 4, - MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, - comp_bytes_remaining))))) - return MZ_FALSE; - - while (comp_bytes_remaining) { - n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, - n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - cur_src_file_ofs += n; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - cur_dst_file_ofs += n; - - comp_bytes_remaining -= n; - } - - bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); - if (bit_flags & 8) { - // Copy data descriptor - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, - sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - - n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - - cur_src_file_ofs += n; - cur_dst_file_ofs += n; - } - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - - // no zip64 support yet - if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE; - - orig_central_dir_size = pState->m_central_dir.m_size; - - memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, - local_dir_header_ofs); - if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) - return MZ_FALSE; - - n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + - MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); - if (!mz_zip_array_push_back( - pZip, &pState->m_central_dir, - pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) { - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE; - n = (mz_uint32)orig_central_dir_size; - if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - pZip->m_total_files++; - pZip->m_archive_size = cur_dst_file_ofs; - - return MZ_TRUE; -} - -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { - mz_zip_internal_state *pState; - mz_uint64 central_dir_ofs, central_dir_size; - mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; - - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) - return MZ_FALSE; - - pState = pZip->m_pState; - - // no zip64 support yet - if ((pZip->m_total_files > 0xFFFF) || - ((pZip->m_archive_size + pState->m_central_dir.m_size + - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) - return MZ_FALSE; - - central_dir_ofs = 0; - central_dir_size = 0; - if (pZip->m_total_files) { - // Write central directory - central_dir_ofs = pZip->m_archive_size; - central_dir_size = pState->m_central_dir.m_size; - pZip->m_central_directory_file_ofs = central_dir_ofs; - if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, - pState->m_central_dir.m_p, - (size_t)central_dir_size) != central_dir_size) - return MZ_FALSE; - pZip->m_archive_size += central_dir_size; - } - - // Write end of central directory record - MZ_CLEAR_OBJ(hdr); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, - pZip->m_total_files); - MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); - - if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, - sizeof(hdr)) != sizeof(hdr)) - return MZ_FALSE; -#ifndef MINIZ_NO_STDIO - if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE; -#endif // #ifndef MINIZ_NO_STDIO - - pZip->m_archive_size += sizeof(hdr); - - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; - return MZ_TRUE; -} - -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, - size_t *pSize) { - if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE; - if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE; - if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; - - *pBuf = pZip->m_pState->m_pMem; - *pSize = pZip->m_pState->m_mem_size; - pZip->m_pState->m_pMem = NULL; - pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; - return MZ_TRUE; -} - -mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { - mz_zip_internal_state *pState; - mz_bool status = MZ_TRUE; - if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || - ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) - return MZ_FALSE; - - pState = pZip->m_pState; - pZip->m_pState = NULL; - mz_zip_array_clear(pZip, &pState->m_central_dir); - mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); - mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); - -#ifndef MINIZ_NO_STDIO - if (pState->m_pFile) { - MZ_FCLOSE(pState->m_pFile); - pState->m_pFile = NULL; - } -#endif // #ifndef MINIZ_NO_STDIO - - if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); - pState->m_pMem = NULL; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pState); - pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; - return status; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_add_mem_to_archive_file_in_place( - const char *pZip_filename, const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, mz_uint16 comment_size, - mz_uint level_and_flags) { - mz_bool status, created_new_archive = MZ_FALSE; - mz_zip_archive zip_archive; - struct MZ_FILE_STAT_STRUCT file_stat; - MZ_CLEAR_OBJ(zip_archive); - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || - ((comment_size) && (!pComment)) || - ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { - // Create a new archive. - if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) - return MZ_FALSE; - created_new_archive = MZ_TRUE; - } else { - // Append to an existing archive. - if (!mz_zip_reader_init_file( - &zip_archive, pZip_filename, - level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) - return MZ_FALSE; - if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) { - mz_zip_reader_end(&zip_archive); - return MZ_FALSE; - } - } - status = - mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, - pComment, comment_size, level_and_flags, 0, 0); - // Always finalize, even if adding failed for some reason, so we have a valid - // central directory. (This may not always succeed, but we can try.) - if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE; - if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE; - if ((!status) && (created_new_archive)) { - // It's a new archive and something went wrong, so just delete it. - int ignoredStatus = MZ_DELETE_FILE(pZip_filename); - (void)ignoredStatus; - } - return status; -} - -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, - const char *pArchive_name, - size_t *pSize, mz_uint flags) { - int file_index; - mz_zip_archive zip_archive; - void *p = NULL; - - if (pSize) *pSize = 0; - - if ((!pZip_filename) || (!pArchive_name)) return NULL; - - MZ_CLEAR_OBJ(zip_archive); - if (!mz_zip_reader_init_file( - &zip_archive, pZip_filename, - flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) - return NULL; - - if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, - flags)) >= 0) - p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); - - mz_zip_reader_end(&zip_archive); - return p; -} - -#endif // #ifndef MINIZ_NO_STDIO - -#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -#endif // #ifndef MINIZ_NO_ARCHIVE_APIS - -#ifdef __cplusplus -} -#endif - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#endif // MINIZ_HEADER_FILE_ONLY - -/* - This is free and unencumbered software released into the public domain. - - Anyone is free to copy, modify, publish, use, compile, sell, or - distribute this software, either in source code form or as a compiled - binary, for any purpose, commercial or non-commercial, and by any - means. - - In jurisdictions that recognize copyright laws, the author or authors - of this software dedicate any and all copyright interest in the - software to the public domain. We make this dedication for the benefit - of the public at large and to the detriment of our heirs and - successors. We intend this dedication to be an overt act of - relinquishment in perpetuity of all present and future rights to this - software under copyright law. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - - For more information, please refer to <http://unlicense.org/> -*/ - -// ---------------------- end of miniz ---------------------------------------- - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -} // namespace miniz -#else - -// Reuse MINIZ_LITTE_ENDIAN macro - -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ - defined(__i386) || defined(__i486__) || defined(__i486) || \ - defined(i386) || defined(__ia64__) || defined(__x86_64__) -// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. -#define MINIZ_X86_OR_X64_CPU 1 -#endif - -#if defined(__sparcv9) -// Big endian -#else -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. -#define MINIZ_LITTLE_ENDIAN 1 -#endif -#endif - -#endif // TINYEXR_USE_MINIZ - // static bool IsBigEndian(void) { // union { // unsigned int i; @@ -7079,7 +684,7 @@ static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { } static void swap2(unsigned short *val) { -#ifdef MINIZ_LITTLE_ENDIAN +#ifdef TINYEXR_LITTLE_ENDIAN (void)val; #else unsigned short tmp = *val; @@ -7138,7 +743,7 @@ static void cpy4(float *dst_val, const float *src_val) { #endif static void swap4(unsigned int *val) { -#ifdef MINIZ_LITTLE_ENDIAN +#ifdef TINYEXR_LITTLE_ENDIAN (void)val; #else unsigned int tmp = *val; @@ -7153,7 +758,7 @@ static void swap4(unsigned int *val) { } static void swap4(int *val) { -#ifdef MINIZ_LITTLE_ENDIAN +#ifdef TINYEXR_LITTLE_ENDIAN (void)val; #else int tmp = *val; @@ -7168,7 +773,7 @@ static void swap4(int *val) { } static void swap4(float *val) { -#ifdef MINIZ_LITTLE_ENDIAN +#ifdef TINYEXR_LITTLE_ENDIAN (void)val; #else float tmp = *val; @@ -7199,7 +804,7 @@ static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 #endif static void swap8(tinyexr::tinyexr_uint64 *val) { -#ifdef MINIZ_LITTLE_ENDIAN +#ifdef TINYEXR_LITTLE_ENDIAN (void)val; #else tinyexr::tinyexr_uint64 tmp = (*val); @@ -7218,12 +823,11 @@ static void swap8(tinyexr::tinyexr_uint64 *val) { } // https://gist.github.com/rygorous/2156668 -// Reuse MINIZ_LITTLE_ENDIAN flag from miniz. union FP32 { unsigned int u; float f; struct { -#if MINIZ_LITTLE_ENDIAN +#if TINYEXR_LITTLE_ENDIAN unsigned int Mantissa : 23; unsigned int Exponent : 8; unsigned int Sign : 1; @@ -7243,7 +847,7 @@ union FP32 { union FP16 { unsigned short u; struct { -#if MINIZ_LITTLE_ENDIAN +#if TINYEXR_LITTLE_ENDIAN unsigned int Mantissa : 10; unsigned int Exponent : 5; unsigned int Sign : 1; @@ -7351,7 +955,7 @@ static const char *ReadString(std::string *s, const char *ptr, size_t len) { } if (size_t(q - ptr) >= len) { - (*s) = std::string(); + (*s).clear(); return NULL; } @@ -7438,6 +1042,7 @@ static void WriteAttributeToMemory(std::vector<unsigned char> *out, typedef struct { std::string name; // less than 255 bytes long int pixel_type; + int requested_pixel_type; int x_sampling; int y_sampling; unsigned char p_linear; @@ -7465,6 +1070,7 @@ struct HeaderInfo { int chunk_count; // Tiled format + int tiled; // Non-zero if the part is tiled. int tile_size_x; int tile_size_y; int tile_level_mode; @@ -7474,6 +1080,11 @@ struct HeaderInfo { int compression_type; + // required for multi-part or non-image files + std::string name; + // required for multi-part or non-image files + std::string type; + void clear() { channels.clear(); attributes.clear(); @@ -7495,6 +1106,7 @@ struct HeaderInfo { chunk_count = 0; // Tiled format + tiled = 0; tile_size_x = 0; tile_size_y = 0; tile_level_mode = 0; @@ -7502,6 +1114,9 @@ struct HeaderInfo { header_len = 0; compression_type = 0; + + name.clear(); + type.clear(); } }; @@ -7558,7 +1173,7 @@ static void WriteChannelInfo(std::vector<unsigned char> &data, // Calculate total size. for (size_t c = 0; c < channels.size(); c++) { - sz += strlen(channels[c].name.c_str()) + 1; // +1 for \0 + sz += channels[c].name.length() + 1; // +1 for \0 sz += 16; // 4 * int } data.resize(sz + 1); @@ -7566,12 +1181,12 @@ static void WriteChannelInfo(std::vector<unsigned char> &data, unsigned char *p = &data.at(0); for (size_t c = 0; c < channels.size(); c++) { - memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str())); - p += strlen(channels[c].name.c_str()); + memcpy(p, channels[c].name.c_str(), channels[c].name.length()); + p += channels[c].name.length(); (*p) = '\0'; p++; - int pixel_type = channels[c].pixel_type; + int pixel_type = channels[c].requested_pixel_type; int x_sampling = channels[c].x_sampling; int y_sampling = channels[c].y_sampling; tinyexr::swap4(&pixel_type); @@ -7650,11 +1265,11 @@ static void CompressZip(unsigned char *dst, // Compress the data using miniz // - miniz::mz_ulong outSize = miniz::mz_compressBound(src_size); - int ret = miniz::mz_compress( + mz_ulong outSize = mz_compressBound(src_size); + int ret = mz_compress( dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)), src_size); - assert(ret == miniz::MZ_OK); + assert(ret == MZ_OK); (void)ret; compressedSize = outSize; @@ -7687,8 +1302,8 @@ static bool DecompressZip(unsigned char *dst, #if TINYEXR_USE_MINIZ int ret = - miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); - if (miniz::MZ_OK != ret) { + mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (MZ_OK != ret) { return false; } #else @@ -8989,7 +2604,8 @@ static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in, if (po == rlc) { if (lc < 8) { /* TinyEXR issue 78 */ - if ((in + 1) >= in_end) { + /* TinyEXR issue 160. in + 1 -> in */ + if (in >= in_end) { return false; } @@ -9332,7 +2948,7 @@ static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, unsigned short minNonZero; unsigned short maxNonZero; -#if !MINIZ_LITTLE_ENDIAN +#if !TINYEXR_LITTLE_ENDIAN // @todo { PIZ compression on BigEndian architecture. } assert(0); return false; @@ -9355,7 +2971,7 @@ static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, // cd.ys = c.channel().ySampling; size_t pixelSize = sizeof(int); // UINT and FLOAT - if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + if (channelInfo[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { pixelSize = sizeof(short); } @@ -9445,10 +3061,10 @@ static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, } static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, - size_t tmpBufSize, size_t inLen, int num_channels, + size_t tmpBufSizeInBytes, size_t inLen, int num_channels, const EXRChannelInfo *channels, int data_width, int num_lines) { - if (inLen == tmpBufSize) { + if (inLen == tmpBufSizeInBytes) { // Data is not compressed(Issue 40). memcpy(outPtr, inPtr, inLen); return true; @@ -9458,7 +3074,7 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, unsigned short minNonZero; unsigned short maxNonZero; -#if !MINIZ_LITTLE_ENDIAN +#if !TINYEXR_LITTLE_ENDIAN // @todo { PIZ compression on BigEndian architecture. } assert(0); return false; @@ -9501,7 +3117,7 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, return false; } - std::vector<unsigned short> tmpBuffer(tmpBufSize); + std::vector<unsigned short> tmpBuffer(tmpBufSizeInBytes / sizeof(unsigned short)); hufUncompress(reinterpret_cast<const char *>(ptr), length, &tmpBuffer); // @@ -9543,7 +3159,7 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, // Expand the pixel data to their original range // - applyLut(lut.data(), &tmpBuffer.at(0), static_cast<int>(tmpBufSize)); + applyLut(lut.data(), &tmpBuffer.at(0), static_cast<int>(tmpBufSizeInBytes / sizeof(unsigned short))); for (int y = 0; y < num_lines; y++) { for (size_t i = 0; i < channelData.size(); ++i) { @@ -9802,6 +3418,9 @@ static bool CompressZfp(std::vector<unsigned char> *outBuf, // ----------------------------------------------------------------- // +// heuristics +#define TINYEXR_DIMENSION_THRESHOLD (1024 * 8192) + // TODO(syoyo): Refactor function arguments. static bool DecodePixelData(/* out */ unsigned char **out_images, const int *requested_pixel_types, @@ -10432,8 +4051,8 @@ static bool DecodeTiledPixelData( const EXRAttribute *attributes, size_t num_channels, const EXRChannelInfo *channels, const std::vector<size_t> &channel_offset_list) { - if (tile_size_x > data_width || tile_size_y > data_height || - tile_size_x * tile_offset_x > data_width || + // Here, data_width and data_height are the dimensions of the current (sub)level. + if (tile_size_x * tile_offset_x > data_width || tile_size_y * tile_offset_y > data_height) { return false; } @@ -10541,6 +4160,7 @@ static inline std::wstring UTF8ToWchar(const std::string &str) { } #endif + static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, const EXRVersion *version, std::string *err, const unsigned char *buf, size_t size) { @@ -10579,6 +4199,11 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, bool has_pixel_aspect_ratio = false; bool has_screen_window_center = false; bool has_screen_window_width = false; + bool has_name = false; + bool has_type = false; + + info->name.clear(); + info->type.clear(); info->data_window.min_x = 0; info->data_window.min_y = 0; @@ -10594,6 +4219,7 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, info->screen_window_width = -1.0f; info->pixel_aspect_ratio = -1.0f; + info->tiled = 0; info->tile_size_x = -1; info->tile_size_y = -1; info->tile_level_mode = -1; @@ -10628,7 +4254,8 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, marker += marker_size; size -= marker_size; - if (version->tiled && attr_name.compare("tiles") == 0) { + // For a multipart file, the version field 9th bit is 0. + if ((version->tiled || version->multipart || version->non_image) && attr_name.compare("tiles") == 0) { unsigned int x_size, y_size; unsigned char tile_mode; assert(data.size() == 9); @@ -10652,7 +4279,7 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, // mode = levelMode + roundingMode * 16 info->tile_level_mode = tile_mode & 0x3; info->tile_rounding_mode = (tile_mode >> 4) & 0x1; - + info->tiled = 1; } else if (attr_name.compare("compression") == 0) { bool ok = false; if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) { @@ -10771,6 +4398,22 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, memcpy(&info->chunk_count, &data.at(0), sizeof(int)); tinyexr::swap4(&info->chunk_count); } + } else if (attr_name.compare("name") == 0) { + if (!data.empty() && data[0]) { + data.push_back(0); + size_t len = strlen(reinterpret_cast<const char*>(&data[0])); + info->name.resize(len); + info->name.assign(reinterpret_cast<const char*>(&data[0]), len); + has_name = true; + } + } else if (attr_name.compare("type") == 0) { + if (!data.empty() && data[0]) { + data.push_back(0); + size_t len = strlen(reinterpret_cast<const char*>(&data[0])); + info->type.resize(len); + info->type.assign(reinterpret_cast<const char*>(&data[0]), len); + has_type = true; + } } else { // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { @@ -10835,6 +4478,17 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, << std::endl; } + if (version->multipart || version->non_image) { + if (!has_name) { + ss_err << "\"name\" attribute not found in the header." + << std::endl; + } + if (!has_type) { + ss_err << "\"type\" attribute not found in the header." + << std::endl; + } + } + if (!(ss_err.str().empty())) { if (err) { (*err) += ss_err.str(); @@ -10865,12 +4519,30 @@ static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { exr_header->data_window.max_y = info.data_window.max_y; exr_header->line_order = info.line_order; exr_header->compression_type = info.compression_type; - + exr_header->tiled = info.tiled; exr_header->tile_size_x = info.tile_size_x; exr_header->tile_size_y = info.tile_size_y; exr_header->tile_level_mode = info.tile_level_mode; exr_header->tile_rounding_mode = info.tile_rounding_mode; + EXRSetNameAttr(exr_header, info.name.c_str()); + + if (!info.type.empty()) { + if (info.type == "scanlineimage") { + assert(!exr_header->tiled); + } else if (info.type == "tiledimage") { + assert(exr_header->tiled); + } else if (info.type == "deeptile") { + exr_header->non_image = 1; + assert(exr_header->tiled); + } else if (info.type == "deepscanline") { + exr_header->non_image = 1; + assert(!exr_header->tiled); + } else { + assert(false); + } + } + exr_header->num_channels = static_cast<int>(info.channels.size()); exr_header->channels = static_cast<EXRChannelInfo *>(malloc( @@ -10932,8 +4604,216 @@ static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { exr_header->header_len = info.header_len; } +struct OffsetData { + OffsetData() : num_x_levels(0), num_y_levels(0) {} + std::vector<std::vector<std::vector <tinyexr::tinyexr_uint64> > > offsets; + int num_x_levels; + int num_y_levels; +}; + +int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) { + switch (tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + return 0; + + case TINYEXR_TILE_MIPMAP_LEVELS: + return lx; + + case TINYEXR_TILE_RIPMAP_LEVELS: + return lx + ly * num_x_levels; + + default: + assert(false); + } + return 0; +} + +static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) { + assert(level >= 0); + + int b = (int)(1u << (unsigned)level); + int level_size = toplevel_size / b; + + if (tile_rounding_mode == TINYEXR_TILE_ROUND_UP && level_size * b < toplevel_size) + level_size += 1; + + return std::max(level_size, 1); +} + +static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header, + const OffsetData& offset_data, + const std::vector<size_t>& channel_offset_list, + int pixel_data_size, + const unsigned char* head, const size_t size, + std::string* err) { + int num_channels = exr_header->num_channels; + + int level_index = LevelIndex(exr_image->level_x, exr_image->level_y, exr_header->tile_level_mode, offset_data.num_x_levels); + int num_y_tiles = (int)offset_data.offsets[level_index].size(); + assert(num_y_tiles); + int num_x_tiles = (int)offset_data.offsets[level_index][0].size(); + assert(num_x_tiles); + int num_tiles = num_x_tiles * num_y_tiles; + + int err_code = TINYEXR_SUCCESS; + + enum { + EF_SUCCESS = 0, + EF_INVALID_DATA = 1, + EF_INSUFFICIENT_DATA = 2, + EF_FAILED_TO_DECODE = 4 + }; +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic<unsigned> error_flag(EF_SUCCESS); +#else + unsigned error_flag(EF_SUCCESS); +#endif + + // Although the spec says : "...the data window is subdivided into an array of smaller rectangles...", + // the IlmImf library allows the dimensions of the tile to be larger (or equal) than the dimensions of the data window. +#if 0 + if ((exr_header->tile_size_x > exr_image->width || exr_header->tile_size_y > exr_image->height) && + exr_image->level_x == 0 && exr_image->level_y == 0) { + if (err) { + (*err) += "Failed to decode tile data.\n"; + } + err_code = TINYEXR_ERROR_INVALID_DATA; + } +#endif + exr_image->tiles = static_cast<EXRTile*>( + calloc(sizeof(EXRTile), static_cast<size_t>(num_tiles))); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector<std::thread> workers; + std::atomic<int> tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() + { + int tile_idx = 0; + while ((tile_idx = tile_count++) < num_tiles) { + +#else +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) { +#endif + // Allocate memory for each tile. + exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( + num_channels, exr_header->channels, + exr_header->requested_pixel_types, exr_header->tile_size_x, + exr_header->tile_size_y); + + int x_tile = tile_idx % num_x_tiles; + int y_tile = tile_idx / num_x_tiles; + // 16 byte: tile coordinates + // 4 byte : data size + // ~ : data(uncompressed or compressed) + tinyexr::tinyexr_uint64 offset = offset_data.offsets[level_index][y_tile][x_tile]; + if (offset + sizeof(int) * 5 > size) { + // Insufficient data size. + error_flag |= EF_INSUFFICIENT_DATA; + continue; + } + + size_t data_size = + size_t(size - (offset + sizeof(int) * 5)); + const unsigned char* data_ptr = + reinterpret_cast<const unsigned char*>(head + offset); + + int tile_coordinates[4]; + memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); + tinyexr::swap4(&tile_coordinates[0]); + tinyexr::swap4(&tile_coordinates[1]); + tinyexr::swap4(&tile_coordinates[2]); + tinyexr::swap4(&tile_coordinates[3]); + + if (tile_coordinates[2] != exr_image->level_x) { + // Invalid data. + error_flag |= EF_INVALID_DATA; + continue; + } + if (tile_coordinates[3] != exr_image->level_y) { + // Invalid data. + error_flag |= EF_INVALID_DATA; + continue; + } + + int data_len; + memcpy(&data_len, data_ptr + 16, + sizeof(int)); // 16 = sizeof(tile_coordinates) + tinyexr::swap4(&data_len); + + if (data_len < 2 || size_t(data_len) > data_size) { + // Insufficient data size. + error_flag |= EF_INSUFFICIENT_DATA; + continue; + } + + // Move to data addr: 20 = 16 + 4; + data_ptr += 20; + bool ret = tinyexr::DecodeTiledPixelData( + exr_image->tiles[tile_idx].images, + &(exr_image->tiles[tile_idx].width), + &(exr_image->tiles[tile_idx].height), + exr_header->requested_pixel_types, data_ptr, + static_cast<size_t>(data_len), exr_header->compression_type, + exr_header->line_order, + exr_image->width, exr_image->height, + tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x, + exr_header->tile_size_y, static_cast<size_t>(pixel_data_size), + static_cast<size_t>(exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast<size_t>(exr_header->num_channels), + exr_header->channels, channel_offset_list); + + if (!ret) { + // Failed to decode tile data. + error_flag |= EF_FAILED_TO_DECODE; + } + + exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; + exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; + exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; + exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } // num_thread loop + + for (auto& t : workers) { + t.join(); + } + +#else + } // parallel for +#endif + + // Even in the event of an error, the reserved memory may be freed. + exr_image->num_channels = num_channels; + exr_image->num_tiles = static_cast<int>(num_tiles); + + if (error_flag) err_code = TINYEXR_ERROR_INVALID_DATA; + if (err) { + if (error_flag & EF_INSUFFICIENT_DATA) { + (*err) += "Insufficient data length.\n"; + } + if (error_flag & EF_FAILED_TO_DECODE) { + (*err) += "Failed to decode tile data.\n"; + } + } + return err_code; +} + static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, - const std::vector<tinyexr::tinyexr_uint64> &offsets, + const OffsetData& offset_data, const unsigned char *head, const size_t size, std::string *err) { int num_channels = exr_header->num_channels; @@ -10971,8 +4851,7 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, // Do not allow too large data_width and data_height. header invalid? { - const int threshold = 1024 * 8192; // heuristics - if ((data_width > threshold) || (data_height > threshold)) { + if ((data_width > TINYEXR_DIMENSION_THRESHOLD) || (data_height > TINYEXR_DIMENSION_THRESHOLD)) { if (err) { std::stringstream ss; ss << "data_with or data_height too large. data_width: " << data_width @@ -10982,8 +4861,21 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, } return TINYEXR_ERROR_INVALID_DATA; } + if (exr_header->tiled) { + if ((exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) || (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD)) { + if (err) { + std::stringstream ss; + ss << "tile with or tile height too large. tile width: " << exr_header->tile_size_x + << ", " + << "tile height = " << exr_header->tile_size_y << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + } } + const std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data.offsets[0][0]; size_t num_blocks = offsets.size(); std::vector<size_t> channel_offset_list; @@ -10998,7 +4890,11 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, return TINYEXR_ERROR_INVALID_DATA; } - bool invalid_data = false; // TODO(LTE): Use atomic lock for MT safety. +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic<bool> invalid_data(false); +#else + bool invalid_data(false); +#endif if (exr_header->tiled) { // value check @@ -11019,136 +4915,60 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, } return TINYEXR_ERROR_INVALID_HEADER; } - - size_t num_tiles = offsets.size(); // = # of blocks - - exr_image->tiles = static_cast<EXRTile *>( - calloc(sizeof(EXRTile), static_cast<size_t>(num_tiles))); - - int err_code = TINYEXR_SUCCESS; - -#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) - - std::vector<std::thread> workers; - std::atomic<size_t> tile_count(0); - - int num_threads = std::max(1, int(std::thread::hardware_concurrency())); - if (num_threads > int(num_tiles)) { - num_threads = int(num_tiles); - } - - for (int t = 0; t < num_threads; t++) { - workers.emplace_back(std::thread([&]() { - size_t tile_idx = 0; - while ((tile_idx = tile_count++) < num_tiles) { - -#else - for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) { -#endif - // Allocate memory for each tile. - exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( - num_channels, exr_header->channels, - exr_header->requested_pixel_types, exr_header->tile_size_x, - exr_header->tile_size_y); - - // 16 byte: tile coordinates - // 4 byte : data size - // ~ : data(uncompressed or compressed) - if (offsets[tile_idx] + sizeof(int) * 5 > size) { - // TODO(LTE): atomic - if (err) { - (*err) += "Insufficient data size.\n"; - } - err_code = TINYEXR_ERROR_INVALID_DATA; - break; - } - - size_t data_size = - size_t(size - (offsets[tile_idx] + sizeof(int) * 5)); - const unsigned char *data_ptr = - reinterpret_cast<const unsigned char *>(head + offsets[tile_idx]); - - int tile_coordinates[4]; - memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); - tinyexr::swap4(&tile_coordinates[0]); - tinyexr::swap4(&tile_coordinates[1]); - tinyexr::swap4(&tile_coordinates[2]); - tinyexr::swap4(&tile_coordinates[3]); - - // @todo{ LoD } - if (tile_coordinates[2] != 0) { - err_code = TINYEXR_ERROR_UNSUPPORTED_FEATURE; - break; - } - if (tile_coordinates[3] != 0) { - err_code = TINYEXR_ERROR_UNSUPPORTED_FEATURE; - break; - } - - int data_len; - memcpy(&data_len, data_ptr + 16, - sizeof(int)); // 16 = sizeof(tile_coordinates) - tinyexr::swap4(&data_len); - - if (data_len < 4 || size_t(data_len) > data_size) { - // TODO(LTE): atomic - if (err) { - (*err) += "Insufficient data length.\n"; - } - err_code = TINYEXR_ERROR_INVALID_DATA; - break; - } - - // Move to data addr: 20 = 16 + 4; - data_ptr += 20; - - bool ret = tinyexr::DecodeTiledPixelData( - exr_image->tiles[tile_idx].images, - &(exr_image->tiles[tile_idx].width), - &(exr_image->tiles[tile_idx].height), - exr_header->requested_pixel_types, data_ptr, - static_cast<size_t>(data_len), exr_header->compression_type, - exr_header->line_order, data_width, data_height, - tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x, - exr_header->tile_size_y, static_cast<size_t>(pixel_data_size), - static_cast<size_t>(exr_header->num_custom_attributes), - exr_header->custom_attributes, - static_cast<size_t>(exr_header->num_channels), - exr_header->channels, channel_offset_list); - - if (!ret) { - // TODO(LTE): atomic - if (err) { - (*err) += "Failed to decode tile data.\n"; - } - err_code = TINYEXR_ERROR_INVALID_DATA; + if (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) { + EXRImage* level_image = NULL; + for (int level = 0; level < offset_data.num_x_levels; ++level) { + if (!level_image) { + level_image = exr_image; + } else { + level_image->next_level = new EXRImage; + InitEXRImage(level_image->next_level); + level_image = level_image->next_level; + } + level_image->width = + LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level, exr_header->tile_rounding_mode); + level_image->height = + LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level, exr_header->tile_rounding_mode); + level_image->level_x = level; + level_image->level_y = level; + + int ret = DecodeTiledLevel(level_image, exr_header, + offset_data, + channel_offset_list, + pixel_data_size, + head, size, + err); + if (ret != TINYEXR_SUCCESS) return ret; + } + } else { + EXRImage* level_image = NULL; + for (int level_y = 0; level_y < offset_data.num_y_levels; ++level_y) + for (int level_x = 0; level_x < offset_data.num_x_levels; ++level_x) { + if (!level_image) { + level_image = exr_image; + } else { + level_image->next_level = new EXRImage; + InitEXRImage(level_image->next_level); + level_image = level_image->next_level; } - exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; - exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; - exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; - exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; - -#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) + level_image->width = + LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level_x, exr_header->tile_rounding_mode); + level_image->height = + LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level_y, exr_header->tile_rounding_mode); + level_image->level_x = level_x; + level_image->level_y = level_y; + + int ret = DecodeTiledLevel(level_image, exr_header, + offset_data, + channel_offset_list, + pixel_data_size, + head, size, + err); + if (ret != TINYEXR_SUCCESS) return ret; } - })); - } // num_thread loop - - for (auto &t : workers) { - t.join(); } - -#else - } -#endif - - if (err_code != TINYEXR_SUCCESS) { - return err_code; - } - - exr_image->num_tiles = static_cast<int>(num_tiles); } else { // scanline format - // Don't allow too large image(256GB * pixel_data_size or more). Workaround // for #104. size_t total_data_len = @@ -11170,7 +4990,7 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, num_channels, exr_header->channels, exr_header->requested_pixel_types, data_width, data_height); -#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) std::vector<std::thread> workers; std::atomic<int> y_count(0); @@ -11271,7 +5091,7 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, } } -#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) } })); } @@ -11286,7 +5106,6 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, if (invalid_data) { if (err) { - std::stringstream ss; (*err) += "Invalid data found when decoding pixels.\n"; } return TINYEXR_ERROR_INVALID_DATA; @@ -11343,6 +5162,370 @@ static bool ReconstructLineOffsets( return true; } + +static int FloorLog2(unsigned x) { + // + // For x > 0, floorLog2(y) returns floor(log(x)/log(2)). + // + int y = 0; + while (x > 1) { + y += 1; + x >>= 1u; + } + return y; +} + + +static int CeilLog2(unsigned x) { + // + // For x > 0, ceilLog2(y) returns ceil(log(x)/log(2)). + // + int y = 0; + int r = 0; + while (x > 1) { + if (x & 1) + r = 1; + + y += 1; + x >>= 1u; + } + return y + r; +} + +static int RoundLog2(int x, int tile_rounding_mode) { + return (tile_rounding_mode == TINYEXR_TILE_ROUND_DOWN) ? FloorLog2(static_cast<unsigned>(x)) : CeilLog2(static_cast<unsigned>(x)); +} + +static int CalculateNumXLevels(const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + + int num = 0; + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + num = 1; + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + int h = max_y - min_y + 1; + num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1; + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + num = RoundLog2(w, exr_header->tile_rounding_mode) + 1; + } + break; + + default: + + assert(false); + } + + return num; +} + +static int CalculateNumYLevels(const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + int num = 0; + + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + num = 1; + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + int h = max_y - min_y + 1; + num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1; + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + { + int h = max_y - min_y + 1; + num = RoundLog2(h, exr_header->tile_rounding_mode) + 1; + } + break; + + default: + + assert(false); + } + + return num; +} + +static void CalculateNumTiles(std::vector<int>& numTiles, + int toplevel_size, + int size, + int tile_rounding_mode) { + for (unsigned i = 0; i < numTiles.size(); i++) { + int l = LevelSize(toplevel_size, i, tile_rounding_mode); + assert(l <= std::numeric_limits<int>::max() - size + 1); + + numTiles[i] = (l + size - 1) / size; + } +} + +static void PrecalculateTileInfo(std::vector<int>& num_x_tiles, + std::vector<int>& num_y_tiles, + const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + + int num_x_levels = CalculateNumXLevels(exr_header); + int num_y_levels = CalculateNumYLevels(exr_header); + + num_x_tiles.resize(num_x_levels); + num_y_tiles.resize(num_y_levels); + + CalculateNumTiles(num_x_tiles, + max_x - min_x + 1, + exr_header->tile_size_x, + exr_header->tile_rounding_mode); + + CalculateNumTiles(num_y_tiles, + max_y - min_y + 1, + exr_header->tile_size_y, + exr_header->tile_rounding_mode); +} + +static void InitSingleResolutionOffsets(OffsetData& offset_data, size_t num_blocks) { + offset_data.offsets.resize(1); + offset_data.offsets[0].resize(1); + offset_data.offsets[0][0].resize(num_blocks); + offset_data.num_x_levels = 1; + offset_data.num_y_levels = 1; +} + +// Return sum of tile blocks. +static int InitTileOffsets(OffsetData& offset_data, + const EXRHeader* exr_header, + const std::vector<int>& num_x_tiles, + const std::vector<int>& num_y_tiles) { + int num_tile_blocks = 0; + offset_data.num_x_levels = static_cast<int>(num_x_tiles.size()); + offset_data.num_y_levels = static_cast<int>(num_y_tiles.size()); + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + case TINYEXR_TILE_MIPMAP_LEVELS: + assert(offset_data.num_x_levels == offset_data.num_y_levels); + offset_data.offsets.resize(offset_data.num_x_levels); + + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + offset_data.offsets[l].resize(num_y_tiles[l]); + + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + offset_data.offsets[l][dy].resize(num_x_tiles[l]); + num_tile_blocks += num_x_tiles[l]; + } + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + offset_data.offsets.resize(static_cast<size_t>(offset_data.num_x_levels) * static_cast<size_t>(offset_data.num_y_levels)); + + for (int ly = 0; ly < offset_data.num_y_levels; ++ly) { + for (int lx = 0; lx < offset_data.num_x_levels; ++lx) { + int l = ly * offset_data.num_x_levels + lx; + offset_data.offsets[l].resize(num_y_tiles[ly]); + + for (size_t dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + offset_data.offsets[l][dy].resize(num_x_tiles[lx]); + num_tile_blocks += num_x_tiles[lx]; + } + } + } + break; + + default: + assert(false); + } + return num_tile_blocks; +} + +static bool IsAnyOffsetsAreInvalid(const OffsetData& offset_data) { + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) + if (reinterpret_cast<const tinyexr::tinyexr_int64&>(offset_data.offsets[l][dy][dx]) <= 0) + return true; + + return false; +} + +static bool isValidTile(const EXRHeader* exr_header, + const OffsetData& offset_data, + int dx, int dy, int lx, int ly) { + if (lx < 0 || ly < 0 || dx < 0 || dy < 0) return false; + int num_x_levels = offset_data.num_x_levels; + int num_y_levels = offset_data.num_y_levels; + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + if (lx == 0 && + ly == 0 && + offset_data.offsets.size() > 0 && + offset_data.offsets[0].size() > static_cast<size_t>(dy) && + offset_data.offsets[0][dy].size() > static_cast<size_t>(dx)) { + return true; + } + + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + if (lx < num_x_levels && + ly < num_y_levels && + offset_data.offsets.size() > static_cast<size_t>(lx) && + offset_data.offsets[lx].size() > static_cast<size_t>(dy) && + offset_data.offsets[lx][dy].size() > static_cast<size_t>(dx)) { + return true; + } + + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + { + size_t idx = static_cast<size_t>(lx) + static_cast<size_t>(ly)* static_cast<size_t>(num_x_levels); + if (lx < num_x_levels && + ly < num_y_levels && + (offset_data.offsets.size() > idx) && + offset_data.offsets[idx].size() > static_cast<size_t>(dy) && + offset_data.offsets[idx][dy].size() > static_cast<size_t>(dx)) { + return true; + } + } + + break; + + default: + + return false; + } + + return false; +} + +static void ReconstructTileOffsets(OffsetData& offset_data, + const EXRHeader* exr_header, + const unsigned char* head, const unsigned char* marker, const size_t /*size*/, + bool isMultiPartFile, + bool isDeep) { + int numXLevels = offset_data.num_x_levels; + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 tileOffset = marker - head; + + if (isMultiPartFile) { + //int partNumber; + marker += sizeof(int); + } + + int tileX; + memcpy(&tileX, marker, sizeof(int)); + tinyexr::swap4(&tileX); + marker += sizeof(int); + + int tileY; + memcpy(&tileY, marker, sizeof(int)); + tinyexr::swap4(&tileY); + marker += sizeof(int); + + int levelX; + memcpy(&levelX, marker, sizeof(int)); + tinyexr::swap4(&levelX); + marker += sizeof(int); + + int levelY; + memcpy(&levelY, marker, sizeof(int)); + tinyexr::swap4(&levelY); + marker += sizeof(int); + + if (isDeep) { + tinyexr::tinyexr_int64 packed_offset_table_size; + memcpy(&packed_offset_table_size, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64*>(&packed_offset_table_size)); + marker += sizeof(tinyexr::tinyexr_int64); + + tinyexr::tinyexr_int64 packed_sample_size; + memcpy(&packed_sample_size, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64*>(&packed_sample_size)); + marker += sizeof(tinyexr::tinyexr_int64); + + // next Int64 is unpacked sample size - skip that too + marker += packed_offset_table_size + packed_sample_size + 8; + + } else { + + int dataSize; + memcpy(&dataSize, marker, sizeof(int)); + tinyexr::swap4(&dataSize); + marker += sizeof(int); + marker += dataSize; + } + + if (!isValidTile(exr_header, offset_data, + tileX, tileY, levelX, levelY)) + return; + + int level_idx = LevelIndex(levelX, levelY, exr_header->tile_level_mode, numXLevels); + offset_data.offsets[level_idx][tileY][tileX] = tileOffset; + } + } + } +} + +// marker output is also +static int ReadOffsets(OffsetData& offset_data, + const unsigned char* head, + const unsigned char*& marker, + const size_t size, + const char** err) { + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 offset; + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offset_data.offsets[l][dy][dx] = offset; + } + } + } + return TINYEXR_SUCCESS; +} + static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, const unsigned char *head, const unsigned char *marker, const size_t size, @@ -11383,100 +5566,114 @@ static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, // Do not allow too large data_width and data_height. header invalid? { - const int threshold = 1024 * 8192; // heuristics - if (data_width > threshold) { + if (data_width > TINYEXR_DIMENSION_THRESHOLD) { tinyexr::SetErrorMessage("data width too large.", err); return TINYEXR_ERROR_INVALID_DATA; } - if (data_height > threshold) { + if (data_height > TINYEXR_DIMENSION_THRESHOLD) { tinyexr::SetErrorMessage("data height too large.", err); return TINYEXR_ERROR_INVALID_DATA; } } - // Read offset tables. - size_t num_blocks = 0; - - if (exr_header->chunk_count > 0) { - // Use `chunkCount` attribute. - num_blocks = static_cast<size_t>(exr_header->chunk_count); - } else if (exr_header->tiled) { - // @todo { LoD } - if (exr_header->tile_size_x > data_width || exr_header->tile_size_x < 1 || - exr_header->tile_size_y > data_height || exr_header->tile_size_y < 1) { - tinyexr::SetErrorMessage("tile sizes are invalid.", err); + if (exr_header->tiled) { + if (exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("tile width too large.", err); return TINYEXR_ERROR_INVALID_DATA; } - - size_t num_x_tiles = static_cast<size_t>(data_width) / - static_cast<size_t>(exr_header->tile_size_x); - if (num_x_tiles * static_cast<size_t>(exr_header->tile_size_x) < - static_cast<size_t>(data_width)) { - num_x_tiles++; + if (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("tile height too large.", err); + return TINYEXR_ERROR_INVALID_DATA; } - size_t num_y_tiles = static_cast<size_t>(data_height) / - static_cast<size_t>(exr_header->tile_size_y); - if (num_y_tiles * static_cast<size_t>(exr_header->tile_size_y) < - static_cast<size_t>(data_height)) { - num_y_tiles++; + } + + // Read offset tables. + OffsetData offset_data; + size_t num_blocks = 0; + // For a multi-resolution image, the size of the offset table will be calculated from the other attributes of the header. + // If chunk_count > 0 then chunk_count must be equal to the calculated tile count. + if (exr_header->tiled) { + { + std::vector<int> num_x_tiles, num_y_tiles; + PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_header); + num_blocks = InitTileOffsets(offset_data, exr_header, num_x_tiles, num_y_tiles); + if (exr_header->chunk_count > 0) { + if (exr_header->chunk_count != static_cast<int>(num_blocks)) { + tinyexr::SetErrorMessage("Invalid offset table size.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } } - num_blocks = num_x_tiles * num_y_tiles; + int ret = ReadOffsets(offset_data, head, marker, size, err); + if (ret != TINYEXR_SUCCESS) return ret; + if (IsAnyOffsetsAreInvalid(offset_data)) { + ReconstructTileOffsets(offset_data, exr_header, + head, marker, size, + exr_header->multipart, exr_header->non_image); + } + } else if (exr_header->chunk_count > 0) { + // Use `chunkCount` attribute. + num_blocks = static_cast<size_t>(exr_header->chunk_count); + InitSingleResolutionOffsets(offset_data, num_blocks); } else { num_blocks = static_cast<size_t>(data_height) / - static_cast<size_t>(num_scanline_blocks); + static_cast<size_t>(num_scanline_blocks); if (num_blocks * static_cast<size_t>(num_scanline_blocks) < - static_cast<size_t>(data_height)) { + static_cast<size_t>(data_height)) { num_blocks++; } - } - - std::vector<tinyexr::tinyexr_uint64> offsets(num_blocks); - - for (size_t y = 0; y < num_blocks; y++) { - tinyexr::tinyexr_uint64 offset; - // Issue #81 - if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { - tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); - tinyexr::swap8(&offset); - if (offset >= size) { - tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - marker += sizeof(tinyexr::tinyexr_uint64); // = 8 - offsets[y] = offset; + InitSingleResolutionOffsets(offset_data, num_blocks); } - // If line offsets are invalid, we try to reconstruct it. - // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. - for (size_t y = 0; y < num_blocks; y++) { - if (offsets[y] <= 0) { - // TODO(syoyo) Report as warning? - // if (err) { - // stringstream ss; - // ss << "Incomplete lineOffsets." << std::endl; - // (*err) += ss.str(); - //} - bool ret = + if (!exr_header->tiled) { + std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data.offsets[0][0]; + for (size_t y = 0; y < num_blocks; y++) { + tinyexr::tinyexr_uint64 offset; + // Issue #81 + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offsets[y] = offset; + } + + // If line offsets are invalid, we try to reconstruct it. + // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. + for (size_t y = 0; y < num_blocks; y++) { + if (offsets[y] <= 0) { + // TODO(syoyo) Report as warning? + // if (err) { + // stringstream ss; + // ss << "Incomplete lineOffsets." << std::endl; + // (*err) += ss.str(); + //} + bool ret = ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); - if (ret) { - // OK - break; - } else { - tinyexr::SetErrorMessage( + if (ret) { + // OK + break; + } else { + tinyexr::SetErrorMessage( "Cannot reconstruct lineOffset table in DecodeEXRImage.", err); - return TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; + } } } } { std::string e; - int ret = DecodeChunk(exr_image, exr_header, offsets, head, size, &e); + int ret = DecodeChunk(exr_image, exr_header, offset_data, head, size, &e); if (ret != TINYEXR_SUCCESS) { if (!e.empty()) { @@ -11530,7 +5727,7 @@ struct LayerChannel { }; static void ChannelsInLayer(const EXRHeader &exr_header, - const std::string layer_name, + const std::string &layer_name, std::vector<LayerChannel> &channels) { channels.clear(); for (int c = 0; c < exr_header.num_channels; c++) { @@ -11881,8 +6078,8 @@ int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, ConvertHeader(exr_header, info); - // transfoer `tiled` from version. - exr_header->tiled = version->tiled; + exr_header->multipart = version->multipart ? 1 : 0; + exr_header->non_image = version->non_image ? 1 : 0; return ret; } @@ -12087,7 +6284,7 @@ int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, FILE *fp = NULL; #ifdef _WIN32 -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. errno_t errcode = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); if (errcode != 0) { @@ -12096,7 +6293,7 @@ int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, return TINYEXR_ERROR_CANT_OPEN_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "rb"); #endif #else @@ -12155,207 +6352,400 @@ int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, err); } -size_t SaveEXRImageToMemory(const EXRImage *exr_image, - const EXRHeader *exr_header, - unsigned char **memory_out, const char **err) { - if (exr_image == NULL || memory_out == NULL || - exr_header->compression_type < 0) { - tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToMemory", err); - return 0; - } +namespace tinyexr +{ -#if !TINYEXR_USE_PIZ - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - tinyexr::SetErrorMessage("PIZ compression is not supported in this build", - err); - return 0; - } -#endif +// out_data must be allocated initially with the block-header size +// of the current image(-part) type +static bool EncodePixelData(/* out */ std::vector<unsigned char>& out_data, + const unsigned char* const* images, + int compression_type, + int /*line_order*/, + int width, // for tiled : tile.width + int /*height*/, // for tiled : header.tile_size_y + int x_stride, // for tiled : header.tile_size_x + int line_no, // for tiled : 0 + int num_lines, // for tiled : tile.height + size_t pixel_data_size, + const std::vector<ChannelInfo>& channels, + const std::vector<size_t>& channel_offset_list, + const void* compression_param = 0) // zfp compression param +{ + size_t buf_size = static_cast<size_t>(width) * + static_cast<size_t>(num_lines) * + static_cast<size_t>(pixel_data_size); + //int last2bit = (buf_size & 3); + // buf_size must be multiple of four + //if(last2bit) buf_size += 4 - last2bit; + std::vector<unsigned char> buf(buf_size); -#if !TINYEXR_USE_ZFP - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - tinyexr::SetErrorMessage("ZFP compression is not supported in this build", - err); - return 0; - } -#endif + size_t start_y = static_cast<size_t>(line_no); + for (size_t c = 0; c < channels.size(); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast<float *>(&buf.at( + static_cast<size_t>(pixel_data_size * y * width) + + channel_offset_list[c] * + static_cast<size_t>(width))); + for (int x = 0; x < width; x++) { + tinyexr::FP16 h16; + h16.u = reinterpret_cast<const unsigned short * const *>( + images)[c][(y + start_y) * x_stride + x]; -#if TINYEXR_USE_ZFP - for (size_t i = 0; i < static_cast<size_t>(exr_header->num_channels); i++) { - if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) { - tinyexr::SetErrorMessage("Pixel type must be FLOAT for ZFP compression", - err); - return 0; + tinyexr::FP32 f32 = half_to_float(h16); + + tinyexr::swap4(&f32.f); + + // line_ptr[x] = f32.f; + tinyexr::cpy4(line_ptr + x, &(f32.f)); + } + } + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &buf.at(static_cast<size_t>(pixel_data_size * y * + width) + + channel_offset_list[c] * + static_cast<size_t>(width))); + for (int x = 0; x < width; x++) { + unsigned short val = reinterpret_cast<const unsigned short * const *>( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap2(&val); + + // line_ptr[x] = val; + tinyexr::cpy2(line_ptr + x, &val); + } + } + } else { + assert(0); + } + + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &buf.at(static_cast<size_t>(pixel_data_size * y * + width) + + channel_offset_list[c] * + static_cast<size_t>(width))); + for (int x = 0; x < width; x++) { + tinyexr::FP32 f32; + f32.f = reinterpret_cast<const float * const *>( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::FP16 h16; + h16 = float_to_half_full(f32); + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&h16.u)); + + // line_ptr[x] = h16.u; + tinyexr::cpy2(line_ptr + x, &(h16.u)); + } + } + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast<float *>(&buf.at( + static_cast<size_t>(pixel_data_size * y * width) + + channel_offset_list[c] * + static_cast<size_t>(width))); + for (int x = 0; x < width; x++) { + float val = reinterpret_cast<const float * const *>( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } else { + assert(0); + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned int *line_ptr = reinterpret_cast<unsigned int *>(&buf.at( + static_cast<size_t>(pixel_data_size * y * width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (int x = 0; x < width; x++) { + unsigned int val = reinterpret_cast<const unsigned int * const *>( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } } } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed) + out_data.insert(out_data.end(), buf.begin(), buf.end()); + + } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#if TINYEXR_USE_MINIZ + std::vector<unsigned char> block(mz_compressBound( + static_cast<unsigned long>(buf.size()))); +#else + std::vector<unsigned char> block( + compressBound(static_cast<uLong>(buf.size()))); #endif + tinyexr::tinyexr_uint64 outSize = block.size(); - std::vector<unsigned char> memory; + tinyexr::CompressZip(&block.at(0), outSize, + reinterpret_cast<const unsigned char *>(&buf.at(0)), + static_cast<unsigned long>(buf.size())); - // Header - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - memory.insert(memory.end(), header, header + 4); - } + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = static_cast<unsigned int>(outSize); // truncate - // Version, scanline. - { - char marker[] = {2, 0, 0, 0}; - /* @todo - if (exr_header->tiled) { - marker[1] |= 0x2; - } - if (exr_header->long_name) { - marker[1] |= 0x4; - } - if (exr_header->non_image) { - marker[1] |= 0x8; - } - if (exr_header->multipart) { - marker[1] |= 0x10; - } - */ - memory.insert(memory.end(), marker, marker + 4); - } + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); - int num_scanlines = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanlines = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanlines = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanlines = 16; - } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // (buf.size() * 3) / 2 would be enough. + std::vector<unsigned char> block((buf.size() * 3) / 2); - // Write attributes. - std::vector<tinyexr::ChannelInfo> channels; - { - std::vector<unsigned char> data; + tinyexr::tinyexr_uint64 outSize = block.size(); - for (int c = 0; c < exr_header->num_channels; c++) { - tinyexr::ChannelInfo info; - info.p_linear = 0; - info.pixel_type = exr_header->requested_pixel_types[c]; - info.x_sampling = 1; - info.y_sampling = 1; - info.name = std::string(exr_header->channels[c].name); - channels.push_back(info); - } + tinyexr::CompressRle(&block.at(0), outSize, + reinterpret_cast<const unsigned char *>(&buf.at(0)), + static_cast<unsigned long>(buf.size())); - tinyexr::WriteChannelInfo(data, channels); + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = static_cast<unsigned int>(outSize); // truncate + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); - tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), - static_cast<int>(data.size())); - } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + unsigned int bufLen = + 8192 + static_cast<unsigned int>( + 2 * static_cast<unsigned int>( + buf.size())); // @fixme { compute good bound. } + std::vector<unsigned char> block(bufLen); + unsigned int outSize = static_cast<unsigned int>(block.size()); + + CompressPiz(&block.at(0), &outSize, + reinterpret_cast<const unsigned char *>(&buf.at(0)), + buf.size(), channels, width, num_lines); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = outSize; + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); - { - int comp = exr_header->compression_type; - tinyexr::swap4(&comp); - tinyexr::WriteAttributeToMemory( - &memory, "compression", "compression", - reinterpret_cast<const unsigned char *>(&comp), 1); - } +#else + assert(0); +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + const ZFPCompressionParam* zfp_compression_param = reinterpret_cast<const ZFPCompressionParam*>(compression_param); + std::vector<unsigned char> block; + unsigned int outSize; - { - int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1}; - tinyexr::swap4(&data[0]); - tinyexr::swap4(&data[1]); - tinyexr::swap4(&data[2]); - tinyexr::swap4(&data[3]); - tinyexr::WriteAttributeToMemory( - &memory, "dataWindow", "box2i", - reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4); - tinyexr::WriteAttributeToMemory( - &memory, "displayWindow", "box2i", - reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4); - } + tinyexr::CompressZfp( + &block, &outSize, reinterpret_cast<const float *>(&buf.at(0)), + width, num_lines, static_cast<int>(channels.size()), *zfp_compression_param); - { - unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } - tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", - &line_order, 1); - } + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = outSize; + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); - { - float aspectRatio = 1.0f; - tinyexr::swap4(&aspectRatio); - tinyexr::WriteAttributeToMemory( - &memory, "pixelAspectRatio", "float", - reinterpret_cast<const unsigned char *>(&aspectRatio), sizeof(float)); +#else + (void)compression_param; + assert(0); +#endif + } else { + assert(0); + return false; } - { - float center[2] = {0.0f, 0.0f}; - tinyexr::swap4(¢er[0]); - tinyexr::swap4(¢er[1]); - tinyexr::WriteAttributeToMemory( - &memory, "screenWindowCenter", "v2f", - reinterpret_cast<const unsigned char *>(center), 2 * sizeof(float)); + return true; +} + +static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_header, + const std::vector<tinyexr::ChannelInfo>& channels, + std::vector<std::vector<unsigned char> >& data_list, + size_t start_index, // for data_list + int num_x_tiles, int num_y_tiles, + const std::vector<size_t>& channel_offset_list, + int pixel_data_size, + const void* compression_param, // must be set if zfp compression is enabled + std::string* err) { + int num_tiles = num_x_tiles * num_y_tiles; + assert(num_tiles == level_image->num_tiles); + + if ((exr_header->tile_size_x > level_image->width || exr_header->tile_size_y > level_image->height) && + level_image->level_x == 0 && level_image->level_y == 0) { + if (err) { + (*err) += "Failed to encode tile data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; } - { - float w = static_cast<float>(exr_image->width); - tinyexr::swap4(&w); - tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float", - reinterpret_cast<const unsigned char *>(&w), - sizeof(float)); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic<bool> invalid_data(false); +#else + bool invalid_data(false); +#endif + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector<std::thread> workers; + std::atomic<int> tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); } - // Custom attributes - if (exr_header->num_custom_attributes > 0) { - for (int i = 0; i < exr_header->num_custom_attributes; i++) { - tinyexr::WriteAttributeToMemory( - &memory, exr_header->custom_attributes[i].name, - exr_header->custom_attributes[i].type, - reinterpret_cast<const unsigned char *>( - exr_header->custom_attributes[i].value), - exr_header->custom_attributes[i].size); + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int i = 0; + while ((i = tile_count++) < num_tiles) { + +#else + // Use signed int since some OpenMP compiler doesn't allow unsigned type for + // `parallel for` +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_tiles; i++) { + +#endif + size_t tile_idx = static_cast<size_t>(i); + size_t data_idx = tile_idx + start_index; + + int x_tile = i % num_x_tiles; + int y_tile = i / num_x_tiles; + + EXRTile& tile = level_image->tiles[tile_idx]; + + const unsigned char* const* images = + static_cast<const unsigned char* const*>(tile.images); + + data_list[data_idx].resize(5*sizeof(int)); + size_t data_header_size = data_list[data_idx].size(); + bool ret = EncodePixelData(data_list[data_idx], + images, + exr_header->compression_type, + 0, // increasing y + tile.width, + exr_header->tile_size_y, + exr_header->tile_size_x, + 0, + tile.height, + pixel_data_size, + channels, + channel_offset_list, + compression_param); + if (!ret) { + invalid_data = true; + continue; } + assert(data_list[data_idx].size() > data_header_size); + int data_len = static_cast<int>(data_list[data_idx].size() - data_header_size); + //tileX, tileY, levelX, levelY // pixel_data_size(int) + memcpy(&data_list[data_idx][0], &x_tile, sizeof(int)); + memcpy(&data_list[data_idx][4], &y_tile, sizeof(int)); + memcpy(&data_list[data_idx][8], &level_image->level_x, sizeof(int)); + memcpy(&data_list[data_idx][12], &level_image->level_y, sizeof(int)); + memcpy(&data_list[data_idx][16], &data_len, sizeof(int)); + + swap4(reinterpret_cast<int*>(&data_list[data_idx][0])); + swap4(reinterpret_cast<int*>(&data_list[data_idx][4])); + swap4(reinterpret_cast<int*>(&data_list[data_idx][8])); + swap4(reinterpret_cast<int*>(&data_list[data_idx][12])); + swap4(reinterpret_cast<int*>(&data_list[data_idx][16])); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) } +})); + } - { // end of header - unsigned char e = 0; - memory.push_back(e); + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + + if (invalid_data) { + if (err) { + (*err) += "Failed to encode tile data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; } + return TINYEXR_SUCCESS; +} - int num_blocks = exr_image->height / num_scanlines; - if (num_blocks * num_scanlines < exr_image->height) { - num_blocks++; +static int NumScanlines(int compression_type) { + int num_scanlines = 1; + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanlines = 16; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanlines = 32; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanlines = 16; } + return num_scanlines; +} - std::vector<tinyexr::tinyexr_uint64> offsets(static_cast<size_t>(num_blocks)); +static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header, + const std::vector<ChannelInfo>& channels, + int num_blocks, + tinyexr_uint64 chunk_offset, // starting offset of current chunk + bool is_multipart, + OffsetData& offset_data, // output block offsets, must be initialized + std::vector<std::vector<unsigned char> >& data_list, // output + tinyexr_uint64& total_size, // output: ending offset of current chunk + std::string* err) { + int num_scanlines = NumScanlines(exr_header->compression_type); - size_t headerSize = memory.size(); - tinyexr::tinyexr_uint64 offset = - headerSize + - static_cast<size_t>(num_blocks) * - sizeof( - tinyexr::tinyexr_int64); // sizeof(header) + sizeof(offsetTable) + data_list.resize(num_blocks); - std::vector<std::vector<unsigned char> > data_list( - static_cast<size_t>(num_blocks)); std::vector<size_t> channel_offset_list( - static_cast<size_t>(exr_header->num_channels)); + static_cast<size_t>(exr_header->num_channels)); int pixel_data_size = 0; - size_t channel_offset = 0; - for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { - channel_offset_list[c] = channel_offset; - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - pixel_data_size += sizeof(unsigned short); - channel_offset += sizeof(unsigned short); - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_FLOAT) { - pixel_data_size += sizeof(float); - channel_offset += sizeof(float); - } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { - pixel_data_size += sizeof(unsigned int); - channel_offset += sizeof(unsigned int); - } else { - assert(0); + { + size_t channel_offset = 0; + for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { + channel_offset_list[c] = channel_offset; + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixel_data_size += sizeof(unsigned short); + channel_offset += sizeof(unsigned short); + } else if (channels[c].requested_pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { + pixel_data_size += sizeof(float); + channel_offset += sizeof(float); + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_UINT) { + pixel_data_size += sizeof(unsigned int); + channel_offset += sizeof(unsigned int); + } else { + assert(0); + } } } + const void* compression_param = 0; #if TINYEXR_USE_ZFP tinyexr::ZFPCompressionParam zfp_compression_param; @@ -12364,304 +6754,517 @@ size_t SaveEXRImageToMemory(const EXRImage *exr_image, { std::string e; bool ret = tinyexr::FindZFPCompressionParam( - &zfp_compression_param, exr_header->custom_attributes, - exr_header->num_custom_attributes, &e); + &zfp_compression_param, exr_header->custom_attributes, + exr_header->num_custom_attributes, &e); if (!ret) { // Use predefined compression parameter. zfp_compression_param.type = 0; zfp_compression_param.rate = 2; } + compression_param = &zfp_compression_param; } #endif - // TODO(LTE): C++11 thread + tinyexr_uint64 offset = chunk_offset; + tinyexr_uint64 doffset = is_multipart ? 4u : 0u; -// Use signed int since some OpenMP compiler doesn't allow unsigned type for -// `parallel for` -#if TINYEXR_USE_OPENMP -#pragma omp parallel for -#endif - for (int i = 0; i < num_blocks; i++) { - size_t ii = static_cast<size_t>(i); - int start_y = num_scanlines * i; - int endY = (std::min)(num_scanlines * (i + 1), exr_image->height); - int h = endY - start_y; - - std::vector<unsigned char> buf( - static_cast<size_t>(exr_image->width * h * pixel_data_size)); + if (exr_image->tiles) { + const EXRImage* level_image = exr_image; + size_t block_idx = 0; + tinyexr::tinyexr_uint64 block_data_size = 0; + int num_levels = (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ? + offset_data.num_x_levels : (offset_data.num_x_levels * offset_data.num_y_levels); + for (int level_index = 0; level_index < num_levels; ++level_index) { + if (!level_image) { + if (err) { + (*err) += "Invalid number of tiled levels for EncodeChunk\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } - for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { - if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - float *line_ptr = reinterpret_cast<float *>(&buf.at( - static_cast<size_t>(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * - static_cast<size_t>(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - tinyexr::FP16 h16; - h16.u = reinterpret_cast<unsigned short **>( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::FP32 f32 = half_to_float(h16); - - tinyexr::swap4(&f32.f); - - // line_ptr[x] = f32.f; - tinyexr::cpy4(line_ptr + x, &(f32.f)); - } - } - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_HALF) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned short *line_ptr = reinterpret_cast<unsigned short *>( - &buf.at(static_cast<size_t>(pixel_data_size * y * - exr_image->width) + - channel_offset_list[c] * - static_cast<size_t>(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - unsigned short val = reinterpret_cast<unsigned short **>( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap2(&val); - - // line_ptr[x] = val; - tinyexr::cpy2(line_ptr + x, &val); - } - } - } else { - assert(0); + int level_index_from_image = LevelIndex(level_image->level_x, level_image->level_y, + exr_header->tile_level_mode, offset_data.num_x_levels); + if (level_index_from_image != level_index) { + if (err) { + (*err) += "Incorrect level ordering in tiled image\n"; } + return TINYEXR_ERROR_INVALID_DATA; + } + int num_y_tiles = (int)offset_data.offsets[level_index].size(); + assert(num_y_tiles); + int num_x_tiles = (int)offset_data.offsets[level_index][0].size(); + assert(num_x_tiles); + + std::string e; + int ret = EncodeTiledLevel(level_image, + exr_header, + channels, + data_list, + block_idx, + num_x_tiles, + num_y_tiles, + channel_offset_list, + pixel_data_size, + compression_param, + &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty() && err) { + (*err) += e; + } + return ret; + } - } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned short *line_ptr = reinterpret_cast<unsigned short *>( - &buf.at(static_cast<size_t>(pixel_data_size * y * - exr_image->width) + - channel_offset_list[c] * - static_cast<size_t>(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - tinyexr::FP32 f32; - f32.f = reinterpret_cast<float **>( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::FP16 h16; - h16 = float_to_half_full(f32); - - tinyexr::swap2(reinterpret_cast<unsigned short *>(&h16.u)); - - // line_ptr[x] = h16.u; - tinyexr::cpy2(line_ptr + x, &(h16.u)); - } - } - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_FLOAT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - float *line_ptr = reinterpret_cast<float *>(&buf.at( - static_cast<size_t>(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * - static_cast<size_t>(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - float val = reinterpret_cast<float **>( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap4(&val); - - // line_ptr[x] = val; - tinyexr::cpy4(line_ptr + x, &val); - } - } - } else { - assert(0); + for (size_t j = 0; j < static_cast<size_t>(num_y_tiles); ++j) + for (size_t i = 0; i < static_cast<size_t>(num_x_tiles); ++i) { + offset_data.offsets[level_index][j][i] = offset; + swap8(reinterpret_cast<tinyexr_uint64*>(&offset_data.offsets[level_index][j][i])); + offset += data_list[block_idx].size() + doffset; + block_data_size += data_list[block_idx].size(); + ++block_idx; } - } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned int *line_ptr = reinterpret_cast<unsigned int *>(&buf.at( - static_cast<size_t>(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * static_cast<size_t>(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - unsigned int val = reinterpret_cast<unsigned int **>( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; + level_image = level_image->next_level; + } + assert(static_cast<int>(block_idx) == num_blocks); + total_size = offset; + } else { // scanlines + std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data.offsets[0][0]; - tinyexr::swap4(&val); +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic<bool> invalid_data(false); + std::vector<std::thread> workers; + std::atomic<int> block_count(0); - // line_ptr[x] = val; - tinyexr::cpy4(line_ptr + x, &val); - } - } + int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks); + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int i = 0; + while ((i = block_count++) < num_blocks) { + +#else + bool invalid_data(false); +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_blocks; i++) { + +#endif + int start_y = num_scanlines * i; + int end_Y = (std::min)(num_scanlines * (i + 1), exr_image->height); + int num_lines = end_Y - start_y; + + const unsigned char* const* images = + static_cast<const unsigned char* const*>(exr_image->images); + + data_list[i].resize(2*sizeof(int)); + size_t data_header_size = data_list[i].size(); + + bool ret = EncodePixelData(data_list[i], + images, + exr_header->compression_type, + 0, // increasing y + exr_image->width, + exr_image->height, + exr_image->width, + start_y, + num_lines, + pixel_data_size, + channels, + channel_offset_list, + compression_param); + if (!ret) { + invalid_data = true; + continue; // "break" cannot be used with OpenMP } + assert(data_list[i].size() > data_header_size); + int data_len = static_cast<int>(data_list[i].size() - data_header_size); + memcpy(&data_list[i][0], &start_y, sizeof(int)); + memcpy(&data_list[i][4], &data_len, sizeof(int)); + + swap4(reinterpret_cast<int*>(&data_list[i][0])); + swap4(reinterpret_cast<int*>(&data_list[i][4])); +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); } - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(uncompressed) - std::vector<unsigned char> header(8); - unsigned int data_len = static_cast<unsigned int>(buf.size()); - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + + if (invalid_data) { + if (err) { + (*err) += "Failed to encode scanline data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) { + offsets[i] = offset; + tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offsets[i])); + offset += data_list[i].size() + doffset; + } - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), buf.begin(), - buf.begin() + data_len); + total_size = static_cast<size_t>(offset); + } + return TINYEXR_SUCCESS; +} - } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { -#if TINYEXR_USE_MINIZ - std::vector<unsigned char> block(tinyexr::miniz::mz_compressBound( - static_cast<unsigned long>(buf.size()))); +// can save a single or multi-part image (no deep* formats) +static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + unsigned char** memory_out, const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory_out == NULL) { + SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } + { + for (unsigned int i = 0; i < num_parts; ++i) { + if (exr_headers[i]->compression_type < 0) { + SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } +#if !TINYEXR_USE_PIZ + if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + SetErrorMessage("PIZ compression is not supported in this build", + err); + return 0; + } +#endif +#if !TINYEXR_USE_ZFP + if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + SetErrorMessage("ZFP compression is not supported in this build", + err); + return 0; + } #else - std::vector<unsigned char> block( - compressBound(static_cast<uLong>(buf.size()))); + for (int c = 0; c < exr_header->num_channels; ++c) { + if (exr_headers[i]->requested_pixel_types[c] != TINYEXR_PIXELTYPE_FLOAT) { + SetErrorMessage("Pixel type must be FLOAT for ZFP compression", + err); + return 0; + } + } #endif - tinyexr::tinyexr_uint64 outSize = block.size(); - - tinyexr::CompressZip(&block.at(0), outSize, - reinterpret_cast<const unsigned char *>(&buf.at(0)), - static_cast<unsigned long>(buf.size())); + } + } - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector<unsigned char> header(8); - unsigned int data_len = static_cast<unsigned int>(outSize); // truncate - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + std::vector<unsigned char> memory; - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + // Header + { + const char header[] = { 0x76, 0x2f, 0x31, 0x01 }; + memory.insert(memory.end(), header, header + 4); + } - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); + // Version + // using value from the first header + int long_name = exr_headers[0]->long_name; + { + char marker[] = { 2, 0, 0, 0 }; + /* @todo + if (exr_header->non_image) { + marker[1] |= 0x8; + } + */ + // tiled + if (num_parts == 1 && exr_images[0].tiles) { + marker[1] |= 0x2; + } + // long_name + if (long_name) { + marker[1] |= 0x4; + } + // multipart + if (num_parts > 1) { + marker[1] |= 0x10; + } + memory.insert(memory.end(), marker, marker + 4); + } - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { - // (buf.size() * 3) / 2 would be enough. - std::vector<unsigned char> block((buf.size() * 3) / 2); + int total_chunk_count = 0; + std::vector<int> chunk_count(num_parts); + std::vector<OffsetData> offset_data(num_parts); + for (unsigned int i = 0; i < num_parts; ++i) { + if (!exr_images[i].tiles) { + int num_scanlines = NumScanlines(exr_headers[i]->compression_type); + chunk_count[i] = + (exr_images[i].height + num_scanlines - 1) / num_scanlines; + InitSingleResolutionOffsets(offset_data[i], chunk_count[i]); + total_chunk_count += chunk_count[i]; + } else { + { + std::vector<int> num_x_tiles, num_y_tiles; + PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i]); + chunk_count[i] = + InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles); + total_chunk_count += chunk_count[i]; + } + } + } + // Write attributes to memory buffer. + std::vector< std::vector<tinyexr::ChannelInfo> > channels(num_parts); + { + std::set<std::string> partnames; + for (unsigned int i = 0; i < num_parts; ++i) { + //channels + { + std::vector<unsigned char> data; + + for (int c = 0; c < exr_headers[i]->num_channels; c++) { + tinyexr::ChannelInfo info; + info.p_linear = 0; + info.pixel_type = exr_headers[i]->pixel_types[c]; + info.requested_pixel_type = exr_headers[i]->requested_pixel_types[c]; + info.x_sampling = 1; + info.y_sampling = 1; + info.name = std::string(exr_headers[i]->channels[c].name); + channels[i].push_back(info); + } - tinyexr::tinyexr_uint64 outSize = block.size(); + tinyexr::WriteChannelInfo(data, channels[i]); - tinyexr::CompressRle(&block.at(0), outSize, - reinterpret_cast<const unsigned char *>(&buf.at(0)), - static_cast<unsigned long>(buf.size())); + tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), + static_cast<int>(data.size())); + } - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector<unsigned char> header(8); - unsigned int data_len = static_cast<unsigned int>(outSize); // truncate - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + { + int comp = exr_headers[i]->compression_type; + swap4(&comp); + WriteAttributeToMemory( + &memory, "compression", "compression", + reinterpret_cast<const unsigned char*>(&comp), 1); + } - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + { + int data[4] = { 0, 0, exr_images[i].width - 1, exr_images[i].height - 1 }; + swap4(&data[0]); + swap4(&data[1]); + swap4(&data[2]); + swap4(&data[3]); + WriteAttributeToMemory( + &memory, "dataWindow", "box2i", + reinterpret_cast<const unsigned char*>(data), sizeof(int) * 4); + + int data0[4] = { 0, 0, exr_images[0].width - 1, exr_images[0].height - 1 }; + swap4(&data0[0]); + swap4(&data0[1]); + swap4(&data0[2]); + swap4(&data0[3]); + // Note: must be the same across parts (currently, using value from the first header) + WriteAttributeToMemory( + &memory, "displayWindow", "box2i", + reinterpret_cast<const unsigned char*>(data0), sizeof(int) * 4); + } - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); + { + unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } + WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", + &line_order, 1); + } - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { -#if TINYEXR_USE_PIZ - unsigned int bufLen = - 8192 + static_cast<unsigned int>( - 2 * static_cast<unsigned int>( - buf.size())); // @fixme { compute good bound. } - std::vector<unsigned char> block(bufLen); - unsigned int outSize = static_cast<unsigned int>(block.size()); - - CompressPiz(&block.at(0), &outSize, - reinterpret_cast<const unsigned char *>(&buf.at(0)), - buf.size(), channels, exr_image->width, h); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector<unsigned char> header(8); - unsigned int data_len = outSize; - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); + { + // Note: must be the same across parts + float aspectRatio = 1.0f; + swap4(&aspectRatio); + WriteAttributeToMemory( + &memory, "pixelAspectRatio", "float", + reinterpret_cast<const unsigned char*>(&aspectRatio), sizeof(float)); + } -#else - assert(0); -#endif - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - std::vector<unsigned char> block; - unsigned int outSize; + { + float center[2] = { 0.0f, 0.0f }; + swap4(¢er[0]); + swap4(¢er[1]); + WriteAttributeToMemory( + &memory, "screenWindowCenter", "v2f", + reinterpret_cast<const unsigned char*>(center), 2 * sizeof(float)); + } - tinyexr::CompressZfp( - &block, &outSize, reinterpret_cast<const float *>(&buf.at(0)), - exr_image->width, h, exr_header->num_channels, zfp_compression_param); + { + float w = 1.0f; + swap4(&w); + WriteAttributeToMemory(&memory, "screenWindowWidth", "float", + reinterpret_cast<const unsigned char*>(&w), + sizeof(float)); + } - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector<unsigned char> header(8); - unsigned int data_len = outSize; - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + if (exr_images[i].tiles) { + unsigned char tile_mode = static_cast<unsigned char>(exr_headers[i]->tile_level_mode & 0x3); + if (exr_headers[i]->tile_rounding_mode) tile_mode |= (1u << 4u); + //unsigned char data[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + unsigned int datai[3] = { 0, 0, 0 }; + unsigned char* data = reinterpret_cast<unsigned char*>(&datai[0]); + datai[0] = static_cast<unsigned int>(exr_headers[i]->tile_size_x); + datai[1] = static_cast<unsigned int>(exr_headers[i]->tile_size_y); + data[8] = tile_mode; + swap4(reinterpret_cast<unsigned int*>(&data[0])); + swap4(reinterpret_cast<unsigned int*>(&data[4])); + WriteAttributeToMemory( + &memory, "tiles", "tiledesc", + reinterpret_cast<const unsigned char*>(data), 9); + } - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); - tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + // must be present for multi-part files - according to spec. + if (num_parts > 1) { + // name + { + size_t len = 0; + if ((len = strlen(exr_headers[i]->name)) > 0) { + partnames.emplace(exr_headers[i]->name); + if (partnames.size() != i + 1) { + SetErrorMessage("'name' attributes must be unique for a multi-part file", err); + return 0; + } + WriteAttributeToMemory( + &memory, "name", "string", + reinterpret_cast<const unsigned char*>(exr_headers[i]->name), + static_cast<int>(len)); + } else { + SetErrorMessage("Invalid 'name' attribute for a multi-part file", err); + return 0; + } + } + // type + { + const char* type = "scanlineimage"; + if (exr_images[i].tiles) type = "tiledimage"; + WriteAttributeToMemory( + &memory, "type", "string", + reinterpret_cast<const unsigned char*>(type), + static_cast<int>(strlen(type))); + } + // chunkCount + { + WriteAttributeToMemory( + &memory, "chunkCount", "int", + reinterpret_cast<const unsigned char*>(&chunk_count[i]), + 4); + } + } - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); + // Custom attributes + if (exr_headers[i]->num_custom_attributes > 0) { + for (int j = 0; j < exr_headers[i]->num_custom_attributes; j++) { + tinyexr::WriteAttributeToMemory( + &memory, exr_headers[i]->custom_attributes[j].name, + exr_headers[i]->custom_attributes[j].type, + reinterpret_cast<const unsigned char*>( + exr_headers[i]->custom_attributes[j].value), + exr_headers[i]->custom_attributes[j].size); + } + } -#else - assert(0); -#endif - } else { - assert(0); + { // end of header + memory.push_back(0); + } } - } // omp parallel - - for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) { - offsets[i] = offset; - tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offsets[i])); - offset += data_list[i].size(); + } + if (num_parts > 1) { + // end of header list + memory.push_back(0); } - size_t totalSize = static_cast<size_t>(offset); - { - memory.insert( - memory.end(), reinterpret_cast<unsigned char *>(&offsets.at(0)), - reinterpret_cast<unsigned char *>(&offsets.at(0)) + - sizeof(tinyexr::tinyexr_uint64) * static_cast<size_t>(num_blocks)); + tinyexr_uint64 chunk_offset = memory.size() + size_t(total_chunk_count) * sizeof(tinyexr_uint64); + + tinyexr_uint64 total_size = 0; + std::vector< std::vector< std::vector<unsigned char> > > data_lists(num_parts); + for (unsigned int i = 0; i < num_parts; ++i) { + std::string e; + int ret = EncodeChunk(&exr_images[i], exr_headers[i], + channels[i], + chunk_count[i], + // starting offset of current chunk after part-number + chunk_offset, + num_parts > 1, + offset_data[i], // output: block offsets, must be initialized + data_lists[i], // output + total_size, // output + &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + return 0; + } + chunk_offset = total_size; } - if (memory.size() == 0) { + // Allocating required memory + if (total_size == 0) { // something went wrong tinyexr::SetErrorMessage("Output memory size is zero", err); return 0; } - - (*memory_out) = static_cast<unsigned char *>(malloc(totalSize)); - memcpy((*memory_out), &memory.at(0), memory.size()); - unsigned char *memory_ptr = *memory_out + memory.size(); - - for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) { - memcpy(memory_ptr, &data_list[i].at(0), data_list[i].size()); - memory_ptr += data_list[i].size(); + (*memory_out) = static_cast<unsigned char*>(malloc(total_size)); + + // Writing header + memcpy((*memory_out), &memory[0], memory.size()); + unsigned char* memory_ptr = *memory_out + memory.size(); + size_t sum = memory.size(); + + // Writing offset data for chunks + for (unsigned int i = 0; i < num_parts; ++i) { + if (exr_images[i].tiles) { + const EXRImage* level_image = &exr_images[i]; + int num_levels = (exr_headers[i]->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ? + offset_data[i].num_x_levels : (offset_data[i].num_x_levels * offset_data[i].num_y_levels); + for (int level_index = 0; level_index < num_levels; ++level_index) { + for (size_t j = 0; j < offset_data[i].offsets[level_index].size(); ++j) { + size_t num_bytes = sizeof(tinyexr_uint64) * offset_data[i].offsets[level_index][j].size(); + sum += num_bytes; + assert(sum <= total_size); + memcpy(memory_ptr, + reinterpret_cast<unsigned char*>(&offset_data[i].offsets[level_index][j][0]), + num_bytes); + memory_ptr += num_bytes; + } + level_image = level_image->next_level; + } + } else { + size_t num_bytes = sizeof(tinyexr::tinyexr_uint64) * static_cast<size_t>(chunk_count[i]); + sum += num_bytes; + assert(sum <= total_size); + std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data[i].offsets[0][0]; + memcpy(memory_ptr, reinterpret_cast<unsigned char*>(&offsets[0]), num_bytes); + memory_ptr += num_bytes; + } + } + + // Writing chunk data + for (unsigned int i = 0; i < num_parts; ++i) { + for (size_t j = 0; j < static_cast<size_t>(chunk_count[i]); ++j) { + if (num_parts > 1) { + sum += 4; + assert(sum <= total_size); + unsigned int part_number = i; + swap4(&part_number); + memcpy(memory_ptr, &part_number, 4); + memory_ptr += 4; + } + sum += data_lists[i][j].size(); + assert(sum <= total_size); + memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size()); + memory_ptr += data_lists[i][j].size(); + } } + assert(sum == total_size); + return total_size; // OK +} + +} // tinyexr - return totalSize; // OK +size_t SaveEXRImageToMemory(const EXRImage* exr_image, + const EXRHeader* exr_header, + unsigned char** memory_out, const char** err) { + return tinyexr::SaveEXRNPartImageToMemory(exr_image, &exr_header, 1, memory_out, err); } int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, @@ -12690,7 +7293,7 @@ int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, FILE *fp = NULL; #ifdef _WIN32 -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang errno_t errcode = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb"); if (errcode != 0) { @@ -12699,7 +7302,7 @@ int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, return TINYEXR_ERROR_CANT_WRITE_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "wb"); #endif #else @@ -12733,6 +7336,75 @@ int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, return TINYEXR_SUCCESS; } +size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + unsigned char** memory_out, const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts < 2 || + memory_out == NULL) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } + return tinyexr::SaveEXRNPartImageToMemory(exr_images, exr_headers, num_parts, memory_out, err); +} + +int SaveEXRMultipartImageToFile(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + const char* filename, + const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts < 2) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. + errno_t errcode = + _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb"); + if (errcode != 0) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "wb"); +#endif +#else + fp = fopen(filename, "wb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRMultipartImageToMemory(exr_images, exr_headers, num_parts, &mem, err); + if (mem_size == 0) { + return TINYEXR_ERROR_SERIALZATION_FAILED; + } + + size_t written_size = 0; + if ((mem_size > 0) && mem) { + written_size = fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + if (written_size != mem_size) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + return TINYEXR_SUCCESS; +} + int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { if (deep_image == NULL) { tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err); @@ -12741,7 +7413,7 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { #ifdef _WIN32 FILE *fp = NULL; -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. errno_t errcode = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); if (errcode != 0) { @@ -12750,7 +7422,7 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { return TINYEXR_ERROR_CANT_OPEN_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "rb"); #endif if (!fp) { @@ -12917,9 +7589,6 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { int data_width = dw - dx + 1; int data_height = dh - dy + 1; - std::vector<float> image( - static_cast<size_t>(data_width * data_height * 4)); // 4 = RGBA - // Read offset tables. int num_blocks = data_height / num_scanline_blocks; if (num_blocks * num_scanline_blocks < data_height) { @@ -13138,6 +7807,9 @@ void InitEXRImage(EXRImage *exr_image) { exr_image->images = NULL; exr_image->tiles = NULL; + exr_image->next_level = NULL; + exr_image->level_x = 0; + exr_image->level_y = 0; exr_image->num_tiles = 0; } @@ -13184,14 +7856,43 @@ int FreeEXRHeader(EXRHeader *exr_header) { free(exr_header->custom_attributes); } + EXRSetNameAttr(exr_header, NULL); + return TINYEXR_SUCCESS; } +void EXRSetNameAttr(EXRHeader* exr_header, const char* name) { + if (exr_header == NULL) { + return; + } + memset(exr_header->name, 0, 256); + if (name != NULL) { + size_t len = std::min(strlen(name), (size_t)255); + if (len) { + memcpy(exr_header->name, name, len); + } + } +} + +int EXRNumLevels(const EXRImage* exr_image) { + if (exr_image == NULL) return 0; + if(exr_image->images) return 1; // scanlines + int levels = 1; + const EXRImage* level_image = exr_image; + while((level_image = level_image->next_level)) ++levels; + return levels; +} + int FreeEXRImage(EXRImage *exr_image) { if (exr_image == NULL) { return TINYEXR_ERROR_INVALID_ARGUMENT; } + if (exr_image->next_level) { + FreeEXRImage(exr_image->next_level); + delete exr_image->next_level; + } + for (int i = 0; i < exr_image->num_channels; i++) { if (exr_image->images && exr_image->images[i]) { free(exr_image->images[i]); @@ -13229,7 +7930,7 @@ int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, FILE *fp = NULL; #ifdef _WIN32 -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. errno_t errcode = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); if (errcode != 0) { @@ -13237,7 +7938,7 @@ int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, return TINYEXR_ERROR_INVALID_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "rb"); #endif #else @@ -13333,11 +8034,11 @@ int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, static_cast<EXRHeader **>(malloc(sizeof(EXRHeader *) * infos.size())); for (size_t i = 0; i < infos.size(); i++) { EXRHeader *exr_header = static_cast<EXRHeader *>(malloc(sizeof(EXRHeader))); + memset(exr_header, 0, sizeof(EXRHeader)); ConvertHeader(exr_header, infos[i]); - // transfoer `tiled` from version. - exr_header->tiled = exr_version->tiled; + exr_header->multipart = exr_version->multipart ? 1 : 0; (*exr_headers)[i] = exr_header; } @@ -13359,7 +8060,7 @@ int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, FILE *fp = NULL; #ifdef _WIN32 -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. errno_t errcode = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); if (errcode != 0) { @@ -13367,7 +8068,7 @@ int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, return TINYEXR_ERROR_INVALID_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "rb"); #endif #else @@ -13465,14 +8166,14 @@ int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { FILE *fp = NULL; #ifdef _WIN32 -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. errno_t err = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); if (err != 0) { // TODO(syoyo): return wfopen_s erro code return TINYEXR_ERROR_CANT_OPEN_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "rb"); #endif #else @@ -13543,51 +8244,82 @@ int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, // http://www.openexr.com/openexrfilelayout.pdf // Load chunk offset table. - std::vector<std::vector<tinyexr::tinyexr_uint64> > chunk_offset_table_list; + std::vector<tinyexr::OffsetData> chunk_offset_table_list; + chunk_offset_table_list.reserve(num_parts); for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) { - std::vector<tinyexr::tinyexr_uint64> offset_table( - static_cast<size_t>(exr_headers[i]->chunk_count)); - - for (size_t c = 0; c < offset_table.size(); c++) { - tinyexr::tinyexr_uint64 offset; - memcpy(&offset, marker, 8); - tinyexr::swap8(&offset); + chunk_offset_table_list.resize(chunk_offset_table_list.size() + 1); + tinyexr::OffsetData& offset_data = chunk_offset_table_list.back(); + if (!exr_headers[i]->tiled || exr_headers[i]->tile_level_mode == TINYEXR_TILE_ONE_LEVEL) { + tinyexr::InitSingleResolutionOffsets(offset_data, exr_headers[i]->chunk_count); + std::vector<tinyexr::tinyexr_uint64>& offset_table = offset_data.offsets[0][0]; + + for (size_t c = 0; c < offset_table.size(); c++) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, 8); + tinyexr::swap8(&offset); + + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } - if (offset >= size) { - tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", - err); - return TINYEXR_ERROR_INVALID_DATA; + offset_table[c] = offset + 4; // +4 to skip 'part number' + marker += 8; + } + } else { + { + std::vector<int> num_x_tiles, num_y_tiles; + tinyexr::PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i]); + int num_blocks = InitTileOffsets(offset_data, exr_headers[i], num_x_tiles, num_y_tiles); + if (num_blocks != exr_headers[i]->chunk_count) { + tinyexr::SetErrorMessage("Invalid offset table size.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + offset_data.offsets[l][dy][dx] = offset + 4; // +4 to skip 'part number' + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + } + } } - - offset_table[c] = offset + 4; // +4 to skip 'part number' - marker += 8; } - - chunk_offset_table_list.push_back(offset_table); } // Decode image. for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) { - std::vector<tinyexr::tinyexr_uint64> &offset_table = - chunk_offset_table_list[i]; + tinyexr::OffsetData &offset_data = chunk_offset_table_list[i]; // First check 'part number' is identitical to 'i' - for (size_t c = 0; c < offset_table.size(); c++) { - const unsigned char *part_number_addr = - memory + offset_table[c] - 4; // -4 to move to 'part number' field. - unsigned int part_no; - memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 - tinyexr::swap4(&part_no); - - if (part_no != i) { - tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", - err); - return TINYEXR_ERROR_INVALID_DATA; - } - } + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + + const unsigned char *part_number_addr = + memory + offset_data.offsets[l][dy][dx] - 4; // -4 to move to 'part number' field. + unsigned int part_no; + memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 + tinyexr::swap4(&part_no); + + if (part_no != i) { + tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + } std::string e; - int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table, + int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_data, memory, size, &e); if (ret != TINYEXR_SUCCESS) { if (!e.empty()) { @@ -13612,7 +8344,7 @@ int LoadEXRMultipartImageFromFile(EXRImage *exr_images, FILE *fp = NULL; #ifdef _WIN32 -#if defined(_MSC_VER) || defined(__MINGW32__) // MSVC, MinGW gcc or clang +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. errno_t errcode = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); if (errcode != 0) { @@ -13620,7 +8352,7 @@ int LoadEXRMultipartImageFromFile(EXRImage *exr_images, return TINYEXR_ERROR_CANT_OPEN_FILE; } #else - // Unknown compiler + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. fp = fopen(filename, "rb"); #endif #else diff --git a/thirdparty/wslay/includes/config.h b/thirdparty/wslay/config.h index 771ad12528..f291801ba1 100644 --- a/thirdparty/wslay/includes/config.h +++ b/thirdparty/wslay/config.h @@ -1,8 +1,10 @@ #ifndef CONFIG_H #define CONFIG_H +// -- GODOT start -- #ifdef BIG_ENDIAN_ENABLED #define WORDS_BIGENDIAN #endif +// -- GODOT end -- #endif /* CONFIG_H */ diff --git a/thirdparty/wslay/msvcfix.diff b/thirdparty/wslay/patches/msvcfix.diff index 28721844f4..f58b6d44f0 100644 --- a/thirdparty/wslay/msvcfix.diff +++ b/thirdparty/wslay/patches/msvcfix.diff @@ -1,8 +1,8 @@ diff --git a/thirdparty/wslay/includes/wslay/wslay.h b/thirdparty/wslay/includes/wslay/wslay.h -index 2fde81a4e..9c751b05b 100644 +index 77a4e8253f..ac6873613f 100644 --- a/thirdparty/wslay/includes/wslay/wslay.h +++ b/thirdparty/wslay/includes/wslay/wslay.h -@@ -33,6 +33,12 @@ extern "C" { +@@ -33,6 +33,13 @@ extern "C" { #include <stdlib.h> #include <sys/types.h> @@ -12,6 +12,7 @@ index 2fde81a4e..9c751b05b 100644 +typedef SSIZE_T ssize_t; +#endif +/* GODOT END */ - ++ /* * wslay/wslayver.h is generated from wslay/wslayver.h.in by + * configure. The projects which do not use autotools can set diff --git a/thirdparty/wslay/includes/wslay/wslay.h b/thirdparty/wslay/wslay/wslay.h index 9c751b05b7..ac6873613f 100644 --- a/thirdparty/wslay/includes/wslay/wslay.h +++ b/thirdparty/wslay/wslay/wslay.h @@ -25,7 +25,7 @@ #ifndef WSLAY_H #define WSLAY_H -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -155,10 +155,10 @@ enum wslay_opcode { * These macros assume that rsv is constructed by ((RSV1 << 2) | * (RSV2 << 1) | RSV3) */ -#define WSLAY_RSV_NONE ((uint8_t) 0) -#define WSLAY_RSV1_BIT (((uint8_t) 1) << 2) -#define WSLAY_RSV2_BIT (((uint8_t) 1) << 1) -#define WSLAY_RSV3_BIT (((uint8_t) 1) << 0) +#define WSLAY_RSV_NONE ((uint8_t)0) +#define WSLAY_RSV1_BIT (((uint8_t)1) << 2) +#define WSLAY_RSV2_BIT (((uint8_t)1) << 1) +#define WSLAY_RSV3_BIT (((uint8_t)1) << 0) #define wslay_get_rsv1(rsv) ((rsv >> 2) & 1) #define wslay_get_rsv2(rsv) ((rsv >> 1) & 1) @@ -172,7 +172,7 @@ struct wslay_frame_iocb { * RFC6455 requires 0 unless extensions are negotiated. */ uint8_t rsv; - /* 4 bit opcode */ + /* 4 bit opcode */ uint8_t opcode; /* payload length [0, 2**63-1] */ uint64_t payload_length; @@ -229,6 +229,33 @@ ssize_t wslay_frame_send(wslay_frame_context_ptr ctx, struct wslay_frame_iocb *iocb); /* + * Write WebSocket frame specified in iocb to buf of length + * buflen. ctx must be initialized using wslay_frame_context_init() + * function. iocb->fin must be 1 if this is a fin frame, otherwise 0. + * iocb->rsv is reserved bits. iocb->opcode must be the opcode of + * this frame. iocb->mask must be 1 if this is masked frame, + * otherwise 0. iocb->payload_length is the payload_length of this + * frame. iocb->data must point to the payload data to be + * sent. iocb->data_length must be the length of the data. Unlike + * wslay_frame_send, this function does not call send_callback + * function. This function calls gen_mask_callback function if it + * needs new mask key. This function returns the number of bytes + * written to a buffer. Unlike wslay_frame_send, it includes the + * number of header bytes. Instead, the number of payload bytes + * written is assigned to *pwpayloadlen if this function succeeds. If + * there is not enough space left in a buffer, it returns 0. If the + * library detects error in iocb, this function returns + * WSLAY_ERR_INVALID_ARGUMENT. If callback functions report a + * failure, this function returns WSLAY_ERR_INVALID_CALLBACK. This + * function does not always send all given data in iocb. If there are + * remaining data to be sent, adjust data and data_length in iocb + * accordingly and call this function again. + */ +ssize_t wslay_frame_write(wslay_frame_context_ptr ctx, + struct wslay_frame_iocb *iocb, uint8_t *buf, + size_t buflen, size_t *pwpayloadlen); + +/* * Receives WebSocket frame and stores it in iocb. This function * returns the number of payload bytes received. This does not * include header bytes. In this case, iocb will be populated as @@ -276,9 +303,9 @@ struct wslay_event_on_msg_recv_arg { * Callback function invoked by wslay_event_recv() when a message is * completely received. */ -typedef void (*wslay_event_on_msg_recv_callback) -(wslay_event_context_ptr ctx, - const struct wslay_event_on_msg_recv_arg *arg, void *user_data); +typedef void (*wslay_event_on_msg_recv_callback)( + wslay_event_context_ptr ctx, const struct wslay_event_on_msg_recv_arg *arg, + void *user_data); struct wslay_event_on_frame_recv_start_arg { /* fin bit; 1 for final frame, or 0. */ @@ -296,9 +323,9 @@ struct wslay_event_on_frame_recv_start_arg { * starts to be received. This callback function is only invoked once * for each frame. */ -typedef void (*wslay_event_on_frame_recv_start_callback) -(wslay_event_context_ptr ctx, - const struct wslay_event_on_frame_recv_start_arg *arg, void *user_data); +typedef void (*wslay_event_on_frame_recv_start_callback)( + wslay_event_context_ptr ctx, + const struct wslay_event_on_frame_recv_start_arg *arg, void *user_data); struct wslay_event_on_frame_recv_chunk_arg { /* chunk of payload data */ @@ -311,16 +338,16 @@ struct wslay_event_on_frame_recv_chunk_arg { * Callback function invoked by wslay_event_recv() when a chunk of * frame payload is received. */ -typedef void (*wslay_event_on_frame_recv_chunk_callback) -(wslay_event_context_ptr ctx, - const struct wslay_event_on_frame_recv_chunk_arg *arg, void *user_data); +typedef void (*wslay_event_on_frame_recv_chunk_callback)( + wslay_event_context_ptr ctx, + const struct wslay_event_on_frame_recv_chunk_arg *arg, void *user_data); /* * Callback function invoked by wslay_event_recv() when a frame is * completely received. */ -typedef void (*wslay_event_on_frame_recv_end_callback) -(wslay_event_context_ptr ctx, void *user_data); +typedef void (*wslay_event_on_frame_recv_end_callback)( + wslay_event_context_ptr ctx, void *user_data); /* * Callback function invoked by wslay_event_recv() when it wants to @@ -394,9 +421,9 @@ struct wslay_event_callbacks { * WSLAY_ERR_NOMEM * Out of memory. */ -int wslay_event_context_server_init -(wslay_event_context_ptr *ctx, - const struct wslay_event_callbacks *callbacks, void *user_data); +int wslay_event_context_server_init( + wslay_event_context_ptr *ctx, const struct wslay_event_callbacks *callbacks, + void *user_data); /* * Initializes ctx as WebSocket client. user_data is an arbitrary @@ -409,9 +436,9 @@ int wslay_event_context_server_init * WSLAY_ERR_NOMEM * Out of memory. */ -int wslay_event_context_client_init -(wslay_event_context_ptr *ctx, - const struct wslay_event_callbacks *callbacks, void *user_data); +int wslay_event_context_client_init( + wslay_event_context_ptr *ctx, const struct wslay_event_callbacks *callbacks, + void *user_data); /* * Releases allocated resources for ctx. @@ -462,8 +489,8 @@ void wslay_event_config_set_max_recv_msg_length(wslay_event_context_ptr ctx, * or wslay_event_context_server_init() or * wslay_event_context_client_init() are replaced with callbacks. */ -void wslay_event_config_set_callbacks -(wslay_event_context_ptr ctx, const struct wslay_event_callbacks *callbacks); +void wslay_event_config_set_callbacks( + wslay_event_context_ptr ctx, const struct wslay_event_callbacks *callbacks); /* * Receives messages from peer. When receiving @@ -538,6 +565,50 @@ int wslay_event_recv(wslay_event_context_ptr ctx); */ int wslay_event_send(wslay_event_context_ptr ctx); +/* + * Writes queued messages to a buffer. Unlike wslay_event_send(), this + * function writes messages into the given buffer. It does not use + * wslay_event_send_callback function. Single call of + * wslay_event_write() writes multiple messages until there is not + * enough space left in a buffer. + * + * If ctx is initialized for WebSocket client use, wslay_event_write() + * uses wslay_event_genmask_callback to get new mask key. + * + * buf is a pointer to buffer and its capacity is given in buflen. It + * should have at least 14 bytes. + * + * When a message queued using wslay_event_queue_fragmented_msg() is + * sent, wslay_event_write() invokes + * wslay_event_fragmented_msg_callback for that message. + * + * After close control frame is sent, this function calls + * wslay_event_set_write_enabled() with second argument 0 to disable + * further transmission to peer. + * + * If there are any pending messages, wslay_event_want_write() returns + * 1, otherwise returns 0. + * + * In case of a fatal errror which leads to negative return code, this + * function calls wslay_event_set_write_enabled() with second argument + * 0 to disable further transmission to peer. + * + * wslay_event_write() returns the number of bytes written to a buffer + * if it succeeds, or one of the following negative error codes: + * + * WSLAY_ERR_CALLBACK_FAILURE + * User defined callback function is failed. + * + * WSLAY_ERR_NOMEM + * Out of memory. + * + * When negative error code is returned, application must not make any + * further call of wslay_event_write() and must close WebSocket + * connection. + */ +ssize_t wslay_event_write(wslay_event_context_ptr ctx, uint8_t *buf, + size_t buflen); + struct wslay_event_msg { uint8_t opcode; const uint8_t *msg; @@ -594,10 +665,9 @@ union wslay_event_msg_source { * moment, return 0. If there is an error, return -1 and set error * code WSLAY_ERR_CALLBACK_FAILURE using wslay_event_set_error(). */ -typedef ssize_t (*wslay_event_fragmented_msg_callback) -(wslay_event_context_ptr ctx, - uint8_t *buf, size_t len, const union wslay_event_msg_source *source, - int *eof, void *user_data); +typedef ssize_t (*wslay_event_fragmented_msg_callback)( + wslay_event_context_ptr ctx, uint8_t *buf, size_t len, + const union wslay_event_msg_source *source, int *eof, void *user_data); struct wslay_event_fragmented_msg { /* opcode */ @@ -631,15 +701,16 @@ struct wslay_event_fragmented_msg { * WSLAY_ERR_NOMEM * Out of memory. */ -int wslay_event_queue_fragmented_msg -(wslay_event_context_ptr ctx, const struct wslay_event_fragmented_msg *arg); +int wslay_event_queue_fragmented_msg( + wslay_event_context_ptr ctx, const struct wslay_event_fragmented_msg *arg); /* * Extended version of wslay_event_queue_fragmented_msg which allows to set * reserved bits. */ -int wslay_event_queue_fragmented_msg_ex(wslay_event_context_ptr ctx, - const struct wslay_event_fragmented_msg *arg, uint8_t rsv); +int wslay_event_queue_fragmented_msg_ex( + wslay_event_context_ptr ctx, const struct wslay_event_fragmented_msg *arg, + uint8_t rsv); /* * Queues close control frame. This function is provided just for @@ -669,8 +740,7 @@ int wslay_event_queue_fragmented_msg_ex(wslay_event_context_ptr ctx, * WSLAY_ERR_NOMEM * Out of memory. */ -int wslay_event_queue_close(wslay_event_context_ptr ctx, - uint16_t status_code, +int wslay_event_queue_close(wslay_event_context_ptr ctx, uint16_t status_code, const uint8_t *reason, size_t reason_length); /* diff --git a/thirdparty/wslay/includes/wslay/wslayver.h b/thirdparty/wslay/wslay/wslayver.h index 28f2018039..28f2018039 100644 --- a/thirdparty/wslay/includes/wslay/wslayver.h +++ b/thirdparty/wslay/wslay/wslayver.h diff --git a/thirdparty/wslay/wslay_event.c b/thirdparty/wslay/wslay_event.c index 140f7c01da..4c29fe4000 100644 --- a/thirdparty/wslay/wslay_event.c +++ b/thirdparty/wslay/wslay_event.c @@ -28,9 +28,9 @@ #include <assert.h> #include <stdio.h> -#include "wslay_queue.h" #include "wslay_frame.h" #include "wslay_net.h" +#include "wslay_macro.h" /* Start of utf8 dfa */ /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. @@ -60,6 +60,7 @@ #define UTF8_ACCEPT 0 #define UTF8_REJECT 12 +/* clang-format off */ static const uint8_t utf8d[] = { /* * The first part of the table maps bytes to character classes that @@ -84,14 +85,13 @@ static const uint8_t utf8d[] = { 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,12,12,12,12,12, }; +/* clang-format on */ -static uint32_t -decode(uint32_t* state, uint32_t* codep, uint32_t byte) { +static uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) { uint32_t type = utf8d[byte]; - *codep = (*state != UTF8_ACCEPT) ? - (byte & 0x3fu) | (*codep << 6) : - (0xff >> type) & (byte); + *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6) + : (0xff >> type) & (byte); *state = utf8d[256 + *state + type]; return *state; @@ -100,117 +100,96 @@ decode(uint32_t* state, uint32_t* codep, uint32_t byte) { /* End of utf8 dfa */ static ssize_t wslay_event_frame_recv_callback(uint8_t *buf, size_t len, - int flags, void *user_data) -{ + int flags, void *user_data) { struct wslay_event_frame_user_data *e = - (struct wslay_event_frame_user_data*)user_data; + (struct wslay_event_frame_user_data *)user_data; return e->ctx->callbacks.recv_callback(e->ctx, buf, len, flags, e->user_data); } static ssize_t wslay_event_frame_send_callback(const uint8_t *data, size_t len, - int flags, void *user_data) -{ + int flags, void *user_data) { struct wslay_event_frame_user_data *e = - (struct wslay_event_frame_user_data*)user_data; + (struct wslay_event_frame_user_data *)user_data; return e->ctx->callbacks.send_callback(e->ctx, data, len, flags, e->user_data); } static int wslay_event_frame_genmask_callback(uint8_t *buf, size_t len, - void *user_data) -{ + void *user_data) { struct wslay_event_frame_user_data *e = - (struct wslay_event_frame_user_data*)user_data; + (struct wslay_event_frame_user_data *)user_data; return e->ctx->callbacks.genmask_callback(e->ctx, buf, len, e->user_data); } -static int wslay_event_byte_chunk_init -(struct wslay_event_byte_chunk **chunk, size_t len) -{ - *chunk = (struct wslay_event_byte_chunk*)malloc - (sizeof(struct wslay_event_byte_chunk)); - if(*chunk == NULL) { +static int wslay_event_byte_chunk_init(struct wslay_event_byte_chunk **chunk, + size_t len) { + *chunk = malloc(sizeof(struct wslay_event_byte_chunk) + len); + if (*chunk == NULL) { return WSLAY_ERR_NOMEM; } memset(*chunk, 0, sizeof(struct wslay_event_byte_chunk)); - if(len) { - (*chunk)->data = (uint8_t*)malloc(len); - if((*chunk)->data == NULL) { - free(*chunk); - return WSLAY_ERR_NOMEM; - } + if (len) { + (*chunk)->data = (uint8_t *)(*chunk) + sizeof(**chunk); (*chunk)->data_length = len; } return 0; } -static void wslay_event_byte_chunk_free(struct wslay_event_byte_chunk *c) -{ - if(!c) { - return; - } - free(c->data); +static void wslay_event_byte_chunk_free(struct wslay_event_byte_chunk *c) { free(c); } static void wslay_event_byte_chunk_copy(struct wslay_event_byte_chunk *c, - size_t off, - const uint8_t *data, size_t data_length) -{ - memcpy(c->data+off, data, data_length); + size_t off, const uint8_t *data, + size_t data_length) { + memcpy(c->data + off, data, data_length); } -static void wslay_event_imsg_set(struct wslay_event_imsg *m, - uint8_t fin, uint8_t rsv, uint8_t opcode) -{ +static void wslay_event_imsg_set(struct wslay_event_imsg *m, uint8_t fin, + uint8_t rsv, uint8_t opcode) { m->fin = fin; m->rsv = rsv; m->opcode = opcode; m->msg_length = 0; } -static void wslay_event_imsg_chunks_free(struct wslay_event_imsg *m) -{ - if(!m->chunks) { - return; - } - while(!wslay_queue_empty(m->chunks)) { - wslay_event_byte_chunk_free(wslay_queue_top(m->chunks)); - wslay_queue_pop(m->chunks); +static void wslay_event_imsg_chunks_free(struct wslay_event_imsg *m) { + while (!wslay_queue_empty(&m->chunks)) { + struct wslay_event_byte_chunk *chunk = wslay_struct_of( + wslay_queue_top(&m->chunks), struct wslay_event_byte_chunk, qe); + wslay_queue_pop(&m->chunks); + wslay_event_byte_chunk_free(chunk); } } -static void wslay_event_imsg_reset(struct wslay_event_imsg *m) -{ +static void wslay_event_imsg_reset(struct wslay_event_imsg *m) { m->opcode = 0xffu; m->utf8state = UTF8_ACCEPT; wslay_event_imsg_chunks_free(m); } -static int wslay_event_imsg_append_chunk(struct wslay_event_imsg *m, size_t len) -{ - if(len == 0) { +static int wslay_event_imsg_append_chunk(struct wslay_event_imsg *m, + size_t len) { + if (len == 0) { return 0; } else { int r; struct wslay_event_byte_chunk *chunk; - if((r = wslay_event_byte_chunk_init(&chunk, len)) != 0) { - return r; - } - if((r = wslay_queue_push(m->chunks, chunk)) != 0) { + if ((r = wslay_event_byte_chunk_init(&chunk, len)) != 0) { return r; } + wslay_queue_push(&m->chunks, &chunk->qe); m->msg_length += len; return 0; } } -static int wslay_event_omsg_non_fragmented_init -(struct wslay_event_omsg **m, uint8_t opcode, uint8_t rsv, - const uint8_t *msg, size_t msg_length) -{ - *m = (struct wslay_event_omsg*)malloc(sizeof(struct wslay_event_omsg)); - if(!*m) { +static int wslay_event_omsg_non_fragmented_init(struct wslay_event_omsg **m, + uint8_t opcode, uint8_t rsv, + const uint8_t *msg, + size_t msg_length) { + *m = malloc(sizeof(struct wslay_event_omsg) + msg_length); + if (!*m) { return WSLAY_ERR_NOMEM; } memset(*m, 0, sizeof(struct wslay_event_omsg)); @@ -218,28 +197,22 @@ static int wslay_event_omsg_non_fragmented_init (*m)->opcode = opcode; (*m)->rsv = rsv; (*m)->type = WSLAY_NON_FRAGMENTED; - if(msg_length) { - (*m)->data = (uint8_t*)malloc(msg_length); - if(!(*m)->data) { - free(*m); - return WSLAY_ERR_NOMEM; - } + if (msg_length) { + (*m)->data = (uint8_t *)(*m) + sizeof(**m); memcpy((*m)->data, msg, msg_length); (*m)->data_length = msg_length; } return 0; } -static int wslay_event_omsg_fragmented_init -(struct wslay_event_omsg **m, uint8_t opcode, uint8_t rsv, - const union wslay_event_msg_source source, - wslay_event_fragmented_msg_callback read_callback) -{ - *m = (struct wslay_event_omsg*)malloc(sizeof(struct wslay_event_omsg)); - if(!*m) { +static int wslay_event_omsg_fragmented_init( + struct wslay_event_omsg **m, uint8_t opcode, uint8_t rsv, + const union wslay_event_msg_source source, + wslay_event_fragmented_msg_callback read_callback) { + *m = calloc(1, sizeof(struct wslay_event_omsg)); + if (!*m) { return WSLAY_ERR_NOMEM; } - memset(*m, 0, sizeof(struct wslay_event_omsg)); (*m)->opcode = opcode; (*m)->rsv = rsv; (*m)->type = WSLAY_FRAGMENTED; @@ -248,31 +221,25 @@ static int wslay_event_omsg_fragmented_init return 0; } -static void wslay_event_omsg_free(struct wslay_event_omsg *m) -{ - if(!m) { - return; - } - free(m->data); - free(m); -} +static void wslay_event_omsg_free(struct wslay_event_omsg *m) { free(m); } -static uint8_t* wslay_event_flatten_queue(struct wslay_queue *queue, size_t len) -{ - if(len == 0) { +static uint8_t *wslay_event_flatten_queue(struct wslay_queue *queue, + size_t len) { + if (len == 0) { return NULL; } else { size_t off = 0; - uint8_t *buf = (uint8_t*)malloc(len); - if(!buf) { + uint8_t *buf = malloc(len); + if (!buf) { return NULL; } - while(!wslay_queue_empty(queue)) { - struct wslay_event_byte_chunk *chunk = wslay_queue_top(queue); - memcpy(buf+off, chunk->data, chunk->data_length); + while (!wslay_queue_empty(queue)) { + struct wslay_event_byte_chunk *chunk = wslay_struct_of( + wslay_queue_top(queue), struct wslay_event_byte_chunk, qe); + wslay_queue_pop(queue); + memcpy(buf + off, chunk->data, chunk->data_length); off += chunk->data_length; wslay_event_byte_chunk_free(chunk); - wslay_queue_pop(queue); assert(off <= len); } assert(len == off); @@ -280,17 +247,15 @@ static uint8_t* wslay_event_flatten_queue(struct wslay_queue *queue, size_t len) } } -static int wslay_event_is_msg_queueable(wslay_event_context_ptr ctx) -{ +static int wslay_event_is_msg_queueable(wslay_event_context_ptr ctx) { return ctx->write_enabled && (ctx->close_status & WSLAY_CLOSE_QUEUED) == 0; } int wslay_event_queue_close(wslay_event_context_ptr ctx, uint16_t status_code, - const uint8_t *reason, size_t reason_length) -{ - if(!wslay_event_is_msg_queueable(ctx)) { + const uint8_t *reason, size_t reason_length) { + if (!wslay_event_is_msg_queueable(ctx)) { return WSLAY_ERR_NO_MORE_MSG; - } else if(reason_length > 123) { + } else if (reason_length > 123) { return WSLAY_ERR_INVALID_ARGUMENT; } else { uint8_t msg[128]; @@ -298,231 +263,203 @@ int wslay_event_queue_close(wslay_event_context_ptr ctx, uint16_t status_code, struct wslay_event_msg arg; uint16_t ncode; int r; - if(status_code == 0) { + if (status_code == 0) { msg_length = 0; } else { ncode = htons(status_code); memcpy(msg, &ncode, 2); - if(reason_length) { - memcpy(msg+2, reason, reason_length); + if (reason_length) { + memcpy(msg + 2, reason, reason_length); } - msg_length = reason_length+2; + msg_length = reason_length + 2; } arg.opcode = WSLAY_CONNECTION_CLOSE; arg.msg = msg; arg.msg_length = msg_length; r = wslay_event_queue_msg(ctx, &arg); - if(r == 0) { + if (r == 0) { ctx->close_status |= WSLAY_CLOSE_QUEUED; } return r; } } -static int wslay_event_queue_close_wrapper -(wslay_event_context_ptr ctx, uint16_t status_code, - const uint8_t *reason, size_t reason_length) -{ +static int wslay_event_queue_close_wrapper(wslay_event_context_ptr ctx, + uint16_t status_code, + const uint8_t *reason, + size_t reason_length) { int r; ctx->read_enabled = 0; - if((r = wslay_event_queue_close(ctx, status_code, reason, reason_length)) && - r != WSLAY_ERR_NO_MORE_MSG) { + if ((r = wslay_event_queue_close(ctx, status_code, reason, reason_length)) && + r != WSLAY_ERR_NO_MORE_MSG) { return r; } return 0; } -static int wslay_event_verify_rsv_bits(wslay_event_context_ptr ctx, uint8_t rsv) -{ +static int wslay_event_verify_rsv_bits(wslay_event_context_ptr ctx, + uint8_t rsv) { return ((rsv & ~ctx->allowed_rsv_bits) == 0); } int wslay_event_queue_msg(wslay_event_context_ptr ctx, - const struct wslay_event_msg *arg) -{ + const struct wslay_event_msg *arg) { return wslay_event_queue_msg_ex(ctx, arg, WSLAY_RSV_NONE); } int wslay_event_queue_msg_ex(wslay_event_context_ptr ctx, - const struct wslay_event_msg *arg, uint8_t rsv) -{ + const struct wslay_event_msg *arg, uint8_t rsv) { int r; struct wslay_event_omsg *omsg; - if(!wslay_event_is_msg_queueable(ctx)) { + if (!wslay_event_is_msg_queueable(ctx)) { return WSLAY_ERR_NO_MORE_MSG; } /* RSV1 is not allowed for control frames */ - if((wslay_is_ctrl_frame(arg->opcode) && - (arg->msg_length > 125 || wslay_get_rsv1(rsv))) - || !wslay_event_verify_rsv_bits(ctx, rsv)) { + if ((wslay_is_ctrl_frame(arg->opcode) && + (arg->msg_length > 125 || wslay_get_rsv1(rsv))) || + !wslay_event_verify_rsv_bits(ctx, rsv)) { return WSLAY_ERR_INVALID_ARGUMENT; } - if((r = wslay_event_omsg_non_fragmented_init - (&omsg, arg->opcode, rsv, arg->msg, arg->msg_length)) != 0) { + if ((r = wslay_event_omsg_non_fragmented_init( + &omsg, arg->opcode, rsv, arg->msg, arg->msg_length)) != 0) { return r; } - if(wslay_is_ctrl_frame(arg->opcode)) { - if((r = wslay_queue_push(ctx->send_ctrl_queue, omsg)) != 0) { - return r; - } + if (wslay_is_ctrl_frame(arg->opcode)) { + wslay_queue_push(&ctx->send_ctrl_queue, &omsg->qe); } else { - if((r = wslay_queue_push(ctx->send_queue, omsg)) != 0) { - return r; - } + wslay_queue_push(&ctx->send_queue, &omsg->qe); } ++ctx->queued_msg_count; ctx->queued_msg_length += arg->msg_length; return 0; } -int wslay_event_queue_fragmented_msg -(wslay_event_context_ptr ctx, const struct wslay_event_fragmented_msg *arg) -{ +int wslay_event_queue_fragmented_msg( + wslay_event_context_ptr ctx, const struct wslay_event_fragmented_msg *arg) { return wslay_event_queue_fragmented_msg_ex(ctx, arg, WSLAY_RSV_NONE); } -int wslay_event_queue_fragmented_msg_ex(wslay_event_context_ptr ctx, - const struct wslay_event_fragmented_msg *arg, uint8_t rsv) -{ +int wslay_event_queue_fragmented_msg_ex( + wslay_event_context_ptr ctx, const struct wslay_event_fragmented_msg *arg, + uint8_t rsv) { int r; struct wslay_event_omsg *omsg; - if(!wslay_event_is_msg_queueable(ctx)) { + if (!wslay_event_is_msg_queueable(ctx)) { return WSLAY_ERR_NO_MORE_MSG; } - if(wslay_is_ctrl_frame(arg->opcode) || - !wslay_event_verify_rsv_bits(ctx, rsv)) { + if (wslay_is_ctrl_frame(arg->opcode) || + !wslay_event_verify_rsv_bits(ctx, rsv)) { return WSLAY_ERR_INVALID_ARGUMENT; } - if((r = wslay_event_omsg_fragmented_init - (&omsg, arg->opcode, rsv, arg->source, arg->read_callback)) != 0) { - return r; - } - if((r = wslay_queue_push(ctx->send_queue, omsg)) != 0) { + if ((r = wslay_event_omsg_fragmented_init( + &omsg, arg->opcode, rsv, arg->source, arg->read_callback)) != 0) { return r; } + wslay_queue_push(&ctx->send_queue, &omsg->qe); ++ctx->queued_msg_count; return 0; } -void wslay_event_config_set_callbacks -(wslay_event_context_ptr ctx, const struct wslay_event_callbacks *callbacks) -{ +void wslay_event_config_set_callbacks( + wslay_event_context_ptr ctx, + const struct wslay_event_callbacks *callbacks) { ctx->callbacks = *callbacks; } -static int wslay_event_context_init -(wslay_event_context_ptr *ctx, - const struct wslay_event_callbacks *callbacks, - void *user_data) -{ +static int +wslay_event_context_init(wslay_event_context_ptr *ctx, + const struct wslay_event_callbacks *callbacks, + void *user_data) { int i, r; struct wslay_frame_callbacks frame_callbacks = { - wslay_event_frame_send_callback, - wslay_event_frame_recv_callback, - wslay_event_frame_genmask_callback - }; - *ctx = (wslay_event_context_ptr)malloc(sizeof(struct wslay_event_context)); - if(!*ctx) { + wslay_event_frame_send_callback, wslay_event_frame_recv_callback, + wslay_event_frame_genmask_callback}; + *ctx = calloc(1, sizeof(struct wslay_event_context)); + if (!*ctx) { return WSLAY_ERR_NOMEM; } - memset(*ctx, 0, sizeof(struct wslay_event_context)); wslay_event_config_set_callbacks(*ctx, callbacks); (*ctx)->user_data = user_data; (*ctx)->frame_user_data.ctx = *ctx; (*ctx)->frame_user_data.user_data = user_data; - if((r = wslay_frame_context_init(&(*ctx)->frame_ctx, &frame_callbacks, - &(*ctx)->frame_user_data)) != 0) { + if ((r = wslay_frame_context_init(&(*ctx)->frame_ctx, &frame_callbacks, + &(*ctx)->frame_user_data)) != 0) { wslay_event_context_free(*ctx); return r; } (*ctx)->read_enabled = (*ctx)->write_enabled = 1; - (*ctx)->send_queue = wslay_queue_new(); - if(!(*ctx)->send_queue) { - wslay_event_context_free(*ctx); - return WSLAY_ERR_NOMEM; - } - (*ctx)->send_ctrl_queue = wslay_queue_new(); - if(!(*ctx)->send_ctrl_queue) { - wslay_event_context_free(*ctx); - return WSLAY_ERR_NOMEM; - } + wslay_queue_init(&(*ctx)->send_queue); + wslay_queue_init(&(*ctx)->send_ctrl_queue); (*ctx)->queued_msg_count = 0; (*ctx)->queued_msg_length = 0; - for(i = 0; i < 2; ++i) { + for (i = 0; i < 2; ++i) { + wslay_queue_init(&(*ctx)->imsgs[i].chunks); wslay_event_imsg_reset(&(*ctx)->imsgs[i]); - (*ctx)->imsgs[i].chunks = wslay_queue_new(); - if(!(*ctx)->imsgs[i].chunks) { - wslay_event_context_free(*ctx); - return WSLAY_ERR_NOMEM; - } } (*ctx)->imsg = &(*ctx)->imsgs[0]; (*ctx)->obufmark = (*ctx)->obuflimit = (*ctx)->obuf; (*ctx)->status_code_sent = WSLAY_CODE_ABNORMAL_CLOSURE; (*ctx)->status_code_recv = WSLAY_CODE_ABNORMAL_CLOSURE; - (*ctx)->max_recv_msg_length = (1u << 31)-1; + (*ctx)->max_recv_msg_length = (1u << 31) - 1; return 0; } -int wslay_event_context_server_init -(wslay_event_context_ptr *ctx, - const struct wslay_event_callbacks *callbacks, - void *user_data) -{ +int wslay_event_context_server_init( + wslay_event_context_ptr *ctx, const struct wslay_event_callbacks *callbacks, + void *user_data) { int r; - if((r = wslay_event_context_init(ctx, callbacks, user_data)) != 0) { + if ((r = wslay_event_context_init(ctx, callbacks, user_data)) != 0) { return r; } (*ctx)->server = 1; return 0; } -int wslay_event_context_client_init -(wslay_event_context_ptr *ctx, - const struct wslay_event_callbacks *callbacks, - void *user_data) -{ +int wslay_event_context_client_init( + wslay_event_context_ptr *ctx, const struct wslay_event_callbacks *callbacks, + void *user_data) { int r; - if((r = wslay_event_context_init(ctx, callbacks, user_data)) != 0) { + if ((r = wslay_event_context_init(ctx, callbacks, user_data)) != 0) { return r; } (*ctx)->server = 0; return 0; } -void wslay_event_context_free(wslay_event_context_ptr ctx) -{ +void wslay_event_context_free(wslay_event_context_ptr ctx) { int i; - if(!ctx) { + if (!ctx) { return; } - for(i = 0; i < 2; ++i) { + for (i = 0; i < 2; ++i) { wslay_event_imsg_chunks_free(&ctx->imsgs[i]); - wslay_queue_free(ctx->imsgs[i].chunks); + wslay_queue_deinit(&ctx->imsgs[i].chunks); } - if(ctx->send_queue) { - while(!wslay_queue_empty(ctx->send_queue)) { - wslay_event_omsg_free(wslay_queue_top(ctx->send_queue)); - wslay_queue_pop(ctx->send_queue); - } - wslay_queue_free(ctx->send_queue); + + while (!wslay_queue_empty(&ctx->send_queue)) { + struct wslay_event_omsg *omsg = wslay_struct_of( + wslay_queue_top(&ctx->send_queue), struct wslay_event_omsg, qe); + wslay_queue_pop(&ctx->send_queue); + wslay_event_omsg_free(omsg); } - if(ctx->send_ctrl_queue) { - while(!wslay_queue_empty(ctx->send_ctrl_queue)) { - wslay_event_omsg_free(wslay_queue_top(ctx->send_ctrl_queue)); - wslay_queue_pop(ctx->send_ctrl_queue); - } - wslay_queue_free(ctx->send_ctrl_queue); + wslay_queue_deinit(&ctx->send_queue); + + while (!wslay_queue_empty(&ctx->send_ctrl_queue)) { + struct wslay_event_omsg *omsg = wslay_struct_of( + wslay_queue_top(&ctx->send_ctrl_queue), struct wslay_event_omsg, qe); + wslay_queue_pop(&ctx->send_ctrl_queue); + wslay_event_omsg_free(omsg); } + wslay_queue_deinit(&ctx->send_ctrl_queue); + wslay_frame_context_free(ctx->frame_ctx); wslay_event_omsg_free(ctx->omsg); free(ctx); } -static void wslay_event_call_on_frame_recv_start_callback -(wslay_event_context_ptr ctx, const struct wslay_frame_iocb *iocb) -{ - if(ctx->callbacks.on_frame_recv_start_callback) { +static void wslay_event_call_on_frame_recv_start_callback( + wslay_event_context_ptr ctx, const struct wslay_frame_iocb *iocb) { + if (ctx->callbacks.on_frame_recv_start_callback) { struct wslay_event_on_frame_recv_start_arg arg; arg.fin = iocb->fin; arg.rsv = iocb->rsv; @@ -532,10 +469,9 @@ static void wslay_event_call_on_frame_recv_start_callback } } -static void wslay_event_call_on_frame_recv_chunk_callback -(wslay_event_context_ptr ctx, const struct wslay_frame_iocb *iocb) -{ - if(ctx->callbacks.on_frame_recv_chunk_callback) { +static void wslay_event_call_on_frame_recv_chunk_callback( + wslay_event_context_ptr ctx, const struct wslay_frame_iocb *iocb) { + if (ctx->callbacks.on_frame_recv_chunk_callback) { struct wslay_event_on_frame_recv_chunk_arg arg; arg.data = iocb->data; arg.data_length = iocb->data_length; @@ -543,205 +479,202 @@ static void wslay_event_call_on_frame_recv_chunk_callback } } -static void wslay_event_call_on_frame_recv_end_callback -(wslay_event_context_ptr ctx) -{ - if(ctx->callbacks.on_frame_recv_end_callback) { +static void +wslay_event_call_on_frame_recv_end_callback(wslay_event_context_ptr ctx) { + if (ctx->callbacks.on_frame_recv_end_callback) { ctx->callbacks.on_frame_recv_end_callback(ctx, ctx->user_data); } } -static int wslay_event_is_valid_status_code(uint16_t status_code) -{ - return (1000 <= status_code && status_code <= 1011 && - status_code != 1004 && status_code != 1005 && status_code != 1006) || - (3000 <= status_code && status_code <= 4999); +static int wslay_event_is_valid_status_code(uint16_t status_code) { + return (1000 <= status_code && status_code <= 1011 && status_code != 1004 && + status_code != 1005 && status_code != 1006) || + (3000 <= status_code && status_code <= 4999); } -static int wslay_event_config_get_no_buffering(wslay_event_context_ptr ctx) -{ +static int wslay_event_config_get_no_buffering(wslay_event_context_ptr ctx) { return (ctx->config & WSLAY_CONFIG_NO_BUFFERING) > 0; } -int wslay_event_recv(wslay_event_context_ptr ctx) -{ +int wslay_event_recv(wslay_event_context_ptr ctx) { struct wslay_frame_iocb iocb; ssize_t r; - while(ctx->read_enabled) { + while (ctx->read_enabled) { memset(&iocb, 0, sizeof(iocb)); r = wslay_frame_recv(ctx->frame_ctx, &iocb); - if(r >= 0) { + if (r >= 0) { int new_frame = 0; /* RSV1 is not allowed on control and continuation frames */ - if((!wslay_event_verify_rsv_bits(ctx, iocb.rsv)) || - (wslay_get_rsv1(iocb.rsv) && (wslay_is_ctrl_frame(iocb.opcode) || - iocb.opcode == WSLAY_CONTINUATION_FRAME)) || - (ctx->server && !iocb.mask) || (!ctx->server && iocb.mask)) { - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { - return r; + if ((!wslay_event_verify_rsv_bits(ctx, iocb.rsv)) || + (wslay_get_rsv1(iocb.rsv) && + (wslay_is_ctrl_frame(iocb.opcode) || + iocb.opcode == WSLAY_CONTINUATION_FRAME)) || + (ctx->server && !iocb.mask) || (!ctx->server && iocb.mask)) { + if ((r = wslay_event_queue_close_wrapper(ctx, WSLAY_CODE_PROTOCOL_ERROR, + NULL, 0)) != 0) { + return (int)r; } break; } - if(ctx->imsg->opcode == 0xffu) { - if(iocb.opcode == WSLAY_TEXT_FRAME || - iocb.opcode == WSLAY_BINARY_FRAME || - iocb.opcode == WSLAY_CONNECTION_CLOSE || - iocb.opcode == WSLAY_PING || - iocb.opcode == WSLAY_PONG) { + if (ctx->imsg->opcode == 0xffu) { + if (iocb.opcode == WSLAY_TEXT_FRAME || + iocb.opcode == WSLAY_BINARY_FRAME || + iocb.opcode == WSLAY_CONNECTION_CLOSE || + iocb.opcode == WSLAY_PING || iocb.opcode == WSLAY_PONG) { wslay_event_imsg_set(ctx->imsg, iocb.fin, iocb.rsv, iocb.opcode); new_frame = 1; } else { - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { - return r; + if ((r = wslay_event_queue_close_wrapper( + ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { + return (int)r; } break; } - } else if(ctx->ipayloadlen == 0 && ctx->ipayloadoff == 0) { - if(iocb.opcode == WSLAY_CONTINUATION_FRAME) { + } else if (ctx->ipayloadlen == 0 && ctx->ipayloadoff == 0) { + if (iocb.opcode == WSLAY_CONTINUATION_FRAME) { ctx->imsg->fin = iocb.fin; - } else if(iocb.opcode == WSLAY_CONNECTION_CLOSE || - iocb.opcode == WSLAY_PING || - iocb.opcode == WSLAY_PONG) { + } else if (iocb.opcode == WSLAY_CONNECTION_CLOSE || + iocb.opcode == WSLAY_PING || iocb.opcode == WSLAY_PONG) { ctx->imsg = &ctx->imsgs[1]; wslay_event_imsg_set(ctx->imsg, iocb.fin, iocb.rsv, iocb.opcode); } else { - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { - return r; + if ((r = wslay_event_queue_close_wrapper( + ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { + return (int)r; } break; } new_frame = 1; } - if(new_frame) { - if(ctx->imsg->msg_length+iocb.payload_length > - ctx->max_recv_msg_length) { - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_MESSAGE_TOO_BIG, NULL, 0)) != 0) { - return r; + if (new_frame) { + if (ctx->imsg->msg_length + iocb.payload_length > + ctx->max_recv_msg_length) { + if ((r = wslay_event_queue_close_wrapper( + ctx, WSLAY_CODE_MESSAGE_TOO_BIG, NULL, 0)) != 0) { + return (int)r; } break; } ctx->ipayloadlen = iocb.payload_length; wslay_event_call_on_frame_recv_start_callback(ctx, &iocb); - if(!wslay_event_config_get_no_buffering(ctx) || - wslay_is_ctrl_frame(iocb.opcode)) { - if((r = wslay_event_imsg_append_chunk(ctx->imsg, - iocb.payload_length)) != 0) { + if (!wslay_event_config_get_no_buffering(ctx) || + wslay_is_ctrl_frame(iocb.opcode)) { + if ((r = wslay_event_imsg_append_chunk(ctx->imsg, + iocb.payload_length)) != 0) { ctx->read_enabled = 0; - return r; + return (int)r; } } } /* If RSV1 bit is set then it is too early for utf-8 validation */ - if((!wslay_get_rsv1(ctx->imsg->rsv) && - ctx->imsg->opcode == WSLAY_TEXT_FRAME) || - ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) { + if ((!wslay_get_rsv1(ctx->imsg->rsv) && + ctx->imsg->opcode == WSLAY_TEXT_FRAME) || + ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) { size_t i; - if(ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) { + if (ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) { i = 2; } else { i = 0; } - for(; i < iocb.data_length; ++i) { + for (; i < iocb.data_length; ++i) { uint32_t codep; - if(decode(&ctx->imsg->utf8state, &codep, - iocb.data[i]) == UTF8_REJECT) { - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_INVALID_FRAME_PAYLOAD_DATA, NULL, 0)) != 0) { - return r; + if (decode(&ctx->imsg->utf8state, &codep, iocb.data[i]) == + UTF8_REJECT) { + if ((r = wslay_event_queue_close_wrapper( + ctx, WSLAY_CODE_INVALID_FRAME_PAYLOAD_DATA, NULL, 0)) != + 0) { + return (int)r; } break; } } } - if(ctx->imsg->utf8state == UTF8_REJECT) { + if (ctx->imsg->utf8state == UTF8_REJECT) { break; } wslay_event_call_on_frame_recv_chunk_callback(ctx, &iocb); - if(iocb.data_length > 0) { - if(!wslay_event_config_get_no_buffering(ctx) || - wslay_is_ctrl_frame(iocb.opcode)) { + if (iocb.data_length > 0) { + if (!wslay_event_config_get_no_buffering(ctx) || + wslay_is_ctrl_frame(iocb.opcode)) { struct wslay_event_byte_chunk *chunk; - chunk = wslay_queue_tail(ctx->imsg->chunks); - wslay_event_byte_chunk_copy(chunk, ctx->ipayloadoff, - iocb.data, iocb.data_length); + chunk = wslay_struct_of(wslay_queue_tail(&ctx->imsg->chunks), + struct wslay_event_byte_chunk, qe); + wslay_event_byte_chunk_copy(chunk, ctx->ipayloadoff, iocb.data, + iocb.data_length); } ctx->ipayloadoff += iocb.data_length; } - if(ctx->ipayloadoff == ctx->ipayloadlen) { - if(ctx->imsg->fin && - (ctx->imsg->opcode == WSLAY_TEXT_FRAME || - ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) && - ctx->imsg->utf8state != UTF8_ACCEPT) { - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_INVALID_FRAME_PAYLOAD_DATA, NULL, 0)) != 0) { - return r; + if (ctx->ipayloadoff == ctx->ipayloadlen) { + if (ctx->imsg->fin && + (ctx->imsg->opcode == WSLAY_TEXT_FRAME || + ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) && + ctx->imsg->utf8state != UTF8_ACCEPT) { + if ((r = wslay_event_queue_close_wrapper( + ctx, WSLAY_CODE_INVALID_FRAME_PAYLOAD_DATA, NULL, 0)) != 0) { + return (int)r; } break; } wslay_event_call_on_frame_recv_end_callback(ctx); - if(ctx->imsg->fin) { - if(ctx->callbacks.on_msg_recv_callback || - ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE || - ctx->imsg->opcode == WSLAY_PING) { + if (ctx->imsg->fin) { + if (ctx->callbacks.on_msg_recv_callback || + ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE || + ctx->imsg->opcode == WSLAY_PING) { struct wslay_event_on_msg_recv_arg arg; uint16_t status_code = 0; uint8_t *msg = NULL; size_t msg_length = 0; - if(!wslay_event_config_get_no_buffering(ctx) || - wslay_is_ctrl_frame(iocb.opcode)) { - msg = wslay_event_flatten_queue(ctx->imsg->chunks, + if (!wslay_event_config_get_no_buffering(ctx) || + wslay_is_ctrl_frame(iocb.opcode)) { + msg = wslay_event_flatten_queue(&ctx->imsg->chunks, ctx->imsg->msg_length); - if(ctx->imsg->msg_length && !msg) { + if (ctx->imsg->msg_length && !msg) { ctx->read_enabled = 0; return WSLAY_ERR_NOMEM; } msg_length = ctx->imsg->msg_length; } - if(ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) { + if (ctx->imsg->opcode == WSLAY_CONNECTION_CLOSE) { const uint8_t *reason; size_t reason_length; - if(ctx->imsg->msg_length >= 2) { + if (ctx->imsg->msg_length >= 2) { memcpy(&status_code, msg, 2); status_code = ntohs(status_code); - if(!wslay_event_is_valid_status_code(status_code)) { + if (!wslay_event_is_valid_status_code(status_code)) { free(msg); - if((r = wslay_event_queue_close_wrapper - (ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { - return r; + if ((r = wslay_event_queue_close_wrapper( + ctx, WSLAY_CODE_PROTOCOL_ERROR, NULL, 0)) != 0) { + return (int)r; } break; } - reason = msg+2; - reason_length = ctx->imsg->msg_length-2; + reason = msg + 2; + reason_length = ctx->imsg->msg_length - 2; } else { reason = NULL; reason_length = 0; } ctx->close_status |= WSLAY_CLOSE_RECEIVED; ctx->status_code_recv = - status_code == 0 ? WSLAY_CODE_NO_STATUS_RCVD : status_code; - if((r = wslay_event_queue_close_wrapper - (ctx, status_code, reason, reason_length)) != 0) { + status_code == 0 ? WSLAY_CODE_NO_STATUS_RCVD : status_code; + if ((r = wslay_event_queue_close_wrapper(ctx, status_code, reason, + reason_length)) != 0) { free(msg); - return r; + return (int)r; } - } else if(ctx->imsg->opcode == WSLAY_PING) { + } else if (ctx->imsg->opcode == WSLAY_PING) { struct wslay_event_msg pong_arg; pong_arg.opcode = WSLAY_PONG; pong_arg.msg = msg; pong_arg.msg_length = ctx->imsg->msg_length; - if((r = wslay_event_queue_msg(ctx, &pong_arg)) && - r != WSLAY_ERR_NO_MORE_MSG) { + if ((r = wslay_event_queue_msg(ctx, &pong_arg)) && + r != WSLAY_ERR_NO_MORE_MSG) { ctx->read_enabled = 0; free(msg); - return r; + return (int)r; } } - if(ctx->callbacks.on_msg_recv_callback) { + if (ctx->callbacks.on_msg_recv_callback) { arg.rsv = ctx->imsg->rsv; arg.opcode = ctx->imsg->opcode; arg.msg = msg; @@ -753,17 +686,17 @@ int wslay_event_recv(wslay_event_context_ptr ctx) free(msg); } wslay_event_imsg_reset(ctx->imsg); - if(ctx->imsg == &ctx->imsgs[1]) { + if (ctx->imsg == &ctx->imsgs[1]) { ctx->imsg = &ctx->imsgs[0]; } } ctx->ipayloadlen = ctx->ipayloadoff = 0; } } else { - if(r != WSLAY_ERR_WANT_READ || - (ctx->error != WSLAY_ERR_WOULDBLOCK && ctx->error != 0)) { - if((r = wslay_event_queue_close_wrapper(ctx, 0, NULL, 0)) != 0) { - return r; + if (r != WSLAY_ERR_WANT_READ || + (ctx->error != WSLAY_ERR_WOULDBLOCK && ctx->error != 0)) { + if ((r = wslay_event_queue_close_wrapper(ctx, 0, NULL, 0)) != 0) { + return (int)r; } return WSLAY_ERR_CALLBACK_FAILURE; } @@ -773,26 +706,25 @@ int wslay_event_recv(wslay_event_context_ptr ctx) return 0; } -static void wslay_event_on_non_fragmented_msg_popped -(wslay_event_context_ptr ctx) -{ +static void +wslay_event_on_non_fragmented_msg_popped(wslay_event_context_ptr ctx) { ctx->omsg->fin = 1; ctx->opayloadlen = ctx->omsg->data_length; ctx->opayloadoff = 0; } -static struct wslay_event_omsg* wslay_event_send_ctrl_queue_pop -(wslay_event_context_ptr ctx) -{ +static struct wslay_event_omsg * +wslay_event_send_ctrl_queue_pop(wslay_event_context_ptr ctx) { /* * If Close control frame is queued, we don't send any control frame * other than Close. */ - if(ctx->close_status & WSLAY_CLOSE_QUEUED) { - while(!wslay_queue_empty(ctx->send_ctrl_queue)) { - struct wslay_event_omsg *msg = wslay_queue_top(ctx->send_ctrl_queue); - wslay_queue_pop(ctx->send_ctrl_queue); - if(msg->opcode == WSLAY_CONNECTION_CLOSE) { + if (ctx->close_status & WSLAY_CLOSE_QUEUED) { + while (!wslay_queue_empty(&ctx->send_ctrl_queue)) { + struct wslay_event_omsg *msg = wslay_struct_of( + wslay_queue_top(&ctx->send_ctrl_queue), struct wslay_event_omsg, qe); + wslay_queue_pop(&ctx->send_ctrl_queue); + if (msg->opcode == WSLAY_CONNECTION_CLOSE) { return msg; } else { wslay_event_omsg_free(msg); @@ -800,71 +732,73 @@ static struct wslay_event_omsg* wslay_event_send_ctrl_queue_pop } return NULL; } else { - struct wslay_event_omsg *msg = wslay_queue_top(ctx->send_ctrl_queue); - wslay_queue_pop(ctx->send_ctrl_queue); + struct wslay_event_omsg *msg = wslay_struct_of( + wslay_queue_top(&ctx->send_ctrl_queue), struct wslay_event_omsg, qe); + wslay_queue_pop(&ctx->send_ctrl_queue); return msg; } } -int wslay_event_send(wslay_event_context_ptr ctx) -{ +int wslay_event_send(wslay_event_context_ptr ctx) { struct wslay_frame_iocb iocb; ssize_t r; - while(ctx->write_enabled && - (!wslay_queue_empty(ctx->send_queue) || - !wslay_queue_empty(ctx->send_ctrl_queue) || ctx->omsg)) { - if(!ctx->omsg) { - if(wslay_queue_empty(ctx->send_ctrl_queue)) { - ctx->omsg = wslay_queue_top(ctx->send_queue); - wslay_queue_pop(ctx->send_queue); + while (ctx->write_enabled && + (!wslay_queue_empty(&ctx->send_queue) || + !wslay_queue_empty(&ctx->send_ctrl_queue) || ctx->omsg)) { + if (!ctx->omsg) { + if (wslay_queue_empty(&ctx->send_ctrl_queue)) { + ctx->omsg = wslay_struct_of(wslay_queue_top(&ctx->send_queue), + struct wslay_event_omsg, qe); + wslay_queue_pop(&ctx->send_queue); } else { ctx->omsg = wslay_event_send_ctrl_queue_pop(ctx); - if(ctx->omsg == NULL) { + if (ctx->omsg == NULL) { break; } } - if(ctx->omsg->type == WSLAY_NON_FRAGMENTED) { + if (ctx->omsg->type == WSLAY_NON_FRAGMENTED) { wslay_event_on_non_fragmented_msg_popped(ctx); } - } else if(!wslay_is_ctrl_frame(ctx->omsg->opcode) && - ctx->frame_ctx->ostate == PREP_HEADER && - !wslay_queue_empty(ctx->send_ctrl_queue)) { - if((r = wslay_queue_push_front(ctx->send_queue, ctx->omsg)) != 0) { - ctx->write_enabled = 0; - return r; - } + } else if (!wslay_is_ctrl_frame(ctx->omsg->opcode) && + ctx->frame_ctx->ostate == PREP_HEADER && + !wslay_queue_empty(&ctx->send_ctrl_queue)) { + wslay_queue_push_front(&ctx->send_queue, &ctx->omsg->qe); ctx->omsg = wslay_event_send_ctrl_queue_pop(ctx); - if(ctx->omsg == NULL) { + if (ctx->omsg == NULL) { break; } /* ctrl message has WSLAY_NON_FRAGMENTED */ wslay_event_on_non_fragmented_msg_popped(ctx); } - if(ctx->omsg->type == WSLAY_NON_FRAGMENTED) { + if (ctx->omsg->type == WSLAY_NON_FRAGMENTED) { memset(&iocb, 0, sizeof(iocb)); iocb.fin = 1; iocb.opcode = ctx->omsg->opcode; iocb.rsv = ctx->omsg->rsv; - iocb.mask = ctx->server^1; - iocb.data = ctx->omsg->data+ctx->opayloadoff; - iocb.data_length = ctx->opayloadlen-ctx->opayloadoff; + iocb.mask = ctx->server ^ 1; + iocb.data = ctx->omsg->data; + iocb.data_length = ctx->opayloadlen; + if (ctx->opayloadoff) { + iocb.data += ctx->opayloadoff; + iocb.data_length -= ctx->opayloadoff; + } iocb.payload_length = ctx->opayloadlen; r = wslay_frame_send(ctx->frame_ctx, &iocb); - if(r >= 0) { - ctx->opayloadoff += r; - if(ctx->opayloadoff == ctx->opayloadlen) { + if (r >= 0) { + ctx->opayloadoff += (uint64_t)r; + if (ctx->opayloadoff == ctx->opayloadlen) { --ctx->queued_msg_count; ctx->queued_msg_length -= ctx->omsg->data_length; - if(ctx->omsg->opcode == WSLAY_CONNECTION_CLOSE) { + if (ctx->omsg->opcode == WSLAY_CONNECTION_CLOSE) { uint16_t status_code = 0; ctx->write_enabled = 0; ctx->close_status |= WSLAY_CLOSE_SENT; - if(ctx->omsg->data_length >= 2) { + if (ctx->omsg->data_length >= 2) { memcpy(&status_code, ctx->omsg->data, 2); status_code = ntohs(status_code); } ctx->status_code_sent = - status_code == 0 ? WSLAY_CODE_NO_STATUS_RCVD : status_code; + status_code == 0 ? WSLAY_CODE_NO_STATUS_RCVD : status_code; } wslay_event_omsg_free(ctx->omsg); ctx->omsg = NULL; @@ -872,30 +806,29 @@ int wslay_event_send(wslay_event_context_ptr ctx) break; } } else { - if(r != WSLAY_ERR_WANT_WRITE || - (ctx->error != WSLAY_ERR_WOULDBLOCK && ctx->error != 0)) { + if (r != WSLAY_ERR_WANT_WRITE || + (ctx->error != WSLAY_ERR_WOULDBLOCK && ctx->error != 0)) { ctx->write_enabled = 0; return WSLAY_ERR_CALLBACK_FAILURE; } break; } } else { - if(ctx->omsg->fin == 0 && ctx->obuflimit == ctx->obufmark) { + if (ctx->omsg->fin == 0 && ctx->obuflimit == ctx->obufmark) { int eof = 0; r = ctx->omsg->read_callback(ctx, ctx->obuf, sizeof(ctx->obuf), - &ctx->omsg->source, - &eof, ctx->user_data); - if(r == 0 && eof == 0) { + &ctx->omsg->source, &eof, ctx->user_data); + if (r == 0 && eof == 0) { break; - } else if(r < 0) { + } else if (r < 0) { ctx->write_enabled = 0; return WSLAY_ERR_CALLBACK_FAILURE; } - ctx->obuflimit = ctx->obuf+r; - if(eof) { + ctx->obuflimit = ctx->obuf + r; + if (eof) { ctx->omsg->fin = 1; } - ctx->opayloadlen = r; + ctx->opayloadlen = (uint64_t)r; ctx->opayloadoff = 0; } memset(&iocb, 0, sizeof(iocb)); @@ -904,29 +837,28 @@ int wslay_event_send(wslay_event_context_ptr ctx) iocb.rsv = ctx->omsg->rsv; iocb.mask = ctx->server ? 0 : 1; iocb.data = ctx->obufmark; - iocb.data_length = ctx->obuflimit-ctx->obufmark; + iocb.data_length = (size_t)(ctx->obuflimit - ctx->obufmark); iocb.payload_length = ctx->opayloadlen; r = wslay_frame_send(ctx->frame_ctx, &iocb); - if(r >= 0) { + if (r >= 0) { ctx->obufmark += r; - if(ctx->obufmark == ctx->obuflimit) { + if (ctx->obufmark == ctx->obuflimit) { ctx->obufmark = ctx->obuflimit = ctx->obuf; - if(ctx->omsg->fin) { + if (ctx->omsg->fin) { --ctx->queued_msg_count; wslay_event_omsg_free(ctx->omsg); ctx->omsg = NULL; } else { ctx->omsg->opcode = WSLAY_CONTINUATION_FRAME; /* RSV1 is not set on continuation frames */ - ctx->omsg->rsv = ctx->omsg->rsv & ~WSLAY_RSV1_BIT; + ctx->omsg->rsv = (uint8_t)(ctx->omsg->rsv & ~WSLAY_RSV1_BIT); } } else { break; } } else { - if(r != WSLAY_ERR_WANT_WRITE || - (ctx->error != WSLAY_ERR_WOULDBLOCK && - ctx->error != 0)) { + if (r != WSLAY_ERR_WANT_WRITE || + (ctx->error != WSLAY_ERR_WOULDBLOCK && ctx->error != 0)) { ctx->write_enabled = 0; return WSLAY_ERR_CALLBACK_FAILURE; } @@ -937,91 +869,214 @@ int wslay_event_send(wslay_event_context_ptr ctx) return 0; } -void wslay_event_set_error(wslay_event_context_ptr ctx, int val) -{ +ssize_t wslay_event_write(wslay_event_context_ptr ctx, uint8_t *buf, + size_t buflen) { + struct wslay_frame_iocb iocb; + ssize_t r; + uint8_t *buf_last = buf; + size_t wpayloadlen; + while (ctx->write_enabled && + (!wslay_queue_empty(&ctx->send_queue) || + !wslay_queue_empty(&ctx->send_ctrl_queue) || ctx->omsg)) { + if (!ctx->omsg) { + if (wslay_queue_empty(&ctx->send_ctrl_queue)) { + ctx->omsg = wslay_struct_of(wslay_queue_top(&ctx->send_queue), + struct wslay_event_omsg, qe); + wslay_queue_pop(&ctx->send_queue); + } else { + ctx->omsg = wslay_event_send_ctrl_queue_pop(ctx); + if (ctx->omsg == NULL) { + break; + } + } + if (ctx->omsg->type == WSLAY_NON_FRAGMENTED) { + wslay_event_on_non_fragmented_msg_popped(ctx); + } + } else if (!wslay_is_ctrl_frame(ctx->omsg->opcode) && + ctx->frame_ctx->ostate == PREP_HEADER && + !wslay_queue_empty(&ctx->send_ctrl_queue)) { + wslay_queue_push_front(&ctx->send_queue, &ctx->omsg->qe); + ctx->omsg = wslay_event_send_ctrl_queue_pop(ctx); + if (ctx->omsg == NULL) { + break; + } + /* ctrl message has WSLAY_NON_FRAGMENTED */ + wslay_event_on_non_fragmented_msg_popped(ctx); + } + if (ctx->omsg->type == WSLAY_NON_FRAGMENTED) { + memset(&iocb, 0, sizeof(iocb)); + iocb.fin = 1; + iocb.opcode = ctx->omsg->opcode; + iocb.rsv = ctx->omsg->rsv; + iocb.mask = ctx->server ^ 1; + iocb.data = ctx->omsg->data; + iocb.data_length = ctx->opayloadlen; + if (ctx->opayloadoff) { + iocb.data += ctx->opayloadoff; + iocb.data_length -= ctx->opayloadoff; + } + iocb.payload_length = ctx->opayloadlen; + r = wslay_frame_write(ctx->frame_ctx, &iocb, buf_last, buflen, + &wpayloadlen); + if (r > 0) { + assert((size_t)r <= buflen); + + buf_last += r; + buflen -= (size_t)r; + + ctx->opayloadoff += wpayloadlen; + if (ctx->opayloadoff == ctx->opayloadlen) { + --ctx->queued_msg_count; + ctx->queued_msg_length -= ctx->omsg->data_length; + if (ctx->omsg->opcode == WSLAY_CONNECTION_CLOSE) { + uint16_t status_code = 0; + ctx->write_enabled = 0; + ctx->close_status |= WSLAY_CLOSE_SENT; + if (ctx->omsg->data_length >= 2) { + memcpy(&status_code, ctx->omsg->data, 2); + status_code = ntohs(status_code); + } + ctx->status_code_sent = + status_code == 0 ? WSLAY_CODE_NO_STATUS_RCVD : status_code; + } + wslay_event_omsg_free(ctx->omsg); + ctx->omsg = NULL; + } else { + break; + } + } else if (r == 0) { + return buf_last - buf; + } else { + return WSLAY_ERR_CALLBACK_FAILURE; + } + } else { + if (ctx->omsg->fin == 0 && ctx->obuflimit == ctx->obufmark) { + int eof = 0; + r = ctx->omsg->read_callback(ctx, ctx->obuf, sizeof(ctx->obuf), + &ctx->omsg->source, &eof, ctx->user_data); + if (r == 0 && eof == 0) { + break; + } else if (r < 0) { + ctx->write_enabled = 0; + return WSLAY_ERR_CALLBACK_FAILURE; + } + ctx->obuflimit = ctx->obuf + r; + if (eof) { + ctx->omsg->fin = 1; + } + ctx->opayloadlen = (uint64_t)r; + ctx->opayloadoff = 0; + } + memset(&iocb, 0, sizeof(iocb)); + iocb.fin = ctx->omsg->fin; + iocb.opcode = ctx->omsg->opcode; + iocb.rsv = ctx->omsg->rsv; + iocb.mask = ctx->server ? 0 : 1; + iocb.data = ctx->obufmark; + iocb.data_length = (size_t)(ctx->obuflimit - ctx->obufmark); + iocb.payload_length = ctx->opayloadlen; + r = wslay_frame_write(ctx->frame_ctx, &iocb, buf_last, buflen, + &wpayloadlen); + if (r > 0) { + assert((size_t)r <= buflen); + + buf_last += r; + buflen -= (size_t)r; + + ctx->obufmark += wpayloadlen; + if (ctx->obufmark == ctx->obuflimit) { + ctx->obufmark = ctx->obuflimit = ctx->obuf; + if (ctx->omsg->fin) { + --ctx->queued_msg_count; + wslay_event_omsg_free(ctx->omsg); + ctx->omsg = NULL; + } else { + ctx->omsg->opcode = WSLAY_CONTINUATION_FRAME; + /* RSV1 is not set on continuation frames */ + ctx->omsg->rsv = (uint8_t)(ctx->omsg->rsv & ~WSLAY_RSV1_BIT); + } + } else { + break; + } + } else if (r == 0) { + return buf_last - buf; + } else { + return WSLAY_ERR_CALLBACK_FAILURE; + } + } + } + return buf_last - buf; +} + +void wslay_event_set_error(wslay_event_context_ptr ctx, int val) { ctx->error = val; } -int wslay_event_want_read(wslay_event_context_ptr ctx) -{ +int wslay_event_want_read(wslay_event_context_ptr ctx) { return ctx->read_enabled; } -int wslay_event_want_write(wslay_event_context_ptr ctx) -{ +int wslay_event_want_write(wslay_event_context_ptr ctx) { return ctx->write_enabled && - (!wslay_queue_empty(ctx->send_queue) || - !wslay_queue_empty(ctx->send_ctrl_queue) || ctx->omsg); + (!wslay_queue_empty(&ctx->send_queue) || + !wslay_queue_empty(&ctx->send_ctrl_queue) || ctx->omsg); } -void wslay_event_shutdown_read(wslay_event_context_ptr ctx) -{ +void wslay_event_shutdown_read(wslay_event_context_ptr ctx) { ctx->read_enabled = 0; } -void wslay_event_shutdown_write(wslay_event_context_ptr ctx) -{ +void wslay_event_shutdown_write(wslay_event_context_ptr ctx) { ctx->write_enabled = 0; } -int wslay_event_get_read_enabled(wslay_event_context_ptr ctx) -{ +int wslay_event_get_read_enabled(wslay_event_context_ptr ctx) { return ctx->read_enabled; } -int wslay_event_get_write_enabled(wslay_event_context_ptr ctx) -{ +int wslay_event_get_write_enabled(wslay_event_context_ptr ctx) { return ctx->write_enabled; } -int wslay_event_get_close_received(wslay_event_context_ptr ctx) -{ +int wslay_event_get_close_received(wslay_event_context_ptr ctx) { return (ctx->close_status & WSLAY_CLOSE_RECEIVED) > 0; } -int wslay_event_get_close_sent(wslay_event_context_ptr ctx) -{ +int wslay_event_get_close_sent(wslay_event_context_ptr ctx) { return (ctx->close_status & WSLAY_CLOSE_SENT) > 0; } void wslay_event_config_set_allowed_rsv_bits(wslay_event_context_ptr ctx, - uint8_t rsv) -{ + uint8_t rsv) { /* We currently only allow WSLAY_RSV1_BIT or WSLAY_RSV_NONE */ ctx->allowed_rsv_bits = rsv & WSLAY_RSV1_BIT; } -void wslay_event_config_set_no_buffering(wslay_event_context_ptr ctx, int val) -{ - if(val) { +void wslay_event_config_set_no_buffering(wslay_event_context_ptr ctx, int val) { + if (val) { ctx->config |= WSLAY_CONFIG_NO_BUFFERING; } else { - ctx->config &= ~WSLAY_CONFIG_NO_BUFFERING; + ctx->config &= (uint32_t)~WSLAY_CONFIG_NO_BUFFERING; } } void wslay_event_config_set_max_recv_msg_length(wslay_event_context_ptr ctx, - uint64_t val) -{ + uint64_t val) { ctx->max_recv_msg_length = val; } -uint16_t wslay_event_get_status_code_received(wslay_event_context_ptr ctx) -{ +uint16_t wslay_event_get_status_code_received(wslay_event_context_ptr ctx) { return ctx->status_code_recv; } -uint16_t wslay_event_get_status_code_sent(wslay_event_context_ptr ctx) -{ +uint16_t wslay_event_get_status_code_sent(wslay_event_context_ptr ctx) { return ctx->status_code_sent; } -size_t wslay_event_get_queued_msg_count(wslay_event_context_ptr ctx) -{ +size_t wslay_event_get_queued_msg_count(wslay_event_context_ptr ctx) { return ctx->queued_msg_count; } -size_t wslay_event_get_queued_msg_length(wslay_event_context_ptr ctx) -{ +size_t wslay_event_get_queued_msg_length(wslay_event_context_ptr ctx) { return ctx->queued_msg_length; } diff --git a/thirdparty/wslay/wslay_event.h b/thirdparty/wslay/wslay_event.h index 36feb9036d..e30c3d1940 100644 --- a/thirdparty/wslay/wslay_event.h +++ b/thirdparty/wslay/wslay_event.h @@ -31,10 +31,10 @@ #include <wslay/wslay.h> -struct wslay_stack; -struct wslay_queue; +#include "wslay_queue.h" struct wslay_event_byte_chunk { + struct wslay_queue_entry qe; uint8_t *data; size_t data_length; }; @@ -44,16 +44,14 @@ struct wslay_event_imsg { uint8_t rsv; uint8_t opcode; uint32_t utf8state; - struct wslay_queue *chunks; + struct wslay_queue chunks; size_t msg_length; }; -enum wslay_event_msg_type { - WSLAY_NON_FRAGMENTED, - WSLAY_FRAGMENTED -}; +enum wslay_event_msg_type { WSLAY_NON_FRAGMENTED, WSLAY_FRAGMENTED }; struct wslay_event_omsg { + struct wslay_queue_entry qe; uint8_t fin; uint8_t opcode; uint8_t rsv; @@ -77,9 +75,7 @@ enum wslay_event_close_status { WSLAY_CLOSE_SENT = 1 << 2 }; -enum wslay_event_config { - WSLAY_CONFIG_NO_BUFFERING = 1 << 0 -}; +enum wslay_event_config { WSLAY_CONFIG_NO_BUFFERING = 1 << 0 }; struct wslay_event_context { /* config status, bitwise OR of enum wslay_event_config values*/ @@ -118,9 +114,9 @@ struct wslay_event_context { is currently sent. */ struct wslay_event_omsg *omsg; /* Queue for non-control frames */ - struct wslay_queue/*<wslay_omsg*>*/ *send_queue; + struct wslay_queue /*<wslay_omsg*>*/ send_queue; /* Queue for control frames */ - struct wslay_queue/*<wslay_omsg*>*/ *send_ctrl_queue; + struct wslay_queue /*<wslay_omsg*>*/ send_ctrl_queue; /* Size of send_queue + size of send_ctrl_queue */ size_t queued_msg_count; /* The sum of message length in send_queue */ diff --git a/thirdparty/wslay/wslay_frame.c b/thirdparty/wslay/wslay_frame.c index 445e750ca5..435044a3f4 100644 --- a/thirdparty/wslay/wslay_frame.c +++ b/thirdparty/wslay/wslay_frame.c @@ -34,10 +34,9 @@ int wslay_frame_context_init(wslay_frame_context_ptr *ctx, const struct wslay_frame_callbacks *callbacks, - void *user_data) -{ - *ctx = (wslay_frame_context_ptr)malloc(sizeof(struct wslay_frame_context)); - if(*ctx == NULL) { + void *user_data) { + *ctx = malloc(sizeof(struct wslay_frame_context)); + if (*ctx == NULL) { return -1; } memset(*ctx, 0, sizeof(struct wslay_frame_context)); @@ -50,38 +49,35 @@ int wslay_frame_context_init(wslay_frame_context_ptr *ctx, return 0; } -void wslay_frame_context_free(wslay_frame_context_ptr ctx) -{ - free(ctx); -} +void wslay_frame_context_free(wslay_frame_context_ptr ctx) { free(ctx); } ssize_t wslay_frame_send(wslay_frame_context_ptr ctx, - struct wslay_frame_iocb *iocb) -{ - if(iocb->data_length > iocb->payload_length) { + struct wslay_frame_iocb *iocb) { + if (iocb->data_length > iocb->payload_length) { return WSLAY_ERR_INVALID_ARGUMENT; } - if(ctx->ostate == PREP_HEADER) { + if (ctx->ostate == PREP_HEADER) { uint8_t *hdptr = ctx->oheader; memset(ctx->oheader, 0, sizeof(ctx->oheader)); - *hdptr |= (iocb->fin << 7) & 0x80u; - *hdptr |= (iocb->rsv << 4) & 0x70u; - *hdptr |= iocb->opcode & 0xfu; + *hdptr |= (uint8_t)((uint8_t)(iocb->fin << 7) & 0x80u); + *hdptr |= (uint8_t)((uint8_t)(iocb->rsv << 4) & 0x70u); + /* Suppress stubborn gcc-10 warning */ + *hdptr |= (uint8_t)((uint8_t)(iocb->opcode << 0) & 0xfu); ++hdptr; - *hdptr |= (iocb->mask << 7) & 0x80u; - if(wslay_is_ctrl_frame(iocb->opcode) && iocb->payload_length > 125) { + *hdptr |= (uint8_t)((uint8_t)(iocb->mask << 7) & 0x80u); + if (wslay_is_ctrl_frame(iocb->opcode) && iocb->payload_length > 125) { return WSLAY_ERR_INVALID_ARGUMENT; } - if(iocb->payload_length < 126) { - *hdptr |= iocb->payload_length; + if (iocb->payload_length < 126) { + *hdptr |= (uint8_t)iocb->payload_length; ++hdptr; - } else if(iocb->payload_length < (1 << 16)) { - uint16_t len = htons(iocb->payload_length); + } else if (iocb->payload_length < (1 << 16)) { + uint16_t len = htons((uint16_t)iocb->payload_length); *hdptr |= 126; ++hdptr; memcpy(hdptr, &len, 2); hdptr += 2; - } else if(iocb->payload_length < (1ull << 63)) { + } else if (iocb->payload_length < (1ull << 63)) { uint64_t len = hton64(iocb->payload_length); *hdptr |= 127; ++hdptr; @@ -91,9 +87,9 @@ ssize_t wslay_frame_send(wslay_frame_context_ptr ctx, /* Too large payload length */ return WSLAY_ERR_INVALID_ARGUMENT; } - if(iocb->mask) { - if(ctx->callbacks.genmask_callback(ctx->omaskkey, 4, - ctx->user_data) != 0) { + if (iocb->mask) { + if (ctx->callbacks.genmask_callback(ctx->omaskkey, 4, ctx->user_data) != + 0) { return WSLAY_ERR_INVALID_CALLBACK; } else { ctx->omask = 1; @@ -107,21 +103,21 @@ ssize_t wslay_frame_send(wslay_frame_context_ptr ctx, ctx->opayloadlen = iocb->payload_length; ctx->opayloadoff = 0; } - if(ctx->ostate == SEND_HEADER) { - ptrdiff_t len = ctx->oheaderlimit-ctx->oheadermark; + if (ctx->ostate == SEND_HEADER) { + ptrdiff_t len = ctx->oheaderlimit - ctx->oheadermark; ssize_t r; int flags = 0; - if(iocb->data_length > 0) { + if (iocb->data_length > 0) { flags |= WSLAY_MSG_MORE; }; - r = ctx->callbacks.send_callback(ctx->oheadermark, len, flags, + r = ctx->callbacks.send_callback(ctx->oheadermark, (size_t)len, flags, ctx->user_data); - if(r > 0) { - if(r > len) { + if (r > 0) { + if (r > len) { return WSLAY_ERR_INVALID_CALLBACK; } else { ctx->oheadermark += r; - if(ctx->oheadermark == ctx->oheaderlimit) { + if (ctx->oheadermark == ctx->oheaderlimit) { ctx->ostate = SEND_PAYLOAD; } else { return WSLAY_ERR_WANT_WRITE; @@ -131,34 +127,34 @@ ssize_t wslay_frame_send(wslay_frame_context_ptr ctx, return WSLAY_ERR_WANT_WRITE; } } - if(ctx->ostate == SEND_PAYLOAD) { + if (ctx->ostate == SEND_PAYLOAD) { size_t totallen = 0; - if(iocb->data_length > 0) { - if(ctx->omask) { + if (iocb->data_length > 0) { + if (ctx->omask) { uint8_t temp[4096]; const uint8_t *datamark = iocb->data, - *datalimit = iocb->data+iocb->data_length; - while(datamark < datalimit) { - size_t datalen = datalimit - datamark; - const uint8_t *writelimit = datamark+ - wslay_min(sizeof(temp), datalen); - size_t writelen = writelimit-datamark; + *datalimit = iocb->data + iocb->data_length; + while (datamark < datalimit) { + size_t datalen = (size_t)(datalimit - datamark); + const uint8_t *writelimit = + datamark + wslay_min(sizeof(temp), datalen); + size_t writelen = (size_t)(writelimit - datamark); ssize_t r; size_t i; - for(i = 0; i < writelen; ++i) { - temp[i] = datamark[i]^ctx->omaskkey[(ctx->opayloadoff+i)%4]; + for (i = 0; i < writelen; ++i) { + temp[i] = datamark[i] ^ ctx->omaskkey[(ctx->opayloadoff + i) % 4]; } r = ctx->callbacks.send_callback(temp, writelen, 0, ctx->user_data); - if(r > 0) { - if((size_t)r > writelen) { + if (r > 0) { + if ((size_t)r > writelen) { return WSLAY_ERR_INVALID_CALLBACK; } else { datamark += r; - ctx->opayloadoff += r; - totallen += r; + ctx->opayloadoff += (uint64_t)r; + totallen += (size_t)r; } } else { - if(totallen > 0) { + if (totallen > 0) { break; } else { return WSLAY_ERR_WANT_WRITE; @@ -169,44 +165,148 @@ ssize_t wslay_frame_send(wslay_frame_context_ptr ctx, ssize_t r; r = ctx->callbacks.send_callback(iocb->data, iocb->data_length, 0, ctx->user_data); - if(r > 0) { - if((size_t)r > iocb->data_length) { + if (r > 0) { + if ((size_t)r > iocb->data_length) { return WSLAY_ERR_INVALID_CALLBACK; } else { - ctx->opayloadoff += r; - totallen = r; + ctx->opayloadoff += (uint64_t)r; + totallen = (size_t)r; } } else { return WSLAY_ERR_WANT_WRITE; } } } - if(ctx->opayloadoff == ctx->opayloadlen) { + if (ctx->opayloadoff == ctx->opayloadlen) { ctx->ostate = PREP_HEADER; } - return totallen; + return (ssize_t)totallen; } return WSLAY_ERR_INVALID_ARGUMENT; } -static void wslay_shift_ibuf(wslay_frame_context_ptr ctx) -{ - ptrdiff_t len = ctx->ibuflimit-ctx->ibufmark; - memmove(ctx->ibuf, ctx->ibufmark, len); - ctx->ibuflimit = ctx->ibuf+len; +ssize_t wslay_frame_write(wslay_frame_context_ptr ctx, + struct wslay_frame_iocb *iocb, uint8_t *buf, + size_t buflen, size_t *pwpayloadlen) { + uint8_t *buf_last = buf; + size_t i; + size_t hdlen; + + *pwpayloadlen = 0; + + if (iocb->data_length > iocb->payload_length) { + return WSLAY_ERR_INVALID_ARGUMENT; + } + + switch (ctx->ostate) { + case PREP_HEADER: + case PREP_HEADER_NOBUF: + hdlen = 2; + if (iocb->payload_length < 126) { + /* nothing to do */ + } else if (iocb->payload_length < (1 << 16)) { + hdlen += 2; + } else if (iocb->payload_length < (1ull << 63)) { + hdlen += 8; + } + if (iocb->mask) { + hdlen += 4; + } + + if (buflen < hdlen) { + ctx->ostate = PREP_HEADER_NOBUF; + return 0; + } + + memset(buf_last, 0, hdlen); + *buf_last |= (uint8_t)((uint8_t)(iocb->fin << 7) & 0x80u); + *buf_last |= (uint8_t)((uint8_t)(iocb->rsv << 4) & 0x70u); + /* Suppress stubborn gcc-10 warning */ + *buf_last |= (uint8_t)((uint8_t)(iocb->opcode << 0) & 0xfu); + ++buf_last; + *buf_last |= (uint8_t)((uint8_t)(iocb->mask << 7) & 0x80u); + if (wslay_is_ctrl_frame(iocb->opcode) && iocb->payload_length > 125) { + return WSLAY_ERR_INVALID_ARGUMENT; + } + if (iocb->payload_length < 126) { + *buf_last |= (uint8_t)iocb->payload_length; + ++buf_last; + } else if (iocb->payload_length < (1 << 16)) { + uint16_t len = htons((uint16_t)iocb->payload_length); + *buf_last |= 126; + ++buf_last; + memcpy(buf_last, &len, 2); + buf_last += 2; + } else if (iocb->payload_length < (1ull << 63)) { + uint64_t len = hton64(iocb->payload_length); + *buf_last |= 127; + ++buf_last; + memcpy(buf_last, &len, 8); + buf_last += 8; + } else { + /* Too large payload length */ + return WSLAY_ERR_INVALID_ARGUMENT; + } + if (iocb->mask) { + if (ctx->callbacks.genmask_callback(ctx->omaskkey, 4, ctx->user_data) != + 0) { + return WSLAY_ERR_INVALID_CALLBACK; + } else { + ctx->omask = 1; + memcpy(buf_last, ctx->omaskkey, 4); + buf_last += 4; + } + } + ctx->ostate = SEND_PAYLOAD; + ctx->opayloadlen = iocb->payload_length; + ctx->opayloadoff = 0; + + buflen -= (size_t)(buf_last - buf); + /* fall through */ + case SEND_PAYLOAD: + if (iocb->data_length > 0) { + size_t writelen = wslay_min(buflen, iocb->data_length); + + if (ctx->omask) { + for (i = 0; i < writelen; ++i) { + *buf_last++ = + iocb->data[i] ^ ctx->omaskkey[(ctx->opayloadoff + i) % 4]; + } + } else { + memcpy(buf_last, iocb->data, writelen); + buf_last += writelen; + } + + ctx->opayloadoff += writelen; + *pwpayloadlen = writelen; + } + + if (ctx->opayloadoff == ctx->opayloadlen) { + ctx->ostate = PREP_HEADER; + } + + return buf_last - buf; + default: + return WSLAY_ERR_INVALID_ARGUMENT; + } +} + +static void wslay_shift_ibuf(wslay_frame_context_ptr ctx) { + ptrdiff_t len = ctx->ibuflimit - ctx->ibufmark; + memmove(ctx->ibuf, ctx->ibufmark, (size_t)len); + ctx->ibuflimit = ctx->ibuf + len; ctx->ibufmark = ctx->ibuf; } -static ssize_t wslay_recv(wslay_frame_context_ptr ctx) -{ +static ssize_t wslay_recv(wslay_frame_context_ptr ctx) { ssize_t r; - if(ctx->ibufmark != ctx->ibuf) { + if (ctx->ibufmark != ctx->ibuf) { wslay_shift_ibuf(ctx); } - r = ctx->callbacks.recv_callback - (ctx->ibuflimit, ctx->ibuf+sizeof(ctx->ibuf)-ctx->ibuflimit, - 0, ctx->user_data); - if(r > 0) { + r = ctx->callbacks.recv_callback( + ctx->ibuflimit, (size_t)(ctx->ibuf + sizeof(ctx->ibuf) - ctx->ibuflimit), + 0, ctx->user_data); + if (r > 0) { ctx->ibuflimit += r; } else { r = WSLAY_ERR_WANT_READ; @@ -217,17 +317,16 @@ static ssize_t wslay_recv(wslay_frame_context_ptr ctx) #define WSLAY_AVAIL_IBUF(ctx) ((size_t)(ctx->ibuflimit - ctx->ibufmark)) ssize_t wslay_frame_recv(wslay_frame_context_ptr ctx, - struct wslay_frame_iocb *iocb) -{ + struct wslay_frame_iocb *iocb) { ssize_t r; - if(ctx->istate == RECV_HEADER1) { + if (ctx->istate == RECV_HEADER1) { uint8_t fin, opcode, rsv, payloadlen; - if(WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { - if((r = wslay_recv(ctx)) <= 0) { + if (WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { + if ((r = wslay_recv(ctx)) <= 0) { return r; } } - if(WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { + if (WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { return WSLAY_ERR_WANT_READ; } fin = (ctx->ibufmark[0] >> 7) & 1; @@ -240,19 +339,19 @@ ssize_t wslay_frame_recv(wslay_frame_context_ptr ctx, ctx->imask = (ctx->ibufmark[0] >> 7) & 1; payloadlen = ctx->ibufmark[0] & 0x7fu; ++ctx->ibufmark; - if(wslay_is_ctrl_frame(opcode) && (payloadlen > 125 || !fin)) { + if (wslay_is_ctrl_frame(opcode) && (payloadlen > 125 || !fin)) { return WSLAY_ERR_PROTO; } - if(payloadlen == 126) { + if (payloadlen == 126) { ctx->istate = RECV_EXT_PAYLOADLEN; ctx->ireqread = 2; - } else if(payloadlen == 127) { + } else if (payloadlen == 127) { ctx->istate = RECV_EXT_PAYLOADLEN; ctx->ireqread = 8; } else { ctx->ipayloadlen = payloadlen; ctx->ipayloadoff = 0; - if(ctx->imask) { + if (ctx->imask) { ctx->istate = RECV_MASKKEY; ctx->ireqread = 4; } else { @@ -260,42 +359,41 @@ ssize_t wslay_frame_recv(wslay_frame_context_ptr ctx, } } } - if(ctx->istate == RECV_EXT_PAYLOADLEN) { - if(WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { - if((r = wslay_recv(ctx)) <= 0) { + if (ctx->istate == RECV_EXT_PAYLOADLEN) { + if (WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { + if ((r = wslay_recv(ctx)) <= 0) { return r; } - if(WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { + if (WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { return WSLAY_ERR_WANT_READ; } } ctx->ipayloadlen = 0; ctx->ipayloadoff = 0; - memcpy((uint8_t*)&ctx->ipayloadlen+(8-ctx->ireqread), - ctx->ibufmark, ctx->ireqread); + memcpy((uint8_t *)&ctx->ipayloadlen + (8 - ctx->ireqread), ctx->ibufmark, + ctx->ireqread); ctx->ipayloadlen = ntoh64(ctx->ipayloadlen); ctx->ibufmark += ctx->ireqread; - if(ctx->ireqread == 8) { - if(ctx->ipayloadlen < (1 << 16) || - ctx->ipayloadlen & (1ull << 63)) { + if (ctx->ireqread == 8) { + if (ctx->ipayloadlen < (1 << 16) || ctx->ipayloadlen & (1ull << 63)) { return WSLAY_ERR_PROTO; } - } else if(ctx->ipayloadlen < 126) { + } else if (ctx->ipayloadlen < 126) { return WSLAY_ERR_PROTO; } - if(ctx->imask) { + if (ctx->imask) { ctx->istate = RECV_MASKKEY; ctx->ireqread = 4; } else { ctx->istate = RECV_PAYLOAD; } } - if(ctx->istate == RECV_MASKKEY) { - if(WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { - if((r = wslay_recv(ctx)) <= 0) { + if (ctx->istate == RECV_MASKKEY) { + if (WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { + if ((r = wslay_recv(ctx)) <= 0) { return r; } - if(WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { + if (WSLAY_AVAIL_IBUF(ctx) < ctx->ireqread) { return WSLAY_ERR_WANT_READ; } } @@ -303,25 +401,25 @@ ssize_t wslay_frame_recv(wslay_frame_context_ptr ctx, ctx->ibufmark += 4; ctx->istate = RECV_PAYLOAD; } - if(ctx->istate == RECV_PAYLOAD) { + if (ctx->istate == RECV_PAYLOAD) { uint8_t *readlimit, *readmark; - uint64_t rempayloadlen = ctx->ipayloadlen-ctx->ipayloadoff; - if(WSLAY_AVAIL_IBUF(ctx) == 0 && rempayloadlen > 0) { - if((r = wslay_recv(ctx)) <= 0) { + uint64_t rempayloadlen = ctx->ipayloadlen - ctx->ipayloadoff; + if (WSLAY_AVAIL_IBUF(ctx) == 0 && rempayloadlen > 0) { + if ((r = wslay_recv(ctx)) <= 0) { return r; } } readmark = ctx->ibufmark; - readlimit = WSLAY_AVAIL_IBUF(ctx) < rempayloadlen ? - ctx->ibuflimit : ctx->ibufmark+rempayloadlen; - if(ctx->imask) { - for(; ctx->ibufmark != readlimit; - ++ctx->ibufmark, ++ctx->ipayloadoff) { + readlimit = WSLAY_AVAIL_IBUF(ctx) < rempayloadlen + ? ctx->ibuflimit + : ctx->ibufmark + rempayloadlen; + if (ctx->imask) { + for (; ctx->ibufmark != readlimit; ++ctx->ibufmark, ++ctx->ipayloadoff) { ctx->ibufmark[0] ^= ctx->imaskkey[ctx->ipayloadoff % 4]; } } else { ctx->ibufmark = readlimit; - ctx->ipayloadoff += readlimit-readmark; + ctx->ipayloadoff += (uint64_t)(readlimit - readmark); } iocb->fin = ctx->iom.fin; iocb->rsv = ctx->iom.rsv; @@ -329,12 +427,12 @@ ssize_t wslay_frame_recv(wslay_frame_context_ptr ctx, iocb->payload_length = ctx->ipayloadlen; iocb->mask = ctx->imask; iocb->data = readmark; - iocb->data_length = ctx->ibufmark-readmark; - if(ctx->ipayloadlen == ctx->ipayloadoff) { + iocb->data_length = (size_t)(ctx->ibufmark - readmark); + if (ctx->ipayloadlen == ctx->ipayloadoff) { ctx->istate = RECV_HEADER1; ctx->ireqread = 2; } - return iocb->data_length; + return (ssize_t)iocb->data_length; } return WSLAY_ERR_INVALID_ARGUMENT; } diff --git a/thirdparty/wslay/wslay_frame.h b/thirdparty/wslay/wslay_frame.h index 6a75858cc7..3ac98112b5 100644 --- a/thirdparty/wslay/wslay_frame.h +++ b/thirdparty/wslay/wslay_frame.h @@ -33,6 +33,7 @@ enum wslay_frame_state { PREP_HEADER, + PREP_HEADER_NOBUF, SEND_HEADER, SEND_PAYLOAD, RECV_HEADER1, diff --git a/thirdparty/wslay/wslay_stack.h b/thirdparty/wslay/wslay_macro.h index 16e4e968eb..a06cff98e1 100644 --- a/thirdparty/wslay/wslay_stack.h +++ b/thirdparty/wslay/wslay_macro.h @@ -1,7 +1,7 @@ /* * Wslay - The WebSocket Library * - * Copyright (c) 2011, 2012 Tatsuhiro Tsujikawa + * Copyright (c) 2020 Tatsuhiro Tsujikawa * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the @@ -22,8 +22,8 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef WSLAY_STACK_H -#define WSLAY_STACK_H +#ifndef WSLAY_MACRO_H +#define WSLAY_MACRO_H #ifdef HAVE_CONFIG_H # include "config.h" @@ -31,20 +31,9 @@ #include <wslay/wslay.h> -struct wslay_stack_cell { - void *data; - struct wslay_stack_cell *next; -}; +#include <stddef.h> -struct wslay_stack { - struct wslay_stack_cell *top; -}; +#define wslay_struct_of(ptr, type, member) \ + ((type *)(void *)((char *)(ptr)-offsetof(type, member))) -struct wslay_stack* wslay_stack_new(); -void wslay_stack_free(struct wslay_stack *stack); -int wslay_stack_push(struct wslay_stack *stack, void *data); -void wslay_stack_pop(struct wslay_stack *stack); -void* wslay_stack_top(struct wslay_stack *stack); -int wslay_stack_empty(struct wslay_stack *stack); - -#endif /* WSLAY_STACK_H */ +#endif /* WSLAY_MACRO_H */ diff --git a/thirdparty/wslay/wslay_net.c b/thirdparty/wslay/wslay_net.c index d3867c21fb..e256158b4c 100644 --- a/thirdparty/wslay/wslay_net.c +++ b/thirdparty/wslay/wslay_net.c @@ -26,10 +26,9 @@ #ifndef WORDS_BIGENDIAN -uint64_t wslay_byteswap64(uint64_t x) -{ +uint64_t wslay_byteswap64(uint64_t x) { uint64_t u = ntohl(x & 0xffffffffllu); - uint64_t l = ntohl(x >> 32); + uint64_t l = ntohl((uint32_t)(x >> 32)); return (u << 32) | l; } diff --git a/thirdparty/wslay/wslay_queue.c b/thirdparty/wslay/wslay_queue.c index 8d2669687d..ebde3ba83d 100644 --- a/thirdparty/wslay/wslay_queue.c +++ b/thirdparty/wslay/wslay_queue.c @@ -27,91 +27,51 @@ #include <string.h> #include <assert.h> -struct wslay_queue* wslay_queue_new(void) -{ - struct wslay_queue *queue = (struct wslay_queue*)malloc - (sizeof(struct wslay_queue)); - if(!queue) { - return NULL; - } - queue->top = queue->tail = NULL; - return queue; -} +#include "wslay_macro.h" -void wslay_queue_free(struct wslay_queue *queue) -{ - if(!queue) { - return; - } else { - struct wslay_queue_cell *p = queue->top; - while(p) { - struct wslay_queue_cell *next = p->next; - free(p); - p = next; - } - free(queue); - } +void wslay_queue_init(struct wslay_queue *queue) { + queue->top = NULL; + queue->tail = &queue->top; } -int wslay_queue_push(struct wslay_queue *queue, void *data) -{ - struct wslay_queue_cell *new_cell = (struct wslay_queue_cell*)malloc - (sizeof(struct wslay_queue_cell)); - if(!new_cell) { - return WSLAY_ERR_NOMEM; - } - new_cell->data = data; - new_cell->next = NULL; - if(queue->tail) { - queue->tail->next = new_cell; - queue->tail = new_cell; +void wslay_queue_deinit(struct wslay_queue *queue) { (void)queue; } - } else { - queue->top = queue->tail = new_cell; - } - return 0; +void wslay_queue_push(struct wslay_queue *queue, + struct wslay_queue_entry *ent) { + ent->next = NULL; + *queue->tail = ent; + queue->tail = &ent->next; } -int wslay_queue_push_front(struct wslay_queue *queue, void *data) -{ - struct wslay_queue_cell *new_cell = (struct wslay_queue_cell*)malloc - (sizeof(struct wslay_queue_cell)); - if(!new_cell) { - return WSLAY_ERR_NOMEM; - } - new_cell->data = data; - new_cell->next = queue->top; - queue->top = new_cell; - if(!queue->tail) { - queue->tail = queue->top; +void wslay_queue_push_front(struct wslay_queue *queue, + struct wslay_queue_entry *ent) { + ent->next = queue->top; + queue->top = ent; + + if (ent->next == NULL) { + queue->tail = &ent->next; } - return 0; } -void wslay_queue_pop(struct wslay_queue *queue) -{ - struct wslay_queue_cell *top = queue->top; - assert(top); - queue->top = top->next; - if(top == queue->tail) { - queue->tail = NULL; +void wslay_queue_pop(struct wslay_queue *queue) { + assert(queue->top); + queue->top = queue->top->next; + if (queue->top == NULL) { + queue->tail = &queue->top; } - free(top); } -void* wslay_queue_top(struct wslay_queue *queue) -{ +struct wslay_queue_entry *wslay_queue_top(struct wslay_queue *queue) { assert(queue->top); - return queue->top->data; + return queue->top; } -void* wslay_queue_tail(struct wslay_queue *queue) -{ - assert(queue->tail); - return queue->tail->data; +struct wslay_queue_entry *wslay_queue_tail(struct wslay_queue *queue) { + assert(queue->top); + return wslay_struct_of(queue->tail, struct wslay_queue_entry, next); } -int wslay_queue_empty(struct wslay_queue *queue) -{ +int wslay_queue_empty(struct wslay_queue *queue) { + assert(queue->top || queue->tail == &queue->top); return queue->top == NULL; } diff --git a/thirdparty/wslay/wslay_queue.h b/thirdparty/wslay/wslay_queue.h index 55e78a042e..fa16aea489 100644 --- a/thirdparty/wslay/wslay_queue.h +++ b/thirdparty/wslay/wslay_queue.h @@ -31,23 +31,23 @@ #include <wslay/wslay.h> -struct wslay_queue_cell { - void *data; - struct wslay_queue_cell *next; +struct wslay_queue_entry { + struct wslay_queue_entry *next; }; struct wslay_queue { - struct wslay_queue_cell *top; - struct wslay_queue_cell *tail; + struct wslay_queue_entry *top; + struct wslay_queue_entry **tail; }; -struct wslay_queue* wslay_queue_new(void); -void wslay_queue_free(struct wslay_queue *queue); -int wslay_queue_push(struct wslay_queue *queue, void *data); -int wslay_queue_push_front(struct wslay_queue *queue, void *data); +void wslay_queue_init(struct wslay_queue *queue); +void wslay_queue_deinit(struct wslay_queue *queue); +void wslay_queue_push(struct wslay_queue *queue, struct wslay_queue_entry *ent); +void wslay_queue_push_front(struct wslay_queue *queue, + struct wslay_queue_entry *ent); void wslay_queue_pop(struct wslay_queue *queue); -void* wslay_queue_top(struct wslay_queue *queue); -void* wslay_queue_tail(struct wslay_queue *queue); +struct wslay_queue_entry *wslay_queue_top(struct wslay_queue *queue); +struct wslay_queue_entry *wslay_queue_tail(struct wslay_queue *queue); int wslay_queue_empty(struct wslay_queue *queue); #endif /* WSLAY_QUEUE_H */ diff --git a/thirdparty/wslay/wslay_stack.c b/thirdparty/wslay/wslay_stack.c deleted file mode 100644 index 0e05d74031..0000000000 --- a/thirdparty/wslay/wslay_stack.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Wslay - The WebSocket Library - * - * Copyright (c) 2011, 2012 Tatsuhiro Tsujikawa - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "wslay_stack.h" - -#include <string.h> -#include <assert.h> - -struct wslay_stack* wslay_stack_new() -{ - struct wslay_stack *stack = (struct wslay_stack*)malloc - (sizeof(struct wslay_stack)); - if(!stack) { - return NULL; - } - stack->top = NULL; - return stack; -} - -void wslay_stack_free(struct wslay_stack *stack) -{ - struct wslay_stack_cell *p; - if(!stack) { - return; - } - p = stack->top; - while(p) { - struct wslay_stack_cell *next = p->next; - free(p); - p = next; - } - free(stack); -} - -int wslay_stack_push(struct wslay_stack *stack, void *data) -{ - struct wslay_stack_cell *new_cell = (struct wslay_stack_cell*)malloc - (sizeof(struct wslay_stack_cell)); - if(!new_cell) { - return WSLAY_ERR_NOMEM; - } - new_cell->data = data; - new_cell->next = stack->top; - stack->top = new_cell; - return 0; -} - -void wslay_stack_pop(struct wslay_stack *stack) -{ - struct wslay_stack_cell *top = stack->top; - assert(top); - stack->top = top->next; - free(top); -} - -void* wslay_stack_top(struct wslay_stack *stack) -{ - assert(stack->top); - return stack->top->data; -} - -int wslay_stack_empty(struct wslay_stack *stack) -{ - return stack->top == NULL; -} diff --git a/thirdparty/xatlas/xatlas.cpp b/thirdparty/xatlas/xatlas.cpp index 9f66ae0067..d92ef1a83a 100644 --- a/thirdparty/xatlas/xatlas.cpp +++ b/thirdparty/xatlas/xatlas.cpp @@ -40,14 +40,14 @@ Copyright (c) 2012 Brandon Pelfrey #if XATLAS_C_API #include "xatlas_c.h" #endif -#include <assert.h> -#include <float.h> // FLT_MAX -#include <limits.h> -#include <math.h> #include <atomic> #include <condition_variable> #include <mutex> #include <thread> +#include <assert.h> +#include <float.h> // FLT_MAX +#include <limits.h> +#include <math.h> #define __STDC_LIMIT_MACROS #include <stdint.h> #include <stdio.h> @@ -76,10 +76,7 @@ Copyright (c) 2012 Brandon Pelfrey #define XA_XSTR(x) XA_STR(x) #ifndef XA_ASSERT -#define XA_ASSERT(exp) \ - if (!(exp)) { \ - XA_PRINT_WARNING("\rASSERT: %s %s %d\n", XA_XSTR(exp), __FILE__, __LINE__); \ - } +#define XA_ASSERT(exp) if (!(exp)) { XA_PRINT_WARNING("\rASSERT: %s %s %d\n", XA_XSTR(exp), __FILE__, __LINE__); } #endif #ifndef XA_DEBUG_ASSERT @@ -87,13 +84,13 @@ Copyright (c) 2012 Brandon Pelfrey #endif #ifndef XA_PRINT -#define XA_PRINT(...) \ +#define XA_PRINT(...) \ if (xatlas::internal::s_print && xatlas::internal::s_printVerbose) \ xatlas::internal::s_print(__VA_ARGS__); #endif #ifndef XA_PRINT_WARNING -#define XA_PRINT_WARNING(...) \ +#define XA_PRINT_WARNING(...) \ if (xatlas::internal::s_print) \ xatlas::internal::s_print(__VA_ARGS__); #endif @@ -145,14 +142,18 @@ Copyright (c) 2012 Brandon Pelfrey #define XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION 0 #define XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS 0 -#define XA_DEBUG_EXPORT_OBJ (0 || XA_DEBUG_EXPORT_OBJ_FACE_GROUPS || XA_DEBUG_EXPORT_OBJ_CHART_GROUPS || XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS || XA_DEBUG_EXPORT_OBJ_CHARTS || XA_DEBUG_EXPORT_OBJ_TJUNCTION || XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION || XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION || XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS) +#define XA_DEBUG_EXPORT_OBJ (0 \ + || XA_DEBUG_EXPORT_OBJ_FACE_GROUPS \ + || XA_DEBUG_EXPORT_OBJ_CHART_GROUPS \ + || XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS \ + || XA_DEBUG_EXPORT_OBJ_CHARTS \ + || XA_DEBUG_EXPORT_OBJ_TJUNCTION \ + || XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION \ + || XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION \ + || XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS) #ifdef _MSC_VER -#define XA_FOPEN(_file, _filename, _mode) \ - { \ - if (fopen_s(&_file, _filename, _mode) != 0) \ - _file = NULL; \ - } +#define XA_FOPEN(_file, _filename, _mode) { if (fopen_s(&_file, _filename, _mode) != 0) _file = NULL; } #define XA_SPRINTF(_buffer, _size, _format, ...) sprintf_s(_buffer, _size, _format, __VA_ARGS__) #else #define XA_FOPEN(_file, _filename, _mode) _file = fopen(_filename, _mode) @@ -172,12 +173,11 @@ typedef uint64_t Duration; #define XA_PROFILE_START(var) const std::chrono::time_point<std::chrono::high_resolution_clock> var##Start = std::chrono::high_resolution_clock::now(); #define XA_PROFILE_END(var) internal::s_profile.var += uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - var##Start).count()); -#define XA_PROFILE_PRINT_AND_RESET(label, var) \ - XA_PRINT("%s%.2f seconds (%g ms)\n", label, internal::durationToSeconds(internal::s_profile.var), internal::durationToMs(internal::s_profile.var)); \ - internal::s_profile.var = 0u; +#define XA_PROFILE_PRINT_AND_RESET(label, var) XA_PRINT("%s%.2f seconds (%g ms)\n", label, internal::durationToSeconds(internal::s_profile.var), internal::durationToMs(internal::s_profile.var)); internal::s_profile.var = 0u; #define XA_PROFILE_ALLOC 0 -struct ProfileData { +struct ProfileData +{ #if XA_PROFILE_ALLOC std::atomic<Duration> alloc; #endif @@ -232,11 +232,13 @@ struct ProfileData { static ProfileData s_profile; -static double durationToMs(Duration c) { +static double durationToMs(Duration c) +{ return (double)c * 0.001; } -static double durationToSeconds(Duration c) { +static double durationToSeconds(Duration c) +{ return (double)c * 0.000001; } #else @@ -246,8 +248,10 @@ static double durationToSeconds(Duration c) { #define XA_PROFILE_ALLOC 0 #endif -struct MemTag { - enum { +struct MemTag +{ + enum + { Default, BitImage, BVH, @@ -270,7 +274,8 @@ struct MemTag { }; #if XA_DEBUG_HEAP -struct AllocHeader { +struct AllocHeader +{ size_t size; const char *file; int line; @@ -283,10 +288,11 @@ struct AllocHeader { static std::mutex s_allocMutex; static AllocHeader *s_allocRoot = nullptr; static size_t s_allocTotalCount = 0, s_allocTotalSize = 0, s_allocPeakSize = 0, s_allocCount[MemTag::Count] = { 0 }, s_allocTotalTagSize[MemTag::Count] = { 0 }, s_allocPeakTagSize[MemTag::Count] = { 0 }; -static uint32_t s_allocId = 0; +static uint32_t s_allocId =0 ; static constexpr uint32_t kAllocRedzone = 0x12345678; -static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line) { +static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line) +{ std::unique_lock<std::mutex> lock(s_allocMutex); if (!size && !ptr) return nullptr; @@ -347,7 +353,8 @@ static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line return newPtr + sizeof(AllocHeader); } -static void ReportLeaks() { +static void ReportLeaks() +{ printf("Checking for memory leaks...\n"); bool anyLeaks = false; AllocHeader *header = s_allocRoot; @@ -375,7 +382,8 @@ static void ReportLeaks() { s_allocTotalTagSize[i] = s_allocPeakTagSize[i] = 0; } -static void PrintMemoryUsage() { +static void PrintMemoryUsage() +{ XA_PRINT("Total allocations: %zu\n", s_allocTotalCount); XA_PRINT("Memory usage: %0.2fMB current, %0.2fMB peak\n", internal::s_allocTotalSize / 1024.0f / 1024.0f, internal::s_allocPeakSize / 1024.0f / 1024.0f); static const char *labels[] = { // Sync with MemTag @@ -404,7 +412,8 @@ static void PrintMemoryUsage() { #define XA_PRINT_MEM_USAGE internal::PrintMemoryUsage(); #else -static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/) { +static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/) +{ if (size == 0 && !ptr) return nullptr; if (size == 0 && s_free) { @@ -430,75 +439,89 @@ static constexpr float kEpsilon = 0.0001f; static constexpr float kAreaEpsilon = FLT_EPSILON; static constexpr float kNormalEpsilon = 0.001f; -static int align(int x, int a) { +static int align(int x, int a) +{ return (x + a - 1) & ~(a - 1); } template <typename T> -static T max(const T &a, const T &b) { +static T max(const T &a, const T &b) +{ return a > b ? a : b; } template <typename T> -static T min(const T &a, const T &b) { +static T min(const T &a, const T &b) +{ return a < b ? a : b; } template <typename T> -static T max3(const T &a, const T &b, const T &c) { +static T max3(const T &a, const T &b, const T &c) +{ return max(a, max(b, c)); } /// Return the maximum of the three arguments. template <typename T> -static T min3(const T &a, const T &b, const T &c) { +static T min3(const T &a, const T &b, const T &c) +{ return min(a, min(b, c)); } /// Clamp between two values. template <typename T> -static T clamp(const T &x, const T &a, const T &b) { +static T clamp(const T &x, const T &a, const T &b) +{ return min(max(x, a), b); } template <typename T> -static void swap(T &a, T &b) { +static void swap(T &a, T &b) +{ T temp = a; a = b; b = temp; } -union FloatUint32 { +union FloatUint32 +{ float f; uint32_t u; }; -static bool isFinite(float f) { +static bool isFinite(float f) +{ FloatUint32 fu; fu.f = f; return fu.u != 0x7F800000u && fu.u != 0x7F800001u; } -static bool isNan(float f) { +static bool isNan(float f) +{ return f != f; } // Robust floating point comparisons: // http://realtimecollisiondetection.net/blog/?p=89 -static bool equal(const float f0, const float f1, const float epsilon) { +static bool equal(const float f0, const float f1, const float epsilon) +{ //return fabs(f0-f1) <= epsilon; return fabs(f0 - f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1)); } -static int ftoi_ceil(float val) { +static int ftoi_ceil(float val) +{ return (int)ceilf(val); } -static bool isZero(const float f, const float epsilon) { +static bool isZero(const float f, const float epsilon) +{ return fabs(f) <= epsilon; } -static float square(float f) { +static float square(float f) +{ return f * f; } @@ -508,8 +531,9 @@ static float square(float f) { * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x * @note nextPowerOfTwo(x) = 2 << log2(x-1) */ -static uint32_t nextPowerOfTwo(uint32_t x) { - XA_DEBUG_ASSERT(x != 0); +static uint32_t nextPowerOfTwo(uint32_t x) +{ + XA_DEBUG_ASSERT( x != 0 ); // On modern CPUs this is supposed to be as fast as using the bsr instruction. x--; x |= x >> 1; @@ -520,34 +544,38 @@ static uint32_t nextPowerOfTwo(uint32_t x) { return x + 1; } -class Vector2 { +class Vector2 +{ public: Vector2() {} - explicit Vector2(float f) : - x(f), y(f) {} - Vector2(float _x, float _y) : - x(_x), y(_y) {} + explicit Vector2(float f) : x(f), y(f) {} + Vector2(float _x, float _y): x(_x), y(_y) {} - Vector2 operator-() const { + Vector2 operator-() const + { return Vector2(-x, -y); } - void operator+=(const Vector2 &v) { + void operator+=(const Vector2 &v) + { x += v.x; y += v.y; } - void operator-=(const Vector2 &v) { + void operator-=(const Vector2 &v) + { x -= v.x; y -= v.y; } - void operator*=(float s) { + void operator*=(float s) + { x *= s; y *= s; } - void operator*=(const Vector2 &v) { + void operator*=(const Vector2 &v) + { x *= v.x; y *= v.y; } @@ -555,11 +583,13 @@ public: float x, y; }; -static bool operator==(const Vector2 &a, const Vector2 &b) { +static bool operator==(const Vector2 &a, const Vector2 &b) +{ return a.x == b.x && a.y == b.y; } -static bool operator!=(const Vector2 &a, const Vector2 &b) { +static bool operator!=(const Vector2 &a, const Vector2 &b) +{ return a.x != b.x || a.y != b.y; } @@ -568,33 +598,40 @@ static bool operator!=(const Vector2 &a, const Vector2 &b) { return Vector2(a.x + b.x, a.y + b.y); }*/ -static Vector2 operator-(const Vector2 &a, const Vector2 &b) { +static Vector2 operator-(const Vector2 &a, const Vector2 &b) +{ return Vector2(a.x - b.x, a.y - b.y); } -static Vector2 operator*(const Vector2 &v, float s) { +static Vector2 operator*(const Vector2 &v, float s) +{ return Vector2(v.x * s, v.y * s); } -static float dot(const Vector2 &a, const Vector2 &b) { +static float dot(const Vector2 &a, const Vector2 &b) +{ return a.x * b.x + a.y * b.y; } -static float lengthSquared(const Vector2 &v) { +static float lengthSquared(const Vector2 &v) +{ return v.x * v.x + v.y * v.y; } -static float length(const Vector2 &v) { +static float length(const Vector2 &v) +{ return sqrtf(lengthSquared(v)); } #if XA_DEBUG -static bool isNormalized(const Vector2 &v, float epsilon = kNormalEpsilon) { +static bool isNormalized(const Vector2 &v, float epsilon = kNormalEpsilon) +{ return equal(length(v), 1, epsilon); } #endif -static Vector2 normalize(const Vector2 &v) { +static Vector2 normalize(const Vector2 &v) +{ const float l = length(v); XA_DEBUG_ASSERT(l > 0.0f); // Never negative. const Vector2 n = v * (1.0f / l); @@ -602,30 +639,36 @@ static Vector2 normalize(const Vector2 &v) { return n; } -static Vector2 normalizeSafe(const Vector2 &v, const Vector2 &fallback) { +static Vector2 normalizeSafe(const Vector2 &v, const Vector2 &fallback) +{ const float l = length(v); if (l > 0.0f) // Never negative. return v * (1.0f / l); return fallback; } -static bool equal(const Vector2 &v1, const Vector2 &v2, float epsilon) { +static bool equal(const Vector2 &v1, const Vector2 &v2, float epsilon) +{ return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon); } -static Vector2 min(const Vector2 &a, const Vector2 &b) { +static Vector2 min(const Vector2 &a, const Vector2 &b) +{ return Vector2(min(a.x, b.x), min(a.y, b.y)); } -static Vector2 max(const Vector2 &a, const Vector2 &b) { +static Vector2 max(const Vector2 &a, const Vector2 &b) +{ return Vector2(max(a.x, b.x), max(a.y, b.y)); } -static bool isFinite(const Vector2 &v) { +static bool isFinite(const Vector2 &v) +{ return isFinite(v.x) && isFinite(v.y); } -static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) { +static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) +{ // IC: While it may be appealing to use the following expression: //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y) * 0.5f; // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point @@ -639,7 +682,8 @@ static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) return (v0.x * v1.y - v0.y * v1.x) * 0.5f; } -static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &b1, const Vector2 &b2, float epsilon) { +static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &b1, const Vector2 &b2, float epsilon) +{ const Vector2 v0 = a2 - a1; const Vector2 v1 = b2 - b1; const float denom = -v1.x * v0.y + v0.x * v1.y; @@ -647,70 +691,76 @@ static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 & return false; const float s = (-v0.y * (a1.x - b1.x) + v0.x * (a1.y - b1.y)) / denom; if (s > epsilon && s < 1.0f - epsilon) { - const float t = (v1.x * (a1.y - b1.y) - v1.y * (a1.x - b1.x)) / denom; + const float t = ( v1.x * (a1.y - b1.y) - v1.y * (a1.x - b1.x)) / denom; return t > epsilon && t < 1.0f - epsilon; } return false; } -struct Vector2i { +struct Vector2i +{ Vector2i() {} - Vector2i(int32_t _x, int32_t _y) : - x(_x), y(_y) {} + Vector2i(int32_t _x, int32_t _y) : x(_x), y(_y) {} int32_t x, y; }; -class Vector3 { +class Vector3 +{ public: Vector3() {} - explicit Vector3(float f) : - x(f), y(f), z(f) {} - Vector3(float _x, float _y, float _z) : - x(_x), y(_y), z(_z) {} - Vector3(const Vector2 &v, float _z) : - x(v.x), y(v.y), z(_z) {} - - Vector2 xy() const { + explicit Vector3(float f) : x(f), y(f), z(f) {} + Vector3(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {} + Vector3(const Vector2 &v, float _z) : x(v.x), y(v.y), z(_z) {} + + Vector2 xy() const + { return Vector2(x, y); } - Vector3 operator-() const { + Vector3 operator-() const + { return Vector3(-x, -y, -z); } - void operator+=(const Vector3 &v) { + void operator+=(const Vector3 &v) + { x += v.x; y += v.y; z += v.z; } - void operator-=(const Vector3 &v) { + void operator-=(const Vector3 &v) + { x -= v.x; y -= v.y; z -= v.z; } - void operator*=(float s) { + void operator*=(float s) + { x *= s; y *= s; z *= s; } - void operator/=(float s) { + void operator/=(float s) + { float is = 1.0f / s; x *= is; y *= is; z *= is; } - void operator*=(const Vector3 &v) { + void operator*=(const Vector3 &v) + { x *= v.x; y *= v.y; z *= v.z; } - void operator/=(const Vector3 &v) { + void operator/=(const Vector3 &v) + { x /= v.x; y /= v.y; z /= v.z; @@ -719,47 +769,58 @@ public: float x, y, z; }; -static Vector3 operator+(const Vector3 &a, const Vector3 &b) { +static Vector3 operator+(const Vector3 &a, const Vector3 &b) +{ return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); } -static Vector3 operator-(const Vector3 &a, const Vector3 &b) { +static Vector3 operator-(const Vector3 &a, const Vector3 &b) +{ return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); } -static bool operator==(const Vector3 &a, const Vector3 &b) { +static bool operator==(const Vector3 &a, const Vector3 &b) +{ return a.x == b.x && a.y == b.y && a.z == b.z; } -static Vector3 cross(const Vector3 &a, const Vector3 &b) { +static Vector3 cross(const Vector3 &a, const Vector3 &b) +{ return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); } -static Vector3 operator*(const Vector3 &v, float s) { +static Vector3 operator*(const Vector3 &v, float s) +{ return Vector3(v.x * s, v.y * s, v.z * s); } -static Vector3 operator/(const Vector3 &v, float s) { +static Vector3 operator/(const Vector3 &v, float s) +{ return v * (1.0f / s); } -static float dot(const Vector3 &a, const Vector3 &b) { +static float dot(const Vector3 &a, const Vector3 &b) +{ return a.x * b.x + a.y * b.y + a.z * b.z; } -static float lengthSquared(const Vector3 &v) { +static float lengthSquared(const Vector3 &v) +{ return v.x * v.x + v.y * v.y + v.z * v.z; } -static float length(const Vector3 &v) { +static float length(const Vector3 &v) +{ return sqrtf(lengthSquared(v)); } -static bool isNormalized(const Vector3 &v, float epsilon = kNormalEpsilon) { +static bool isNormalized(const Vector3 &v, float epsilon = kNormalEpsilon) +{ return equal(length(v), 1.0f, epsilon); } -static Vector3 normalize(const Vector3 &v) { +static Vector3 normalize(const Vector3 &v) +{ const float l = length(v); XA_DEBUG_ASSERT(l > 0.0f); // Never negative. const Vector3 n = v * (1.0f / l); @@ -767,103 +828,116 @@ static Vector3 normalize(const Vector3 &v) { return n; } -static Vector3 normalizeSafe(const Vector3 &v, const Vector3 &fallback) { +static Vector3 normalizeSafe(const Vector3 &v, const Vector3 &fallback) +{ const float l = length(v); if (l > 0.0f) // Never negative. return v * (1.0f / l); return fallback; } -static bool equal(const Vector3 &v0, const Vector3 &v1, float epsilon) { +static bool equal(const Vector3 &v0, const Vector3 &v1, float epsilon) +{ return fabs(v0.x - v1.x) <= epsilon && fabs(v0.y - v1.y) <= epsilon && fabs(v0.z - v1.z) <= epsilon; } -static Vector3 min(const Vector3 &a, const Vector3 &b) { +static Vector3 min(const Vector3 &a, const Vector3 &b) +{ return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); } -static Vector3 max(const Vector3 &a, const Vector3 &b) { +static Vector3 max(const Vector3 &a, const Vector3 &b) +{ return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); } #if XA_DEBUG -bool isFinite(const Vector3 &v) { +bool isFinite(const Vector3 &v) +{ return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); } #endif -struct Extents2 { +struct Extents2 +{ Vector2 min, max; Extents2() {} - Extents2(Vector2 p1, Vector2 p2) { + Extents2(Vector2 p1, Vector2 p2) + { min = xatlas::internal::min(p1, p2); max = xatlas::internal::max(p1, p2); } - void reset() { + void reset() + { min.x = min.y = FLT_MAX; max.x = max.y = -FLT_MAX; } - void add(Vector2 p) { + void add(Vector2 p) + { min = xatlas::internal::min(min, p); max = xatlas::internal::max(max, p); } - Vector2 midpoint() const { + Vector2 midpoint() const + { return Vector2(min.x + (max.x - min.x) * 0.5f, min.y + (max.y - min.y) * 0.5f); } - static bool intersect(const Extents2 &e1, const Extents2 &e2) { + static bool intersect(const Extents2 &e1, const Extents2 &e2) + { return e1.min.x <= e2.max.x && e1.max.x >= e2.min.x && e1.min.y <= e2.max.y && e1.max.y >= e2.min.y; } }; // From Fast-BVH -struct AABB { - AABB() : - min(FLT_MAX, FLT_MAX, FLT_MAX), max(-FLT_MAX, -FLT_MAX, -FLT_MAX) {} - AABB(const Vector3 &_min, const Vector3 &_max) : - min(_min), max(_max) {} - AABB(const Vector3 &p, float radius = 0.0f) : - min(p), max(p) { - if (radius > 0.0f) - expand(radius); - } - - bool intersect(const AABB &other) const { +struct AABB +{ + AABB() : min(FLT_MAX, FLT_MAX, FLT_MAX), max(-FLT_MAX, -FLT_MAX, -FLT_MAX) {} + AABB(const Vector3 &_min, const Vector3 &_max) : min(_min), max(_max) { } + AABB(const Vector3 &p, float radius = 0.0f) : min(p), max(p) { if (radius > 0.0f) expand(radius); } + + bool intersect(const AABB &other) const + { return min.x <= other.max.x && max.x >= other.min.x && min.y <= other.max.y && max.y >= other.min.y && min.z <= other.max.z && max.z >= other.min.z; } - void expandToInclude(const Vector3 &p) { + void expandToInclude(const Vector3 &p) + { min = internal::min(min, p); max = internal::max(max, p); } - void expandToInclude(const AABB &aabb) { + void expandToInclude(const AABB &aabb) + { min = internal::min(min, aabb.min); max = internal::max(max, aabb.max); } - void expand(float amount) { + void expand(float amount) + { min -= Vector3(amount); max += Vector3(amount); } - Vector3 centroid() const { + Vector3 centroid() const + { return min + (max - min) * 0.5f; } - uint32_t maxDimension() const { + uint32_t maxDimension() const + { const Vector3 extent = max - min; uint32_t result = 0; if (extent.y > extent.x) { result = 1; if (extent.z > extent.y) result = 2; - } else if (extent.z > extent.x) + } + else if(extent.z > extent.x) result = 2; return result; } @@ -871,9 +945,10 @@ struct AABB { Vector3 min, max; }; -struct ArrayBase { - ArrayBase(uint32_t _elementSize, int memTag = MemTag::Default) : - buffer(nullptr), elementSize(_elementSize), size(0), capacity(0) { +struct ArrayBase +{ + ArrayBase(uint32_t _elementSize, int memTag = MemTag::Default) : buffer(nullptr), elementSize(_elementSize), size(0), capacity(0) + { #if XA_DEBUG_HEAP this->memTag = memTag; #else @@ -881,15 +956,18 @@ struct ArrayBase { #endif } - ~ArrayBase() { + ~ArrayBase() + { XA_FREE(buffer); } - XA_INLINE void clear() { + XA_INLINE void clear() + { size = 0; } - void copyFrom(const uint8_t *data, uint32_t length) { + void copyFrom(const uint8_t *data, uint32_t length) + { XA_DEBUG_ASSERT(data); XA_DEBUG_ASSERT(length > 0); resize(length, true); @@ -897,7 +975,8 @@ struct ArrayBase { memcpy(buffer, data, length * elementSize); } - void copyTo(ArrayBase &other) const { + void copyTo(ArrayBase &other) const + { XA_DEBUG_ASSERT(elementSize == other.elementSize); XA_DEBUG_ASSERT(size > 0); other.resize(size, true); @@ -905,7 +984,8 @@ struct ArrayBase { memcpy(other.buffer, buffer, size * elementSize); } - void destroy() { + void destroy() + { size = 0; XA_FREE(buffer); buffer = nullptr; @@ -914,7 +994,8 @@ struct ArrayBase { } // Insert the given element at the given index shifting all the elements up. - void insertAt(uint32_t index, const uint8_t *value) { + void insertAt(uint32_t index, const uint8_t *value) + { XA_DEBUG_ASSERT(index >= 0 && index <= size); XA_DEBUG_ASSERT(value); resize(size + 1, false); @@ -925,7 +1006,8 @@ struct ArrayBase { memcpy(&buffer[index * elementSize], value, elementSize); } - void moveTo(ArrayBase &other) { + void moveTo(ArrayBase &other) + { XA_DEBUG_ASSERT(elementSize == other.elementSize); other.destroy(); other.buffer = buffer; @@ -939,12 +1021,14 @@ struct ArrayBase { elementSize = size = capacity = 0; } - void pop_back() { + void pop_back() + { XA_DEBUG_ASSERT(size > 0); resize(size - 1, false); } - void push_back(const uint8_t *value) { + void push_back(const uint8_t *value) + { XA_DEBUG_ASSERT(value < buffer || value >= buffer + size); XA_DEBUG_ASSERT(value); resize(size + 1, false); @@ -953,7 +1037,8 @@ struct ArrayBase { memcpy(&buffer[(size - 1) * elementSize], value, elementSize); } - void push_back(const ArrayBase &other) { + void push_back(const ArrayBase &other) + { XA_DEBUG_ASSERT(elementSize == other.elementSize); if (other.size > 0) { const uint32_t oldSize = size; @@ -965,7 +1050,8 @@ struct ArrayBase { } // Remove the element at the given index. This is an expensive operation! - void removeAt(uint32_t index) { + void removeAt(uint32_t index) + { XA_DEBUG_ASSERT(index >= 0 && index < size); XA_DEBUG_ASSERT(buffer); if (buffer) { @@ -977,7 +1063,8 @@ struct ArrayBase { } // Element at index is swapped with the last element, then the array length is decremented. - void removeAtFast(uint32_t index) { + void removeAtFast(uint32_t index) + { XA_DEBUG_ASSERT(index >= 0 && index < size); XA_DEBUG_ASSERT(buffer); if (buffer) { @@ -988,12 +1075,14 @@ struct ArrayBase { } } - void reserve(uint32_t desiredSize) { + void reserve(uint32_t desiredSize) + { if (desiredSize > capacity) setArrayCapacity(desiredSize); } - void resize(uint32_t newSize, bool exact) { + void resize(uint32_t newSize, bool exact) + { size = newSize; if (size > capacity) { // First allocation is always exact. Otherwise, following allocations grow array to 150% of desired size. @@ -1006,7 +1095,8 @@ struct ArrayBase { } } - void setArrayCapacity(uint32_t newCapacity) { + void setArrayCapacity(uint32_t newCapacity) + { XA_DEBUG_ASSERT(newCapacity >= size); if (newCapacity == 0) { // free the buffer. @@ -1026,7 +1116,8 @@ struct ArrayBase { } #if XA_DEBUG_HEAP - void setMemTag(int _memTag) { + void setMemTag(int _memTag) + { this->memTag = _memTag; } #endif @@ -1040,27 +1131,30 @@ struct ArrayBase { #endif }; -template <typename T> -class Array { +template<typename T> +class Array +{ public: - Array(int memTag = MemTag::Default) : - m_base(sizeof(T), memTag) {} - Array(const Array &) = delete; + Array(int memTag = MemTag::Default) : m_base(sizeof(T), memTag) {} + Array(const Array&) = delete; Array &operator=(const Array &) = delete; - XA_INLINE const T &operator[](uint32_t index) const { + XA_INLINE const T &operator[](uint32_t index) const + { XA_DEBUG_ASSERT(index < m_base.size); XA_DEBUG_ASSERT(m_base.buffer); return ((const T *)m_base.buffer)[index]; } - XA_INLINE T &operator[](uint32_t index) { + XA_INLINE T &operator[](uint32_t index) + { XA_DEBUG_ASSERT(index < m_base.size); XA_DEBUG_ASSERT(m_base.buffer); return ((T *)m_base.buffer)[index]; } - XA_INLINE const T &back() const { + XA_INLINE const T &back() const + { XA_DEBUG_ASSERT(!isEmpty()); return ((const T *)m_base.buffer)[m_base.size - 1]; } @@ -1068,7 +1162,8 @@ public: XA_INLINE T *begin() { return (T *)m_base.buffer; } XA_INLINE void clear() { m_base.clear(); } - bool contains(const T &value) const { + bool contains(const T &value) const + { for (uint32_t i = 0; i < m_base.size; i++) { if (((const T *)m_base.buffer)[i] == value) return true; @@ -1093,23 +1188,27 @@ public: void reserve(uint32_t desiredSize) { m_base.reserve(desiredSize); } void resize(uint32_t newSize) { m_base.resize(newSize, true); } - void runCtors() { + void runCtors() + { for (uint32_t i = 0; i < m_base.size; i++) new (&((T *)m_base.buffer)[i]) T; } - void runDtors() { + void runDtors() + { for (uint32_t i = 0; i < m_base.size; i++) ((T *)m_base.buffer)[i].~T(); } - void fill(const T &value) { + void fill(const T &value) + { auto buffer = (T *)m_base.buffer; for (uint32_t i = 0; i < m_base.size; i++) buffer[i] = value; } - void fillBytes(uint8_t value) { + void fillBytes(uint8_t value) + { if (m_base.buffer && m_base.size > 0) memset(m_base.buffer, (int)value, m_base.size * m_base.elementSize); } @@ -1120,7 +1219,8 @@ public: XA_INLINE uint32_t size() const { return m_base.size; } - XA_INLINE void zeroOutMemory() { + XA_INLINE void zeroOutMemory() + { if (m_base.buffer && m_base.size > 0) memset(m_base.buffer, 0, m_base.elementSize * m_base.size); } @@ -1129,57 +1229,37 @@ private: ArrayBase m_base; }; -template <typename T> -struct ArrayView { - ArrayView() : - data(nullptr), length(0) {} - ArrayView(Array<T> &a) : - data(a.data()), length(a.size()) {} - ArrayView(T *_data, uint32_t _length) : - data(_data), length(_length) {} - ArrayView &operator=(Array<T> &a) { - data = a.data(); - length = a.size(); - return *this; - } - XA_INLINE const T &operator[](uint32_t index) const { - XA_DEBUG_ASSERT(index < length); - return data[index]; - } - XA_INLINE T &operator[](uint32_t index) { - XA_DEBUG_ASSERT(index < length); - return data[index]; - } +template<typename T> +struct ArrayView +{ + ArrayView() : data(nullptr), length(0) {} + ArrayView(Array<T> &a) : data(a.data()), length(a.size()) {} + ArrayView(T *_data, uint32_t _length) : data(_data), length(_length) {} + ArrayView &operator=(Array<T> &a) { data = a.data(); length = a.size(); return *this; } + XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < length); return data[index]; } + XA_INLINE T &operator[](uint32_t index) { XA_DEBUG_ASSERT(index < length); return data[index]; } T *data; uint32_t length; }; -template <typename T> -struct ConstArrayView { - ConstArrayView() : - data(nullptr), length(0) {} - ConstArrayView(const Array<T> &a) : - data(a.data()), length(a.size()) {} - ConstArrayView(ArrayView<T> av) : - data(av.data), length(av.length) {} - ConstArrayView(const T *_data, uint32_t _length) : - data(_data), length(_length) {} - ConstArrayView &operator=(const Array<T> &a) { - data = a.data(); - length = a.size(); - return *this; - } - XA_INLINE const T &operator[](uint32_t index) const { - XA_DEBUG_ASSERT(index < length); - return data[index]; - } +template<typename T> +struct ConstArrayView +{ + ConstArrayView() : data(nullptr), length(0) {} + ConstArrayView(const Array<T> &a) : data(a.data()), length(a.size()) {} + ConstArrayView(ArrayView<T> av) : data(av.data), length(av.length) {} + ConstArrayView(const T *_data, uint32_t _length) : data(_data), length(_length) {} + ConstArrayView &operator=(const Array<T> &a) { data = a.data(); length = a.size(); return *this; } + XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < length); return data[index]; } const T *data; uint32_t length; }; /// Basis class to compute tangent space basis, ortogonalizations and to transform vectors from one space to another. -struct Basis { - XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal) { +struct Basis +{ + XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal) + { XA_ASSERT(isNormalized(normal)); // Choose minimum axis. Vector3 tangent; @@ -1195,7 +1275,8 @@ struct Basis { return tangent; } - XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent) { + XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent) + { return cross(normal, tangent); } @@ -1205,36 +1286,42 @@ struct Basis { }; // Simple bit array. -class BitArray { +class BitArray +{ public: - BitArray() : - m_size(0) {} + BitArray() : m_size(0) {} - BitArray(uint32_t sz) { + BitArray(uint32_t sz) + { resize(sz); } - void resize(uint32_t new_size) { + void resize(uint32_t new_size) + { m_size = new_size; m_wordArray.resize((m_size + 31) >> 5); } - bool get(uint32_t index) const { + bool get(uint32_t index) const + { XA_DEBUG_ASSERT(index < m_size); return (m_wordArray[index >> 5] & (1 << (index & 31))) != 0; } - void set(uint32_t index) { + void set(uint32_t index) + { XA_DEBUG_ASSERT(index < m_size); m_wordArray[index >> 5] |= (1 << (index & 31)); } - void unset(uint32_t index) { + void unset(uint32_t index) + { XA_DEBUG_ASSERT(index < m_size); m_wordArray[index >> 5] &= ~(1 << (index & 31)); } - void zeroOutMemory() { + void zeroOutMemory() + { m_wordArray.zeroOutMemory(); } @@ -1243,13 +1330,13 @@ private: Array<uint32_t> m_wordArray; }; -class BitImage { +class BitImage +{ public: - BitImage() : - m_width(0), m_height(0), m_rowStride(0), m_data(MemTag::BitImage) {} + BitImage() : m_width(0), m_height(0), m_rowStride(0), m_data(MemTag::BitImage) {} - BitImage(uint32_t w, uint32_t h) : - m_width(w), m_height(h), m_data(MemTag::BitImage) { + BitImage(uint32_t w, uint32_t h) : m_width(w), m_height(h), m_data(MemTag::BitImage) + { m_rowStride = (m_width + 63) >> 6; m_data.resize(m_rowStride * m_height); m_data.zeroOutMemory(); @@ -1260,14 +1347,16 @@ public: uint32_t width() const { return m_width; } uint32_t height() const { return m_height; } - void copyTo(BitImage &other) { + void copyTo(BitImage &other) + { other.m_width = m_width; other.m_height = m_height; other.m_rowStride = m_rowStride; m_data.copyTo(other.m_data); } - void resize(uint32_t w, uint32_t h, bool discard) { + void resize(uint32_t w, uint32_t h, bool discard) + { const uint32_t rowStride = (w + 63) >> 6; if (discard) { m_data.resize(rowStride * h); @@ -1291,24 +1380,28 @@ public: m_rowStride = rowStride; } - bool get(uint32_t x, uint32_t y) const { + bool get(uint32_t x, uint32_t y) const + { XA_DEBUG_ASSERT(x < m_width && y < m_height); const uint32_t index = (x >> 6) + y * m_rowStride; return (m_data[index] & (UINT64_C(1) << (uint64_t(x) & UINT64_C(63)))) != 0; } - void set(uint32_t x, uint32_t y) { + void set(uint32_t x, uint32_t y) + { XA_DEBUG_ASSERT(x < m_width && y < m_height); const uint32_t index = (x >> 6) + y * m_rowStride; m_data[index] |= UINT64_C(1) << (uint64_t(x) & UINT64_C(63)); XA_DEBUG_ASSERT(get(x, y)); } - void zeroOutMemory() { + void zeroOutMemory() + { m_data.zeroOutMemory(); } - bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const { + bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const + { for (uint32_t y = 0; y < image.m_height; y++) { const uint32_t thisY = y + offsetY; if (thisY >= m_height) @@ -1332,7 +1425,8 @@ public: return true; } - void dilate(uint32_t padding) { + void dilate(uint32_t padding) + { BitImage tmp(m_width, m_height); for (uint32_t p = 0; p < padding; p++) { tmp.zeroOutMemory(); @@ -1342,21 +1436,15 @@ public: if (!b) { if (x > 0) { b |= get(x - 1, y); - if (y > 0) - b |= get(x - 1, y - 1); - if (y < m_height - 1) - b |= get(x - 1, y + 1); + if (y > 0) b |= get(x - 1, y - 1); + if (y < m_height - 1) b |= get(x - 1, y + 1); } - if (y > 0) - b |= get(x, y - 1); - if (y < m_height - 1) - b |= get(x, y + 1); + if (y > 0) b |= get(x, y - 1); + if (y < m_height - 1) b |= get(x, y + 1); if (x < m_width - 1) { b |= get(x + 1, y); - if (y > 0) - b |= get(x + 1, y - 1); - if (y < m_height - 1) - b |= get(x + 1, y + 1); + if (y > 0) b |= get(x + 1, y - 1); + if (y < m_height - 1) b |= get(x + 1, y + 1); } } if (b) @@ -1375,10 +1463,11 @@ private: }; // From Fast-BVH -class BVH { +class BVH +{ public: - BVH(const Array<AABB> &objectAabbs, uint32_t leafSize = 4) : - m_objectIds(MemTag::BVH), m_nodes(MemTag::BVH) { + BVH(const Array<AABB> &objectAabbs, uint32_t leafSize = 4) : m_objectIds(MemTag::BVH), m_nodes(MemTag::BVH) + { m_objectAabbs = &objectAabbs; if (m_objectAabbs->isEmpty()) return; @@ -1398,7 +1487,7 @@ public: Node node; m_nodes.reserve(objectAabbs.size() * 2); uint32_t nNodes = 0; - while (stackptr > 0) { + while(stackptr > 0) { // Pop the next item off of the stack const BuildEntry &bnode = todo[--stackptr]; const uint32_t start = bnode.start; @@ -1411,7 +1500,7 @@ public: // Calculate the bounding box for this node AABB bb(objectAabbs[m_objectIds[start]]); AABB bc(objectAabbs[m_objectIds[start]].centroid()); - for (uint32_t p = start + 1; p < end; ++p) { + for(uint32_t p = start + 1; p < end; ++p) { bb.expandToInclude(objectAabbs[m_objectIds[p]]); bc.expandToInclude(objectAabbs[m_objectIds[p]].centroid()); } @@ -1427,7 +1516,7 @@ public: m_nodes[bnode.parent].rightOffset--; // When this is the second touch, this is the right child. // The right child sets up the offset for the flat tree. - if (m_nodes[bnode.parent].rightOffset == kTouchedTwice) + if (m_nodes[bnode.parent].rightOffset == kTouchedTwice ) m_nodes[bnode.parent].rightOffset = nNodes - 1 - bnode.parent; } // If this is a leaf, no need to subdivide. @@ -1462,20 +1551,21 @@ public: } } - void query(const AABB &queryAabb, Array<uint32_t> &result) const { + void query(const AABB &queryAabb, Array<uint32_t> &result) const + { result.clear(); // Working set uint32_t todo[64]; int32_t stackptr = 0; // "Push" on the root node to the working set todo[stackptr] = 0; - while (stackptr >= 0) { + while(stackptr >= 0) { // Pop off the next node to work on. const int ni = todo[stackptr--]; const Node &node = m_nodes[ni]; // Is leaf -> Intersect if (node.rightOffset == 0) { - for (uint32_t o = 0; o < node.nPrims; ++o) { + for(uint32_t o = 0; o < node.nPrims; ++o) { const uint32_t obj = node.start + o; if (queryAabb.intersect((*m_objectAabbs)[m_objectIds[obj]])) result.push_back(m_objectIds[obj]); @@ -1492,12 +1582,14 @@ public: } private: - struct BuildEntry { + struct BuildEntry + { uint32_t parent; // If non-zero then this is the index of the parent. (used in offsets) uint32_t start, end; // The range of objects in the object list covered by this node. }; - struct Node { + struct Node + { AABB aabb; uint32_t start, nPrims, rightOffset; }; @@ -1507,8 +1599,10 @@ private: Array<Node> m_nodes; }; -struct Fit { - static bool computeBasis(ConstArrayView<Vector3> points, Basis *basis) { +struct Fit +{ + static bool computeBasis(ConstArrayView<Vector3> points, Basis *basis) + { if (computeLeastSquaresNormal(points, &basis->normal)) { basis->tangent = Basis::computeTangent(basis->normal); basis->bitangent = Basis::computeBitangent(basis->normal, basis->tangent); @@ -1522,7 +1616,8 @@ private: // Fast, and accurate to within a few degrees. // Returns None if the points do not span a plane. // https://www.ilikebigbits.com/2015_03_04_plane_from_points.html - static bool computeLeastSquaresNormal(ConstArrayView<Vector3> points, Vector3 *normal) { + static bool computeLeastSquaresNormal(ConstArrayView<Vector3> points, Vector3 *normal) + { XA_DEBUG_ASSERT(points.length >= 3); if (points.length == 3) { *normal = normalize(cross(points[2] - points[0], points[1] - points[0])); @@ -1587,7 +1682,7 @@ private: // Pick path with best conditioning: Vector3 dir(0.0f); if (det_max == det_x) - dir = Vector3(det_x, xz * yz - xy * zz, xy * yz - xz * yy); + dir = Vector3(det_x,xz * yz - xy * zz,xy * yz - xz * yy); else if (det_max == det_y) dir = Vector3(xz * yz - xy * zz, det_y, xy * xz - yz * xx); else if (det_max == det_z) @@ -1600,7 +1695,8 @@ private: return isNormalized(*normal); } - static bool computeEigen(ConstArrayView<Vector3> points, Basis *basis) { + static bool computeEigen(ConstArrayView<Vector3> points, Basis *basis) + { float matrix[6]; computeCovariance(points, matrix); if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) @@ -1615,7 +1711,8 @@ private: return true; } - static Vector3 computeCentroid(ConstArrayView<Vector3> points) { + static Vector3 computeCentroid(ConstArrayView<Vector3> points) + { Vector3 centroid(0.0f); for (uint32_t i = 0; i < points.length; i++) centroid += points[i]; @@ -1623,7 +1720,8 @@ private: return centroid; } - static Vector3 computeCovariance(ConstArrayView<Vector3> points, float *covariance) { + static Vector3 computeCovariance(ConstArrayView<Vector3> points, float * covariance) + { // compute the centroid Vector3 centroid = computeCentroid(points); // compute covariance matrix @@ -1645,7 +1743,8 @@ private: // Tridiagonal solver from Charles Bloom. // Householder transforms followed by QL decomposition. // Seems to be based on the code from Numerical Recipes in C. - static bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) { + static bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) + { XA_DEBUG_ASSERT(matrix != nullptr && eigenValues != nullptr && eigenVectors != nullptr); float subd[3]; float diag[3]; @@ -1670,7 +1769,7 @@ private: // eigenvectors are the columns; make them the rows : for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - (&eigenVectors[j].x)[i] = (float)work[i][j]; + (&eigenVectors[j].x)[i] = (float) work[i][j]; } } // shuffle to sort by singular value : @@ -1692,7 +1791,8 @@ private: } private: - static void EigenSolver3_Tridiagonal(float mat[3][3], float *diag, float *subd) { + static void EigenSolver3_Tridiagonal(float mat[3][3], float *diag, float *subd) + { // Householder reduction T = Q^t M Q // Input: // mat, symmetric 3x3 matrix M @@ -1744,7 +1844,8 @@ private: } } - static bool EigenSolver3_QLAlgorithm(float mat[3][3], float *diag, float *subd) { + static bool EigenSolver3_QLAlgorithm(float mat[3][3], float *diag, float *subd) + { // QL iteration with implicit shifting to reduce matrix from tridiagonal // to diagonal const int maxiter = 32; @@ -1754,21 +1855,21 @@ private: int m; for (m = ell; m <= 1; m++) { float dd = fabsf(diag[m]) + fabsf(diag[m + 1]); - if (fabsf(subd[m]) + dd == dd) + if ( fabsf(subd[m]) + dd == dd ) break; } - if (m == ell) + if ( m == ell ) break; float g = (diag[ell + 1] - diag[ell]) / (2 * subd[ell]); float r = sqrtf(g * g + 1); - if (g < 0) + if ( g < 0 ) g = diag[m] - diag[ell] + subd[ell] / (g - r); else g = diag[m] - diag[ell] + subd[ell] / (g + r); float s = 1, c = 1, p = 0; for (int i = m - 1; i >= ell; i--) { float f = s * subd[i], b = c * subd[i]; - if (fabsf(f) >= fabsf(g)) { + if ( fabsf(f) >= fabsf(g) ) { c = g / f; r = sqrtf(c * c + 1); subd[i + 1] = f * r; @@ -1794,7 +1895,7 @@ private: subd[ell] = g; subd[m] = 0; } - if (iter == maxiter) + if ( iter == maxiter ) // should not get here under normal circumstances return false; } @@ -1802,48 +1903,56 @@ private: } }; -static uint32_t sdbmHash(const void *data_in, uint32_t size, uint32_t h = 5381) { - const uint8_t *data = (const uint8_t *)data_in; +static uint32_t sdbmHash(const void *data_in, uint32_t size, uint32_t h = 5381) +{ + const uint8_t *data = (const uint8_t *) data_in; uint32_t i = 0; while (i < size) { - h = (h << 16) + (h << 6) - h + (uint32_t)data[i++]; + h = (h << 16) + (h << 6) - h + (uint32_t ) data[i++]; } return h; } template <typename T> -static uint32_t hash(const T &t, uint32_t h = 5381) { +static uint32_t hash(const T &t, uint32_t h = 5381) +{ return sdbmHash(&t, sizeof(T), h); } template <typename Key> -struct Hash { +struct Hash +{ uint32_t operator()(const Key &k) const { return hash(k); } }; template <typename Key> -struct PassthroughHash { +struct PassthroughHash +{ uint32_t operator()(const Key &k) const { return (uint32_t)k; } }; template <typename Key> -struct Equal { +struct Equal +{ bool operator()(const Key &k0, const Key &k1) const { return k0 == k1; } }; -template <typename Key, typename H = Hash<Key>, typename E = Equal<Key>> -class HashMap { +template<typename Key, typename H = Hash<Key>, typename E = Equal<Key> > +class HashMap +{ public: - HashMap(int memTag, uint32_t size) : - m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag) { + HashMap(int memTag, uint32_t size) : m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag) + { } - ~HashMap() { + ~HashMap() + { if (m_slots) XA_FREE(m_slots); } - void destroy() { + void destroy() + { if (m_slots) { XA_FREE(m_slots); m_slots = nullptr; @@ -1852,7 +1961,8 @@ public: m_next.destroy(); } - uint32_t add(const Key &key) { + uint32_t add(const Key &key) + { if (!m_slots) alloc(); const uint32_t hash = computeHash(key); @@ -1862,18 +1972,21 @@ public: return m_keys.size() - 1; } - uint32_t get(const Key &key) const { + uint32_t get(const Key &key) const + { if (!m_slots) return UINT32_MAX; return find(key, m_slots[computeHash(key)]); } - uint32_t getNext(const Key &key, uint32_t current) const { + uint32_t getNext(const Key &key, uint32_t current) const + { return find(key, m_next[current]); } private: - void alloc() { + void alloc() + { XA_DEBUG_ASSERT(m_size > 0); m_numSlots = nextPowerOfTwo(m_size); auto minNumSlots = uint32_t(m_size * 1.3); @@ -1886,12 +1999,14 @@ private: m_next.reserve(m_size); } - uint32_t computeHash(const Key &key) const { + uint32_t computeHash(const Key &key) const + { H hash; return hash(key) & (m_numSlots - 1); } - uint32_t find(const Key &key, uint32_t current) const { + uint32_t find(const Key &key, uint32_t current) const + { E equal; while (current != UINT32_MAX) { if (equal(m_keys[current], key)) @@ -1909,8 +2024,9 @@ private: Array<uint32_t> m_next; }; -template <typename T> -static void insertionSort(T *data, uint32_t length) { +template<typename T> +static void insertionSort(T *data, uint32_t length) +{ for (int32_t i = 1; i < (int32_t)length; i++) { T x = data[i]; int32_t j = i - 1; @@ -1922,18 +2038,21 @@ static void insertionSort(T *data, uint32_t length) { } } -class KISSRng { +class KISSRng +{ public: KISSRng() { reset(); } - void reset() { + void reset() + { x = 123456789; y = 362436000; z = 521288629; c = 7654321; } - uint32_t getRange(uint32_t range) { + uint32_t getRange(uint32_t range) + { if (range == 0) return 0; x = 69069 * x + 12345; @@ -1952,9 +2071,11 @@ private: // Based on Pierre Terdiman's and Michael Herf's source code. // http://www.codercorner.com/RadixSortRevisited.htm // http://www.stereopsis.com/radix.html -class RadixSort { +class RadixSort +{ public: - void sort(ConstArrayView<float> input) { + void sort(ConstArrayView<float> input) + { if (input.length == 0) { m_buffer1.clear(); m_buffer2.clear(); @@ -1983,7 +2104,8 @@ public: } // Access to results. m_ranks is a list of indices in sorted order, i.e. in the order you may further process your data - const uint32_t *ranks() const { + const uint32_t *ranks() const + { XA_DEBUG_ASSERT(m_validRanks); return m_ranks; } @@ -1993,17 +2115,20 @@ private: Array<uint32_t> m_buffer1, m_buffer2; bool m_validRanks = false; - void floatFlip(uint32_t &f) { + void floatFlip(uint32_t &f) + { int32_t mask = (int32_t(f) >> 31) | 0x80000000; // Warren Hunt, Manchor Ko. f ^= mask; } - void ifloatFlip(uint32_t &f) { + void ifloatFlip(uint32_t &f) + { uint32_t mask = ((f >> 31) - 1) | 0x80000000; // Michael Herf. f ^= mask; } - void createHistograms(ConstArrayView<uint32_t> input, uint32_t *histogram) { + void createHistograms(ConstArrayView<uint32_t> input, uint32_t *histogram) + { const uint32_t bucketCount = sizeof(uint32_t); // Init bucket pointers. uint32_t *h[bucketCount]; @@ -2014,14 +2139,15 @@ private: memset(histogram, 0, 256 * bucketCount * sizeof(uint32_t)); // @@ Add support for signed integers. // Build histograms. - const uint8_t *p = (const uint8_t *)input.data; // @@ Does this break aliasing rules? + const uint8_t *p = (const uint8_t *)input.data; // @@ Does this break aliasing rules? const uint8_t *pe = p + input.length * sizeof(uint32_t); while (p != pe) { h[0][*p++]++, h[1][*p++]++, h[2][*p++]++, h[3][*p++]++; } } - void insertionSort(ConstArrayView<float> input) { + void insertionSort(ConstArrayView<float> input) + { if (!m_validRanks) { m_ranks[0] = 0; for (uint32_t i = 1; i != input.length; ++i) { @@ -2051,7 +2177,8 @@ private: } } - void radixSort(ConstArrayView<uint32_t> input) { + void radixSort(ConstArrayView<uint32_t> input) + { const uint32_t P = sizeof(uint32_t); // pass count // Allocate histograms & offsets on the stack uint32_t histogram[256 * P]; @@ -2069,8 +2196,7 @@ private: } // Create offsets link[0] = m_ranks2; - for (uint32_t i = 1; i < 256; i++) - link[i] = link[i - 1] + h[i - 1]; + for (uint32_t i = 1; i < 256; i++) link[i] = link[i - 1] + h[i - 1]; // Perform Radix Sort if (!m_validRanks) { for (uint32_t i = 0; i < input.length; i++) { @@ -2096,21 +2222,25 @@ private: }; // Wrapping this in a class allows temporary arrays to be re-used. -class BoundingBox2D { +class BoundingBox2D +{ public: Vector2 majorAxis, minorAxis, minCorner, maxCorner; - void clear() { + void clear() + { m_boundaryVertices.clear(); } - void appendBoundaryVertex(Vector2 v) { + void appendBoundaryVertex(Vector2 v) + { m_boundaryVertices.push_back(v); } // This should compute convex hull and use rotating calipers to find the best box. Currently it uses a brute force method. // If vertices are empty, the boundary vertices are used. - void compute(ConstArrayView<Vector2> vertices = ConstArrayView<Vector2>()) { + void compute(ConstArrayView<Vector2> vertices = ConstArrayView<Vector2>()) + { XA_DEBUG_ASSERT(!m_boundaryVertices.isEmpty()); if (vertices.length == 0) vertices = m_boundaryVertices; @@ -2157,7 +2287,8 @@ public: private: // Compute the convex hull using Graham Scan. - void convexHull(ConstArrayView<Vector2> input, Array<Vector2> &output, float epsilon) { + void convexHull(ConstArrayView<Vector2> input, Array<Vector2> &output, float epsilon) + { m_coords.resize(input.length); for (uint32_t i = 0; i < input.length; i++) m_coords[i] = input[i].x; @@ -2186,7 +2317,7 @@ private: XA_DEBUG_ASSERT(m_top.size() >= 2); output.push_back(m_top[0]); output.push_back(m_top[1]); - for (uint32_t i = 2; i < m_top.size();) { + for (uint32_t i = 2; i < m_top.size(); ) { Vector2 a = output[output.size() - 2]; Vector2 b = output[output.size() - 1]; Vector2 c = m_top[i]; @@ -2202,7 +2333,7 @@ private: XA_DEBUG_ASSERT(m_bottom.size() >= 2); output.push_back(m_bottom[1]); // Filter bottom list. - for (uint32_t i = 2; i < m_bottom.size();) { + for (uint32_t i = 2; i < m_bottom.size(); ) { Vector2 a = output[output.size() - 2]; Vector2 b = output[output.size() - 1]; Vector2 c = m_bottom[i]; @@ -2225,45 +2356,45 @@ private: RadixSort m_radix; }; -struct EdgeKey { - EdgeKey(const EdgeKey &k) : - v0(k.v0), v1(k.v1) {} - EdgeKey(uint32_t _v0, uint32_t _v1) : - v0(_v0), v1(_v1) {} +struct EdgeKey +{ + EdgeKey(const EdgeKey &k) : v0(k.v0), v1(k.v1) {} + EdgeKey(uint32_t _v0, uint32_t _v1) : v0(_v0), v1(_v1) {} bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; } uint32_t v0; uint32_t v1; }; -struct EdgeHash { +struct EdgeHash +{ uint32_t operator()(const EdgeKey &k) const { return k.v0 * 32768u + k.v1; } }; -static uint32_t meshEdgeFace(uint32_t edge) { - return edge / 3; -} -static uint32_t meshEdgeIndex0(uint32_t edge) { - return edge; -} +static uint32_t meshEdgeFace(uint32_t edge) { return edge / 3; } +static uint32_t meshEdgeIndex0(uint32_t edge) { return edge; } -static uint32_t meshEdgeIndex1(uint32_t edge) { +static uint32_t meshEdgeIndex1(uint32_t edge) +{ const uint32_t faceFirstEdge = edge / 3 * 3; return faceFirstEdge + (edge - faceFirstEdge + 1) % 3; } -struct MeshFlags { - enum { - HasIgnoredFaces = 1 << 0, - HasNormals = 1 << 1, - HasMaterials = 1 << 2 +struct MeshFlags +{ + enum + { + HasIgnoredFaces = 1<<0, + HasNormals = 1<<1, + HasMaterials = 1<<2 }; }; -class Mesh { +class Mesh +{ public: - Mesh(float epsilon, uint32_t approxVertexCount, uint32_t approxFaceCount, uint32_t flags = 0, uint32_t id = UINT32_MAX) : - m_epsilon(epsilon), m_flags(flags), m_id(id), m_faceIgnore(MemTag::Mesh), m_faceMaterials(MemTag::Mesh), m_indices(MemTag::MeshIndices), m_positions(MemTag::MeshPositions), m_normals(MemTag::MeshNormals), m_texcoords(MemTag::MeshTexcoords), m_nextColocalVertex(MemTag::MeshColocals), m_firstColocalVertex(MemTag::MeshColocals), m_boundaryEdges(MemTag::MeshBoundaries), m_oppositeEdges(MemTag::MeshBoundaries), m_edgeMap(MemTag::MeshEdgeMap, approxFaceCount * 3) { + Mesh(float epsilon, uint32_t approxVertexCount, uint32_t approxFaceCount, uint32_t flags = 0, uint32_t id = UINT32_MAX) : m_epsilon(epsilon), m_flags(flags), m_id(id), m_faceIgnore(MemTag::Mesh), m_faceMaterials(MemTag::Mesh), m_indices(MemTag::MeshIndices), m_positions(MemTag::MeshPositions), m_normals(MemTag::MeshNormals), m_texcoords(MemTag::MeshTexcoords), m_nextColocalVertex(MemTag::MeshColocals), m_firstColocalVertex(MemTag::MeshColocals), m_boundaryEdges(MemTag::MeshBoundaries), m_oppositeEdges(MemTag::MeshBoundaries), m_edgeMap(MemTag::MeshEdgeMap, approxFaceCount * 3) + { m_indices.reserve(approxFaceCount * 3); m_positions.reserve(approxVertexCount); m_texcoords.reserve(approxVertexCount); @@ -2278,7 +2409,8 @@ public: uint32_t flags() const { return m_flags; } uint32_t id() const { return m_id; } - void addVertex(const Vector3 &pos, const Vector3 &normal = Vector3(0.0f), const Vector2 &texcoord = Vector2(0.0f)) { + void addVertex(const Vector3 &pos, const Vector3 &normal = Vector3(0.0f), const Vector2 &texcoord = Vector2(0.0f)) + { XA_DEBUG_ASSERT(isFinite(pos)); m_positions.push_back(pos); if (m_flags & MeshFlags::HasNormals) @@ -2286,7 +2418,8 @@ public: m_texcoords.push_back(texcoord); } - void addFace(const uint32_t *indices, bool ignore = false, uint32_t material = UINT32_MAX) { + void addFace(const uint32_t *indices, bool ignore = false, uint32_t material = UINT32_MAX) + { if (m_flags & MeshFlags::HasIgnoredFaces) m_faceIgnore.push_back(ignore); if (m_flags & MeshFlags::HasMaterials) @@ -2301,7 +2434,8 @@ public: } } - void createColocalsBVH() { + void createColocalsBVH() + { const uint32_t vertexCount = m_positions.size(); Array<AABB> aabbs(MemTag::BVH); aabbs.resize(vertexCount); @@ -2342,7 +2476,8 @@ public: } } - void createColocalsHash() { + void createColocalsHash() + { const uint32_t vertexCount = m_positions.size(); HashMap<Vector3> positionToVertexMap(MemTag::Default, vertexCount); for (uint32_t i = 0; i < vertexCount; i++) @@ -2380,14 +2515,16 @@ public: } } - void createColocals() { + void createColocals() + { if (m_epsilon <= FLT_EPSILON) createColocalsHash(); else createColocalsBVH(); } - void createBoundaries() { + void createBoundaries() + { const uint32_t edgeCount = m_indices.size(); const uint32_t vertexCount = m_positions.size(); m_oppositeEdges.resize(edgeCount); @@ -2418,7 +2555,8 @@ public: } /// Find edge, test all colocals. - uint32_t findEdge(uint32_t vertex0, uint32_t vertex1) const { + uint32_t findEdge(uint32_t vertex0, uint32_t vertex1) const + { // Try to find exact vertex match first. { EdgeKey key(vertex0, vertex1); @@ -2459,12 +2597,14 @@ public: // Edge map can be destroyed when no longer used to reduce memory usage. It's used by: // * Mesh::createBoundaries() // * Mesh::edgeMap() (used by MeshFaceGroups) - void destroyEdgeMap() { + void destroyEdgeMap() + { m_edgeMap.destroy(); } #if XA_DEBUG_EXPORT_OBJ - void writeObjVertices(FILE *file) const { + void writeObjVertices(FILE *file) const + { for (uint32_t i = 0; i < m_positions.size(); i++) fprintf(file, "v %g %g %g\n", m_positions[i].x, m_positions[i].y, m_positions[i].z); if (m_flags & MeshFlags::HasNormals) { @@ -2475,7 +2615,8 @@ public: fprintf(file, "vt %g %g\n", m_texcoords[i].x, m_texcoords[i].y); } - void writeObjFace(FILE *file, uint32_t face, uint32_t offset = 0) const { + void writeObjFace(FILE *file, uint32_t face, uint32_t offset = 0) const + { fprintf(file, "f "); for (uint32_t j = 0; j < 3; j++) { const uint32_t index = m_indices[face * 3 + j] + 1 + offset; // 1-indexed @@ -2483,7 +2624,8 @@ public: } } - void writeObjBoundaryEges(FILE *file) const { + void writeObjBoundaryEges(FILE *file) const + { if (m_oppositeEdges.isEmpty()) return; // Boundaries haven't been created. fprintf(file, "o boundary_edges\n"); @@ -2494,7 +2636,8 @@ public: } } - void writeObjFile(const char *filename) const { + void writeObjFile(const char *filename) const + { FILE *file; XA_FOPEN(file, filename, "w"); if (!file) @@ -2509,7 +2652,8 @@ public: } #endif - float computeSurfaceArea() const { + float computeSurfaceArea() const + { float area = 0; for (uint32_t f = 0; f < faceCount(); f++) area += computeFaceArea(f); @@ -2518,21 +2662,24 @@ public: } // Returned value is always positive, even if some triangles are flipped. - float computeParametricArea() const { + float computeParametricArea() const + { float area = 0; for (uint32_t f = 0; f < faceCount(); f++) area += fabsf(computeFaceParametricArea(f)); // May be negative, depends on texcoord winding. return area; } - float computeFaceArea(uint32_t face) const { + float computeFaceArea(uint32_t face) const + { const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]]; const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]]; const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]]; return length(cross(p1 - p0, p2 - p0)) * 0.5f; } - Vector3 computeFaceCentroid(uint32_t face) const { + Vector3 computeFaceCentroid(uint32_t face) const + { Vector3 sum(0.0f); for (uint32_t i = 0; i < 3; i++) sum += m_positions[m_indices[face * 3 + i]]; @@ -2541,7 +2688,8 @@ public: // Average of the edge midpoints weighted by the edge length. // I want a point inside the triangle, but closer to the cirumcenter. - Vector3 computeFaceCenter(uint32_t face) const { + Vector3 computeFaceCenter(uint32_t face) const + { const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]]; const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]]; const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]]; @@ -2554,7 +2702,8 @@ public: return m0 + m1 + m2; } - Vector3 computeFaceNormal(uint32_t face) const { + Vector3 computeFaceNormal(uint32_t face) const + { const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]]; const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]]; const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]]; @@ -2564,7 +2713,8 @@ public: return normalizeSafe(normalAreaScaled, Vector3(0, 0, 1)); } - float computeFaceParametricArea(uint32_t face) const { + float computeFaceParametricArea(uint32_t face) const + { const Vector2 &t0 = m_texcoords[m_indices[face * 3 + 0]]; const Vector2 &t1 = m_texcoords[m_indices[face * 3 + 1]]; const Vector2 &t2 = m_texcoords[m_indices[face * 3 + 2]]; @@ -2572,7 +2722,8 @@ public: } // @@ This is not exactly accurate, we should compare the texture coordinates... - bool isSeam(uint32_t edge) const { + bool isSeam(uint32_t edge) const + { const uint32_t oppositeEdge = m_oppositeEdges[edge]; if (oppositeEdge == UINT32_MAX) return false; // boundary edge @@ -2583,7 +2734,8 @@ public: return m_indices[e0] != m_indices[oe1] || m_indices[e1] != m_indices[oe0]; } - bool isTextureSeam(uint32_t edge) const { + bool isTextureSeam(uint32_t edge) const + { const uint32_t oppositeEdge = m_oppositeEdges[edge]; if (oppositeEdge == UINT32_MAX) return false; // boundary edge @@ -2594,7 +2746,8 @@ public: return m_texcoords[m_indices[e0]] != m_texcoords[m_indices[oe1]] || m_texcoords[m_indices[e1]] != m_texcoords[m_indices[oe0]]; } - uint32_t firstColocalVertex(uint32_t vertex) const { + uint32_t firstColocalVertex(uint32_t vertex) const + { XA_DEBUG_ASSERT(m_firstColocalVertex.size() == m_positions.size()); return m_firstColocalVertex[vertex]; } @@ -2609,10 +2762,7 @@ public: XA_INLINE uint32_t vertexAt(uint32_t i) const { return m_indices[i]; } XA_INLINE const Vector3 &position(uint32_t vertex) const { return m_positions[vertex]; } XA_INLINE ConstArrayView<Vector3> positions() const { return m_positions; } - XA_INLINE const Vector3 &normal(uint32_t vertex) const { - XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); - return m_normals[vertex]; - } + XA_INLINE const Vector3 &normal(uint32_t vertex) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); return m_normals[vertex]; } XA_INLINE const Vector2 &texcoord(uint32_t vertex) const { return m_texcoords[vertex]; } XA_INLINE Vector2 &texcoord(uint32_t vertex) { return m_texcoords[vertex]; } XA_INLINE const ConstArrayView<Vector2> texcoords() const { return m_texcoords; } @@ -2625,6 +2775,7 @@ public: XA_INLINE const HashMap<EdgeKey, EdgeHash> &edgeMap() const { return m_edgeMap; } private: + float m_epsilon; uint32_t m_flags; uint32_t m_id; @@ -2647,21 +2798,24 @@ private: HashMap<EdgeKey, EdgeHash> m_edgeMap; public: - class FaceEdgeIterator { + class FaceEdgeIterator + { public: - FaceEdgeIterator(const Mesh *mesh, uint32_t face) : - m_mesh(mesh), m_face(face), m_relativeEdge(0) { + FaceEdgeIterator (const Mesh *mesh, uint32_t face) : m_mesh(mesh), m_face(face), m_relativeEdge(0) + { m_edge = m_face * 3; } - void advance() { + void advance() + { if (m_relativeEdge < 3) { m_edge++; m_relativeEdge++; } } - bool isDone() const { + bool isDone() const + { return m_relativeEdge == 3; } @@ -2673,7 +2827,8 @@ public: uint32_t face() const { return m_face; } uint32_t oppositeEdge() const { return m_mesh->m_oppositeEdges[m_edge]; } - uint32_t oppositeFace() const { + uint32_t oppositeFace() const + { const uint32_t oedge = m_mesh->m_oppositeEdges[m_edge]; if (oedge == UINT32_MAX) return UINT32_MAX; @@ -2697,18 +2852,19 @@ public: }; }; -struct MeshFaceGroups { +struct MeshFaceGroups +{ typedef uint32_t Handle; static constexpr Handle kInvalid = UINT32_MAX; - MeshFaceGroups(const Mesh *mesh) : - m_mesh(mesh), m_groups(MemTag::Mesh), m_firstFace(MemTag::Mesh), m_nextFace(MemTag::Mesh), m_faceCount(MemTag::Mesh) {} + MeshFaceGroups(const Mesh *mesh) : m_mesh(mesh), m_groups(MemTag::Mesh), m_firstFace(MemTag::Mesh), m_nextFace(MemTag::Mesh), m_faceCount(MemTag::Mesh) {} XA_INLINE Handle groupAt(uint32_t face) const { return m_groups[face]; } XA_INLINE uint32_t groupCount() const { return m_faceCount.size(); } XA_INLINE uint32_t nextFace(uint32_t face) const { return m_nextFace[face]; } XA_INLINE uint32_t faceCount(uint32_t group) const { return m_faceCount[group]; } - void compute() { + void compute() + { m_groups.resize(m_mesh->faceCount()); m_groups.fillBytes(0xff); // Set all faces to kInvalid uint32_t firstUnassignedFace = 0; @@ -2767,23 +2923,27 @@ struct MeshFaceGroups { } } - class Iterator { + class Iterator + { public: - Iterator(const MeshFaceGroups *meshFaceGroups, Handle group) : - m_meshFaceGroups(meshFaceGroups) { + Iterator(const MeshFaceGroups *meshFaceGroups, Handle group) : m_meshFaceGroups(meshFaceGroups) + { XA_DEBUG_ASSERT(group != kInvalid); m_current = m_meshFaceGroups->m_firstFace[group]; } - void advance() { + void advance() + { m_current = m_meshFaceGroups->m_nextFace[m_current]; } - bool isDone() const { + bool isDone() const + { return m_current == UINT32_MAX; } - uint32_t face() const { + uint32_t face() const + { return m_current; } @@ -2803,7 +2963,8 @@ private: constexpr MeshFaceGroups::Handle MeshFaceGroups::kInvalid; #if XA_CHECK_T_JUNCTIONS -static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, const Vector3 &lineEnd, float *t, float epsilon) { +static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, const Vector3 &lineEnd, float *t, float epsilon) +{ float tt; if (!t) t = &tt; @@ -2821,7 +2982,8 @@ static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, } // Returns the number of T-junctions found. -static int meshCheckTJunctions(const Mesh &inputMesh) { +static int meshCheckTJunctions(const Mesh &inputMesh) +{ int count = 0; const uint32_t vertexCount = inputMesh.vertexCount(); const uint32_t edgeCount = inputMesh.edgeCount(); @@ -2845,10 +3007,12 @@ static int meshCheckTJunctions(const Mesh &inputMesh) { #endif // References invalid faces and vertices in a mesh. -struct InvalidMeshGeometry { +struct InvalidMeshGeometry +{ // If meshFaceGroups is not null, invalid faces have the face group MeshFaceGroups::kInvalid. // If meshFaceGroups is null, invalid faces are Mesh::isFaceIgnored. - void extract(const Mesh *mesh, const MeshFaceGroups *meshFaceGroups) { + void extract(const Mesh *mesh, const MeshFaceGroups *meshFaceGroups) + { // Copy invalid faces. m_faces.clear(); const uint32_t meshFaceCount = mesh->faceCount(); @@ -2886,28 +3050,32 @@ private: Array<uint32_t> m_vertexToSourceVertexMap; // Map face vertices to vertices of the source mesh. }; -struct Progress { - Progress(ProgressCategory category, ProgressFunc func, void *userData, uint32_t maxValue) : - cancel(false), m_category(category), m_func(func), m_userData(userData), m_value(0), m_maxValue(maxValue), m_percent(0) { +struct Progress +{ + Progress(ProgressCategory category, ProgressFunc func, void *userData, uint32_t maxValue) : cancel(false), m_category(category), m_func(func), m_userData(userData), m_value(0), m_maxValue(maxValue), m_percent(0) + { if (m_func) { if (!m_func(category, 0, userData)) cancel = true; } } - ~Progress() { + ~Progress() + { if (m_func) { if (!m_func(m_category, 100, m_userData)) cancel = true; } } - void increment(uint32_t value) { + void increment(uint32_t value) + { m_value += value; update(); } - void setMaxValue(uint32_t maxValue) { + void setMaxValue(uint32_t maxValue) + { m_maxValue = maxValue; update(); } @@ -2915,15 +3083,15 @@ struct Progress { std::atomic<bool> cancel; private: - void update() { + void update() + { if (!m_func) return; const uint32_t newPercent = uint32_t(ceilf(m_value.load() / (float)m_maxValue.load() * 100.0f)); if (newPercent != m_percent) { // Atomic max. uint32_t oldPercent = m_percent; - while (oldPercent < newPercent && !m_percent.compare_exchange_weak(oldPercent, newPercent)) { - } + while (oldPercent < newPercent && !m_percent.compare_exchange_weak(oldPercent, newPercent)) {} if (!m_func(m_category, m_percent, m_userData)) cancel = true; } @@ -2935,31 +3103,32 @@ private: std::atomic<uint32_t> m_value, m_maxValue, m_percent; }; -struct Spinlock { - void lock() { - while (m_lock.test_and_set(std::memory_order_acquire)) { - } - } +struct Spinlock +{ + void lock() { while(m_lock.test_and_set(std::memory_order_acquire)) {} } void unlock() { m_lock.clear(std::memory_order_release); } private: std::atomic_flag m_lock = ATOMIC_FLAG_INIT; }; -struct TaskGroupHandle { +struct TaskGroupHandle +{ uint32_t value = UINT32_MAX; }; -struct Task { +struct Task +{ void (*func)(void *groupUserData, void *taskUserData); void *userData; // Passed to func as taskUserData. }; #if XA_MULTITHREADED -class TaskScheduler { +class TaskScheduler +{ public: - TaskScheduler() : - m_shutdown(false) { + TaskScheduler() : m_shutdown(false) + { m_threadIndex = 0; // Max with current task scheduler usage is 1 per thread + 1 deep nesting, but allow for some slop. m_maxGroups = std::thread::hardware_concurrency() * 4; @@ -2978,7 +3147,8 @@ public: } } - ~TaskScheduler() { + ~TaskScheduler() + { m_shutdown = true; for (uint32_t i = 0; i < m_workers.size(); i++) { Worker &worker = m_workers[i]; @@ -2996,12 +3166,14 @@ public: XA_FREE(m_groups); } - uint32_t threadCount() const { + uint32_t threadCount() const + { return max(1u, std::thread::hardware_concurrency()); // Including the main thread. } // userData is passed to Task::func as groupUserData. - TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) { + TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) + { // Claim the first free group. for (uint32_t i = 0; i < m_maxGroups; i++) { TaskGroup &group = m_groups[i]; @@ -3025,7 +3197,8 @@ public: return handle; } - void run(TaskGroupHandle handle, const Task &task) { + void run(TaskGroupHandle handle, const Task &task) + { XA_DEBUG_ASSERT(handle.value != UINT32_MAX); TaskGroup &group = m_groups[handle.value]; group.queueLock.lock(); @@ -3039,7 +3212,8 @@ public: } } - void wait(TaskGroupHandle *handle) { + void wait(TaskGroupHandle *handle) + { if (handle->value == UINT32_MAX) { XA_DEBUG_ASSERT(false); return; @@ -3067,7 +3241,8 @@ public: static uint32_t currentThreadIndex() { return m_threadIndex; } private: - struct TaskGroup { + struct TaskGroup + { std::atomic<bool> free; Array<Task> queue; // Items are never removed. queueHead is incremented to pop items. uint32_t queueHead = 0; @@ -3076,7 +3251,8 @@ private: void *userData; }; - struct Worker { + struct Worker + { std::thread *thread = nullptr; std::mutex mutex; std::condition_variable cv; @@ -3089,11 +3265,12 @@ private: uint32_t m_maxGroups; static thread_local uint32_t m_threadIndex; - static void workerThread(TaskScheduler *scheduler, Worker *worker, uint32_t threadIndex) { + static void workerThread(TaskScheduler *scheduler, Worker *worker, uint32_t threadIndex) + { m_threadIndex = threadIndex; std::unique_lock<std::mutex> lock(worker->mutex); for (;;) { - worker->cv.wait(lock, [=] { return worker->wakeup.load(); }); + worker->cv.wait(lock, [=]{ return worker->wakeup.load(); }); worker->wakeup = false; for (;;) { if (scheduler->m_shutdown) @@ -3124,18 +3301,22 @@ private: thread_local uint32_t TaskScheduler::m_threadIndex; #else -class TaskScheduler { +class TaskScheduler +{ public: - ~TaskScheduler() { + ~TaskScheduler() + { for (uint32_t i = 0; i < m_groups.size(); i++) destroyGroup({ i }); } - uint32_t threadCount() const { + uint32_t threadCount() const + { return 1; } - TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) { + TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) + { TaskGroup *group = XA_NEW(MemTag::Default, TaskGroup); group->queue.reserve(reserveSize); group->userData = userData; @@ -3145,11 +3326,13 @@ public: return handle; } - void run(TaskGroupHandle handle, Task task) { + void run(TaskGroupHandle handle, Task task) + { m_groups[handle.value]->queue.push_back(task); } - void wait(TaskGroupHandle *handle) { + void wait(TaskGroupHandle *handle) + { if (handle->value == UINT32_MAX) { XA_DEBUG_ASSERT(false); return; @@ -3165,7 +3348,8 @@ public: static uint32_t currentThreadIndex() { return 0; } private: - void destroyGroup(TaskGroupHandle handle) { + void destroyGroup(TaskGroupHandle handle) + { TaskGroup *group = m_groups[handle.value]; if (group) { group->~TaskGroup(); @@ -3174,7 +3358,8 @@ private: } } - struct TaskGroup { + struct TaskGroup + { Array<Task> queue; void *userData; }; @@ -3188,7 +3373,8 @@ const uint8_t TGA_TYPE_RGB = 2; const uint8_t TGA_ORIGIN_UPPER = 0x20; #pragma pack(push, 1) -struct TgaHeader { +struct TgaHeader +{ uint8_t id_length; uint8_t colormap_type; uint8_t image_type; @@ -3205,7 +3391,8 @@ struct TgaHeader { }; #pragma pack(pop) -static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, uint32_t height) { +static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, uint32_t height) +{ XA_DEBUG_ASSERT(sizeof(TgaHeader) == TgaHeader::Size); FILE *f; XA_FOPEN(f, filename, "wb"); @@ -3230,10 +3417,12 @@ static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, } #endif -template <typename T> -class ThreadLocal { +template<typename T> +class ThreadLocal +{ public: - ThreadLocal() { + ThreadLocal() + { #if XA_MULTITHREADED const uint32_t n = std::thread::hardware_concurrency(); #else @@ -3244,7 +3433,8 @@ public: new (&m_array[i]) T; } - ~ThreadLocal() { + ~ThreadLocal() + { #if XA_MULTITHREADED const uint32_t n = std::thread::hardware_concurrency(); #else @@ -3255,7 +3445,8 @@ public: XA_FREE(m_array); } - T &get() const { + T &get() const + { return m_array[TaskScheduler::currentThreadIndex()]; } @@ -3264,10 +3455,12 @@ private: }; // Implemented as a struct so the temporary arrays can be reused. -struct Triangulator { +struct Triangulator +{ // This is doing a simple ear-clipping algorithm that skips invalid triangles. Ideally, we should // also sort the ears by angle, start with the ones that have the smallest angle and proceed in order. - void triangulatePolygon(ConstArrayView<Vector3> vertices, ConstArrayView<uint32_t> inputIndices, Array<uint32_t> &outputIndices) { + void triangulatePolygon(ConstArrayView<Vector3> vertices, ConstArrayView<uint32_t> inputIndices, Array<uint32_t> &outputIndices) + { m_polygonVertices.clear(); m_polygonVertices.reserve(inputIndices.length); outputIndices.clear(); @@ -3276,7 +3469,8 @@ struct Triangulator { outputIndices.push_back(inputIndices[0]); outputIndices.push_back(inputIndices[1]); outputIndices.push_back(inputIndices[2]); - } else { + } + else { // Build 2D polygon projecting vertices onto normal plane. // Faces are not necesarily planar, this is for example the case, when the face comes from filling a hole. In such cases // it's much better to use the best fit plane. @@ -3348,7 +3542,8 @@ struct Triangulator { } private: - static bool pointInTriangle(const Vector2 &p, const Vector2 &a, const Vector2 &b, const Vector2 &c) { + static bool pointInTriangle(const Vector2 &p, const Vector2 &a, const Vector2 &b, const Vector2 &c) + { return triangleArea(a, b, p) >= kAreaEpsilon && triangleArea(b, c, p) >= kAreaEpsilon && triangleArea(c, a, p) >= kAreaEpsilon; } @@ -3357,10 +3552,12 @@ private: Array<Vector2> m_polygonPoints; }; -class UniformGrid2 { +class UniformGrid2 +{ public: // indices are optional. - void reset(ConstArrayView<Vector2> positions, ConstArrayView<uint32_t> indices = ConstArrayView<uint32_t>(), uint32_t reserveEdgeCount = 0) { + void reset(ConstArrayView<Vector2> positions, ConstArrayView<uint32_t> indices = ConstArrayView<uint32_t>(), uint32_t reserveEdgeCount = 0) + { m_edges.clear(); if (reserveEdgeCount > 0) m_edges.reserve(reserveEdgeCount); @@ -3369,12 +3566,14 @@ public: m_cellDataOffsets.clear(); } - void append(uint32_t edge) { + void append(uint32_t edge) + { XA_DEBUG_ASSERT(m_cellDataOffsets.isEmpty()); m_edges.push_back(edge); } - bool intersect(Vector2 v1, Vector2 v2, float epsilon) { + bool intersect(Vector2 v1, Vector2 v2, float epsilon) + { const uint32_t edgeCount = m_edges.size(); bool bruteForce = edgeCount <= 20; if (!bruteForce && m_cellDataOffsets.isEmpty()) @@ -3401,7 +3600,8 @@ public: } // If edges is empty, checks for intersection with all edges in the grid. - bool intersect(float epsilon, ConstArrayView<uint32_t> edges = ConstArrayView<uint32_t>(), ConstArrayView<uint32_t> ignoreEdges = ConstArrayView<uint32_t>()) { + bool intersect(float epsilon, ConstArrayView<uint32_t> edges = ConstArrayView<uint32_t>(), ConstArrayView<uint32_t> ignoreEdges = ConstArrayView<uint32_t>()) + { bool bruteForce = m_edges.size() <= 20; if (!bruteForce && m_cellDataOffsets.isEmpty()) bruteForce = !createGrid(); @@ -3471,7 +3671,8 @@ public: } #if XA_DEBUG_EXPORT_BOUNDARY_GRID - void debugExport(const char *filename) { + void debugExport(const char *filename) + { Array<uint8_t> image; image.resize(m_gridWidth * m_gridHeight * 3); for (uint32_t y = 0; y < m_gridHeight; y++) { @@ -3493,7 +3694,8 @@ public: #endif private: - bool createGrid() { + bool createGrid() + { // Compute edge extents. Min will be the grid origin. const uint32_t edgeCount = m_edges.size(); Extents2 edgeExtents; @@ -3545,7 +3747,8 @@ private: return true; } - void computePotentialEdges(Vector2 p1, Vector2 p2) { + void computePotentialEdges(Vector2 p1, Vector2 p2) + { m_potentialEdges.clear(); traverse(p1, p2); for (uint32_t j = 0; j < m_traversedCellOffsets.size(); j++) { @@ -3563,7 +3766,8 @@ private: } // "A Fast Voxel Traversal Algorithm for Ray Tracing" - void traverse(Vector2 p1, Vector2 p2) { + void traverse(Vector2 p1, Vector2 p2) + { const Vector2 dir = p2 - p1; const Vector2 normal = normalizeSafe(dir, Vector2(0.0f)); const int stepX = dir.x >= 0 ? 1 : -1; @@ -3584,12 +3788,14 @@ private: if (normal.x > kEpsilon || normal.x < -kEpsilon) { tMaxX = (distToNextCellX * stepX) / normal.x; tDeltaX = (m_cellSize * stepX) / normal.x; - } else + } + else tMaxX = tDeltaX = FLT_MAX; if (normal.y > kEpsilon || normal.y < -kEpsilon) { tMaxY = (distToNextCellY * stepY) / normal.y; tDeltaY = (m_cellSize * stepY) / normal.y; - } else + } + else tMaxY = tDeltaY = FLT_MAX; m_traversedCellOffsets.clear(); m_traversedCellOffsets.push_back(firstCell[0] + firstCell[1] * m_gridWidth); @@ -3616,23 +3822,28 @@ private: } } - uint32_t cellX(float x) const { + uint32_t cellX(float x) const + { return min((uint32_t)max(0.0f, (x - m_gridOrigin.x) / m_cellSize), m_gridWidth - 1u); } - uint32_t cellY(float y) const { + uint32_t cellY(float y) const + { return min((uint32_t)max(0.0f, (y - m_gridOrigin.y) / m_cellSize), m_gridHeight - 1u); } - Vector2 edgePosition0(uint32_t edge) const { + Vector2 edgePosition0(uint32_t edge) const + { return m_positions[vertexAt(meshEdgeIndex0(edge))]; } - Vector2 edgePosition1(uint32_t edge) const { + Vector2 edgePosition1(uint32_t edge) const + { return m_positions[vertexAt(meshEdgeIndex1(edge))]; } - uint32_t vertexAt(uint32_t index) const { + uint32_t vertexAt(uint32_t index) const + { return m_indices.length > 0 ? m_indices[index] : index; } @@ -3648,13 +3859,15 @@ private: Array<uint32_t> m_traversedCellOffsets; }; -struct UvMeshChart { +struct UvMeshChart +{ Array<uint32_t> faces; Array<uint32_t> indices; uint32_t material; }; -struct UvMesh { +struct UvMesh +{ UvMeshDecl decl; BitArray faceIgnore; Array<uint32_t> faceMaterials; @@ -3664,7 +3877,8 @@ struct UvMesh { Array<uint32_t> vertexToChartMap; }; -struct UvMeshInstance { +struct UvMeshInstance +{ UvMesh *mesh; Array<Vector2> texcoords; }; @@ -3712,30 +3926,27 @@ struct UvMeshInstance { * FRANCE */ namespace opennl { -#define NL_NEW(T) XA_ALLOC(MemTag::OpenNL, T) -#define NL_NEW_ARRAY(T, NB) XA_ALLOC_ARRAY(MemTag::OpenNL, T, NB) -#define NL_RENEW_ARRAY(T, x, NB) XA_REALLOC(MemTag::OpenNL, x, T, NB) -#define NL_DELETE(x) \ - XA_FREE(x); \ - x = nullptr -#define NL_DELETE_ARRAY(x) \ - XA_FREE(x); \ - x = nullptr -#define NL_CLEAR(x, T) memset(x, 0, sizeof(T)); -#define NL_CLEAR_ARRAY(T, x, NB) memset(x, 0, (size_t)(NB) * sizeof(T)) -#define NL_NEW_VECTOR(dim) XA_ALLOC_ARRAY(MemTag::OpenNL, double, dim) -#define NL_DELETE_VECTOR(ptr) XA_FREE(ptr) +#define NL_NEW(T) XA_ALLOC(MemTag::OpenNL, T) +#define NL_NEW_ARRAY(T,NB) XA_ALLOC_ARRAY(MemTag::OpenNL, T, NB) +#define NL_RENEW_ARRAY(T,x,NB) XA_REALLOC(MemTag::OpenNL, x, T, NB) +#define NL_DELETE(x) XA_FREE(x); x = nullptr +#define NL_DELETE_ARRAY(x) XA_FREE(x); x = nullptr +#define NL_CLEAR(x, T) memset(x, 0, sizeof(T)); +#define NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (size_t)(NB)*sizeof(T)) +#define NL_NEW_VECTOR(dim) XA_ALLOC_ARRAY(MemTag::OpenNL, double, dim) +#define NL_DELETE_VECTOR(ptr) XA_FREE(ptr) struct NLMatrixStruct; -typedef NLMatrixStruct *NLMatrix; +typedef NLMatrixStruct * NLMatrix; typedef void (*NLDestroyMatrixFunc)(NLMatrix M); -typedef void (*NLMultMatrixVectorFunc)(NLMatrix M, const double *x, double *y); +typedef void (*NLMultMatrixVectorFunc)(NLMatrix M, const double* x, double* y); #define NL_MATRIX_SPARSE_DYNAMIC 0x1001 -#define NL_MATRIX_CRS 0x1002 -#define NL_MATRIX_OTHER 0x1006 +#define NL_MATRIX_CRS 0x1002 +#define NL_MATRIX_OTHER 0x1006 -struct NLMatrixStruct { +struct NLMatrixStruct +{ uint32_t m; uint32_t n; uint32_t type; @@ -3745,35 +3956,39 @@ struct NLMatrixStruct { /* Dynamic arrays for sparse row/columns */ -struct NLCoeff { +struct NLCoeff +{ uint32_t index; double value; }; -struct NLRowColumn { +struct NLRowColumn +{ uint32_t size; uint32_t capacity; - NLCoeff *coeff; + NLCoeff* coeff; }; /* Compressed Row Storage */ -struct NLCRSMatrix { +struct NLCRSMatrix +{ uint32_t m; uint32_t n; uint32_t type; NLDestroyMatrixFunc destroy_func; NLMultMatrixVectorFunc mult_func; - double *val; - uint32_t *rowptr; - uint32_t *colind; + double* val; + uint32_t* rowptr; + uint32_t* colind; uint32_t nslices; - uint32_t *sliceptr; + uint32_t* sliceptr; }; /* SparseMatrix data structure */ -struct NLSparseMatrix { +struct NLSparseMatrix +{ uint32_t m; uint32_t n; uint32_t type; @@ -3781,23 +3996,25 @@ struct NLSparseMatrix { NLMultMatrixVectorFunc mult_func; uint32_t diag_size; uint32_t diag_capacity; - NLRowColumn *row; - NLRowColumn *column; - double *diag; + NLRowColumn* row; + NLRowColumn* column; + double* diag; uint32_t row_capacity; uint32_t column_capacity; }; /* NLContext data structure */ -struct NLBufferBinding { - void *base_address; +struct NLBufferBinding +{ + void* base_address; uint32_t stride; }; -#define NL_BUFFER_ITEM(B, i) *(double *)((void *)((char *)((B).base_address) + ((i) * (B).stride))) +#define NL_BUFFER_ITEM(B,i) *(double*)((void*)((char*)((B).base_address)+((i)*(B).stride))) -struct NLContext { +struct NLContext +{ NLBufferBinding *variable_buffer; double *variable_value; bool *variable_is_locked; @@ -3821,30 +4038,35 @@ struct NLContext { double error; }; -static void nlDeleteMatrix(NLMatrix M) { +static void nlDeleteMatrix(NLMatrix M) +{ if (!M) return; M->destroy_func(M); NL_DELETE(M); } -static void nlMultMatrixVector(NLMatrix M, const double *x, double *y) { +static void nlMultMatrixVector(NLMatrix M, const double* x, double* y) +{ M->mult_func(M, x, y); } -static void nlRowColumnConstruct(NLRowColumn *c) { +static void nlRowColumnConstruct(NLRowColumn* c) +{ c->size = 0; c->capacity = 0; c->coeff = nullptr; } -static void nlRowColumnDestroy(NLRowColumn *c) { +static void nlRowColumnDestroy(NLRowColumn* c) +{ NL_DELETE_ARRAY(c->coeff); c->size = 0; c->capacity = 0; } -static void nlRowColumnGrow(NLRowColumn *c) { +static void nlRowColumnGrow(NLRowColumn* c) +{ if (c->capacity != 0) { c->capacity = 2 * c->capacity; c->coeff = NL_RENEW_ARRAY(NLCoeff, c->coeff, c->capacity); @@ -3855,7 +4077,8 @@ static void nlRowColumnGrow(NLRowColumn *c) { } } -static void nlRowColumnAdd(NLRowColumn *c, uint32_t index, double value) { +static void nlRowColumnAdd(NLRowColumn* c, uint32_t index, double value) +{ for (uint32_t i = 0; i < c->size; i++) { if (c->coeff[i].index == index) { c->coeff[i].value += value; @@ -3870,7 +4093,8 @@ static void nlRowColumnAdd(NLRowColumn *c, uint32_t index, double value) { } /* Does not check whether the index already exists */ -static void nlRowColumnAppend(NLRowColumn *c, uint32_t index, double value) { +static void nlRowColumnAppend(NLRowColumn* c, uint32_t index, double value) +{ if (c->size == c->capacity) nlRowColumnGrow(c); c->coeff[c->size].index = index; @@ -3878,27 +4102,32 @@ static void nlRowColumnAppend(NLRowColumn *c, uint32_t index, double value) { c->size++; } -static void nlRowColumnZero(NLRowColumn *c) { +static void nlRowColumnZero(NLRowColumn* c) +{ c->size = 0; } -static void nlRowColumnClear(NLRowColumn *c) { +static void nlRowColumnClear(NLRowColumn* c) +{ c->size = 0; c->capacity = 0; NL_DELETE_ARRAY(c->coeff); } -static int nlCoeffCompare(const void *p1, const void *p2) { - return (((NLCoeff *)(p2))->index < ((NLCoeff *)(p1))->index); +static int nlCoeffCompare(const void* p1, const void* p2) +{ + return (((NLCoeff*)(p2))->index < ((NLCoeff*)(p1))->index); } -static void nlRowColumnSort(NLRowColumn *c) { +static void nlRowColumnSort(NLRowColumn* c) +{ qsort(c->coeff, c->size, sizeof(NLCoeff), nlCoeffCompare); } /* CRSMatrix data structure */ -static void nlCRSMatrixDestroy(NLCRSMatrix *M) { +static void nlCRSMatrixDestroy(NLCRSMatrix* M) +{ NL_DELETE_ARRAY(M->val); NL_DELETE_ARRAY(M->rowptr); NL_DELETE_ARRAY(M->colind); @@ -3908,7 +4137,8 @@ static void nlCRSMatrixDestroy(NLCRSMatrix *M) { M->nslices = 0; } -static void nlCRSMatrixMultSlice(NLCRSMatrix *M, const double *x, double *y, uint32_t Ibegin, uint32_t Iend) { +static void nlCRSMatrixMultSlice(NLCRSMatrix* M, const double* x, double* y, uint32_t Ibegin, uint32_t Iend) +{ for (uint32_t i = Ibegin; i < Iend; ++i) { double sum = 0.0; for (uint32_t j = M->rowptr[i]; j < M->rowptr[i + 1]; ++j) @@ -3917,13 +4147,15 @@ static void nlCRSMatrixMultSlice(NLCRSMatrix *M, const double *x, double *y, uin } } -static void nlCRSMatrixMult(NLCRSMatrix *M, const double *x, double *y) { +static void nlCRSMatrixMult(NLCRSMatrix* M, const double* x, double* y) +{ int nslices = (int)(M->nslices); for (int slice = 0; slice < nslices; ++slice) nlCRSMatrixMultSlice(M, x, y, M->sliceptr[slice], M->sliceptr[slice + 1]); } -static void nlCRSMatrixConstruct(NLCRSMatrix *M, uint32_t m, uint32_t n, uint32_t nnz, uint32_t nslices) { +static void nlCRSMatrixConstruct(NLCRSMatrix* M, uint32_t m, uint32_t n, uint32_t nnz, uint32_t nslices) +{ M->m = m; M->n = n; M->type = NL_MATRIX_CRS; @@ -3942,19 +4174,22 @@ static void nlCRSMatrixConstruct(NLCRSMatrix *M, uint32_t m, uint32_t n, uint32_ /* SparseMatrix data structure */ -static void nlSparseMatrixDestroyRowColumns(NLSparseMatrix *M) { +static void nlSparseMatrixDestroyRowColumns(NLSparseMatrix* M) +{ for (uint32_t i = 0; i < M->m; i++) nlRowColumnDestroy(&(M->row[i])); NL_DELETE_ARRAY(M->row); } -static void nlSparseMatrixDestroy(NLSparseMatrix *M) { +static void nlSparseMatrixDestroy(NLSparseMatrix* M) +{ XA_DEBUG_ASSERT(M->type == NL_MATRIX_SPARSE_DYNAMIC); nlSparseMatrixDestroyRowColumns(M); NL_DELETE_ARRAY(M->diag); } -static void nlSparseMatrixAdd(NLSparseMatrix *M, uint32_t i, uint32_t j, double value) { +static void nlSparseMatrixAdd(NLSparseMatrix* M, uint32_t i, uint32_t j, double value) +{ XA_DEBUG_ASSERT(i >= 0 && i <= M->m - 1); XA_DEBUG_ASSERT(j >= 0 && j <= M->n - 1); if (i == j) @@ -3963,21 +4198,24 @@ static void nlSparseMatrixAdd(NLSparseMatrix *M, uint32_t i, uint32_t j, double } /* Returns the number of non-zero coefficients */ -static uint32_t nlSparseMatrixNNZ(NLSparseMatrix *M) { +static uint32_t nlSparseMatrixNNZ(NLSparseMatrix* M) +{ uint32_t nnz = 0; for (uint32_t i = 0; i < M->m; i++) nnz += M->row[i].size; return nnz; } -static void nlSparseMatrixSort(NLSparseMatrix *M) { +static void nlSparseMatrixSort(NLSparseMatrix* M) +{ for (uint32_t i = 0; i < M->m; i++) nlRowColumnSort(&(M->row[i])); } /* SparseMatrix x Vector routines, internal helper routines */ -static void nlSparseMatrix_mult_rows(NLSparseMatrix *A, const double *x, double *y) { +static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double* y) +{ /* * Note: OpenMP does not like unsigned ints * (causes some floating point exceptions), @@ -3985,8 +4223,8 @@ static void nlSparseMatrix_mult_rows(NLSparseMatrix *A, const double *x, double * indices. */ int m = (int)(A->m); - NLCoeff *c = nullptr; - NLRowColumn *Ri = nullptr; + NLCoeff* c = nullptr; + NLRowColumn* Ri = nullptr; for (int i = 0; i < m; i++) { Ri = &(A->row[i]); y[i] = 0; @@ -3997,12 +4235,14 @@ static void nlSparseMatrix_mult_rows(NLSparseMatrix *A, const double *x, double } } -static void nlSparseMatrixMult(NLSparseMatrix *A, const double *x, double *y) { +static void nlSparseMatrixMult(NLSparseMatrix* A, const double* x, double* y) +{ XA_DEBUG_ASSERT(A->type == NL_MATRIX_SPARSE_DYNAMIC); nlSparseMatrix_mult_rows(A, x, y); } -static void nlSparseMatrixConstruct(NLSparseMatrix *M, uint32_t m, uint32_t n) { +static void nlSparseMatrixConstruct(NLSparseMatrix* M, uint32_t m, uint32_t n) +{ M->m = m; M->n = n; M->type = NL_MATRIX_SPARSE_DYNAMIC; @@ -4022,23 +4262,24 @@ static void nlSparseMatrixConstruct(NLSparseMatrix *M, uint32_t m, uint32_t n) { NL_CLEAR_ARRAY(double, M->diag, M->diag_size); } -static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix *M) { +static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix* M) +{ uint32_t nnz = nlSparseMatrixNNZ(M); uint32_t nslices = 8; /* TODO: get number of cores */ uint32_t slice, cur_bound, cur_NNZ, cur_row; uint32_t k; uint32_t slice_size = nnz / nslices; - NLCRSMatrix *CRS = NL_NEW(NLCRSMatrix); + NLCRSMatrix* CRS = NL_NEW(NLCRSMatrix); NL_CLEAR(CRS, NLCRSMatrix); nlCRSMatrixConstruct(CRS, M->m, M->n, nnz, nslices); nlSparseMatrixSort(M); /* Convert matrix to CRS format */ k = 0; for (uint32_t i = 0; i < M->m; ++i) { - NLRowColumn *Ri = &(M->row[i]); + NLRowColumn* Ri = &(M->row[i]); CRS->rowptr[i] = k; for (uint32_t ij = 0; ij < Ri->size; ij++) { - NLCoeff *c = &(Ri->coeff[ij]); + NLCoeff* c = &(Ri->coeff[ij]); CRS->val[k] = c->value; CRS->colind[k] = c->index; ++k; @@ -4053,8 +4294,8 @@ static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix *M) { CRS->sliceptr[0] = 0; for (slice = 1; slice < nslices; ++slice) { while (cur_NNZ < cur_bound && cur_row < M->m) { - ++cur_row; cur_NNZ += CRS->rowptr[cur_row + 1] - CRS->rowptr[cur_row]; + ++cur_row; } CRS->sliceptr[slice] = cur_row; cur_bound += slice_size; @@ -4064,17 +4305,19 @@ static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix *M) { return (NLMatrix)CRS; } -static void nlMatrixCompress(NLMatrix *M) { +static void nlMatrixCompress(NLMatrix* M) +{ NLMatrix CRS = nullptr; if ((*M)->type != NL_MATRIX_SPARSE_DYNAMIC) return; - CRS = nlCRSMatrixNewFromSparseMatrix((NLSparseMatrix *)*M); + CRS = nlCRSMatrixNewFromSparseMatrix((NLSparseMatrix*)*M); nlDeleteMatrix(*M); *M = CRS; } -static NLContext *nlNewContext() { - NLContext *result = NL_NEW(NLContext); +static NLContext *nlNewContext() +{ + NLContext* result = NL_NEW(NLContext); NL_CLEAR(result, NLContext); result->max_iterations = 100; result->threshold = 1e-6; @@ -4083,7 +4326,8 @@ static NLContext *nlNewContext() { return result; } -static void nlDeleteContext(NLContext *context) { +static void nlDeleteContext(NLContext *context) +{ nlDeleteMatrix(context->M); context->M = nullptr; nlDeleteMatrix(context->P); @@ -4101,19 +4345,22 @@ static void nlDeleteContext(NLContext *context) { NL_DELETE(context); } -static double ddot(int n, const double *x, const double *y) { +static double ddot(int n, const double *x, const double *y) +{ double sum = 0.0; for (int i = 0; i < n; i++) sum += x[i] * y[i]; return sum; } -static void daxpy(int n, double a, const double *x, double *y) { +static void daxpy(int n, double a, const double *x, double *y) +{ for (int i = 0; i < n; i++) y[i] = a * x[i] + y[i]; } -static void dscal(int n, double a, double *x) { +static void dscal(int n, double a, double *x) +{ for (int i = 0; i < n; i++) x[i] *= a; } @@ -4136,16 +4383,17 @@ static void dscal(int n, double a, double *x) { * versions of matrix x vector product (CPU/GPU, sparse/dense ...) */ -static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double *b, double *x, double eps, uint32_t max_iter, double *sq_bnorm, double *sq_rnorm) { - int N = (int)M->n; - double *r = NL_NEW_VECTOR(N); - double *d = NL_NEW_VECTOR(N); - double *h = NL_NEW_VECTOR(N); +static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double* b, double* x, double eps, uint32_t max_iter, double *sq_bnorm, double *sq_rnorm) +{ + int N = (int)M->n; + double* r = NL_NEW_VECTOR(N); + double* d = NL_NEW_VECTOR(N); + double* h = NL_NEW_VECTOR(N); double *Ad = h; uint32_t its = 0; double rh, alpha, beta; double b_square = ddot(N, b, b); - double err = eps * eps * b_square; + double err = eps * eps*b_square; double curr_err; nlMultMatrixVector(M, x, r); daxpy(N, -1., b, r); @@ -4175,12 +4423,13 @@ static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double *b, double * return its; } -static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix P, double *b_in, double *x_in, double eps, uint32_t max_iter) { +static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix P, double* b_in, double* x_in, double eps, uint32_t max_iter) +{ uint32_t result = 0; double rnorm = 0.0; double bnorm = 0.0; - double *b = b_in; - double *x = x_in; + double* b = b_in; + double* x = x_in; XA_DEBUG_ASSERT(M->m == M->n); double sq_bnorm, sq_rnorm; result = nlSolveSystem_PRE_CG(M, P, b, x, eps, max_iter, &sq_bnorm, &sq_rnorm); @@ -4195,9 +4444,10 @@ static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix return result; } -static bool nlSolveIterative(NLContext *context) { - double *b = context->b; - double *x = context->x; +static bool nlSolveIterative(NLContext *context) +{ + double* b = context->b; + double* x = context->x; uint32_t n = context->n; NLMatrix M = context->M; NLMatrix P = context->P; @@ -4209,30 +4459,34 @@ static bool nlSolveIterative(NLContext *context) { return true; } -struct NLJacobiPreconditioner { +struct NLJacobiPreconditioner +{ uint32_t m; uint32_t n; uint32_t type; NLDestroyMatrixFunc destroy_func; NLMultMatrixVectorFunc mult_func; - double *diag_inv; + double* diag_inv; }; -static void nlJacobiPreconditionerDestroy(NLJacobiPreconditioner *M) { +static void nlJacobiPreconditionerDestroy(NLJacobiPreconditioner* M) +{ NL_DELETE_ARRAY(M->diag_inv); } -static void nlJacobiPreconditionerMult(NLJacobiPreconditioner *M, const double *x, double *y) { +static void nlJacobiPreconditionerMult(NLJacobiPreconditioner* M, const double* x, double* y) +{ for (uint32_t i = 0; i < M->n; ++i) y[i] = x[i] * M->diag_inv[i]; } -static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) { - NLSparseMatrix *M = nullptr; - NLJacobiPreconditioner *result = nullptr; +static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) +{ + NLSparseMatrix* M = nullptr; + NLJacobiPreconditioner* result = nullptr; XA_DEBUG_ASSERT(M_in->type == NL_MATRIX_SPARSE_DYNAMIC); XA_DEBUG_ASSERT(M_in->m == M_in->n); - M = (NLSparseMatrix *)M_in; + M = (NLSparseMatrix*)M_in; result = NL_NEW(NLJacobiPreconditioner); NL_CLEAR(result, NLJacobiPreconditioner); result->m = M->m; @@ -4250,7 +4504,8 @@ static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) { #define NL_NB_VARIABLES 0x101 #define NL_MAX_ITERATIONS 0x103 -static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) { +static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) +{ if (pname == NL_NB_VARIABLES) { XA_DEBUG_ASSERT(param > 0); context->nb_variables = (uint32_t)param; @@ -4261,22 +4516,26 @@ static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) { } } -static void nlSetVariable(NLContext *context, uint32_t index, double value) { +static void nlSetVariable(NLContext *context, uint32_t index, double value) +{ XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); NL_BUFFER_ITEM(context->variable_buffer[0], index) = value; } -static double nlGetVariable(NLContext *context, uint32_t index) { +static double nlGetVariable(NLContext *context, uint32_t index) +{ XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); return NL_BUFFER_ITEM(context->variable_buffer[0], index); } -static void nlLockVariable(NLContext *context, uint32_t index) { +static void nlLockVariable(NLContext *context, uint32_t index) +{ XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); context->variable_is_locked[index] = true; } -static void nlVariablesToVector(NLContext *context) { +static void nlVariablesToVector(NLContext *context) +{ uint32_t n = context->n; XA_DEBUG_ASSERT(context->x); for (uint32_t k = 0; k < context->nb_systems; ++k) { @@ -4291,7 +4550,8 @@ static void nlVariablesToVector(NLContext *context) { } } -static void nlVectorToVariables(NLContext *context) { +static void nlVectorToVariables(NLContext *context) +{ uint32_t n = context->n; XA_DEBUG_ASSERT(context->x); for (uint32_t k = 0; k < context->nb_systems; ++k) { @@ -4306,7 +4566,8 @@ static void nlVectorToVariables(NLContext *context) { } } -static void nlCoefficient(NLContext *context, uint32_t index, double value) { +static void nlCoefficient(NLContext *context, uint32_t index, double value) +{ XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); if (context->variable_is_locked[index]) { /* @@ -4323,11 +4584,12 @@ static void nlCoefficient(NLContext *context, uint32_t index, double value) { } } -#define NL_SYSTEM 0x0 -#define NL_MATRIX 0x1 -#define NL_ROW 0x2 +#define NL_SYSTEM 0x0 +#define NL_MATRIX 0x1 +#define NL_ROW 0x2 -static void nlBegin(NLContext *context, uint32_t prim) { +static void nlBegin(NLContext *context, uint32_t prim) +{ if (prim == NL_SYSTEM) { XA_DEBUG_ASSERT(context->nb_variables > 0); context->variable_buffer = NL_NEW_ARRAY(NLBufferBinding, context->nb_systems); @@ -4336,8 +4598,8 @@ static void nlBegin(NLContext *context, uint32_t prim) { NL_CLEAR_ARRAY(double, context->variable_value, context->nb_variables * context->nb_systems); for (uint32_t k = 0; k < context->nb_systems; ++k) { context->variable_buffer[k].base_address = - context->variable_value + - k * context->nb_variables; + context->variable_value + + k * context->nb_variables; context->variable_buffer[k].stride = sizeof(double); } context->variable_is_locked = NL_NEW_ARRAY(bool, context->nb_variables); @@ -4360,11 +4622,11 @@ static void nlBegin(NLContext *context, uint32_t prim) { context->max_iterations = n * 5; context->M = (NLMatrix)(NL_NEW(NLSparseMatrix)); NL_CLEAR(context->M, NLSparseMatrix); - nlSparseMatrixConstruct((NLSparseMatrix *)(context->M), n, n); - context->x = NL_NEW_ARRAY(double, n * context->nb_systems); - NL_CLEAR_ARRAY(double, context->x, n * context->nb_systems); - context->b = NL_NEW_ARRAY(double, n * context->nb_systems); - NL_CLEAR_ARRAY(double, context->b, n * context->nb_systems); + nlSparseMatrixConstruct((NLSparseMatrix*)(context->M), n, n); + context->x = NL_NEW_ARRAY(double, n*context->nb_systems); + NL_CLEAR_ARRAY(double, context->x, n*context->nb_systems); + context->b = NL_NEW_ARRAY(double, n*context->nb_systems); + NL_CLEAR_ARRAY(double, context->b, n*context->nb_systems); nlVariablesToVector(context); nlRowColumnConstruct(&context->af); nlRowColumnConstruct(&context->al); @@ -4375,15 +4637,16 @@ static void nlBegin(NLContext *context, uint32_t prim) { } } -static void nlEnd(NLContext *context, uint32_t prim) { +static void nlEnd(NLContext *context, uint32_t prim) +{ if (prim == NL_MATRIX) { nlRowColumnClear(&context->af); nlRowColumnClear(&context->al); } else if (prim == NL_ROW) { - NLRowColumn *af = &context->af; - NLRowColumn *al = &context->al; - NLSparseMatrix *M = (NLSparseMatrix *)context->M; - double *b = context->b; + NLRowColumn* af = &context->af; + NLRowColumn* al = &context->al; + NLSparseMatrix* M = (NLSparseMatrix*)context->M; + double* b = context->b; uint32_t nf = af->size; uint32_t nl = al->size; uint32_t n = context->n; @@ -4404,13 +4667,14 @@ static void nlEnd(NLContext *context, uint32_t prim) { S += al->coeff[jj].value * NL_BUFFER_ITEM(context->variable_buffer[k], j); } for (uint32_t jj = 0; jj < nf; jj++) - b[k * n + af->coeff[jj].index] -= af->coeff[jj].value * S; + b[k*n + af->coeff[jj].index] -= af->coeff[jj].value * S; } context->current_row++; } } -static bool nlSolve(NLContext *context) { +static bool nlSolve(NLContext *context) +{ nlDeleteMatrix(context->P); context->P = nlNewJacobiPreconditioner(context->M); nlMatrixCompress(&context->M); @@ -4421,9 +4685,11 @@ static bool nlSolve(NLContext *context) { } // namespace opennl namespace raster { -class ClippedTriangle { +class ClippedTriangle +{ public: - ClippedTriangle(const Vector2 &a, const Vector2 &b, const Vector2 &c) { + ClippedTriangle(const Vector2 &a, const Vector2 &b, const Vector2 &c) + { m_numVertices = 3; m_activeVertexBuffer = 0; m_verticesA[0] = a; @@ -4434,20 +4700,20 @@ public: m_area = 0; } - void clipHorizontalPlane(float offset, float clipdirection) { - Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; + void clipHorizontalPlane(float offset, float clipdirection) + { + Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; m_activeVertexBuffer ^= 1; Vector2 *v2 = m_vertexBuffers[m_activeVertexBuffer]; v[m_numVertices] = v[0]; - float dy2, dy1 = offset - v[0].y; - int dy2in, dy1in = clipdirection * dy1 >= 0; - uint32_t p = 0; + float dy2, dy1 = offset - v[0].y; + int dy2in, dy1in = clipdirection * dy1 >= 0; + uint32_t p = 0; for (uint32_t k = 0; k < m_numVertices; k++) { - dy2 = offset - v[k + 1].y; + dy2 = offset - v[k + 1].y; dy2in = clipdirection * dy2 >= 0; - if (dy1in) - v2[p++] = v[k]; - if (dy1in + dy2in == 1) { // not both in/out + if (dy1in) v2[p++] = v[k]; + if ( dy1in + dy2in == 1 ) { // not both in/out float dx = v[k + 1].x - v[k].x; float dy = v[k + 1].y - v[k].y; v2[p++] = Vector2(v[k].x + dy1 * (dx / dy), offset); @@ -4458,20 +4724,20 @@ public: m_numVertices = p; } - void clipVerticalPlane(float offset, float clipdirection) { - Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; + void clipVerticalPlane(float offset, float clipdirection) + { + Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; m_activeVertexBuffer ^= 1; Vector2 *v2 = m_vertexBuffers[m_activeVertexBuffer]; v[m_numVertices] = v[0]; - float dx2, dx1 = offset - v[0].x; - int dx2in, dx1in = clipdirection * dx1 >= 0; - uint32_t p = 0; + float dx2, dx1 = offset - v[0].x; + int dx2in, dx1in = clipdirection * dx1 >= 0; + uint32_t p = 0; for (uint32_t k = 0; k < m_numVertices; k++) { dx2 = offset - v[k + 1].x; dx2in = clipdirection * dx2 >= 0; - if (dx1in) - v2[p++] = v[k]; - if (dx1in + dx2in == 1) { // not both in/out + if (dx1in) v2[p++] = v[k]; + if ( dx1in + dx2in == 1 ) { // not both in/out float dx = v[k + 1].x - v[k].x; float dy = v[k + 1].y - v[k].y; v2[p++] = Vector2(offset, v[k].y + dx1 * (dy / dx)); @@ -4482,8 +4748,9 @@ public: m_numVertices = p; } - void computeArea() { - Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; + void computeArea() + { + Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; v[m_numVertices] = v[0]; m_area = 0; float centroidx = 0, centroidy = 0; @@ -4497,7 +4764,8 @@ public: m_area = 0.5f * fabsf(m_area); } - void clipAABox(float x0, float y0, float x1, float y1) { + void clipAABox(float x0, float y0, float x1, float y1) + { clipVerticalPlane(x0, -1); clipHorizontalPlane(y0, -1); clipVerticalPlane(x1, 1); @@ -4505,7 +4773,8 @@ public: computeArea(); } - float area() const { + float area() const + { return m_area; } @@ -4522,9 +4791,10 @@ private: typedef bool (*SamplingCallback)(void *param, int x, int y); /// A triangle for rasterization. -struct Triangle { - Triangle(const Vector2 &_v0, const Vector2 &_v1, const Vector2 &_v2) : - v1(_v0), v2(_v2), v3(_v1), n1(0.0f), n2(0.0f), n3(0.0f) { +struct Triangle +{ + Triangle(const Vector2 &_v0, const Vector2 &_v1, const Vector2 &_v2) : v1(_v0), v2(_v2), v3(_v1), n1(0.0f), n2(0.0f), n3(0.0f) + { // make sure every triangle is front facing. flipBackface(); // Compute deltas. @@ -4532,7 +4802,8 @@ struct Triangle { computeUnitInwardNormals(); } - bool isValid() { + bool isValid() + { const Vector2 e0 = v3 - v1; const Vector2 e1 = v2 - v1; const float area = e0.y * e1.x - e1.y * e0.x; @@ -4540,17 +4811,18 @@ struct Triangle { } // extents has to be multiple of BK_SIZE!! - bool drawAA(const Vector2 &extents, SamplingCallback cb, void *param) { - const float PX_INSIDE = 1.0f / sqrtf(2.0f); - const float PX_OUTSIDE = -1.0f / sqrtf(2.0f); + bool drawAA(const Vector2 &extents, SamplingCallback cb, void *param) + { + const float PX_INSIDE = 1.0f/sqrtf(2.0f); + const float PX_OUTSIDE = -1.0f/sqrtf(2.0f); const float BK_SIZE = 8; - const float BK_INSIDE = sqrtf(BK_SIZE * BK_SIZE / 2.0f); - const float BK_OUTSIDE = -sqrtf(BK_SIZE * BK_SIZE / 2.0f); + const float BK_INSIDE = sqrtf(BK_SIZE*BK_SIZE/2.0f); + const float BK_OUTSIDE = -sqrtf(BK_SIZE*BK_SIZE/2.0f); // Bounding rectangle float minx = floorf(max(min3(v1.x, v2.x, v3.x), 0.0f)); float miny = floorf(max(min3(v1.y, v2.y, v3.y), 0.0f)); - float maxx = ceilf(min(max3(v1.x, v2.x, v3.x), extents.x - 1.0f)); - float maxy = ceilf(min(max3(v1.y, v2.y, v3.y), extents.y - 1.0f)); + float maxx = ceilf( min(max3(v1.x, v2.x, v3.x), extents.x - 1.0f)); + float maxy = ceilf( min(max3(v1.y, v2.y, v3.y), extents.y - 1.0f)); // There's no reason to align the blocks to the viewport, instead we align them to the origin of the triangle bounds. minx = floorf(minx); miny = floorf(miny); @@ -4575,10 +4847,9 @@ struct Triangle { float bC = C2 + n2.x * xc + n2.y * yc; float cC = C3 + n3.x * xc + n3.y * yc; // Skip block when outside an edge - if ((aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE)) - continue; + if ( (aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE) ) continue; // Accept whole block when totally covered - if ((aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE)) { + if ( (aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE) ) { for (float y = y0; y < y0 + BK_SIZE; y++) { for (float x = x0; x < x0 + BK_SIZE; x++) { if (!cb(param, (int)x, (int)y)) @@ -4621,9 +4892,10 @@ struct Triangle { } private: - void flipBackface() { + void flipBackface() + { // check if triangle is backfacing, if so, swap two vertices - if (((v3.x - v1.x) * (v2.y - v1.y) - (v3.y - v1.y) * (v2.x - v1.x)) < 0) { + if ( ((v3.x - v1.x) * (v2.y - v1.y) - (v3.y - v1.y) * (v2.x - v1.x)) < 0 ) { Vector2 hv = v1; v1 = v2; v2 = hv; // swap pos @@ -4631,7 +4903,8 @@ private: } // compute unit inward normals for each edge. - void computeUnitInwardNormals() { + void computeUnitInwardNormals() + { n1 = v1 - v2; n1 = Vector2(-n1.y, n1.x); n1 = n1 * (1.0f / sqrtf(dot(n1, n1))); @@ -4649,7 +4922,8 @@ private: }; // Process the given triangle. Returns false if rasterization was interrupted by the callback. -static bool drawTriangle(const Vector2 &extents, const Vector2 v[3], SamplingCallback cb, void *param) { +static bool drawTriangle(const Vector2 &extents, const Vector2 v[3], SamplingCallback cb, void *param) +{ Triangle tri(v[0], v[1], v[2]); // @@ It would be nice to have a conservative drawing mode that enlarges the triangle extents by one texel and is able to handle degenerate triangles. // @@ Maybe the simplest thing to do would be raster triangle edges. @@ -4664,19 +4938,22 @@ namespace segment { // - Insertion is o(n) // - Smallest element goes at the end, so that popping it is o(1). -struct CostQueue { - CostQueue(uint32_t size = UINT32_MAX) : - m_maxSize(size), m_pairs(MemTag::SegmentAtlasChartCandidates) {} +struct CostQueue +{ + CostQueue(uint32_t size = UINT32_MAX) : m_maxSize(size), m_pairs(MemTag::SegmentAtlasChartCandidates) {} - float peekCost() const { + float peekCost() const + { return m_pairs.back().cost; } - uint32_t peekFace() const { + uint32_t peekFace() const + { return m_pairs.back().face; } - void push(float cost, uint32_t face) { + void push(float cost, uint32_t face) + { const Pair p = { cost, face }; if (m_pairs.isEmpty() || cost < peekCost()) m_pairs.push_back(p); @@ -4693,25 +4970,29 @@ struct CostQueue { } } - uint32_t pop() { + uint32_t pop() + { XA_DEBUG_ASSERT(!m_pairs.isEmpty()); uint32_t f = m_pairs.back().face; m_pairs.pop_back(); return f; } - XA_INLINE void clear() { + XA_INLINE void clear() + { m_pairs.clear(); } - XA_INLINE uint32_t count() const { + XA_INLINE uint32_t count() const + { return m_pairs.size(); } private: const uint32_t m_maxSize; - struct Pair { + struct Pair + { float cost; uint32_t face; }; @@ -4719,7 +5000,8 @@ private: Array<Pair> m_pairs; }; -struct AtlasData { +struct AtlasData +{ ChartOptions options; const Mesh *mesh = nullptr; Array<float> edgeDihedralAngles; @@ -4729,10 +5011,10 @@ struct AtlasData { Array<Vector3> faceNormals; BitArray isFaceInChart; - AtlasData() : - edgeDihedralAngles(MemTag::SegmentAtlasMeshData), edgeLengths(MemTag::SegmentAtlasMeshData), faceAreas(MemTag::SegmentAtlasMeshData), faceNormals(MemTag::SegmentAtlasMeshData) {} + AtlasData() : edgeDihedralAngles(MemTag::SegmentAtlasMeshData), edgeLengths(MemTag::SegmentAtlasMeshData), faceAreas(MemTag::SegmentAtlasMeshData), faceNormals(MemTag::SegmentAtlasMeshData) {} - void compute() { + void compute() + { const uint32_t faceCount = mesh->faceCount(); const uint32_t edgeCount = mesh->edgeCount(); edgeDihedralAngles.resize(edgeCount); @@ -4773,18 +5055,20 @@ struct AtlasData { }; // If MeshDecl::vertexUvData is set on input meshes, find charts by floodfilling faces in world/model space without crossing UV seams. -struct OriginalUvCharts { - OriginalUvCharts(AtlasData &data) : - m_data(data) {} +struct OriginalUvCharts +{ + OriginalUvCharts(AtlasData &data) : m_data(data) {} uint32_t chartCount() const { return m_charts.size(); } const Basis &chartBasis(uint32_t chartIndex) const { return m_chartBasis[chartIndex]; } - ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { + ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const + { const Chart &chart = m_charts[chartIndex]; return ConstArrayView<uint32_t>(&m_chartFaces[chart.firstFace], chart.faceCount); } - void compute() { + void compute() + { m_charts.clear(); m_chartFaces.clear(); const Mesh *mesh = m_data.mesh; @@ -4805,7 +5089,8 @@ struct OriginalUvCharts { } // Compute basis for each chart. m_chartBasis.resize(m_charts.size()); - for (uint32_t c = 0; c < m_charts.size(); c++) { + for (uint32_t c = 0; c < m_charts.size(); c++) + { const Chart &chart = m_charts[c]; m_tempPoints.resize(chart.faceCount * 3); for (uint32_t f = 0; f < chart.faceCount; f++) { @@ -4818,11 +5103,13 @@ struct OriginalUvCharts { } private: - struct Chart { + struct Chart + { uint32_t firstFace, faceCount; }; - void floodfillFaces(Chart &chart) { + void floodfillFaces(Chart &chart) + { const bool isFaceAreaNegative = m_data.faceUvAreas[m_chartFaces[chart.firstFace]] < 0.0f; for (;;) { bool newFaceAdded = false; @@ -4868,13 +5155,14 @@ static uint32_t s_planarRegionsCurrentRegion; static uint32_t s_planarRegionsCurrentVertex; #endif -struct PlanarCharts { - PlanarCharts(AtlasData &data) : - m_data(data), m_nextRegionFace(MemTag::SegmentAtlasPlanarRegions), m_faceToRegionId(MemTag::SegmentAtlasPlanarRegions) {} +struct PlanarCharts +{ + PlanarCharts(AtlasData &data) : m_data(data), m_nextRegionFace(MemTag::SegmentAtlasPlanarRegions), m_faceToRegionId(MemTag::SegmentAtlasPlanarRegions) {} const Basis &chartBasis(uint32_t chartIndex) const { return m_chartBasis[chartIndex]; } uint32_t chartCount() const { return m_charts.size(); } - ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { + ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const + { const Chart &chart = m_charts[chartIndex]; return ConstArrayView<uint32_t>(&m_chartFaces[chart.firstFace], chart.faceCount); } @@ -4883,7 +5171,8 @@ struct PlanarCharts { uint32_t nextRegionFace(uint32_t face) const { return m_nextRegionFace[face]; } float regionArea(uint32_t region) const { return m_regionAreas[region]; } - void compute() { + void compute() + { const uint32_t faceCount = m_data.mesh->faceCount(); // Precompute regions of coplanar incident faces. m_regionFirstFace.clear(); @@ -4983,7 +5272,8 @@ struct PlanarCharts { if (!createChart) break; face = m_nextRegionFace[face]; - } while (face != firstRegionFace); + } + while (face != firstRegionFace); // Create a chart. if (createChart) { Chart chart; @@ -4995,13 +5285,15 @@ struct PlanarCharts { m_chartFaces.push_back(face); chart.faceCount++; face = m_nextRegionFace[face]; - } while (face != firstRegionFace); + } + while (face != firstRegionFace); m_charts.push_back(chart); } } // Compute basis for each chart using the first face normal (all faces have the same normal). m_chartBasis.resize(m_charts.size()); - for (uint32_t c = 0; c < m_charts.size(); c++) { + for (uint32_t c = 0; c < m_charts.size(); c++) + { const uint32_t face = m_chartFaces[m_charts[c].firstFace]; Basis &basis = m_chartBasis[c]; basis.normal = m_data.faceNormals[face]; @@ -5011,7 +5303,8 @@ struct PlanarCharts { } private: - struct Chart { + struct Chart + { uint32_t firstFace, faceCount; }; @@ -5025,11 +5318,12 @@ private: Array<Basis> m_chartBasis; }; -struct ClusteredCharts { - ClusteredCharts(AtlasData &data, const PlanarCharts &planarCharts) : - m_data(data), m_planarCharts(planarCharts), m_texcoords(MemTag::SegmentAtlasMeshData), m_bestTriangles(10), m_placingSeeds(false) {} +struct ClusteredCharts +{ + ClusteredCharts(AtlasData &data, const PlanarCharts &planarCharts) : m_data(data), m_planarCharts(planarCharts), m_texcoords(MemTag::SegmentAtlasMeshData), m_bestTriangles(10), m_placingSeeds(false) {} - ~ClusteredCharts() { + ~ClusteredCharts() + { const uint32_t chartCount = m_charts.size(); for (uint32_t i = 0; i < chartCount; i++) { m_charts[i]->~Chart(); @@ -5041,7 +5335,8 @@ struct ClusteredCharts { ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { return m_charts[chartIndex]->faces; } const Basis &chartBasis(uint32_t chartIndex) const { return m_charts[chartIndex]->basis; } - void compute() { + void compute() + { const uint32_t faceCount = m_data.mesh->faceCount(); m_facesLeft = 0; for (uint32_t i = 0; i < faceCount; i++) { @@ -5087,9 +5382,9 @@ struct ClusteredCharts { } private: - struct Chart { - Chart() : - faces(MemTag::SegmentAtlasChartFaces) {} + struct Chart + { + Chart() : faces(MemTag::SegmentAtlasChartFaces) {} int id = -1; Basis basis; // Best fit normal. @@ -5103,7 +5398,8 @@ private: uint32_t seed; }; - void placeSeeds(float threshold) { + void placeSeeds(float threshold) + { XA_PROFILE_START(clusteredChartsPlaceSeeds) m_placingSeeds = true; // Instead of using a predefiened number of seeds: @@ -5119,7 +5415,8 @@ private: } // Returns true if any of the charts can grow more. - void growCharts(float threshold) { + void growCharts(float threshold) + { XA_PROFILE_START(clusteredChartsGrow) for (;;) { if (m_facesLeft == 0) @@ -5165,7 +5462,8 @@ private: XA_PROFILE_END(clusteredChartsGrow) } - void resetCharts() { + void resetCharts() + { XA_PROFILE_START(clusteredChartsReset) const uint32_t faceCount = m_data.mesh->faceCount(); for (uint32_t i = 0; i < faceCount; i++) { @@ -5196,7 +5494,8 @@ private: XA_PROFILE_END(clusteredChartsReset) } - bool relocateSeeds() { + bool relocateSeeds() + { XA_PROFILE_START(clusteredChartsRelocateSeeds) bool anySeedChanged = false; const uint32_t chartCount = m_charts.size(); @@ -5209,7 +5508,8 @@ private: return anySeedChanged; } - void fillHoles(float threshold) { + void fillHoles(float threshold) + { XA_PROFILE_START(clusteredChartsFillHoles) while (m_facesLeft > 0) createChart(threshold); @@ -5217,7 +5517,8 @@ private: } #if XA_MERGE_CHARTS - void mergeCharts() { + void mergeCharts() + { XA_PROFILE_START(clusteredChartsMerge) const uint32_t chartCount = m_charts.size(); // Merge charts progressively until there's none left to merge. @@ -5286,7 +5587,7 @@ private: if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && equal(m_sharedBoundaryLengthsNoSeams[cc], chart2->boundaryLength, kEpsilon)) goto merge; if (m_sharedBoundaryLengths[cc] > 0.2f * max(0.0f, chart->boundaryLength - externalBoundaryLength) || - m_sharedBoundaryLengths[cc] > 0.75f * chart2->boundaryLength) + m_sharedBoundaryLengths[cc] > 0.75f * chart2->boundaryLength) goto merge; continue; merge: @@ -5324,7 +5625,8 @@ private: #endif private: - void createChart(float threshold) { + void createChart(float threshold) + { Chart *chart = XA_NEW(MemTag::Default, Chart); chart->id = (int)m_charts.size(); m_charts.push_back(chart); @@ -5355,13 +5657,15 @@ private: } } - bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const { + bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const + { const uint32_t oppositeEdge = m_data.mesh->oppositeEdge(edge); const uint32_t oppositeFace = meshEdgeFace(oppositeEdge); return oppositeEdge == UINT32_MAX || m_faceCharts[oppositeFace] != chart->id; } - bool computeChartBasis(Chart *chart, Basis *basis) { + bool computeChartBasis(Chart *chart, Basis *basis) + { const uint32_t faceCount = chart->faces.size(); m_tempPoints.resize(chart->faces.size() * 3); for (uint32_t i = 0; i < faceCount; i++) { @@ -5372,7 +5676,8 @@ private: return Fit::computeBasis(m_tempPoints, basis); } - bool isFaceFlipped(uint32_t face) const { + bool isFaceFlipped(uint32_t face) const + { const Vector2 &v1 = m_texcoords[face * 3 + 0]; const Vector2 &v2 = m_texcoords[face * 3 + 1]; const Vector2 &v3 = m_texcoords[face * 3 + 2]; @@ -5380,7 +5685,8 @@ private: return parametricArea < 0.0f; } - void parameterizeChart(const Chart *chart) { + void parameterizeChart(const Chart *chart) + { const uint32_t faceCount = chart->faces.size(); for (uint32_t i = 0; i < faceCount; i++) { const uint32_t face = chart->faces[i]; @@ -5393,7 +5699,8 @@ private: } // m_faceCharts for the chart faces must be set to the chart ID. Needed to compute boundary edges. - bool isChartParameterizationValid(const Chart *chart) { + bool isChartParameterizationValid(const Chart *chart) + { const uint32_t faceCount = chart->faces.size(); // Check for flipped faces in the parameterization. OK if all are flipped. uint32_t flippedFaceCount = 0; @@ -5427,7 +5734,8 @@ private: return true; } - bool addFaceToChart(Chart *chart, uint32_t face) { + bool addFaceToChart(Chart *chart, uint32_t face) + { XA_DEBUG_ASSERT(!m_data.isFaceInChart.get(face)); const uint32_t oldFaceCount = chart->faces.size(); const bool firstFace = oldFaceCount == 0; @@ -5505,7 +5813,8 @@ private: } // Returns true if the seed has changed. - bool relocateSeed(Chart *chart) { + bool relocateSeed(Chart *chart) + { // Find the first N triangles that fit the proxy best. const uint32_t faceCount = chart->faces.size(); m_bestTriangles.clear(); @@ -5535,7 +5844,8 @@ private: } // Cost is combined metrics * weights. - float computeCost(Chart *chart, uint32_t face) const { + float computeCost(Chart *chart, uint32_t face) const + { // Estimate boundary length and area: const float newChartArea = computeArea(chart, face); const float newBoundaryLength = computeBoundaryLength(chart, face); @@ -5571,20 +5881,23 @@ private: // Returns a value in [0-1]. // 0 if face normal is coplanar to the chart's best fit normal. // 1 if face normal is perpendicular. - float computeNormalDeviationMetric(Chart *chart, uint32_t face) const { + float computeNormalDeviationMetric(Chart *chart, uint32_t face) const + { // All faces in coplanar regions have the same normal, can use any face. const Vector3 faceNormal = m_data.faceNormals[face]; // Use plane fitting metric for now: return min(1.0f - dot(faceNormal, chart->basis.normal), 1.0f); // @@ normal deviations should be weighted by face area } - float computeRoundnessMetric(Chart *chart, float newBoundaryLength, float newChartArea) const { + float computeRoundnessMetric(Chart *chart, float newBoundaryLength, float newChartArea) const + { const float oldRoundness = square(chart->boundaryLength) / chart->area; const float newRoundness = square(newBoundaryLength) / newChartArea; return 1.0f - oldRoundness / newRoundness; } - float computeStraightnessMetric(Chart *chart, uint32_t firstFace) const { + float computeStraightnessMetric(Chart *chart, uint32_t firstFace) const + { float l_out = 0.0f; // Length of firstFace planar region boundary that doesn't border the chart. float l_in = 0.0f; // Length that does border the chart. const uint32_t planarRegionId = m_planarCharts.regionIdFromFace(firstFace); @@ -5613,7 +5926,8 @@ private: #endif } - bool isNormalSeam(uint32_t edge) const { + bool isNormalSeam(uint32_t edge) const + { const uint32_t oppositeEdge = m_data.mesh->oppositeEdge(edge); if (oppositeEdge == UINT32_MAX) return false; // boundary edge @@ -5633,7 +5947,8 @@ private: return !equal(m_data.faceNormals[f0], m_data.faceNormals[f1], kNormalEpsilon); } - float computeNormalSeamMetric(Chart *chart, uint32_t firstFace) const { + float computeNormalSeamMetric(Chart *chart, uint32_t firstFace) const + { float seamFactor = 0.0f, totalLength = 0.0f; uint32_t face = firstFace; for (;;) { @@ -5673,7 +5988,8 @@ private: return seamFactor / totalLength; } - float computeTextureSeamMetric(Chart *chart, uint32_t firstFace) const { + float computeTextureSeamMetric(Chart *chart, uint32_t firstFace) const + { float seamLength = 0.0f, totalLength = 0.0f; uint32_t face = firstFace; for (;;) { @@ -5699,7 +6015,8 @@ private: return seamLength / totalLength; } - float computeArea(Chart *chart, uint32_t firstFace) const { + float computeArea(Chart *chart, uint32_t firstFace) const + { float area = chart->area; uint32_t face = firstFace; for (;;) { @@ -5711,7 +6028,8 @@ private: return area; } - float computeBoundaryLength(Chart *chart, uint32_t firstFace) const { + float computeBoundaryLength(Chart *chart, uint32_t firstFace) const + { float boundaryLength = chart->boundaryLength; // Add new edges, subtract edges shared with the chart. const uint32_t planarRegionId = m_planarCharts.regionIdFromFace(firstFace); @@ -5732,10 +6050,11 @@ private: if (face == firstFace) break; } - return max(0.0f, boundaryLength); // @@ Hack! + return max(0.0f, boundaryLength); // @@ Hack! } - bool mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength) { + bool mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength) + { const uint32_t oldOwnerFaceCount = owner->faces.size(); const uint32_t chartFaceCount = chart->faces.size(); owner->faces.push_back(chart->faces); @@ -5793,8 +6112,10 @@ private: bool m_placingSeeds; }; -struct ChartGeneratorType { - enum Enum { +struct ChartGeneratorType +{ + enum Enum + { OriginalUv, Planar, Clustered, @@ -5802,15 +6123,17 @@ struct ChartGeneratorType { }; }; -struct Atlas { - Atlas() : - m_originalUvCharts(m_data), m_planarCharts(m_data), m_clusteredCharts(m_data, m_planarCharts) {} +struct Atlas +{ + Atlas() : m_originalUvCharts(m_data), m_planarCharts(m_data), m_clusteredCharts(m_data, m_planarCharts) {} - uint32_t chartCount() const { + uint32_t chartCount() const + { return m_originalUvCharts.chartCount() + m_planarCharts.chartCount() + m_clusteredCharts.chartCount(); } - ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { + ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const + { if (chartIndex < m_originalUvCharts.chartCount()) return m_originalUvCharts.chartFaces(chartIndex); chartIndex -= m_originalUvCharts.chartCount(); @@ -5820,7 +6143,8 @@ struct Atlas { return m_clusteredCharts.chartFaces(chartIndex); } - const Basis &chartBasis(uint32_t chartIndex) const { + const Basis &chartBasis(uint32_t chartIndex) const + { if (chartIndex < m_originalUvCharts.chartCount()) return m_originalUvCharts.chartBasis(chartIndex); chartIndex -= m_originalUvCharts.chartCount(); @@ -5830,7 +6154,8 @@ struct Atlas { return m_clusteredCharts.chartBasis(chartIndex); } - ChartGeneratorType::Enum chartGeneratorType(uint32_t chartIndex) const { + ChartGeneratorType::Enum chartGeneratorType(uint32_t chartIndex) const + { if (chartIndex < m_originalUvCharts.chartCount()) return ChartGeneratorType::OriginalUv; chartIndex -= m_originalUvCharts.chartCount(); @@ -5839,7 +6164,8 @@ struct Atlas { return ChartGeneratorType::Clustered; } - void reset(const Mesh *mesh, const ChartOptions &options) { + void reset(const Mesh *mesh, const ChartOptions &options) + { XA_PROFILE_START(buildAtlasInit) m_data.options = options; m_data.mesh = mesh; @@ -5847,7 +6173,8 @@ struct Atlas { XA_PROFILE_END(buildAtlasInit) } - void compute() { + void compute() + { if (m_data.options.useInputMeshUvs) { XA_PROFILE_START(originalUvCharts) m_originalUvCharts.compute(); @@ -5868,17 +6195,19 @@ private: ClusteredCharts m_clusteredCharts; }; -struct ComputeUvMeshChartsTaskArgs { +struct ComputeUvMeshChartsTaskArgs +{ UvMesh *mesh; Progress *progress; }; // Charts are found by floodfilling faces without crossing UV seams. -struct ComputeUvMeshChartsTask { - ComputeUvMeshChartsTask(ComputeUvMeshChartsTaskArgs *args) : - m_mesh(args->mesh), m_progress(args->progress), m_uvToEdgeMap(MemTag::Default, m_mesh->indices.size()), m_faceAssigned(m_mesh->indices.size() / 3) {} +struct ComputeUvMeshChartsTask +{ + ComputeUvMeshChartsTask(ComputeUvMeshChartsTaskArgs *args) : m_mesh(args->mesh), m_progress(args->progress), m_uvToEdgeMap(MemTag::Default, m_mesh->indices.size()), m_faceAssigned(m_mesh->indices.size() / 3) {} - void run() { + void run() + { const uint32_t vertexCount = m_mesh->texcoords.size(); const uint32_t indexCount = m_mesh->indices.size(); const uint32_t faceCount = indexCount / 3; @@ -5932,7 +6261,8 @@ struct ComputeUvMeshChartsTask { private: // The chart at chartIndex doesn't have to exist yet. - bool canAddFaceToChart(uint32_t chartIndex, uint32_t face) const { + bool canAddFaceToChart(uint32_t chartIndex, uint32_t face) const + { if (m_faceAssigned.get(face)) return false; // Already assigned to a chart. if (m_mesh->faceIgnore.get(face)) @@ -5949,7 +6279,8 @@ private: return true; } - void addFaceToChart(uint32_t chartIndex, uint32_t face) { + void addFaceToChart(uint32_t chartIndex, uint32_t face) + { UvMeshChart *chart = m_mesh->charts[chartIndex]; m_faceAssigned.set(face); chart->faces.push_back(face); @@ -5960,20 +6291,22 @@ private: } } - UvMesh *const m_mesh; - Progress *const m_progress; + UvMesh * const m_mesh; + Progress * const m_progress; HashMap<Vector2> m_uvToEdgeMap; // Face is edge / 3. BitArray m_faceAssigned; }; -static void runComputeUvMeshChartsTask(void * /*groupUserData*/, void *taskUserData) { +static void runComputeUvMeshChartsTask(void * /*groupUserData*/, void *taskUserData) +{ XA_PROFILE_START(computeChartsThread) ComputeUvMeshChartsTask task((ComputeUvMeshChartsTaskArgs *)taskUserData); task.run(); XA_PROFILE_END(computeChartsThread) } -static bool computeUvMeshCharts(TaskScheduler *taskScheduler, ArrayView<UvMesh *> meshes, ProgressFunc progressFunc, void *progressUserData) { +static bool computeUvMeshCharts(TaskScheduler *taskScheduler, ArrayView<UvMesh *> meshes, ProgressFunc progressFunc, void *progressUserData) +{ uint32_t totalFaceCount = 0; for (uint32_t i = 0; i < meshes.length; i++) totalFaceCount += meshes[i]->indices.size() / 3; @@ -5981,7 +6314,8 @@ static bool computeUvMeshCharts(TaskScheduler *taskScheduler, ArrayView<UvMesh * TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(nullptr, meshes.length); Array<ComputeUvMeshChartsTaskArgs> taskArgs; taskArgs.resize(meshes.length); - for (uint32_t i = 0; i < meshes.length; i++) { + for (uint32_t i = 0; i < meshes.length; i++) + { ComputeUvMeshChartsTaskArgs &args = taskArgs[i]; args.mesh = meshes[i]; args.progress = &progress; @@ -5999,7 +6333,8 @@ static bool computeUvMeshCharts(TaskScheduler *taskScheduler, ArrayView<UvMesh * namespace param { // Fast sweep in 3 directions -static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b) { +static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b) +{ XA_DEBUG_ASSERT(a != nullptr); XA_DEBUG_ASSERT(b != nullptr); const uint32_t vertexCount = mesh->vertexCount(); @@ -6056,7 +6391,8 @@ static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b // From OpenNL LSCM example. // Computes the coordinates of the vertices of a triangle in a local 2D orthonormal basis of the triangle's plane. -static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vector2 *z1, Vector2 *z2) { +static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vector2 *z1, Vector2 *z2) +{ Vector3 X = normalize(p1 - p0); Vector3 Z = normalize(cross(X, p2 - p0)); Vector3 Y = cross(Z, X); @@ -6068,24 +6404,28 @@ static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vec // Conformal relations from Brecht Van Lommel (based on ABF): -static float vec_angle_cos(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) { +static float vec_angle_cos(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) +{ Vector3 d1 = v1 - v2; Vector3 d2 = v3 - v2; return clamp(dot(d1, d2) / (length(d1) * length(d2)), -1.0f, 1.0f); } -static float vec_angle(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) { +static float vec_angle(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) +{ float dot = vec_angle_cos(v1, v2, v3); return acosf(dot); } -static void triangle_angles(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3, float *a1, float *a2, float *a3) { +static void triangle_angles(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3, float *a1, float *a2, float *a3) +{ *a1 = vec_angle(v3, v1, v2); *a2 = vec_angle(v1, v2, v3); *a3 = kPi - *a2 - *a1; } -static bool setup_abf_relations(opennl::NLContext *context, int id0, int id1, int id2, const Vector3 &p0, const Vector3 &p1, const Vector3 &p2) { +static bool setup_abf_relations(opennl::NLContext *context, int id0, int id1, int id2, const Vector3 &p0, const Vector3 &p1, const Vector3 &p2) +{ // @@ IC: Wouldn't it be more accurate to return cos and compute 1-cos^2? // It does indeed seem to be a little bit more robust. // @@ Need to revisit this more carefully! @@ -6142,7 +6482,8 @@ static bool setup_abf_relations(opennl::NLContext *context, int id0, int id1, in return true; } -static bool computeLeastSquaresConformalMap(Mesh *mesh) { +static bool computeLeastSquaresConformalMap(Mesh *mesh) +{ uint32_t lockedVertex0, lockedVertex1; if (!findApproximateDiameterVertices(mesh, &lockedVertex0, &lockedVertex1)) { // Mesh has no boundaries. @@ -6189,16 +6530,16 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh) { // Note : b = 0 // Real part opennl::nlBegin(context, NL_ROW); - opennl::nlCoefficient(context, u0_id, -a + c); - opennl::nlCoefficient(context, v0_id, b - d); - opennl::nlCoefficient(context, u1_id, -c); - opennl::nlCoefficient(context, v1_id, d); + opennl::nlCoefficient(context, u0_id, -a+c) ; + opennl::nlCoefficient(context, v0_id, b-d) ; + opennl::nlCoefficient(context, u1_id, -c) ; + opennl::nlCoefficient(context, v1_id, d) ; opennl::nlCoefficient(context, u2_id, a); opennl::nlEnd(context, NL_ROW); // Imaginary part opennl::nlBegin(context, NL_ROW); - opennl::nlCoefficient(context, u0_id, -b + d); - opennl::nlCoefficient(context, v0_id, -a + c); + opennl::nlCoefficient(context, u0_id, -b+d); + opennl::nlCoefficient(context, v0_id, -a+c); opennl::nlCoefficient(context, u1_id, -d); opennl::nlCoefficient(context, v1_id, -c); opennl::nlCoefficient(context, v2_id, a); @@ -6222,8 +6563,10 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh) { return true; } -struct PiecewiseParam { - void reset(const Mesh *mesh) { +struct PiecewiseParam +{ + void reset(const Mesh *mesh) + { m_mesh = mesh; const uint32_t faceCount = m_mesh->faceCount(); const uint32_t vertexCount = m_mesh->vertexCount(); @@ -6241,7 +6584,8 @@ struct PiecewiseParam { ConstArrayView<uint32_t> chartFaces() const { return m_patch; } ConstArrayView<Vector2> texcoords() const { return m_texcoords; } - bool computeChart() { + bool computeChart() + { // Clear per-patch state. m_patch.clear(); m_candidates.clear(); @@ -6370,7 +6714,8 @@ struct PiecewiseParam { } private: - struct Candidate { + struct Candidate + { uint32_t face, vertex; Candidate *prev, *next; // The previous/next candidate with the same vertex. Vector2 position; @@ -6380,14 +6725,10 @@ private: float patchVertexOrient; }; - struct CandidateIterator { - CandidateIterator(Candidate *head) : - m_current(head) { XA_DEBUG_ASSERT(!head->prev); } - void advance() { - if (m_current != nullptr) { - m_current = m_current->next; - } - } + struct CandidateIterator + { + CandidateIterator(Candidate *head) : m_current(head) { XA_DEBUG_ASSERT(!head->prev); } + void advance() { if (m_current != nullptr) { m_current = m_current->next; } } bool isDone() const { return !m_current; } Candidate *current() { return m_current; } @@ -6406,7 +6747,8 @@ private: UniformGrid2 m_boundaryGrid; Array<uint32_t> m_newBoundaryEdges, m_ignoreBoundaryEdges; // Temp arrays used when testing for boundary intersection. - void addFaceToPatch(uint32_t face) { + void addFaceToPatch(uint32_t face) + { XA_DEBUG_ASSERT(!m_faceInPatch.get(face)); XA_DEBUG_ASSERT(!m_faceInAnyPatch.get(face)); m_patch.push_back(face); @@ -6446,7 +6788,8 @@ private: } } - void addCandidateFace(uint32_t patchEdge, float patchVertexOrient, uint32_t face, uint32_t edge, uint32_t freeVertex) { + void addCandidateFace(uint32_t patchEdge, float patchVertexOrient, uint32_t face, uint32_t edge, uint32_t freeVertex) + { XA_DEBUG_ASSERT(!m_faceToCandidate[face]); Vector2 texcoords[3]; orthoProjectFace(face, texcoords); @@ -6549,7 +6892,8 @@ private: it.current()->maxCost = maxCost; } - Candidate *linkedCandidateHead(Candidate *candidate) { + Candidate *linkedCandidateHead(Candidate *candidate) + { Candidate *current = candidate; for (;;) { if (!current->prev) @@ -6559,7 +6903,8 @@ private: return current; } - void removeLinkedCandidates(Candidate *head) { + void removeLinkedCandidates(Candidate *head) + { XA_DEBUG_ASSERT(!head->prev); Candidate *current = head; while (current) { @@ -6576,7 +6921,8 @@ private: } } - void orthoProjectFace(uint32_t face, Vector2 *texcoords) const { + void orthoProjectFace(uint32_t face, Vector2 *texcoords) const + { const Vector3 normal = -m_mesh->computeFaceNormal(face); const Vector3 tangent = normalize(m_mesh->position(m_mesh->vertexAt(face * 3 + 1)) - m_mesh->position(m_mesh->vertexAt(face * 3 + 0))); const Vector3 bitangent = cross(normal, tangent); @@ -6586,14 +6932,16 @@ private: } } - float parametricArea(const Vector2 *texcoords) const { + float parametricArea(const Vector2 *texcoords) const + { const Vector2 &v1 = texcoords[0]; const Vector2 &v2 = texcoords[1]; const Vector2 &v3 = texcoords[2]; return ((v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y)) * 0.5f; } - float computeStretch(Vector3 p1, Vector3 p2, Vector3 p3, Vector2 t1, Vector2 t2, Vector2 t3) const { + float computeStretch(Vector3 p1, Vector3 p2, Vector3 p3, Vector2 t1, Vector2 t2, Vector2 t3) const + { float parametricArea = ((t2.y - t1.y) * (t3.x - t1.x) - (t3.y - t1.y) * (t2.x - t1.x)) * 0.5f; if (isZero(parametricArea, kAreaEpsilon)) return FLT_MAX; @@ -6607,13 +6955,15 @@ private: } // Return value is positive if the point is one side of the edge, negative if on the other side. - float orientToEdge(Vector2 edgeVertex0, Vector2 edgeVertex1, Vector2 point) const { + float orientToEdge(Vector2 edgeVertex0, Vector2 edgeVertex1, Vector2 point) const + { return (edgeVertex0.x - point.x) * (edgeVertex1.y - point.y) - (edgeVertex0.y - point.y) * (edgeVertex1.x - point.x); } }; // Estimate quality of existing parameterization. -struct Quality { +struct Quality +{ // computeBoundaryIntersection bool boundaryIntersection = false; @@ -6630,7 +6980,8 @@ struct Quality { float conformalMetric = 0.0f; float authalicMetric = 0.0f; - void computeBoundaryIntersection(const Mesh *mesh, UniformGrid2 &boundaryGrid) { + void computeBoundaryIntersection(const Mesh *mesh, UniformGrid2 &boundaryGrid) + { const Array<uint32_t> &boundaryEdges = mesh->boundaryEdges(); const uint32_t boundaryEdgeCount = boundaryEdges.size(); boundaryGrid.reset(mesh->texcoords(), mesh->indices(), boundaryEdgeCount); @@ -6646,7 +6997,8 @@ struct Quality { #endif } - void computeFlippedFaces(const Mesh *mesh, Array<uint32_t> *flippedFaces) { + void computeFlippedFaces(const Mesh *mesh, Array<uint32_t> *flippedFaces) + { totalTriangleCount = flippedTriangleCount = zeroAreaTriangleCount = 0; if (flippedFaces) flippedFaces->clear(); @@ -6682,7 +7034,8 @@ struct Quality { flippedFaces->clear(); flippedTriangleCount = 0; } - if (flippedTriangleCount > totalTriangleCount / 2) { + if (flippedTriangleCount > totalTriangleCount / 2) + { // If more than half the triangles are flipped, reverse the flipped / not flipped classification. flippedTriangleCount = totalTriangleCount - flippedTriangleCount; if (flippedFaces) { @@ -6704,7 +7057,8 @@ struct Quality { } } - void computeMetrics(const Mesh *mesh) { + void computeMetrics(const Mesh *mesh) + { totalGeometricArea = totalParametricArea = 0.0f; stretchMetric = maxStretchMetric = conformalMetric = authalicMetric = 0.0f; const uint32_t faceCount = mesh->faceCount(); @@ -6736,7 +7090,7 @@ struct Quality { const float a = dot(Ss, Ss); // E const float b = dot(Ss, St); // F const float c = dot(St, St); // G - // Compute eigen-values of the first fundamental form: + // Compute eigen-values of the first fundamental form: const float sigma1 = sqrtf(0.5f * max(0.0f, a + c - sqrtf(square(a - c) + 4 * square(b)))); // gamma uppercase, min eigenvalue. const float sigma2 = sqrtf(0.5f * max(0.0f, a + c + sqrtf(square(a - c) + 4 * square(b)))); // gamma lowercase, max eigenvalue. XA_ASSERT(sigma2 > sigma1 || equal(sigma1, sigma2, kEpsilon)); @@ -6767,22 +7121,24 @@ struct Quality { if (totalGeometricArea > 0.0f) { const float normFactor = sqrtf(totalParametricArea / totalGeometricArea); stretchMetric = sqrtf(stretchMetric / totalGeometricArea) * normFactor; - maxStretchMetric *= normFactor; + maxStretchMetric *= normFactor; conformalMetric = sqrtf(conformalMetric / totalGeometricArea); authalicMetric = sqrtf(authalicMetric / totalGeometricArea); } } }; -struct ChartCtorBuffers { +struct ChartCtorBuffers +{ Array<uint32_t> chartMeshIndices; Array<uint32_t> unifiedMeshIndices; }; -class Chart { +class Chart +{ public: - Chart(const Basis &basis, segment::ChartGeneratorType::Enum generatorType, ConstArrayView<uint32_t> faces, const Mesh *sourceMesh, uint32_t chartGroupId, uint32_t chartId) : - m_basis(basis), m_unifiedMesh(nullptr), m_type(ChartType::LSCM), m_generatorType(generatorType), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) { + Chart(const Basis &basis, segment::ChartGeneratorType::Enum generatorType, ConstArrayView<uint32_t> faces, const Mesh *sourceMesh, uint32_t chartGroupId, uint32_t chartId) : m_basis(basis), m_unifiedMesh(nullptr), m_type(ChartType::LSCM), m_generatorType(generatorType), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) + { XA_UNUSED(chartGroupId); XA_UNUSED(chartId); m_faceToSourceFaceMap.copyFrom(faces.data, faces.length); @@ -6813,8 +7169,7 @@ public: m_chartVertexToUnifiedVertexMap.push_back(unifiedVertex); m_originalVertexCount++; } - m_originalIndices[f * 3 + i] = sourceVertexToChartVertexMap.get(sourceVertex); - ; + m_originalIndices[f * 3 + i] = sourceVertexToChartVertexMap.get(sourceVertex);; XA_DEBUG_ASSERT(m_originalIndices[f * 3 + i] != UINT32_MAX); unifiedIndices[i] = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex); XA_DEBUG_ASSERT(unifiedIndices[i] != UINT32_MAX); @@ -6838,8 +7193,8 @@ public: #endif } - Chart(ChartCtorBuffers &buffers, const Chart *parent, const Mesh *parentMesh, ConstArrayView<uint32_t> faces, ConstArrayView<Vector2> texcoords, const Mesh *sourceMesh) : - m_unifiedMesh(nullptr), m_type(ChartType::Piecewise), m_generatorType(segment::ChartGeneratorType::Piecewise), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) { + Chart(ChartCtorBuffers &buffers, const Chart *parent, const Mesh *parentMesh, ConstArrayView<uint32_t> faces, ConstArrayView<Vector2> texcoords, const Mesh *sourceMesh) : m_unifiedMesh(nullptr), m_type(ChartType::Piecewise), m_generatorType(segment::ChartGeneratorType::Piecewise), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) + { const uint32_t faceCount = faces.length; m_faceToSourceFaceMap.resize(faceCount); for (uint32_t i = 0; i < faceCount; i++) @@ -6886,7 +7241,8 @@ public: backupTexcoords(); } - ~Chart() { + ~Chart() + { if (m_unifiedMesh) { m_unifiedMesh->~Mesh(); XA_FREE(m_unifiedMesh); @@ -6914,7 +7270,8 @@ public: ConstArrayView<uint32_t> originalVertices() const { return m_originalIndices; } - void parameterize(const ChartOptions &options, UniformGrid2 &boundaryGrid) { + void parameterize(const ChartOptions &options, UniformGrid2 &boundaryGrid) + { const uint32_t unifiedVertexCount = m_unifiedMesh->vertexCount(); if (m_generatorType == segment::ChartGeneratorType::OriginalUv) { } else { @@ -6938,7 +7295,8 @@ public: XA_PROFILE_START(parameterizeChartsLSCM) if (options.paramFunc) { options.paramFunc(&m_unifiedMesh->position(0).x, &m_unifiedMesh->texcoord(0).x, m_unifiedMesh->vertexCount(), m_unifiedMesh->indices().data, m_unifiedMesh->indexCount()); - } else + } + else computeLeastSquaresConformalMap(m_unifiedMesh); XA_PROFILE_END(parameterizeChartsLSCM) XA_PROFILE_START(parameterizeChartsEvaluateQuality) @@ -6980,7 +7338,8 @@ public: backupTexcoords(); } - Vector2 computeParametricBounds() const { + Vector2 computeParametricBounds() const + { Vector2 minCorner(FLT_MAX, FLT_MAX); Vector2 maxCorner(-FLT_MAX, -FLT_MAX); const uint32_t vertexCount = m_unifiedMesh->vertexCount(); @@ -6992,7 +7351,8 @@ public: } #if XA_CHECK_PIECEWISE_CHART_QUALITY - void evaluateQuality(UniformGrid2 &boundaryGrid) { + void evaluateQuality(UniformGrid2 &boundaryGrid) + { m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid); #if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION m_quality.computeFlippedFaces(m_unifiedMesh, &m_paramFlippedFaces); @@ -7004,12 +7364,14 @@ public: } #endif - void restoreTexcoords() { + void restoreTexcoords() + { memcpy(m_unifiedMesh->texcoords().data, m_backupTexcoords.data(), m_unifiedMesh->vertexCount() * sizeof(Vector2)); } private: - void backupTexcoords() { + void backupTexcoords() + { m_backupTexcoords.resize(m_unifiedMesh->vertexCount()); memcpy(m_backupTexcoords.data(), m_unifiedMesh->texcoords().data, m_unifiedMesh->vertexCount() * sizeof(Vector2)); } @@ -7040,7 +7402,8 @@ private: bool m_isInvalid; }; -struct CreateAndParameterizeChartTaskGroupArgs { +struct CreateAndParameterizeChartTaskGroupArgs +{ Progress *progress; ThreadLocal<UniformGrid2> *boundaryGrid; ThreadLocal<ChartCtorBuffers> *chartBuffers; @@ -7048,7 +7411,8 @@ struct CreateAndParameterizeChartTaskGroupArgs { ThreadLocal<PiecewiseParam> *pp; }; -struct CreateAndParameterizeChartTaskArgs { +struct CreateAndParameterizeChartTaskArgs +{ const Basis *basis; Chart *chart; // output Array<Chart *> charts; // output (if more than one chart) @@ -7059,7 +7423,8 @@ struct CreateAndParameterizeChartTaskArgs { uint32_t chartId; }; -static void runCreateAndParameterizeChartTask(void *groupUserData, void *taskUserData) { +static void runCreateAndParameterizeChartTask(void *groupUserData, void *taskUserData) +{ XA_PROFILE_START(createChartMeshAndParameterizeThread) auto groupArgs = (CreateAndParameterizeChartTaskGroupArgs *)groupUserData; auto args = (CreateAndParameterizeChartTaskArgs *)taskUserData; @@ -7130,13 +7495,15 @@ static void runCreateAndParameterizeChartTask(void *groupUserData, void *taskUse } // Set of charts corresponding to mesh faces in the same face group. -class ChartGroup { +class ChartGroup +{ public: - ChartGroup(uint32_t id, const Mesh *sourceMesh, const MeshFaceGroups *sourceMeshFaceGroups, MeshFaceGroups::Handle faceGroup) : - m_id(id), m_sourceMesh(sourceMesh), m_sourceMeshFaceGroups(sourceMeshFaceGroups), m_faceGroup(faceGroup) { + ChartGroup(uint32_t id, const Mesh *sourceMesh, const MeshFaceGroups *sourceMeshFaceGroups, MeshFaceGroups::Handle faceGroup) : m_id(id), m_sourceMesh(sourceMesh), m_sourceMeshFaceGroups(sourceMeshFaceGroups), m_faceGroup(faceGroup) + { } - ~ChartGroup() { + ~ChartGroup() + { for (uint32_t i = 0; i < m_charts.size(); i++) { m_charts[i]->~Chart(); XA_FREE(m_charts[i]); @@ -7147,7 +7514,8 @@ public: Chart *chartAt(uint32_t i) const { return m_charts[i]; } uint32_t faceCount() const { return m_sourceMeshFaceGroups->faceCount(m_faceGroup); } - void computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, Progress *progress, segment::Atlas &atlas, ThreadLocal<UniformGrid2> *boundaryGrid, ThreadLocal<ChartCtorBuffers> *chartBuffers, ThreadLocal<PiecewiseParam> *piecewiseParam) { + void computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, Progress *progress, segment::Atlas &atlas, ThreadLocal<UniformGrid2> *boundaryGrid, ThreadLocal<ChartCtorBuffers> *chartBuffers, ThreadLocal<PiecewiseParam> *piecewiseParam) + { // This function may be called multiple times, so destroy existing charts. for (uint32_t i = 0; i < m_charts.size(); i++) { m_charts[i]->~Chart(); @@ -7291,7 +7659,8 @@ public: } private: - Mesh *createMesh() { + Mesh *createMesh() + { XA_DEBUG_ASSERT(m_faceGroup != MeshFaceGroups::kInvalid); // Create new mesh from the source mesh, using faces that belong to this group. m_faceToSourceFaceMap.reserve(m_sourceMeshFaceGroups->faceCount(m_faceGroup)); @@ -7345,14 +7714,15 @@ private: } const uint32_t m_id; - const Mesh *const m_sourceMesh; - const MeshFaceGroups *const m_sourceMeshFaceGroups; + const Mesh * const m_sourceMesh; + const MeshFaceGroups * const m_sourceMeshFaceGroups; const MeshFaceGroups::Handle m_faceGroup; Array<uint32_t> m_faceToSourceFaceMap; // List of faces of the source mesh that belong to this chart group. Array<Chart *> m_charts; }; -struct ChartGroupComputeChartsTaskGroupArgs { +struct ChartGroupComputeChartsTaskGroupArgs +{ ThreadLocal<segment::Atlas> *atlas; const ChartOptions *options; Progress *progress; @@ -7362,7 +7732,8 @@ struct ChartGroupComputeChartsTaskGroupArgs { ThreadLocal<PiecewiseParam> *piecewiseParam; }; -static void runChartGroupComputeChartsTask(void *groupUserData, void *taskUserData) { +static void runChartGroupComputeChartsTask(void *groupUserData, void *taskUserData) +{ auto args = (ChartGroupComputeChartsTaskGroupArgs *)groupUserData; auto chartGroup = (ChartGroup *)taskUserData; if (args->progress->cancel) @@ -7372,7 +7743,8 @@ static void runChartGroupComputeChartsTask(void *groupUserData, void *taskUserDa XA_PROFILE_END(chartGroupComputeChartsThread) } -struct MeshComputeChartsTaskGroupArgs { +struct MeshComputeChartsTaskGroupArgs +{ ThreadLocal<segment::Atlas> *atlas; const ChartOptions *options; Progress *progress; @@ -7382,7 +7754,8 @@ struct MeshComputeChartsTaskGroupArgs { ThreadLocal<PiecewiseParam> *piecewiseParam; }; -struct MeshComputeChartsTaskArgs { +struct MeshComputeChartsTaskArgs +{ const Mesh *sourceMesh; Array<ChartGroup *> *chartGroups; // output InvalidMeshGeometry *invalidMeshGeometry; // output @@ -7392,7 +7765,8 @@ struct MeshComputeChartsTaskArgs { static uint32_t s_faceGroupsCurrentVertex = 0; #endif -static void runMeshComputeChartsTask(void *groupUserData, void *taskUserData) { +static void runMeshComputeChartsTask(void *groupUserData, void *taskUserData) +{ auto groupArgs = (MeshComputeChartsTaskGroupArgs *)groupUserData; auto args = (MeshComputeChartsTaskArgs *)taskUserData; if (groupArgs->progress->cancel) @@ -7491,12 +7865,13 @@ cleanup: } /// An atlas is a set of chart groups. -class Atlas { +class Atlas +{ public: - Atlas() : - m_chartsComputed(false) {} + Atlas() : m_chartsComputed(false) {} - ~Atlas() { + ~Atlas() + { for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) { for (uint32_t j = 0; j < m_meshChartGroups[i].size(); j++) { m_meshChartGroups[i][j]->~ChartGroup(); @@ -7513,11 +7888,13 @@ public: uint32_t chartGroupCount(uint32_t mesh) const { return m_meshChartGroups[mesh].size(); } const ChartGroup *chartGroupAt(uint32_t mesh, uint32_t group) const { return m_meshChartGroups[mesh][group]; } - void addMesh(const Mesh *mesh) { + void addMesh(const Mesh *mesh) + { m_meshes.push_back(mesh); } - bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData) { + bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData) + { XA_PROFILE_START(computeChartsReal) #if XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS segment::s_planarRegionsCurrentRegion = segment::s_planarRegionsCurrentVertex = 0; @@ -7591,7 +7968,7 @@ public: private: Array<const Mesh *> m_meshes; Array<InvalidMeshGeometry> m_invalidMeshGeometry; // 1 per mesh. - Array<Array<ChartGroup *>> m_meshChartGroups; + Array<Array<ChartGroup *> > m_meshChartGroups; bool m_chartsComputed; }; @@ -7599,15 +7976,17 @@ private: namespace pack { -class AtlasImage { +class AtlasImage +{ public: - AtlasImage(uint32_t width, uint32_t height) : - m_width(width), m_height(height) { + AtlasImage(uint32_t width, uint32_t height) : m_width(width), m_height(height) + { m_data.resize(m_width * m_height); memset(m_data.data(), 0, sizeof(uint32_t) * m_data.size()); } - void resize(uint32_t width, uint32_t height) { + void resize(uint32_t width, uint32_t height) + { Array<uint32_t> data; data.resize(width * height); memset(data.data(), 0, sizeof(uint32_t) * data.size()); @@ -7618,7 +7997,8 @@ public: data.moveTo(m_data); } - void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y) { + void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y) + { const int w = image->width(); const int h = image->height(); for (int y = 0; y < h; y++) { @@ -7644,13 +8024,15 @@ public: } } - void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const { + void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const + { for (uint32_t y = 0; y < destHeight; y++) memcpy(&dest[y * destWidth], &m_data[padding + (y + padding) * m_width], destWidth * sizeof(uint32_t)); } #if XA_DEBUG_EXPORT_ATLAS_IMAGES - void writeTga(const char *filename, uint32_t width, uint32_t height) const { + void writeTga(const char *filename, uint32_t width, uint32_t height) const + { Array<uint8_t> image; image.resize(width * height * 3); for (uint32_t y = 0; y < height; y++) { @@ -7692,7 +8074,8 @@ private: Array<uint32_t> m_data; }; -struct Chart { +struct Chart +{ int32_t atlasIndex; uint32_t material; ConstArrayView<uint32_t> indices; @@ -7711,12 +8094,14 @@ struct Chart { uint32_t uniqueVertexCount() const { return uniqueVertices.isEmpty() ? vertices.length : uniqueVertices.size(); } }; -struct AddChartTaskArgs { +struct AddChartTaskArgs +{ param::Chart *paramChart; Chart *chart; // out }; -static void runAddChartTask(void *groupUserData, void *taskUserData) { +static void runAddChartTask(void *groupUserData, void *taskUserData) +{ XA_PROFILE_START(packChartsAddChartsThread) auto boundingBox = (ThreadLocal<BoundingBox2D> *)groupUserData; auto args = (AddChartTaskArgs *)taskUserData; @@ -7753,8 +8138,10 @@ static void runAddChartTask(void *groupUserData, void *taskUserData) { XA_PROFILE_END(packChartsAddChartsThread) } -struct Atlas { - ~Atlas() { +struct Atlas +{ + ~Atlas() + { for (uint32_t i = 0; i < m_atlasImages.size(); i++) { m_atlasImages[i]->~AtlasImage(); XA_FREE(m_atlasImages[i]); @@ -7778,7 +8165,8 @@ struct Atlas { const Array<AtlasImage *> &getImages() const { return m_atlasImages; } float getUtilization(uint32_t atlas) const { return m_utilization[atlas]; } - void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas) { + void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas) + { // Count charts. uint32_t chartCount = 0; for (uint32_t i = 0; i < paramAtlas->meshCount(); i++) { @@ -7819,7 +8207,8 @@ struct Atlas { m_charts[i] = taskArgs[i].chart; } - void addUvMeshCharts(UvMeshInstance *mesh) { + void addUvMeshCharts(UvMeshInstance *mesh) + { // Copy texcoords from mesh. mesh->texcoords.resize(mesh->mesh->texcoords.size()); memcpy(mesh->texcoords.data(), mesh->mesh->texcoords.data(), mesh->texcoords.size() * sizeof(Vector2)); @@ -7882,7 +8271,8 @@ struct Atlas { } // Pack charts in the smallest possible rectangle. - bool packCharts(const PackOptions &options, ProgressFunc progressFunc, void *progressUserData) { + bool packCharts(const PackOptions &options, ProgressFunc progressFunc, void *progressUserData) + { if (progressFunc) { if (!progressFunc(ProgressCategory::PackCharts, 0, progressUserData)) return false; @@ -8116,7 +8506,8 @@ struct Atlas { int best_x = 0, best_y = 0; int best_cw = 0, best_ch = 0; int best_r = 0; - for (;;) { + for (;;) + { #if XA_DEBUG bool firstChartInBitImage = false; #endif @@ -8152,7 +8543,8 @@ struct Atlas { if (best_x + best_cw > atlasSizes[currentAtlas].x || best_y + best_ch > atlasSizes[currentAtlas].y) { for (uint32_t j = 0; j < chartStartPositions.size(); j++) chartStartPositions[j] = Vector2i(0, 0); - } else { + } + else { chartStartPositions[currentAtlas] = Vector2i(best_x, best_y); } } @@ -8240,7 +8632,8 @@ struct Atlas { } if (m_utilization.size() > 1) { XA_PRINT(" %u: %f%% utilization\n", i, m_utilization[i] * 100.0f); - } else { + } + else { XA_PRINT(" %f%% utilization\n", m_utilization[i] * 100.0f); } } @@ -8259,14 +8652,16 @@ struct Atlas { } private: - bool findChartLocation(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) { + bool findChartLocation(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) + { const int attempts = 4096; if (options.bruteForce || attempts >= w * h) return findChartLocation_bruteForce(options, startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, maxResolution); return findChartLocation_random(options, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, maxResolution); } - bool findChartLocation_bruteForce(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) { + bool findChartLocation_bruteForce(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) + { const int stepSize = options.blockAlign ? 4 : 1; int best_metric = INT_MAX; // Try two different orientations. @@ -8311,7 +8706,8 @@ private: return best_metric != INT_MAX; } - bool findChartLocation_random(const PackOptions &options, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int attempts, uint32_t maxResolution) { + bool findChartLocation_random(const PackOptions &options, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int attempts, uint32_t maxResolution) + { bool result = false; const int BLOCK_SIZE = 4; int best_metric = INT_MAX; @@ -8366,7 +8762,8 @@ private: return result; } - void addChart(BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int atlas_w, int atlas_h, int offset_x, int offset_y, int r) { + void addChart(BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int atlas_w, int atlas_h, int offset_x, int offset_y, int r) + { XA_DEBUG_ASSERT(r == 0 || r == 1); const BitImage *image = r == 0 ? chartBitImage : chartBitImageRotated; const int w = image->width(); @@ -8389,7 +8786,8 @@ private: } } - void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated, UniformGrid2 &boundaryEdgeGrid) const { + void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated, UniformGrid2 &boundaryEdgeGrid) const + { boundaryEdgeGrid.reset(chart->vertices, chart->indices); if (chart->boundaryEdges) { const uint32_t edgeCount = chart->boundaryEdges->size(); @@ -8444,11 +8842,13 @@ private: } } - struct DrawTriangleCallbackArgs { + struct DrawTriangleCallbackArgs + { BitImage *chartBitImage, *chartBitImageRotated; }; - static bool drawTriangleCallback(void *param, int x, int y) { + static bool drawTriangleCallback(void *param, int x, int y) + { auto args = (DrawTriangleCallbackArgs *)param; args->chartBitImage->set(x, y); if (args->chartBitImageRotated) @@ -8471,13 +8871,15 @@ private: } // namespace internal // Used to map triangulated polygons back to polygons. -struct MeshPolygonMapping { +struct MeshPolygonMapping +{ internal::Array<uint8_t> faceVertexCount; // Copied from MeshDecl::faceVertexCount. internal::Array<uint32_t> triangleToPolygonMap; // Triangle index (mesh face index) to polygon index. internal::Array<uint32_t> triangleToPolygonIndicesMap; // Triangle indices to polygon indices. }; -struct Context { +struct Context +{ Atlas atlas; internal::Progress *addMeshProgress = nullptr; internal::TaskGroupHandle addMeshTaskGroup; @@ -8492,14 +8894,16 @@ struct Context { bool uvMeshChartsComputed = false; }; -Atlas *Create() { +Atlas *Create() +{ Context *ctx = XA_NEW(internal::MemTag::Default, Context); memset(&ctx->atlas, 0, sizeof(Atlas)); ctx->taskScheduler = XA_NEW(internal::MemTag::Default, internal::TaskScheduler); return &ctx->atlas; } -static void DestroyOutputMeshes(Context *ctx) { +static void DestroyOutputMeshes(Context *ctx) +{ if (!ctx->atlas.meshes) return; for (int i = 0; i < (int)ctx->atlas.meshCount; i++) { @@ -8520,7 +8924,8 @@ static void DestroyOutputMeshes(Context *ctx) { ctx->atlas.meshes = nullptr; } -void Destroy(Atlas *atlas) { +void Destroy(Atlas *atlas) +{ XA_DEBUG_ASSERT(atlas); Context *ctx = (Context *)atlas; if (atlas->utilization) @@ -8567,14 +8972,15 @@ void Destroy(Atlas *atlas) { #endif } -static void runAddMeshTask(void *groupUserData, void *taskUserData) { +static void runAddMeshTask(void *groupUserData, void *taskUserData) +{ XA_PROFILE_START(addMeshThread) auto ctx = (Context *)groupUserData; auto mesh = (internal::Mesh *)taskUserData; internal::Progress *progress = ctx->addMeshProgress; if (progress->cancel) { XA_PROFILE_END(addMeshThread) - return; + return; } XA_PROFILE_START(addMeshCreateColocals) mesh->createColocals(); @@ -8587,32 +8993,37 @@ static void runAddMeshTask(void *groupUserData, void *taskUserData) { XA_PROFILE_END(addMeshThread) } -static internal::Vector3 DecodePosition(const MeshDecl &meshDecl, uint32_t index) { +static internal::Vector3 DecodePosition(const MeshDecl &meshDecl, uint32_t index) +{ XA_DEBUG_ASSERT(meshDecl.vertexPositionData); XA_DEBUG_ASSERT(meshDecl.vertexPositionStride > 0); return *((const internal::Vector3 *)&((const uint8_t *)meshDecl.vertexPositionData)[meshDecl.vertexPositionStride * index]); } -static internal::Vector3 DecodeNormal(const MeshDecl &meshDecl, uint32_t index) { +static internal::Vector3 DecodeNormal(const MeshDecl &meshDecl, uint32_t index) +{ XA_DEBUG_ASSERT(meshDecl.vertexNormalData); XA_DEBUG_ASSERT(meshDecl.vertexNormalStride > 0); return *((const internal::Vector3 *)&((const uint8_t *)meshDecl.vertexNormalData)[meshDecl.vertexNormalStride * index]); } -static internal::Vector2 DecodeUv(const MeshDecl &meshDecl, uint32_t index) { +static internal::Vector2 DecodeUv(const MeshDecl &meshDecl, uint32_t index) +{ XA_DEBUG_ASSERT(meshDecl.vertexUvData); XA_DEBUG_ASSERT(meshDecl.vertexUvStride > 0); return *((const internal::Vector2 *)&((const uint8_t *)meshDecl.vertexUvData)[meshDecl.vertexUvStride * index]); } -static uint32_t DecodeIndex(IndexFormat format, const void *indexData, int32_t offset, uint32_t i) { +static uint32_t DecodeIndex(IndexFormat format, const void *indexData, int32_t offset, uint32_t i) +{ XA_DEBUG_ASSERT(indexData); if (format == IndexFormat::UInt16) return uint16_t((int32_t)((const uint16_t *)indexData)[i] + offset); return uint32_t((int32_t)((const uint32_t *)indexData)[i] + offset); } -AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint) { +AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint) +{ XA_DEBUG_ASSERT(atlas); if (!atlas) { XA_PRINT_WARNING("AddMesh: atlas is null.\n"); @@ -8630,7 +9041,8 @@ AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountH // Don't know how many times AddMesh will be called, so progress needs to adjusted each time. if (!ctx->addMeshProgress) { ctx->addMeshProgress = XA_NEW_ARGS(internal::MemTag::Default, internal::Progress, ProgressCategory::AddMesh, ctx->progressFunc, ctx->progressUserData, 1); - } else { + } + else { ctx->addMeshProgress->setMaxValue(internal::max(ctx->meshes.size() + 1, meshCountHint)); } XA_PROFILE_START(addMeshCopyData) @@ -8804,7 +9216,8 @@ AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountH return AddMeshError::Success; } -void AddMeshJoin(Atlas *atlas) { +void AddMeshJoin(Atlas *atlas) +{ XA_DEBUG_ASSERT(atlas); if (!atlas) { XA_PRINT_WARNING("AddMeshJoin: atlas is null.\n"); @@ -8847,7 +9260,8 @@ void AddMeshJoin(Atlas *atlas) { } } -AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) { +AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) +{ XA_DEBUG_ASSERT(atlas); if (!atlas) { XA_PRINT_WARNING("AddUvMesh: atlas is null.\n"); @@ -8948,7 +9362,8 @@ AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) { return AddMeshError::Success; } -void ComputeCharts(Atlas *atlas, ChartOptions options) { +void ComputeCharts(Atlas *atlas, ChartOptions options) +{ if (!atlas) { XA_PRINT_WARNING("ComputeCharts: atlas is null.\n"); return; @@ -9136,7 +9551,8 @@ void ComputeCharts(Atlas *atlas, ChartOptions options) { XA_PRINT_MEM_USAGE } -void PackCharts(Atlas *atlas, PackOptions packOptions) { +void PackCharts(Atlas *atlas, PackOptions packOptions) +{ // Validate arguments and context state. if (!atlas) { XA_PRINT_WARNING("PackCharts: atlas is null.\n"); @@ -9177,7 +9593,8 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) { if (!ctx->uvMeshInstances.isEmpty()) { for (uint32_t i = 0; i < ctx->uvMeshInstances.size(); i++) packAtlas.addUvMeshCharts(ctx->uvMeshInstances[i]); - } else + } + else packAtlas.addCharts(ctx->taskScheduler, &ctx->paramAtlas); XA_PROFILE_END(packChartsAddCharts) XA_PROFILE_START(packCharts) @@ -9455,7 +9872,8 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) { XA_PRINT_MEM_USAGE } -void Generate(Atlas *atlas, ChartOptions chartOptions, PackOptions packOptions) { +void Generate(Atlas *atlas, ChartOptions chartOptions, PackOptions packOptions) +{ if (!atlas) { XA_PRINT_WARNING("Generate: atlas is null.\n"); return; @@ -9469,7 +9887,8 @@ void Generate(Atlas *atlas, ChartOptions chartOptions, PackOptions packOptions) PackCharts(atlas, packOptions); } -void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progressUserData) { +void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progressUserData) +{ if (!atlas) { XA_PRINT_WARNING("SetProgressCallback: atlas is null.\n"); return; @@ -9479,17 +9898,20 @@ void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progress ctx->progressUserData = progressUserData; } -void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc) { +void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc) +{ internal::s_realloc = reallocFunc; internal::s_free = freeFunc; } -void SetPrint(PrintFunc print, bool verbose) { +void SetPrint(PrintFunc print, bool verbose) +{ internal::s_print = print; internal::s_printVerbose = verbose; } -const char *StringForEnum(AddMeshError error) { +const char *StringForEnum(AddMeshError error) +{ if (error == AddMeshError::Error) return "Unspecified error"; if (error == AddMeshError::IndexOutOfRange) @@ -9501,7 +9923,8 @@ const char *StringForEnum(AddMeshError error) { return "Success"; } -const char *StringForEnum(ProgressCategory category) { +const char *StringForEnum(ProgressCategory category) +{ if (category == ProgressCategory::AddMesh) return "Adding mesh(es)"; if (category == ProgressCategory::ComputeCharts) @@ -9529,76 +9952,93 @@ static_assert(sizeof(xatlas::PackOptions) == sizeof(xatlasPackOptions), "xatlasP extern "C" { #endif -xatlasAtlas *xatlasCreate() { +xatlasAtlas *xatlasCreate() +{ return (xatlasAtlas *)xatlas::Create(); } -void xatlasDestroy(xatlasAtlas *atlas) { +void xatlasDestroy(xatlasAtlas *atlas) +{ xatlas::Destroy((xatlas::Atlas *)atlas); } -xatlasAddMeshError xatlasAddMesh(xatlasAtlas *atlas, const xatlasMeshDecl *meshDecl, uint32_t meshCountHint) { +xatlasAddMeshError xatlasAddMesh(xatlasAtlas *atlas, const xatlasMeshDecl *meshDecl, uint32_t meshCountHint) +{ return (xatlasAddMeshError)xatlas::AddMesh((xatlas::Atlas *)atlas, *(const xatlas::MeshDecl *)meshDecl, meshCountHint); } -void xatlasAddMeshJoin(xatlasAtlas *atlas) { +void xatlasAddMeshJoin(xatlasAtlas *atlas) +{ xatlas::AddMeshJoin((xatlas::Atlas *)atlas); } -xatlasAddMeshError xatlasAddUvMesh(xatlasAtlas *atlas, const xatlasUvMeshDecl *decl) { +xatlasAddMeshError xatlasAddUvMesh(xatlasAtlas *atlas, const xatlasUvMeshDecl *decl) +{ return (xatlasAddMeshError)xatlas::AddUvMesh((xatlas::Atlas *)atlas, *(const xatlas::UvMeshDecl *)decl); } -void xatlasComputeCharts(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions) { +void xatlasComputeCharts(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions) +{ xatlas::ComputeCharts((xatlas::Atlas *)atlas, chartOptions ? *(xatlas::ChartOptions *)chartOptions : xatlas::ChartOptions()); } -void xatlasPackCharts(xatlasAtlas *atlas, const xatlasPackOptions *packOptions) { +void xatlasPackCharts(xatlasAtlas *atlas, const xatlasPackOptions *packOptions) +{ xatlas::PackCharts((xatlas::Atlas *)atlas, packOptions ? *(xatlas::PackOptions *)packOptions : xatlas::PackOptions()); } -void xatlasGenerate(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions, const xatlasPackOptions *packOptions) { +void xatlasGenerate(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions, const xatlasPackOptions *packOptions) +{ xatlas::Generate((xatlas::Atlas *)atlas, chartOptions ? *(xatlas::ChartOptions *)chartOptions : xatlas::ChartOptions(), packOptions ? *(xatlas::PackOptions *)packOptions : xatlas::PackOptions()); } -void xatlasSetProgressCallback(xatlasAtlas *atlas, xatlasProgressFunc progressFunc, void *progressUserData) { +void xatlasSetProgressCallback(xatlasAtlas *atlas, xatlasProgressFunc progressFunc, void *progressUserData) +{ xatlas::ProgressFunc pf; *(void **)&pf = (void *)progressFunc; xatlas::SetProgressCallback((xatlas::Atlas *)atlas, pf, progressUserData); } -void xatlasSetAlloc(xatlasReallocFunc reallocFunc, xatlasFreeFunc freeFunc) { +void xatlasSetAlloc(xatlasReallocFunc reallocFunc, xatlasFreeFunc freeFunc) +{ xatlas::SetAlloc((xatlas::ReallocFunc)reallocFunc, (xatlas::FreeFunc)freeFunc); } -void xatlasSetPrint(xatlasPrintFunc print, bool verbose) { +void xatlasSetPrint(xatlasPrintFunc print, bool verbose) +{ xatlas::SetPrint((xatlas::PrintFunc)print, verbose); } -const char *xatlasAddMeshErrorString(xatlasAddMeshError error) { +const char *xatlasAddMeshErrorString(xatlasAddMeshError error) +{ return xatlas::StringForEnum((xatlas::AddMeshError)error); } -const char *xatlasProgressCategoryString(xatlasProgressCategory category) { +const char *xatlasProgressCategoryString(xatlasProgressCategory category) +{ return xatlas::StringForEnum((xatlas::ProgressCategory)category); } -void xatlasMeshDeclInit(xatlasMeshDecl *meshDecl) { +void xatlasMeshDeclInit(xatlasMeshDecl *meshDecl) +{ xatlas::MeshDecl init; memcpy(meshDecl, &init, sizeof(init)); } -void xatlasUvMeshDeclInit(xatlasUvMeshDecl *uvMeshDecl) { +void xatlasUvMeshDeclInit(xatlasUvMeshDecl *uvMeshDecl) +{ xatlas::UvMeshDecl init; memcpy(uvMeshDecl, &init, sizeof(init)); } -void xatlasChartOptionsInit(xatlasChartOptions *chartOptions) { +void xatlasChartOptionsInit(xatlasChartOptions *chartOptions) +{ xatlas::ChartOptions init; memcpy(chartOptions, &init, sizeof(init)); } -void xatlasPackOptionsInit(xatlasPackOptions *packOptions) { +void xatlasPackOptionsInit(xatlasPackOptions *packOptions) +{ xatlas::PackOptions init; memcpy(packOptions, &init, sizeof(init)); } diff --git a/thirdparty/xatlas/xatlas.h b/thirdparty/xatlas/xatlas.h index fc40d9d49c..d66a96db21 100644 --- a/thirdparty/xatlas/xatlas.h +++ b/thirdparty/xatlas/xatlas.h @@ -36,7 +36,8 @@ Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com> namespace xatlas { -enum class ChartType { +enum class ChartType +{ Planar, Ortho, LSCM, @@ -45,7 +46,8 @@ enum class ChartType { }; // A group of connected faces, belonging to a single atlas. -struct Chart { +struct Chart +{ uint32_t *faceArray; uint32_t atlasIndex; // Sub-atlas index. uint32_t faceCount; @@ -54,7 +56,8 @@ struct Chart { }; // Output vertex. -struct Vertex { +struct Vertex +{ int32_t atlasIndex; // Sub-atlas index. -1 if the vertex doesn't exist in any atlas. int32_t chartIndex; // -1 if the vertex doesn't exist in any chart. float uv[2]; // Not normalized - values are in Atlas width and height range. @@ -62,7 +65,8 @@ struct Vertex { }; // Output mesh. -struct Mesh { +struct Mesh +{ Chart *chartArray; uint32_t *indexArray; Vertex *vertexArray; @@ -77,7 +81,8 @@ static const uint32_t kImageIsBilinearBit = 0x40000000; static const uint32_t kImageIsPaddingBit = 0x20000000; // Empty on creation. Populated after charts are packed. -struct Atlas { +struct Atlas +{ uint32_t *image; Mesh *meshes; // The output meshes, corresponding to each AddMesh call. float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length. @@ -94,13 +99,15 @@ Atlas *Create(); void Destroy(Atlas *atlas); -enum class IndexFormat { +enum class IndexFormat +{ UInt16, UInt32 }; // Input mesh declaration. -struct MeshDecl { +struct MeshDecl +{ const void *vertexPositionData = nullptr; const void *vertexNormalData = nullptr; // optional const void *vertexUvData = nullptr; // optional. The input UVs are provided as a hint to the chart generator. @@ -131,7 +138,8 @@ struct MeshDecl { float epsilon = 1.192092896e-07F; }; -enum class AddMeshError { +enum class AddMeshError +{ Success, // No error. Error, // Unspecified error. IndexOutOfRange, // An index is >= MeshDecl vertexCount. @@ -145,7 +153,8 @@ AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountH // Wait for AddMesh async processing to finish. ComputeCharts / Generate call this internally. void AddMeshJoin(Atlas *atlas); -struct UvMeshDecl { +struct UvMeshDecl +{ const void *vertexUvData = nullptr; const void *indexData = nullptr; // optional const uint32_t *faceMaterialData = nullptr; // Optional. Overlapping UVs should be assigned a different material. Must be indexCount / 3 in length. @@ -161,7 +170,8 @@ AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl); // Custom parameterization function. texcoords initial values are an orthogonal parameterization. typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount); -struct ChartOptions { +struct ChartOptions +{ ParameterizeFunc paramFunc = nullptr; float maxChartArea = 0.0f; // Don't grow charts to be larger than this. 0 means no limit. @@ -184,7 +194,8 @@ struct ChartOptions { // Call after all AddMesh calls. Can be called multiple times to recompute charts with different options. void ComputeCharts(Atlas *atlas, ChartOptions options = ChartOptions()); -struct PackOptions { +struct PackOptions +{ // Charts larger than this will be scaled down. 0 means no limit. uint32_t maxChartSize = 0; @@ -227,7 +238,8 @@ void PackCharts(Atlas *atlas, PackOptions packOptions = PackOptions()); void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), PackOptions packOptions = PackOptions()); // Progress tracking. -enum class ProgressCategory { +enum class ProgressCategory +{ AddMesh, ComputeCharts, PackCharts, diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h index d9a2730104..2e5a933ad3 100644 --- a/thirdparty/zstd/common/bitstream.h +++ b/thirdparty/zstd/common/bitstream.h @@ -1,7 +1,7 @@ /* ****************************************************************** * bitstream * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/common/compiler.h b/thirdparty/zstd/common/compiler.h index 3e454f38c1..a951d0adea 100644 --- a/thirdparty/zstd/common/compiler.h +++ b/thirdparty/zstd/common/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -90,6 +90,7 @@ # endif #endif + /* target attribute */ #ifndef __has_attribute #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ diff --git a/thirdparty/zstd/common/cpu.h b/thirdparty/zstd/common/cpu.h index cb210593ea..8acd33be3c 100644 --- a/thirdparty/zstd/common/cpu.h +++ b/thirdparty/zstd/common/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Facebook, Inc. + * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/debug.c b/thirdparty/zstd/common/debug.c index f303f4a2e5..bb863c9ea6 100644 --- a/thirdparty/zstd/common/debug.c +++ b/thirdparty/zstd/common/debug.c @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/common/debug.h b/thirdparty/zstd/common/debug.h index 8b5734366c..3b2a320a18 100644 --- a/thirdparty/zstd/common/debug.h +++ b/thirdparty/zstd/common/debug.h @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/common/entropy_common.c b/thirdparty/zstd/common/entropy_common.c index f9fcb1acfc..41cd69566b 100644 --- a/thirdparty/zstd/common/entropy_common.c +++ b/thirdparty/zstd/common/entropy_common.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Common functions of New Generation Entropy library - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/common/error_private.c b/thirdparty/zstd/common/error_private.c index 45bba5305b..6d1135f8c3 100644 --- a/thirdparty/zstd/common/error_private.c +++ b/thirdparty/zstd/common/error_private.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/error_private.h b/thirdparty/zstd/common/error_private.h index 71b37b8dfa..6d8b9f7763 100644 --- a/thirdparty/zstd/common/error_private.h +++ b/thirdparty/zstd/common/error_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -21,8 +21,8 @@ extern "C" { /* **************************************** * Dependencies ******************************************/ -#include "zstd_deps.h" /* size_t */ -#include "zstd_errors.h" /* enum list */ +#include "../zstd_errors.h" /* enum list */ +#include "zstd_deps.h" /* size_t */ /* **************************************** diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h index dd5fc44e80..19dd4febcd 100644 --- a/thirdparty/zstd/common/fse.h +++ b/thirdparty/zstd/common/fse.h @@ -1,7 +1,7 @@ /* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -352,7 +352,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); /**< build a fake FSE_DTable, designed to always generate the same symbolValue */ -#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)) +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c index c164430f99..f4ff58fa0a 100644 --- a/thirdparty/zstd/common/fse_decompress.c +++ b/thirdparty/zstd/common/fse_decompress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy decoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -310,6 +310,12 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); } +typedef struct { + short ncount[FSE_MAX_SYMBOL_VALUE + 1]; + FSE_DTable dtable[1]; /* Dynamically sized */ +} FSE_DecompressWksp; + + FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, @@ -318,33 +324,37 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( { const BYTE* const istart = (const BYTE*)cSrc; const BYTE* ip = istart; - short counting[FSE_MAX_SYMBOL_VALUE+1]; unsigned tableLog; unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - FSE_DTable* const dtable = (FSE_DTable*)workSpace; + FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; + + DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); + if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); /* normal FSE decoding mode */ - size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); - if (FSE_isError(NCountLength)) return NCountLength; - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - assert(NCountLength <= cSrcSize); - ip += NCountLength; - cSrcSize -= NCountLength; + { + size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + } if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); - workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); - wkspSize -= FSE_DTABLE_SIZE(tableLog); + workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); - CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); { - const void* ptr = dtable; + const void* ptr = wksp->dtable; const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; const U32 fastMode = DTableH->fastMode; /* select fast mode (static) */ - if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); - return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); } } diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h index 1afef90c7c..3d47ced030 100644 --- a/thirdparty/zstd/common/huf.h +++ b/thirdparty/zstd/common/huf.h @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman codec, * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -192,6 +192,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); @@ -278,7 +279,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); * a required workspace size greater than that specified in the following * macro. */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) #ifndef HUF_FORCE_DECOMPRESS_X2 diff --git a/thirdparty/zstd/common/mem.h b/thirdparty/zstd/common/mem.h index 4728ef767b..9f3b81ab9d 100644 --- a/thirdparty/zstd/common/mem.h +++ b/thirdparty/zstd/common/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -143,9 +143,7 @@ MEM_STATIC size_t MEM_swapST(size_t in); * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -308,7 +306,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) MEM_STATIC U32 MEM_readLE24(const void* memPtr) { - return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); + return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16); } MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c index 4c1b83376f..ea70b8b65a 100644 --- a/thirdparty/zstd/common/pool.c +++ b/thirdparty/zstd/common/pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h index 63954ca6ca..e18aa0708f 100644 --- a/thirdparty/zstd/common/pool.h +++ b/thirdparty/zstd/common/pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/xxhash.c b/thirdparty/zstd/common/xxhash.c index e708df3c33..926b33604e 100644 --- a/thirdparty/zstd/common/xxhash.c +++ b/thirdparty/zstd/common/xxhash.c @@ -1,6 +1,6 @@ /* * xxHash - Fast Hash algorithm - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - xxHash homepage: http://www.xxhash.com @@ -30,9 +30,7 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ +# if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ defined(__ICCARM__) # define XXH_FORCE_MEMORY_ACCESS 1 diff --git a/thirdparty/zstd/common/xxhash.h b/thirdparty/zstd/common/xxhash.h index eceb55d5e0..16c1f1617b 100644 --- a/thirdparty/zstd/common/xxhash.h +++ b/thirdparty/zstd/common/xxhash.h @@ -1,7 +1,7 @@ /* * xxHash - Extremely Fast Hash algorithm * Header File - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - xxHash source repository : https://github.com/Cyan4973/xxHash diff --git a/thirdparty/zstd/common/zstd_common.c b/thirdparty/zstd/common/zstd_common.c index 939e9f08fa..3d7e35b309 100644 --- a/thirdparty/zstd/common/zstd_common.c +++ b/thirdparty/zstd/common/zstd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/zstd_deps.h b/thirdparty/zstd/common/zstd_deps.h index 0fb8b7818b..14211344a0 100644 --- a/thirdparty/zstd/common/zstd_deps.h +++ b/thirdparty/zstd/common/zstd_deps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h index 0991f20a08..68252e987e 100644 --- a/thirdparty/zstd/common/zstd_internal.h +++ b/thirdparty/zstd/common/zstd_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -36,6 +36,11 @@ # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ #endif #include "xxhash.h" /* XXH_reset, update, digest */ +#ifndef ZSTD_NO_TRACE +# include "zstd_trace.h" +#else +# define ZSTD_TRACE 0 +#endif #if defined (__cplusplus) extern "C" { @@ -347,11 +352,18 @@ typedef enum { * Private declarations *********************************************/ typedef struct seqDef_s { - U32 offset; /* Offset code of the sequence */ + U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */ U16 litLength; U16 matchLength; } seqDef; +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + typedef struct { seqDef* sequencesStart; seqDef* sequences; /* ptr to end of sequences */ @@ -363,12 +375,12 @@ typedef struct { size_t maxNbSeq; size_t maxNbLit; - /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment - * the existing value of the litLength or matchLength by 0x10000. + * the existing value of the litLength or matchLength by 0x10000. */ - U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ - U32 longLengthPos; /* Index of the sequence to apply long length modification to */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ } seqStore_t; typedef struct { @@ -378,7 +390,7 @@ typedef struct { /** * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. */ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) { @@ -386,10 +398,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore seqLen.litLength = seq->litLength; seqLen.matchLength = seq->matchLength + MINMATCH; if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthID == 1) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { seqLen.litLength += 0xFFFF; } - if (seqStore->longLengthID == 2) { + if (seqStore->longLengthType == ZSTD_llt_matchLength) { seqLen.matchLength += 0xFFFF; } } diff --git a/thirdparty/zstd/common/zstd_trace.h b/thirdparty/zstd/common/zstd_trace.h new file mode 100644 index 0000000000..2da5640771 --- /dev/null +++ b/thirdparty/zstd/common/zstd_trace.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include <stddef.h> + +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin( + struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin( + struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c index 304a82b3cc..b4297ec88a 100644 --- a/thirdparty/zstd/compress/fse_compress.c +++ b/thirdparty/zstd/compress/fse_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy encoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/compress/hist.c b/thirdparty/zstd/compress/hist.c index a9659d11ad..073c57e752 100644 --- a/thirdparty/zstd/compress/hist.c +++ b/thirdparty/zstd/compress/hist.c @@ -1,7 +1,7 @@ /* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/compress/hist.h b/thirdparty/zstd/compress/hist.h index fb9ead6834..228ed48a71 100644 --- a/thirdparty/zstd/compress/hist.h +++ b/thirdparty/zstd/compress/hist.h @@ -1,7 +1,7 @@ /* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c index 302e08864d..485906e678 100644 --- a/thirdparty/zstd/compress/huf_compress.c +++ b/thirdparty/zstd/compress/huf_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Huffman encoder, part of New Generation Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -59,7 +59,15 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. */ #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) + +typedef struct { + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; +} HUF_CompressWeightsWksp; + +static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) { BYTE* const ostart = (BYTE*) dst; BYTE* op = ostart; @@ -67,33 +75,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight unsigned maxSymbolValue = HUF_TABLELOG_MAX; U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace; - FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; - U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; - - unsigned count[HUF_TABLELOG_MAX+1]; - S16 norm[HUF_TABLELOG_MAX+1]; + if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); /* init conditions */ if (wtSize <= 1) return 0; /* Not compressible */ /* Scan input and build symbol stats */ - { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ } tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); /* Write table description header */ - { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); op += hSize; } /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); + CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); if (cSize == 0) return 0; /* not enough space for compressed data */ op += cSize; } @@ -102,29 +107,33 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight } -/*! HUF_writeCTable() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, - const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) -{ +typedef struct { + HUF_CompressWeightsWksp wksp; BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; +} HUF_WriteCTableWksp; + +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, + void* workspace, size_t workspaceSize) +{ BYTE* op = (BYTE*)dst; U32 n; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace; - /* check conditions */ + /* check conditions */ + if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); /* convert to weight */ - bitsToWeight[0] = 0; + wksp->bitsToWeight[0] = 0; for (n=1; n<huffLog+1; n++) - bitsToWeight[n] = (BYTE)(huffLog + 1 - n); + wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n); for (n=0; n<maxSymbolValue; n++) - huffWeight[n] = bitsToWeight[CTable[n].nbBits]; + wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits]; /* attempt weights compression by FSE */ - { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); + { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ op[0] = (BYTE)hSize; return hSize+1; @@ -134,12 +143,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ for (n=0; n<maxSymbolValue; n+=2) - op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); + op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]); return ((maxSymbolValue+1)/2) + 1; } +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + HUF_WriteCTableWksp wksp; + return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); +} + size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) { @@ -732,7 +751,10 @@ static size_t HUF_compressCTable_internal( typedef struct { unsigned count[HUF_SYMBOLVALUE_MAX + 1]; HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; - HUF_buildCTable_wksp_tables buildCTable_wksp; + union { + HUF_buildCTable_wksp_tables buildCTable_wksp; + HUF_WriteCTableWksp writeCTable_wksp; + } wksps; } HUF_compress_tables_t; /* HUF_compress_internal() : @@ -795,7 +817,7 @@ HUF_compress_internal (void* dst, size_t dstSize, huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, maxSymbolValue, huffLog, - &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); + &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); CHECK_F(maxBits); huffLog = (U32)maxBits; /* Zero unused symbols in CTable, so we can check it for validity */ @@ -804,7 +826,8 @@ HUF_compress_internal (void* dst, size_t dstSize, } /* Write table description header */ - { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, + &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); /* Check if using previous huffman table is beneficial */ if (repeat && *repeat != HUF_repeat_none) { size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c index 386b051df6..b7ee2980a7 100644 --- a/thirdparty/zstd/compress/zstd_compress.c +++ b/thirdparty/zstd/compress/zstd_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -72,6 +72,10 @@ struct ZSTD_CDict_s { ZSTD_customMem customMem; U32 dictID; int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use + * row-based matchfinder. Unless the cdict is reloaded, we will use + * the same greedy/lazy matchfinder at compression time. + */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -202,6 +206,49 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) /* private API call, for dictBuilder only */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } +/* Returns true if the strategy supports using a row based matchfinder */ +static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { + return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); +} + +/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder + * for this compression. + */ +static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) { + assert(mode != ZSTD_urm_auto); + return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder); +} + +/* Returns row matchfinder usage enum given an initial mode and cParams */ +static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode, + const ZSTD_compressionParameters* const cParams) { +#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON)) + int const kHasSIMD128 = 1; +#else + int const kHasSIMD128 = 0; +#endif + if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ + mode = ZSTD_urm_disableRowMatchFinder; + if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; + if (kHasSIMD128) { + if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder; + } else { + if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder; + } + return mode; +} + +/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ +static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const U32 forDDSDict) { + assert(useRowMatchFinder != ZSTD_urm_auto); + /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. + * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. + */ + return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); +} + /* Returns 1 if compression parameters are such that we should * enable long distance matching (wlog >= 27, strategy >= btopt). * Returns 0 otherwise. @@ -210,6 +257,14 @@ static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; } +/* Returns 1 if compression parameters are such that we should + * enable blockSplitter (wlog >= 17, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17; +} + static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ZSTD_compressionParameters cParams) { @@ -218,6 +273,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); cctxParams.cParams = cParams; + /* Adjust advanced params according to cParams */ if (ZSTD_CParams_shouldEnableLdm(&cParams)) { DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); cctxParams.ldmParams.enableLdm = 1; @@ -227,6 +283,12 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( assert(cctxParams.ldmParams.hashRateLog < 32); } + if (ZSTD_CParams_useBlockSplitter(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params"); + cctxParams.splitBlocks = 1; + } + + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); assert(!ZSTD_checkCParams(cParams)); return cctxParams; } @@ -269,29 +331,48 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) return 0; } +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); + DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder); +} + size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); - assert(!ZSTD_checkCParams(params.cParams)); - cctxParams->cParams = params.cParams; - cctxParams->fParams = params.fParams; - cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); return 0; } -/* ZSTD_assignParamsToCCtxParams() : - * params is presumed valid at this stage */ -static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) { - ZSTD_CCtx_params ret = *cctxParams; assert(!ZSTD_checkCParams(params->cParams)); - ret.cParams = params->cParams; - ret.fParams = params->fParams; - ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return ret; + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; } ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) @@ -468,6 +549,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = 1; return bounds; + case ZSTD_c_splitBlocks: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_useRowMatchFinder: + bounds.lowerBound = (int)ZSTD_urm_auto; + bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder; + return bounds; + + case ZSTD_c_deterministicRefPrefix: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -529,6 +625,9 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_splitBlocks: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: default: return 0; } @@ -581,6 +680,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_splitBlocks: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -792,17 +894,32 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->validateSequences = value; return CCtxParams->validateSequences; + case ZSTD_c_splitBlocks: + BOUNDCHECK(ZSTD_c_splitBlocks, value); + CCtxParams->splitBlocks = value; + return CCtxParams->splitBlocks; + + case ZSTD_c_useRowMatchFinder: + BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); + CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value; + return CCtxParams->useRowMatchFinder; + + case ZSTD_c_deterministicRefPrefix: + BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); + CCtxParams->deterministicRefPrefix = !!value; + return CCtxParams->deterministicRefPrefix; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } -size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) { return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); } size_t ZSTD_CCtxParams_getParameter( - ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) { switch(param) { @@ -915,6 +1032,15 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_validateSequences : *value = (int)CCtxParams->validateSequences; break; + case ZSTD_c_splitBlocks : + *value = (int)CCtxParams->splitBlocks; + break; + case ZSTD_c_useRowMatchFinder : + *value = (int)CCtxParams->useRowMatchFinder; + break; + case ZSTD_c_deterministicRefPrefix: + *value = (int)CCtxParams->deterministicRefPrefix; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -1188,15 +1314,26 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); assert(ZSTD_checkCParams(cPar)==0); - if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) - srcSize = minSrcSize; - switch (mode) { - case ZSTD_cpm_noAttachDict: case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; break; case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ dictSize = 0; break; default: @@ -1213,7 +1350,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, ZSTD_highbit32(tSize-1) + 1; if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; } - { U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; if (cycleLog > dictAndWindowLog) @@ -1269,9 +1407,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( static size_t ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const U32 enableDedicatedDictSearch, const U32 forCCtx) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* chain table size should be 0 for fast or row-hash strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; @@ -1281,24 +1424,34 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + hSize * sizeof(U32) + h3Size * sizeof(U32); size_t const optPotentialSpace = - ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) - + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) + + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder) + ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) + : 0; size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) ? optPotentialSpace : 0; + size_t const slackSpace = ZSTD_cwksp_slack_space_required(); + + /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ + ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); + assert(useRowMatchFinder != ZSTD_urm_auto); + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace; + return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; } static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_compressionParameters* cParams, const ldmParams_t* ldmParams, const int isStatic, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize) @@ -1308,16 +1461,16 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( U32 const divider = (cParams->minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); size_t const ldmSeqSpace = ldmParams->enableLdm ? - ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) @@ -1343,25 +1496,45 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, + &cParams); RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); /* estimateCCtxSize is for one-shot compression. So no buffers should * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder; + noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder; + rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); - return ZSTD_estimateCCtxSize_usingCParams(cParams); + int tier = 0; + size_t largestSize = 0; + static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; + for (; tier < 4; ++tier) { + /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); + } + return largestSize; } size_t ZSTD_estimateCCtxSize(int compressionLevel) @@ -1369,6 +1542,7 @@ size_t ZSTD_estimateCCtxSize(int compressionLevel) int level; size_t memBudget = 0; for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + /* Ensure monotonically increasing memory usage as compression level increases */ size_t const newMB = ZSTD_estimateCCtxSize_internal(level); if (newMB > memBudget) memBudget = newMB; } @@ -1387,17 +1561,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, ZSTD_CONTENTSIZE_UNKNOWN); } } size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder; + noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder; + rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) @@ -1522,20 +1708,27 @@ typedef enum { ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; + static size_t ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, const ZSTD_compressionParameters* cParams, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, const ZSTD_compResetPolicy_e crp, const ZSTD_indexResetPolicy_e forceResetIndex, const ZSTD_resetTarget_e forWho) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* disable chain table allocation for fast or row-based strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, + ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + assert(useRowMatchFinder != ZSTD_urm_auto); if (forceResetIndex == ZSTDirp_reset) { ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); @@ -1574,11 +1767,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); } + if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { + { /* Row match finder needs an additional table of hashes ("tags") */ + size_t const tagTableSize = hSize*sizeof(U16); + ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); + if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); + } + { /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = cParams->searchLog < 5 ? 4 : 5; + assert(cParams->hashLog > rowLog); + ms->rowHashLog = cParams->hashLog - rowLog; + } + } + ms->cParams = *cParams; RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, "failed a workspace allocation in ZSTD_reset_matchState"); - return 0; } @@ -1595,62 +1800,85 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); } +/** ZSTD_dictTooBig(): + * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in + * one go generically. So we ensure that in that case we reset the tables to zero, + * so that we can load as much of the dictionary as possible. + */ +static int ZSTD_dictTooBig(size_t const loadedDictSize) +{ + return loadedDictSize > ZSTD_CHUNKSIZE_MAX; +} + /*! ZSTD_resetCCtx_internal() : - note : `params` are assumed fully validated at this stage */ + * @param loadedDictSize The size of the dictionary to be loaded + * into the context, if any. If no dictionary is used, or the + * dictionary is being attached / copied, then pass 0. + * note : `params` are assumed fully validated at this stage. + */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - ZSTD_CCtx_params params, + ZSTD_CCtx_params const* params, U64 const pledgedSrcSize, + size_t const loadedDictSize, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d", + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); zc->isFirstBlock = 1; - if (params.ldmParams.enableLdm) { + /* Set applied params early so we can modify them for LDM, + * and point params at the applied params. + */ + zc->appliedParams = *params; + params = &zc->appliedParams; + + assert(params->useRowMatchFinder != ZSTD_urm_auto); + if (params->ldmParams.enableLdm) { /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); - assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); - assert(params.ldmParams.hashRateLog < 32); - zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); + ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); + assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); + assert(params->ldmParams.hashRateLog < 32); } - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; - size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) ? windowSize + blockSize : 0; - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); ZSTD_indexResetPolicy_e needsIndexReset = - (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, buffInSize, buffOutSize, pledgedSrcSize); + int resizeWorkspace; + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); - /* Check if workspace is large enough, alloc a new one if needed */ - { + { /* Check if workspace is large enough, alloc a new one if needed */ int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - + resizeWorkspace = workspaceTooSmall || workspaceWasteful; DEBUGLOG(4, "Need %zu B workspace", neededSpace); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - if (workspaceTooSmall || workspaceWasteful) { + if (resizeWorkspace) { DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", ZSTD_cwksp_sizeof(ws) >> 10, neededSpace >> 10); @@ -1678,8 +1906,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_cwksp_clear(ws); /* init params */ - zc->appliedParams = params; - zc->blockState.matchState.cParams = params.cParams; + zc->blockState.matchState.cParams = params->cParams; zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; zc->consumedSrcSize = 0; zc->producedCSize = 0; @@ -1692,6 +1919,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, XXH64_reset(&zc->xxhState, 0); zc->stage = ZSTDcs_init; zc->dictID = 0; + zc->dictContentSize = 0; ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); @@ -1709,13 +1937,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm) { /* TODO: avoid memset? */ - size_t const ldmBucketSize = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); - zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); - ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); + size_t const numBuckets = + ((size_t)1) << (params->ldmParams.hashLog - + params->ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); } /* sequences storage */ @@ -1729,32 +1957,28 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, FORWARD_IF_ERROR(ZSTD_reset_matchState( &zc->blockState.matchState, ws, - ¶ms.cParams, + ¶ms->cParams, + params->useRowMatchFinder, crp, needsIndexReset, ZSTD_resetTarget_CCtx), ""); /* ldm hash table */ - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm) { /* TODO: avoid memset? */ - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; ZSTD_window_init(&zc->ldmState.window); - ZSTD_window_clear(&zc->ldmState.window); zc->ldmState.loadedDictEnd = 0; } - /* Due to alignment, when reusing a workspace, we can actually consume - * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h - */ - assert(ZSTD_cwksp_used(ws) >= neededSpace && - ZSTD_cwksp_used(ws) <= neededSpace + 3); - + assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; return 0; @@ -1810,6 +2034,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); { ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; @@ -1825,7 +2051,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, cdict->dictContentSize, ZSTD_cpm_attachDict); params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_makeClean, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); } @@ -1852,6 +2080,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, } } cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; /* copy block state */ ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); @@ -1868,15 +2097,17 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; assert(!cdict->matchState.dedicatedDictSearch); - - DEBUGLOG(4, "copying dictionary into context"); + DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + params.useRowMatchFinder = cdict->useRowMatchFinder; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); @@ -1884,17 +2115,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + assert(params.useRowMatchFinder != ZSTD_urm_auto); /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) + ? ((size_t)1 << cdict_cParams->chainLog) + : 0; size_t const hSize = (size_t)1 << cdict_cParams->hashLog; ZSTD_memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, hSize * sizeof(U32)); - ZSTD_memcpy(cctx->blockState.matchState.chainTable, + /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ + if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { + ZSTD_memcpy(cctx->blockState.matchState.chainTable, cdict->matchState.chainTable, chainSize * sizeof(U32)); + } + /* copy tag table */ + if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { + size_t const tagTableSize = hSize*sizeof(U16); + ZSTD_memcpy(cctx->blockState.matchState.tagTable, + cdict->matchState.tagTable, + tagTableSize); + } } /* Zero the hashTable3, since the cdict never fills it */ @@ -1915,6 +2159,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; /* copy block state */ ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); @@ -1957,16 +2202,18 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, "Can't copy a ctx that's not in init stage."); - + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; /* Copy only compression parameters related to tables. */ params.cParams = srcCCtx->appliedParams.cParams; + assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto); + params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; params.fParams = fParams; - ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff); assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); @@ -1978,7 +2225,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, + srcCCtx->appliedParams.useRowMatchFinder, + 0 /* forDDSDict */) + ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) + : 0; size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; int const h3log = srcCCtx->blockState.matchState.hashLog3; size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; @@ -2005,6 +2256,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; /* copy block state */ ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); @@ -2091,7 +2343,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); } - if (params->cParams.strategy != ZSTD_fast) { + if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { U32 const chainSize = (U32)1 << params->cParams.chainLog; if (params->cParams.strategy == ZSTD_btlazy2) ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); @@ -2128,9 +2380,9 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); } - if (seqStorePtr->longLengthID==1) + if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) + if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } @@ -2144,10 +2396,158 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } -/* ZSTD_entropyCompressSequences_internal(): - * actually compresses both literals and sequences */ +/* ZSTD_blockSplitterEnabled(): + * Returns if block splitting param is being used + * If used, compression will do best effort to split a block in order to improve compression ratio. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks); + return (cctxParams->splitBlocks != 0); +} + +/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types + * and size of the sequences statistics + */ +typedef struct { + U32 LLtype; + U32 Offtype; + U32 MLtype; + size_t size; + size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_symbolEncodingTypeStats_t; + +/* ZSTD_buildSequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. + * Modifies `nextEntropy` to have the appropriate values as a side effect. + * nbSeq must be greater than 0. + * + * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, unsigned* countWorkspace, + void* entropyWorkspace, size_t entropyWkspSize) { + BYTE* const ostart = dst; + const BYTE* const oend = dstEnd; + BYTE* op = ostart; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + ZSTD_symbolEncodingTypeStats_t stats; + + stats.lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + assert(op <= oend); + assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, + countWorkspace, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, + sizeof(prevEntropy->litlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); + stats.size = countSize; + return stats; + } + if (stats.LLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, + countWorkspace, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, + sizeof(prevEntropy->offcodeCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); + stats.size = countSize; + return stats; + } + if (stats.Offtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, + countWorkspace, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, + sizeof(prevEntropy->matchlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); + stats.size = countSize; + return stats; + } + if (stats.MLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + stats.size = (size_t)(op-ostart); + return stats; +} + +/* ZSTD_entropyCompressSeqStore_internal(): + * compresses both literals and sequences + * Returns compressed size of block, or a zstd error. + */ MEM_STATIC size_t -ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2161,22 +2561,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* seqHead; - BYTE* lastNCount = NULL; + size_t lastCountSize; entropyWorkspace = count + (MaxSeq + 1); entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); - DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); assert(entropyWkspSize >= HUF_WORKSPACE_SIZE); @@ -2216,95 +2614,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); return (size_t)(op - ostart); } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, - LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for Offsets */ - { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for MatchLengths */ - { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, - ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + { + ZSTD_symbolEncodingTypeStats_t stats; + BYTE* seqHead = op++; + /* build stats for sequences */ + stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &prevEntropy->fse, &nextEntropy->fse, + op, oend, + strategy, count, + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); + lastCountSize = stats.lastCountSize; + op += stats.size; + } { size_t const bitstreamSize = ZSTD_encodeSequences( op, (size_t)(oend - op), @@ -2324,9 +2647,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, * In this exceedingly rare case, we will simply emit an uncompressed * block, since it isn't worth optimizing. */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); + if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { + /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(lastCountSize + bitstreamSize == 3); DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return 0; @@ -2338,7 +2661,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t -ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2347,7 +2670,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, void* entropyWorkspace, size_t entropyWkspSize, int bmi2) { - size_t const cSize = ZSTD_entropyCompressSequences_internal( + size_t const cSize = ZSTD_entropyCompressSeqStore_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, entropyWorkspace, entropyWkspSize, bmi2); @@ -2357,20 +2680,20 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); /* Check compressibility */ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); if (cSize >= maxCSize) return 0; /* block not compressed */ } - DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); return cSize; } /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode) { static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, @@ -2418,7 +2741,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); + if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { + static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { + { ZSTD_compressBlock_greedy_row, + ZSTD_compressBlock_lazy_row, + ZSTD_compressBlock_lazy2_row }, + { ZSTD_compressBlock_greedy_extDict_row, + ZSTD_compressBlock_lazy_extDict_row, + ZSTD_compressBlock_lazy2_extDict_row }, + { ZSTD_compressBlock_greedy_dictMatchState_row, + ZSTD_compressBlock_lazy_dictMatchState_row, + ZSTD_compressBlock_lazy2_dictMatchState_row }, + { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } + }; + DEBUGLOG(4, "Selecting a row-based matchfinder"); + assert(useRowMatchFinder != ZSTD_urm_auto); + selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; + } else { + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + } assert(selectedCompressor != NULL); return selectedCompressor; } @@ -2434,7 +2778,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) { ssPtr->lit = ssPtr->litStart; ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; + ssPtr->longLengthType = ZSTD_llt_none; } typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; @@ -2487,6 +2831,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_ldm_blockCompress(&zc->externSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm) { @@ -2503,10 +2848,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_ldm_blockCompress(&ldmSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -2540,9 +2888,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) outSeqs[i].rep = 0; if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { + } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { outSeqs[i].matchLength += 0x10000; } } @@ -2653,11 +3001,713 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore) return nbSeqs < 4 && nbLits < 10; } -static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) { - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; + ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; + bs->prevCBlock = bs->nextCBlock; + bs->nextCBlock = tmp; +} + +/* Writes the block header */ +static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); +} + +/** ZSTD_buildBlockEntropyStats_literals() : + * Builds entropy for the literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * Requires ENTROPY_WORKSPACE_SIZE workspace + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +#ifndef COMPRESS_LITERALS_SIZE_MIN +#define COMPRESS_LITERALS_SIZE_MIN 63 +#endif + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + + +/* ZSTD_buildDummySequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, + * and updates nextEntropy to the appropriate repeatMode. + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; + nextEntropy->litlength_repeatMode = FSE_repeat_none; + nextEntropy->offcode_repeatMode = FSE_repeat_none; + nextEntropy->matchlength_repeatMode = FSE_repeat_none; + return stats; +} + +/** ZSTD_buildBlockEntropyStats_sequences() : + * Builds entropy for the sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * Requires ENTROPY_WORKSPACE_SIZE wksp. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + unsigned* countWorkspace = (unsigned*)workspace; + unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); + size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); + ZSTD_symbolEncodingTypeStats_t stats; + + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); + stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + prevEntropy, nextEntropy, op, oend, + strategy, countWorkspace, + entropyWorkspace, entropyWorkspaceSize) + : ZSTD_buildDummySequencesStatistics(nextEntropy); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; + fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; + fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; + fseMetadata->lastCountSize = stats.lastCountSize; + return stats.size; +} + + +/** ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * Requires workspace size ENTROPY_WORKSPACE_SIZE + * + * @return : 0 on success or error code + */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); + return 0; +} + +/* Returns the size estimate for the literals section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + U32 singleStream = litSize < 256; + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ +static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, + const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + (void)defaultMax; + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { + return nbSeq * 10; + } + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits >> 3; +} + +/* Returns the size estimate for the sequences section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, + fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, + fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, + fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +/* Returns the size estimate for a given stream of literals, of, ll, ml */ +static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return seqSize + literalsSize + ZSTD_blockHeaderSize; +} + +/* Builds entropy statistics and uses them for blocksize estimation. + * + * Returns the estimated compressed size of the seqStore, or a zstd error. + */ +static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + seqStore->ofCode, seqStore->llCode, seqStore->mlCode, + (size_t)(seqStore->sequences - seqStore->sequencesStart), + &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1); +} + +/* Returns literals bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { + size_t literalsBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { + literalsBytes += 0x10000; + } + } + return literalsBytes; +} + +/* Returns match bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { + size_t matchBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.matchLength + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { + matchBytes += 0x10000; + } + } + return matchBytes; +} + +/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + * Stores the result in resultSeqStore. + */ +static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, + const seqStore_t* originalSeqStore, + size_t startIdx, size_t endIdx) { + BYTE* const litEnd = originalSeqStore->lit; + size_t literalsBytes; + size_t literalsBytesPreceding = 0; + + *resultSeqStore = *originalSeqStore; + if (startIdx > 0) { + resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; + literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + } + + /* Move longLengthPos into the correct position if necessary */ + if (originalSeqStore->longLengthType != ZSTD_llt_none) { + if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { + resultSeqStore->longLengthType = ZSTD_llt_none; + } else { + resultSeqStore->longLengthPos -= (U32)startIdx; + } + } + resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; + literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + resultSeqStore->litStart += literalsBytesPreceding; + if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + /* This accounts for possible last literals if the derived chunk reaches the end of the block */ + resultSeqStore->lit = litEnd; + } else { + resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; + } + resultSeqStore->llCode += startIdx; + resultSeqStore->mlCode += startIdx; + resultSeqStore->ofCode += startIdx; +} + +/** + * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. + * offCode must be an offCode representing a repcode, therefore in the range of [0, 2]. + */ +static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) { + U32 const adjustedOffCode = offCode + ll0; + assert(offCode < ZSTD_REP_NUM); + if (adjustedOffCode == ZSTD_REP_NUM) { + /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ + assert(rep[0] > 0); + return rep[0] - 1; + } + return rep[adjustedOffCode]; +} + +/** + * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise + * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within + * the seqStore that may be invalid. + * + * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in + * accordance with the seqStore. + */ +static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, + seqStore_t* const seqStore, U32 const nbSeq) { + U32 idx = 0; + for (; idx < nbSeq; ++idx) { + seqDef* const seq = seqStore->sequencesStart + idx; + U32 const ll0 = (seq->litLength == 0); + U32 offCode = seq->offset - 1; + assert(seq->offset > 0); + if (offCode <= ZSTD_REP_MOVE) { + U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); + U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); + /* Adjust simulated decompression repcode history if we come across a mismatch. Replace + * the repcode with the offset it actually references, determined by the compression + * repcode history. + */ + if (dRawOffset != cRawOffset) { + seq->offset = cRawOffset + ZSTD_REP_NUM; + } + } + /* Compression repcode history is always updated with values directly from the unmodified seqStore. + * Decompression repcode history may use modified seq->offset value taken from compression repcode history. + */ + *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0); + *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0); + } +} + +/* ZSTD_compressSeqStore_singleBlock(): + * Compresses a seqStore into a block with a block header, into the buffer dst. + * + * Returns the total size of that block (including header) or a ZSTD error code. + */ +static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, + repcodes_t* const dRep, repcodes_t* const cRep, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock, U32 isPartition) { + const U32 rleMaxLength = 25; + BYTE* op = (BYTE*)dst; + const BYTE* ip = (const BYTE*)src; + size_t cSize; + size_t cSeqsSize; + + /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ + repcodes_t const dRepOriginal = *dRep; + if (isPartition) + ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); + + cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); + + if (!zc->isFirstBlock && + cSeqsSize < rleMaxLength && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSize = 1; + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + if (cSeqsSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else if (cSeqsSize == 1) { + cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); + cSize = ZSTD_blockHeaderSize + cSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); + } + return cSize; +} + +/* Struct to keep track of where we are in our recursive calls. */ +typedef struct { + U32* splitLocations; /* Array of split indices */ + size_t idx; /* The current index within splitLocations being worked on */ +} seqStoreSplits; + +#define MIN_SEQUENCES_BLOCK_SPLITTING 300 +#define MAX_NB_SPLITS 196 + +/* Helper function to perform the recursive search for block splits. + * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then + * we do not recurse. + * + * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * In practice, recursion depth usually doesn't go beyond 4. + * + * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize + * maximum of 128 KB, this value is actually impossible to reach. + */ +static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) { + seqStore_t fullSeqStoreChunk; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + size_t estimatedOriginalSize; + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + size_t midIdx = (startIdx + endIdx)/2; + + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) { + return; + } + ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx); + ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc); + DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", + estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { + return; + } + if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { + ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); + splits->splitLocations[splits->idx] = (U32)midIdx; + splits->idx++; + ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); + } +} + +/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. + * + * Returns the number of splits made (which equals the size of the partition table - 1). + */ +static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { + seqStoreSplits splits = {partitions, 0}; + if (nbSeq <= 4) { + DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); + /* Refuse to try and split anything with less than 4 sequences */ + return 0; + } + ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); + splits.splitLocations[splits.idx] = nbSeq; + DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); + return splits.idx; +} + +/* ZSTD_compressBlock_splitBlock(): + * Attempts to split a given block into multiple blocks to improve compression ratio. + * + * Returns combined size of all blocks (which includes headers), or a ZSTD error code. + */ +static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) { + size_t cSize = 0; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + U32 partitions[MAX_NB_SPLITS]; + size_t i = 0; + size_t srcBytesTotal = 0; + size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + seqStore_t nextSeqStore; + seqStore_t currSeqStore; + + /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history + * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two + * separate repcode histories that simulate repcode history on compression and decompression side, + * and use the histories to determine whether we must replace a particular repcode with its raw offset. + * + * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed + * or RLE. This allows us to retrieve the offset value that an invalid repcode references within + * a nocompress/RLE block. + * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use + * the replacement offset value rather than the original repcode to update the repcode history. + * dRep also will be the final repcode history sent to the next block. + * + * See ZSTD_seqStore_resolveOffCodes() for more details. + */ + repcodes_t dRep; + repcodes_t cRep; + ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + if (numSplits == 0) { + size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, + &dRep, &cRep, + op, dstCapacity, + ip, blockSize, + lastBlock, 0 /* isPartition */); + FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); + assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + return cSizeSingleBlock; + } + + ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]); + for (i = 0; i <= numSplits; ++i) { + size_t srcBytes; + size_t cSizeChunk; + U32 const lastPartition = (i == numSplits); + U32 lastBlockEntireSrc = 0; + + srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore); + srcBytesTotal += srcBytes; + if (lastPartition) { + /* This is the final partition, need to account for possible last literals */ + srcBytes += blockSize - srcBytesTotal; + lastBlockEntireSrc = lastBlock; + } else { + ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); + } + + cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore, + &dRep, &cRep, + op, dstCapacity, + ip, srcBytes, + lastBlockEntireSrc, 1 /* isPartition */); + DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk); + FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); + + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + cSize += cSizeChunk; + currSeqStore = nextSeqStore; + assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + } + /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes + * for the next block. + */ + ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); + return cSize; +} + +static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) { + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + U32 nbSeq; + size_t cSize; + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); + return cSize; + } + nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); + } + + assert(zc->appliedParams.splitBlocks == 1); + cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); + FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); + return cSize; } static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, @@ -2683,12 +3733,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (zc->seqCollector.collectSequences) { ZSTD_copyBlockSequences(zc); - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return 0; } /* encode sequences and literals */ - cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, dst, dstCapacity, @@ -2717,7 +3767,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, out: if (!ZSTD_isError(cSize) && cSize > 1) { - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -2770,7 +3820,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return cSize; } } @@ -2810,9 +3860,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, void const* ip, void const* iend) { - if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { - U32 const maxDist = (U32)1 << params->cParams.windowLog; - U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const maxDist = (U32)1 << params->cParams.windowLog; + if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); @@ -2835,7 +3885,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, * Frame is supposed already started (header already produced) * @return : compressed size, or an error code */ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, +static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastFrameChunk) @@ -2875,6 +3925,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); assert(cSize > 0); assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); + assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); } else { cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, @@ -2954,6 +4008,26 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, return pos; } +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + /* ZSTD_writeLastEmptyBlock() : * output an empty Block with end-of-frame mark to complete a frame * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) @@ -3010,11 +4084,12 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!srcSize) return fhSize; /* do not generate an empty block if no input */ - if (!ZSTD_window_update(&ms->window, src, srcSize)) { + if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { + ms->forceNonContiguous = 0; ms->nextToUpdate = ms->window.dictLimit; } if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); } if (!frame) { @@ -3082,63 +4157,86 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, { const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; + int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL; + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); - ZSTD_window_update(&ms->window, src, srcSize); + if (srcSize > ZSTD_CHUNKSIZE_MAX) { + /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. + * Dictionaries right at the edge will immediately trigger overflow + * correction, but I don't want to insert extra constraints here. + */ + U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; + /* We must have cleared our windows when our source is this large. */ + assert(ZSTD_window_isEmpty(ms->window)); + if (loadLdmDict) + assert(ZSTD_window_isEmpty(ls->window)); + /* If the dictionary is too large, only load the suffix of the dictionary. */ + if (srcSize > maxDictSize) { + ip = iend - maxDictSize; + src = ip; + srcSize = maxDictSize; + } + } + + DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); + ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + ms->forceNonContiguous = params->deterministicRefPrefix; - if (params->ldmParams.enableLdm && ls != NULL) { - ZSTD_window_update(&ls->window, src, srcSize); + if (loadLdmDict) { + ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); } - /* Assert that we the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); - if (srcSize <= HASH_READ_SIZE) return 0; - while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = (size_t)(iend - ip); - size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); - const BYTE* const ichunk = ip + chunk; + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + if (loadLdmDict) + ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); - if (params->ldmParams.enableLdm && ls != NULL) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); - - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, ichunk, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); - break; + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, dtlm); + break; - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { - assert(chunk == remaining); /* must load everything in one go */ - ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); - } else if (chunk >= HASH_READ_SIZE) { - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + assert(srcSize >= HASH_READ_SIZE); + if (ms->dedicatedDictSearch) { + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_urm_auto); + if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); + } else { + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); } - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (chunk >= HASH_READ_SIZE) - ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); - break; - - default: - assert(0); /* not possible : not a valid strategy id */ } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + assert(srcSize >= HASH_READ_SIZE); + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); + break; - ip = ichunk; + default: + assert(0); /* not possible : not a valid strategy id */ } ms->nextToUpdate = (U32)(iend - ms->window.base); @@ -3277,7 +4375,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, const BYTE* const dictEnd = dictPtr + dictSize; size_t dictID; size_t eSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); assert(dictSize >= 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); @@ -3348,6 +4445,10 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; +#if ZSTD_TRACE + cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; +#endif DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); @@ -3362,7 +4463,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + dictContentSize, ZSTDcrp_makeClean, zbuff) , ""); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( @@ -3377,6 +4479,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; + cctx->dictContentSize = dictContentSize; } return 0; } @@ -3405,8 +4508,8 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) { - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, @@ -3415,9 +4518,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); @@ -3471,6 +4576,30 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) return op-ostart; } +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + trace.cctx = cctx; + ZSTD_trace_compress_end(cctx->traceCtx, &trace); + } + cctx->traceCtx = 0; +#else + (void)cctx; + (void)extraCSize; +#endif +} + size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -3493,25 +4622,10 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); } + ZSTD_CCtx_trace(cctx, endResult); return cSize + endResult; } -static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_parameters* params) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); - DEBUGLOG(4, "ZSTD_compress_internal"); - return ZSTD_compress_advanced_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - &cctxParams); -} - size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -3520,11 +4634,12 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, { DEBUGLOG(4, "ZSTD_compress_advanced"); FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - return ZSTD_compress_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - ¶ms); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctx->simpleApiParams); } /* Internal */ @@ -3548,11 +4663,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - assert(params.fParams.contentSizeFlag == 1); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); } size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, @@ -3596,7 +4713,10 @@ size_t ZSTD_estimateCDictSize_advanced( DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small + * in case we are using DDS with row-hash. */ + + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams), + /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } @@ -3627,9 +4747,6 @@ static size_t ZSTD_initCDict_internal( assert(!ZSTD_checkCParams(params.cParams)); cdict->matchState.cParams = params.cParams; cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; - if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { - cdict->matchState.dedicatedDictSearch = 0; - } if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictContent = dictBuffer; } else { @@ -3650,6 +4767,7 @@ static size_t ZSTD_initCDict_internal( &cdict->matchState, &cdict->workspace, ¶ms.cParams, + params.useRowMatchFinder, ZSTDcrp_makeClean, ZSTDirp_reset, ZSTD_resetTarget_CDict), ""); @@ -3673,14 +4791,17 @@ static size_t ZSTD_initCDict_internal( static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) + ZSTD_compressionParameters cParams, + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + U32 enableDedicatedDictSearch, + ZSTD_customMem customMem) { if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { size_t const workspaceSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); @@ -3698,8 +4819,8 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, assert(cdict != NULL); ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; - cdict->compressionLevel = 0; /* signals advanced API usage */ - + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + cdict->useRowMatchFinder = useRowMatchFinder; return cdict; } } @@ -3751,10 +4872,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); } + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); cctxParams.cParams = cParams; + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); cdict = ZSTD_createCDict_advanced_internal(dictSize, dictLoadMethod, cctxParams.cParams, + cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, customMem); if (ZSTD_isError( ZSTD_initCDict_internal(cdict, @@ -3823,7 +4947,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams) { - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams); + /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) @@ -3848,6 +4974,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_CCtxParams_init(¶ms, 0); params.cParams = cParams; + params.useRowMatchFinder = useRowMatchFinder; + cdict->useRowMatchFinder = useRowMatchFinder; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, @@ -3874,62 +5002,88 @@ unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) return cdict->dictID; } - -/* ZSTD_compressBegin_usingCDict_advanced() : - * cdict must be != NULL */ -size_t ZSTD_compressBegin_usingCDict_advanced( +/* ZSTD_compressBegin_usingCDict_internal() : + * Implementation of various ZSTD_compressBegin_usingCDict* functions. + */ +static size_t ZSTD_compressBegin_usingCDict_internal( ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); - { ZSTD_CCtx_params params = cctx->requestedParams; + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0 ) - && (params.attachDictPref != ZSTD_dictForceLoad) ? + || cdict->compressionLevel == 0 ) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, cdict->dictContentSize); - /* Increase window log to fit the entire dictionary and source if the - * source size is known. Limit the increase to 19, which is the - * window log for compression level 1 with the largest source size. - */ - if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { - U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); - U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; - params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); - } - params.fParams = fParams; - return ZSTD_compressBegin_internal(cctx, - NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_not_buffered); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * This function is DEPRECATED. + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); } /* ZSTD_compressBegin_usingCDict() : - * pledgedSrcSize=0 means "unknown" - * if pledgedSrcSize>0, it will enable contentSizeFlag */ + * cdict must be != NULL */ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); - return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); } -size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, +/*! ZSTD_compress_usingCDict_internal(): + * Implementation of various ZSTD_compress_usingCDict* functions. + */ +static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } +/*! ZSTD_compress_usingCDict_advanced(): + * This function is DEPRECATED. + */ +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + /*! ZSTD_compress_usingCDict() : * Compression using a digested Dictionary. * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. @@ -3941,7 +5095,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); } @@ -4071,7 +5225,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; } @@ -4351,8 +5505,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - if (cctx->cdict) - params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + if (cctx->cdict && !cctx->localDict.cdict) { + /* Let the cdict's compression level take priority over the requested params. + * But do not take the cdict's compression level if the "cdict" is actually a localDict + * generated from ZSTD_initLocalDict(). + */ + params.compressionLevel = cctx->cdict->compressionLevel; + } DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ { @@ -4371,11 +5530,21 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.ldmParams.enableLdm = 1; } + if (ZSTD_CParams_useBlockSplitter(¶ms.cParams)) { + DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)"); + params.splitBlocks = 1; + } + + params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); + #ifdef ZSTD_MULTITHREAD if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ } if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; +#endif /* mt context creation */ if (cctx->mtctx == NULL) { DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", @@ -4389,6 +5558,10 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, cctx->mtctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; cctx->streamStage = zcss_load; cctx->appliedParams = params; } else @@ -4450,8 +5623,12 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, size_t const ipos = input->pos; size_t const opos = output->pos; flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); @@ -4834,7 +6011,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, continue; } - compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, &cctx->appliedParams, op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, @@ -4866,7 +6043,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, } else { U32 cBlockHeader; /* Error checking and repcodes update */ - ZSTD_confirmRepcodesAndEntropyTables(cctx); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; @@ -4967,6 +6144,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) #define ZSTD_MAX_CLEVEL 22 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } +int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { { /* "default" - for any srcSize > 256 KB */ @@ -5099,7 +6277,10 @@ static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const static int ZSTD_dedicatedDictSearch_isSupported( ZSTD_compressionParameters const* cParams) { - return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2); + return (cParams->strategy >= ZSTD_greedy) + && (cParams->strategy <= ZSTD_lazy2) + && (cParams->hashLog > cParams->chainLog) + && (cParams->chainLog <= 24); } /** @@ -5117,6 +6298,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams( case ZSTD_lazy: case ZSTD_lazy2: cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + if (cParams->hashLog < ZSTD_HASHLOG_MIN) { + cParams->hashLog = ZSTD_HASHLOG_MIN; + } break; case ZSTD_btlazy2: case ZSTD_btopt: @@ -5165,6 +6349,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, else row = compressionLevel; { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); /* acceleration factor */ if (compressionLevel < 0) { int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h index c04998b8b1..3b04fd09f6 100644 --- a/thirdparty/zstd/compress/zstd_compress_internal.h +++ b/thirdparty/zstd/compress/zstd_compress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -81,6 +81,53 @@ typedef struct { ZSTD_fseCTables_t fse; } ZSTD_entropyCTables_t; +/*********************************************** +* Entropy buffer statistics structs and funcs * +***********************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/** ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * @return : 0 on success or error code */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); + +/********************************* +* Compression internals structs * +*********************************/ + typedef struct { U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ U32 len; /* Raw length of match */ @@ -141,14 +188,21 @@ typedef struct { } ZSTD_compressedBlockState_t; typedef struct { - BYTE const* nextSrc; /* next block here to continue on current prefix */ - BYTE const* base; /* All regular indexes relative to this position */ - BYTE const* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more valid data */ + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ + U32 nbOverflowCorrections; /* Number of times overflow correction has run since + * ZSTD_window_init(). Useful for debugging coredumps + * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. + */ } ZSTD_window_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t; + +#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ + struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary, within context's referential. @@ -160,9 +214,17 @@ struct ZSTD_matchState_t { */ U32 nextToUpdate; /* index from which to continue table update */ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + + U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ + U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + U32* hashTable; U32* hashTable3; U32* chainTable; + + U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ + int dedicatedDictSearch; /* Indicates whether this matchState is using the * dedicated dictionary search structure. */ @@ -184,12 +246,21 @@ typedef struct { } ldmEntry_t; typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { ZSTD_window_t window; /* State for the window round buffer management */ ldmEntry_t* hashTable; U32 loadedDictEnd; BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U64 hashPower; /* Used to compute the rolling hash. - * Depends on ldmParams.minMatchLength */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; } ldmState_t; typedef struct { @@ -246,6 +317,15 @@ struct ZSTD_CCtx_params_s { ZSTD_sequenceFormat_e blockDelimiters; int validateSequences; + /* Block splitting */ + int splitBlocks; + + /* Param for deciding whether to use row-based matchfinder */ + ZSTD_useRowMatchFinderMode_e useRowMatchFinder; + + /* Always load a dictionary in ext-dict mode (not prefix mode)? */ + int deterministicRefPrefix; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -269,7 +349,9 @@ struct ZSTD_CCtx_s { int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ ZSTD_CCtx_params requestedParams; ZSTD_CCtx_params appliedParams; + ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ U32 dictID; + size_t dictContentSize; ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ size_t blockSize; @@ -321,6 +403,11 @@ struct ZSTD_CCtx_s { #ifdef ZSTD_MULTITHREAD ZSTDMT_CCtx* mtctx; #endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; @@ -355,7 +442,7 @@ typedef enum { typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -532,8 +619,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera /* literal Length */ if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 1; + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_literalLength; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } seqStorePtr->sequences[0].litLength = (U16)litLength; @@ -543,8 +630,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera /* match Length */ if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 2; + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } seqStorePtr->sequences[0].matchLength = (U16)mlBase; @@ -795,6 +882,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) window->dictLimit = end; } +MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) +{ + return window.dictLimit == 1 && + window.lowLimit == 1 && + (window.nextSrc - window.base) == 1; +} + /** * ZSTD_window_hasExtDict(): * Returns non-zero if the window has a non-empty extDict. @@ -818,15 +912,69 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) ZSTD_noDict; } +/* Defining this macro to non-zero tells zstd to run the overflow correction + * code much more frequently. This is very inefficient, and should only be + * used for tests and fuzzers. + */ +#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY +# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 +# else +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 +# endif +#endif + +/** + * ZSTD_window_canOverflowCorrect(): + * Returns non-zero if the indices are large enough for overflow correction + * to work correctly without impacting compression ratio. + */ +MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src) +{ + U32 const cycleSize = 1u << cycleLog; + U32 const curr = (U32)((BYTE const*)src - window.base); + U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize); + + /* Adjust the min index to backoff the overflow correction frequency, + * so we don't waste too much CPU in overflow correction. If this + * computation overflows we don't really care, we just need to make + * sure it is at least minIndexToOverflowCorrect. + */ + U32 const adjustment = window.nbOverflowCorrections + 1; + U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, + minIndexToOverflowCorrect); + U32 const indexLargeEnough = curr > adjustedIndex; + + /* Only overflow correct early if the dictionary is invalidated already, + * so we don't hurt compression ratio. + */ + U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; + + return indexLargeEnough && dictionaryInvalidated; +} + /** * ZSTD_window_needOverflowCorrection(): * Returns non-zero if the indices are getting too large and need overflow * protection. */ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src, void const* srcEnd) { U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { + return 1; + } + } return curr > ZSTD_CURRENT_MAX; } @@ -838,7 +986,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, * * The least significant cycleLog bits of the indices must remain the same, * which may be 0. Every index up to maxDist in the past must be valid. - * NOTE: (maxDist & cycleMask) must be zero. */ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, U32 maxDist, void const* src) @@ -862,17 +1009,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. */ - U32 const cycleMask = (1U << cycleLog) - 1; + U32 const cycleSize = 1u << cycleLog; + U32 const cycleMask = cycleSize - 1; U32 const curr = (U32)((BYTE const*)src - window->base); U32 const currentCycle0 = curr & cycleMask; /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; - U32 const newCurrent = currentCycle1 + maxDist; + U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0; + U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize); U32 const correction = curr - newCurrent; - assert((maxDist & cycleMask) == 0); + /* maxDist must be a power of two so that: + * (newCurrent & cycleMask) == (curr & cycleMask) + * This is required to not corrupt the chains / binary tree. + */ + assert((maxDist & (maxDist - 1)) == 0); + assert((curr & cycleMask) == (newCurrent & cycleMask)); assert(curr > newCurrent); - /* Loose bound, should be around 1<<29 (see above) */ - assert(correction > 1<<28); + if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + } window->base += correction; window->dictBase += correction; @@ -888,6 +1043,8 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, assert(window->lowLimit <= newCurrent); assert(window->dictLimit <= newCurrent); + ++window->nbOverflowCorrections; + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, window->lowLimit); return correction; @@ -997,6 +1154,7 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { window->dictLimit = 1; /* start from 1, so that 1st position is valid */ window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ window->nextSrc = window->base + 1; /* see issue #1241 */ + window->nbOverflowCorrections = 0; } /** @@ -1007,7 +1165,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { * Returns non-zero if the segment is contiguous. */ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize) + void const* src, size_t srcSize, + int forceNonContiguous) { BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; @@ -1017,7 +1176,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, assert(window->base != NULL); assert(window->dictBase != NULL); /* Check if blocks follow each other */ - if (src != window->nextSrc) { + if (src != window->nextSrc || forceNonContiguous) { /* not contiguous */ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); @@ -1200,4 +1359,9 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe * condition for correct operation : hashLog > 1 */ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + #endif /* ZSTD_COMPRESS_H */ diff --git a/thirdparty/zstd/compress/zstd_compress_literals.c b/thirdparty/zstd/compress/zstd_compress_literals.c index 6dd1c1447a..008337bb1b 100644 --- a/thirdparty/zstd/compress/zstd_compress_literals.c +++ b/thirdparty/zstd/compress/zstd_compress_literals.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,7 +15,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); @@ -42,7 +42,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ @@ -117,7 +117,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, } } - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } diff --git a/thirdparty/zstd/compress/zstd_compress_literals.h b/thirdparty/zstd/compress/zstd_compress_literals.h index 8b08705743..9904c0cd30 100644 --- a/thirdparty/zstd/compress/zstd_compress_literals.h +++ b/thirdparty/zstd/compress/zstd_compress_literals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_compress_sequences.c b/thirdparty/zstd/compress/zstd_compress_sequences.c index be30c08c6b..611eabdcbb 100644 --- a/thirdparty/zstd/compress/zstd_compress_sequences.c +++ b/thirdparty/zstd/compress/zstd_compress_sequences.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t { unsigned cost = 0; unsigned s; + + assert(total > 0); for (s = 0; s <= max; ++s) { unsigned norm = (unsigned)((256 * count[s]) / total); if (count[s] != 0 && norm == 0) @@ -232,6 +234,11 @@ ZSTD_selectEncodingType( return set_compressed; } +typedef struct { + S16 norm[MaxSeq + 1]; + U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)]; +} ZSTD_BuildCTableWksp; + size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, @@ -258,7 +265,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ return 0; case set_compressed: { - S16 norm[MaxSeq + 1]; + ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace; size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); if (count[codeTable[nbSeq-1]] > 1) { @@ -266,11 +273,12 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, nbSeq_1--; } assert(nbSeq_1 > 1); - assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); + (void)entropyWorkspaceSize; + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); return NCountSize; } } diff --git a/thirdparty/zstd/compress/zstd_compress_sequences.h b/thirdparty/zstd/compress/zstd_compress_sequences.h index 68c6f9a5ac..7991364c2f 100644 --- a/thirdparty/zstd/compress/zstd_compress_sequences.h +++ b/thirdparty/zstd/compress/zstd_compress_sequences.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_compress_superblock.c b/thirdparty/zstd/compress/zstd_compress_superblock.c index e23e619eef..e4e45069bc 100644 --- a/thirdparty/zstd/compress/zstd_compress_superblock.c +++ b/thirdparty/zstd/compress/zstd_compress_superblock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,288 +15,10 @@ #include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ #include "hist.h" /* HIST_countFast_wksp */ -#include "zstd_compress_internal.h" +#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */ #include "zstd_compress_sequences.h" #include "zstd_compress_literals.h" -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/** ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/** ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - - -/** ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; - - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } -} - -/** ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - ZSTD_memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; -} - - -/** ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; -} - /** ZSTD_compressSubBlock_literal() : * Compresses literals section for a sub-block. * When we have to write the Huffman table we will sometimes choose a header @@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, * before we know the table size + compressed size, so we have a bound on the * table size. If we guessed incorrectly, we fall back to uncompressed literals. * - * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded * in writing the header, otherwise it is set to 0. * * hufMetadata->hType has literals block type info. @@ -643,8 +365,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, void* workspace, size_t wkspSize, int writeEntropy) { - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ size_t cSeqSizeEstimate = 0; + if (nbSeq == 0) return sequencesSectionHeaderSize; cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, nbSeq, fseTables->offcodeCTable, NULL, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, @@ -830,7 +553,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, unsigned lastBlock) { ZSTD_entropyCTablesMetadata_t entropyMetadata; - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, diff --git a/thirdparty/zstd/compress/zstd_compress_superblock.h b/thirdparty/zstd/compress/zstd_compress_superblock.h index 07f4cb1dc6..176f9b106f 100644 --- a/thirdparty/zstd/compress/zstd_compress_superblock.h +++ b/thirdparty/zstd/compress/zstd_compress_superblock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_cwksp.h b/thirdparty/zstd/compress/zstd_cwksp.h index d65170b39c..2656d26ca2 100644 --- a/thirdparty/zstd/compress/zstd_cwksp.h +++ b/thirdparty/zstd/compress/zstd_cwksp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -35,6 +35,10 @@ extern "C" { #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 #endif + +/* Set our tables and aligneds to align by 64 bytes */ +#define ZSTD_CWKSP_ALIGNMENT_BYTES 64 + /*-************************************* * Structures ***************************************/ @@ -117,10 +121,11 @@ typedef enum { * - Tables: these are any of several different datastructures (hash tables, * chain tables, binary trees) that all respect a common format: they are * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. + * Their sizes depend on the cparams. These tables are 64-byte aligned. * * - Aligned: these buffers are used for various purposes that require 4 byte - * alignment, but don't require any initialization before they're used. + * alignment, but don't require any initialization before they're used. These + * buffers are each aligned to 64 bytes. * * - Buffers: these buffers are used for various purposes that don't require * any alignment or initialization before they're used. This means they can @@ -133,8 +138,7 @@ typedef enum { * * 1. Objects * 2. Buffers - * 3. Aligned - * 4. Tables + * 3. Aligned/Tables * * Attempts to reserve objects of different types out of order will fail. */ @@ -187,6 +191,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { * Since tables aren't currently redzoned, you don't need to call through this * to figure out how much space you need for the matchState tables. Everything * else is though. + * + * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). */ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { if (size == 0) @@ -198,30 +204,110 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { #endif } -MEM_STATIC void ZSTD_cwksp_internal_advance_phase( +/** + * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. + * Used to determine the number of bytes required for a given "aligned". + */ +MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { + return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); +} + +/** + * Returns the amount of additional space the cwksp must allocate + * for internal purposes (currently only alignment). + */ +MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { + /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes + * to align the beginning of tables section, as well as another n_2=[0, 63] bytes + * to align the beginning of the aligned secion. + * + * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and + * aligneds being sized in multiples of 64 bytes. + */ + size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; + return slackSpace; +} + + +/** + * Return the number of additional bytes required to align a pointer to the given number of bytes. + * alignBytes must be a power of two. + */ +MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { + size_t const alignBytesMask = alignBytes - 1; + size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; + assert((alignBytes & alignBytesMask) == 0); + assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); + return bytes; +} + +/** + * Internal function. Do not use directly. + * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which + * counts from the end of the wksp. (as opposed to the object/table segment) + * + * Returns a pointer to the beginning of that space. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) { + void* const alloc = (BYTE*)ws->allocStart - bytes; + void* const bottom = ws->tableEnd; + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + return alloc; +} + +/** + * Moves the cwksp to the next phase, and does any necessary allocations. + * Returns a 0 on success, or zstd error + */ +MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase( ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { assert(phase >= ws->phase); if (phase > ws->phase) { + /* Going from allocating objects to allocating buffers */ if (ws->phase < ZSTD_cwksp_alloc_buffers && phase >= ZSTD_cwksp_alloc_buffers) { ws->tableValidEnd = ws->objectEnd; } + + /* Going from allocating buffers to allocating aligneds/tables */ if (ws->phase < ZSTD_cwksp_alloc_aligned && phase >= ZSTD_cwksp_alloc_aligned) { - /* If unaligned allocations down from a too-large top have left us - * unaligned, we need to realign our alloc ptr. Technically, this - * can consume space that is unaccounted for in the neededSpace - * calculation. However, I believe this can only happen when the - * workspace is too large, and specifically when it is too large - * by a larger margin than the space that will be consumed. */ - /* TODO: cleaner, compiler warning friendly way to do this??? */ - ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); - if (ws->allocStart < ws->tableValidEnd) { - ws->tableValidEnd = ws->allocStart; + { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ + size_t const bytesToAlign = + ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); + DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); + ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ + RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), + memory_allocation, "aligned phase - alignment initial allocation failed!"); + } + { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ + void* const alloc = ws->objectEnd; + size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); + void* const end = (BYTE*)alloc + bytesToAlign; + DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); + RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation, + "table phase - alignment initial allocation failed!"); + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; } } ws->phase = phase; + ZSTD_cwksp_assert_internal_consistency(ws); } + return 0; } /** @@ -237,38 +323,25 @@ MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { MEM_STATIC void* ZSTD_cwksp_reserve_internal( ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { void* alloc; - void* bottom = ws->tableEnd; - ZSTD_cwksp_internal_advance_phase(ws, phase); - alloc = (BYTE *)ws->allocStart - bytes; - - if (bytes == 0) + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) { return NULL; + } #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) /* over-reserve space */ - alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; + bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; #endif - DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(alloc >= bottom); - if (alloc < bottom) { - DEBUGLOG(4, "cwksp: alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - if (alloc < ws->tableValidEnd) { - ws->tableValidEnd = alloc; - } - ws->allocStart = alloc; + alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes); #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { - __asan_unpoison_memory_region(alloc, bytes); + if (alloc) { + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } } #endif @@ -283,28 +356,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { } /** - * Reserves and returns memory sized on and aligned on sizeof(unsigned). + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). */ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { - assert((bytes & (sizeof(U32)-1)) == 0); - return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); + void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), + ZSTD_cwksp_alloc_aligned); + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return ptr; } /** - * Aligned on sizeof(unsigned). These buffers have the special property that + * Aligned on 64 bytes. These buffers have the special property that * their values remain constrained, allowing us to re-use them without * memset()-ing them. */ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; - void* alloc = ws->tableEnd; - void* end = (BYTE *)alloc + bytes; - void* top = ws->allocStart; + void* alloc; + void* end; + void* top; + + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { + return NULL; + } + alloc = ws->tableEnd; + end = (BYTE *)alloc + bytes; + top = ws->allocStart; DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); assert((bytes & (sizeof(U32)-1)) == 0); - ZSTD_cwksp_internal_advance_phase(ws, phase); ZSTD_cwksp_assert_internal_consistency(ws); assert(end <= top); if (end > top) { @@ -320,6 +401,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { } #endif + assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); + assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); return alloc; } @@ -503,7 +586,7 @@ MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { /** * Moves the management of a workspace from one cwksp to another. The src cwksp - * is left in an invalid state (src must be re-init()'ed before its used again). + * is left in an invalid state (src must be re-init()'ed before it's used again). */ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { *dst = *src; @@ -527,6 +610,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { * Functions Checking Free Space ***************************************/ +/* ZSTD_alignmentSpaceWithinBounds() : + * Returns if the estimated space needed for a wksp is within an acceptable limit of the + * actual amount of space used. + */ +MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, + size_t const estimatedSpace, int resizedWorkspace) { + if (resizedWorkspace) { + /* Resized/newly allocated wksp should have exact bounds */ + return ZSTD_cwksp_used(ws) == estimatedSpace; + } else { + /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes + * than estimatedSpace. See the comments in zstd_cwksp.h for details. + */ + return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); + } +} + + MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); } diff --git a/thirdparty/zstd/compress/zstd_double_fast.c b/thirdparty/zstd/compress/zstd_double_fast.c index ef12a524f7..d0d3a784dd 100644 --- a/thirdparty/zstd/compress/zstd_double_fast.c +++ b/thirdparty/zstd/compress/zstd_double_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) + & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; @@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) + & (offset_2 < current2 - dictStartIndex)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; diff --git a/thirdparty/zstd/compress/zstd_double_fast.h b/thirdparty/zstd/compress/zstd_double_fast.h index 14d944d69b..e16b7b03a3 100644 --- a/thirdparty/zstd/compress/zstd_double_fast.h +++ b/thirdparty/zstd/compress/zstd_double_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_fast.c b/thirdparty/zstd/compress/zstd_fast.c index db7ce83d0a..4edc04dccd 100644 --- a/thirdparty/zstd/compress/zstd_fast.c +++ b/thirdparty/zstd/compress/zstd_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( assert(endIndex - prefixStartIndex <= maxDistance); (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - /* ensure there will be no no underflow + /* ensure there will be no underflow * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); @@ -416,9 +416,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repMatch = repBase + repIndex; hashTable[h] = curr; /* update hash table */ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); - assert(offset_1 <= curr +1); /* check repIndex */ - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ + & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; @@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; diff --git a/thirdparty/zstd/compress/zstd_fast.h b/thirdparty/zstd/compress/zstd_fast.h index cf6aaa8e67..0d4a0c1090 100644 --- a/thirdparty/zstd/compress/zstd_fast.h +++ b/thirdparty/zstd/compress/zstd_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c index 49ec1b09ef..3d523e8472 100644 --- a/thirdparty/zstd/compress/zstd_lazy.c +++ b/thirdparty/zstd/compress/zstd_lazy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -438,43 +438,9 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( } } - - -/* ********************************* -* Hash Chain +/*********************************** +* Dedicated dict search ***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) { @@ -500,11 +466,10 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; - U32 hashIdx; assert(ms->cParams.chainLog <= 24); - assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(ms->cParams.hashLog > ms->cParams.chainLog); assert(idx != 0); assert(tmpMinChain <= minChain); @@ -535,7 +500,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B if (count == cacheSize) { for (count = 0; count < chainLimit;) { if (i < minChain) { - if (!i || countBeyondMinChain++ > cacheSize) { + if (!i || ++countBeyondMinChain > cacheSize) { /* only allow pulling `cacheSize` number of entries * into the cache or chainTable beyond `minChain`, * to replace the entries pulled out of the @@ -591,6 +556,139 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B ms->nextToUpdate = target; } +/* Returns the longest match length found in the dedicated dict search structure. + * If none are longer than the argument ml, then ml will be returned. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, + const ZSTD_matchState_t* const dms, + const BYTE* const ip, const BYTE* const iLimit, + const BYTE* const prefixStart, const U32 curr, + const U32 dictLimit, const size_t ddsIdx) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + U32 matchIndex; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + return ml; +} + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} /* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE @@ -661,90 +759,8 @@ size_t ZSTD_HcFindBestMatch_generic ( } if (dictMode == ZSTD_dedicatedDictSearch) { - const U32 ddsLowestIndex = dms->window.dictLimit; - const BYTE* const ddsBase = dms->window.base; - const BYTE* const ddsEnd = dms->window.nextSrc; - const U32 ddsSize = (U32)(ddsEnd - ddsBase); - const U32 ddsIndexDelta = dictLimit - ddsSize; - const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); - const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; - U32 ddsAttempt; - - for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { - PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 const chainIndex = chainPackedPointer >> 8; - - PREFETCH_L1(&dms->chainTable[chainIndex]); - } - - for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; - match = ddsBase + matchIndex; - - if (!matchIndex) { - return ml; - } - - /* guaranteed by table construction */ - (void)ddsLowestIndex; - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) { - /* best possible, avoids read overflow on next attempt */ - return ml; - } - } - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 chainIndex = chainPackedPointer >> 8; - U32 const chainLength = chainPackedPointer & 0xFF; - U32 const chainAttempts = nbAttempts - ddsAttempt; - U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; - U32 chainAttempt; - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { - PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); - } - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->chainTable[chainIndex]; - match = ddsBase + matchIndex; - - /* guaranteed by table construction */ - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } - } + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); } else if (dictMode == ZSTD_dictMatchState) { const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); @@ -845,11 +861,657 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( } } +/* ********************************* +* (SIMD) Row-based matchfinder +***********************************/ +/* Constants for row-based hash */ +#define ZSTD_ROW_HASH_TAG_OFFSET 1 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ +#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) + +#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1) + +typedef U32 ZSTD_VecMask; /* Clarifies when we are interacting with a U32 representing a mask of matches */ + +#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */ + +#include <emmintrin.h> +typedef __m128i ZSTD_Vec128; + +/* Returns a 128-bit container with 128-bits from src */ +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + return _mm_loadu_si128((ZSTD_Vec128 const*)src); +} + +/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */ +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + return _mm_set1_epi8((char)val); +} + +/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask + * into a 32-bit mask that is the MSB of each byte. + * */ +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)); +} + +typedef struct { + __m128i fst; + __m128i snd; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_read(ptr); + v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1); + return v; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_set8(val); + v.snd = ZSTD_Vec128_set8(val); + return v; +} + +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask fstMask; + ZSTD_VecMask sndMask; + fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst); + sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd); + return fstMask | (sndMask << 16); +} + +#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */ + +#include <arm_neon.h> +typedef uint8x16_t ZSTD_Vec128; + +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + return vld1q_u8((const BYTE* const)src); +} + +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + return vdupq_n_u8(val); +} + +/* Mimics '_mm_movemask_epi8()' from SSE */ +static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) { + /* Shift out everything but the MSB bits in each byte */ + uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7)); + /* Merge the even lanes together with vsra (right shift and add) */ + uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7)); + uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); + uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); + /* Extract the low 8 bits from each lane, merge */ + return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8); +} + +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y)); +} + +typedef struct { + uint8x16_t fst; + uint8x16_t snd; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_read(ptr); + v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1); + return v; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_set8(val); + v.snd = ZSTD_Vec128_set8(val); + return v; +} + +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask fstMask; + ZSTD_VecMask sndMask; + fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst); + sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd); + return fstMask | (sndMask << 16); +} + +#else /* Scalar fallback version */ + +#define VEC128_NB_SIZE_T (16 / sizeof(size_t)) +typedef struct { + size_t vec[VEC128_NB_SIZE_T]; +} ZSTD_Vec128; + +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + ZSTD_Vec128 ret; + ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t)); + return ret; +} + +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + ZSTD_Vec128 ret = { {0} }; + int startBit = sizeof(size_t) * 8 - 8; + for (;startBit >= 0; startBit -= 8) { + unsigned j = 0; + for (;j < VEC128_NB_SIZE_T; ++j) { + ret.vec[j] |= ((size_t)val << startBit); + } + } + return ret; +} + +/* Compare x to y, byte by byte, generating a "matches" bitfield */ +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + ZSTD_VecMask res = 0; + unsigned i = 0; + unsigned l = 0; + for (; i < VEC128_NB_SIZE_T; ++i) { + const size_t cmp1 = x.vec[i]; + const size_t cmp2 = y.vec[i]; + unsigned j = 0; + for (; j < sizeof(size_t); ++j, ++l) { + if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) { + res |= ((U32)1 << (j+i*sizeof(size_t))); + } + } + } + return res; +} + +#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T +typedef struct { + size_t vec[VEC256_NB_SIZE_T]; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) { + ZSTD_Vec256 ret; + ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t)); + return ret; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 ret = { {0} }; + int startBit = sizeof(size_t) * 8 - 8; + for (;startBit >= 0; startBit -= 8) { + unsigned j = 0; + for (;j < VEC256_NB_SIZE_T; ++j) { + ret.vec[j] |= ((size_t)val << startBit); + } + } + return ret; +} + +/* Compare x to y, byte by byte, generating a "matches" bitfield */ +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask res = 0; + unsigned i = 0; + unsigned l = 0; + for (; i < VEC256_NB_SIZE_T; ++i) { + const size_t cmp1 = x.vec[i]; + const size_t cmp2 = y.vec[i]; + unsigned j = 0; + for (; j < sizeof(size_t); ++j, ++l) { + if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) { + res |= ((U32)1 << (j+i*sizeof(size_t))); + } + } + } + return res; +} + +#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */ + +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (U32)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (U32)__builtin_ctz(val); +# else + /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */ + static const U32 multiplyDeBruijnBitPosition[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27]; +# endif +} + +/* ZSTD_VecMask_rotateRight(): + * Rotates a bitfield to the right by "rotation" bits. + * If the rotation is greater than totalBits, the returned mask is 0. + */ +FORCE_INLINE_TEMPLATE ZSTD_VecMask +ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) { + if (rotation == 0) + return mask; + switch (totalBits) { + default: + assert(0); + case 16: + return (mask >> rotation) | (U16)(mask << (16 - rotation)); + case 32: + return (mask >> rotation) | (U32)(mask << (32 - rotation)); + } +} + +/* ZSTD_row_nextIndex(): + * Returns the next index to insert at within a tagTable row, and updates the "head" + * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { + U32 const next = (*tagRow - 1) & rowMask; + *tagRow = (BYTE)next; + return next; +} + +/* ZSTD_isAligned(): + * Checks that a pointer is aligned to "align" bytes which must be a power of 2. + */ +MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { + assert((align & (align - 1)) == 0); + return (((size_t)ptr) & (align - 1)) == 0; +} + +/* ZSTD_row_prefetch(): + * Performs prefetching for the hashTable and tagTable at a given row. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { + PREFETCH_L1(hashTable + relRow); + if (rowLog == 5) { + PREFETCH_L1(hashTable + relRow + 16); + } + PREFETCH_L1(tagTable + relRow); + assert(rowLog == 4 || rowLog == 5); + assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ + assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */ +} + +/* ZSTD_row_fillHashCache(): + * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, + * but not beyond iLimit. + */ +static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, + U32 const rowLog, U32 const mls, + U32 idx, const BYTE* const iLimit) +{ + U32 const* const hashTable = ms->hashTable; + U16 const* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); + U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); + + for (; idx < lim; ++idx) { + U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; + } + + DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1], + ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], + ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); +} + +/* ZSTD_row_nextCachedHash(): + * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at + * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, + U16 const* tagTable, BYTE const* base, + U32 idx, U32 const hashLog, + U32 const rowLog, U32 const mls) +{ + U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; + cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash; + return hash; + } +} + +/* ZSTD_row_update_internal(): + * Inserts the byte at ip into the appropriate position in the hash table. + * Determines the relative row, and the position within the {16, 32} entry row to insert at. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target); + for (; idx < target; ++idx) { + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls) + : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. + Explicit cast allows us to get exact desired position within each row */ + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + + assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); + ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; + row[pos] = idx; + } + ms->nextToUpdate = target; +} + +/* ZSTD_row_update(): + * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary + * processing. + */ +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; + const U32 rowMask = (1u << rowLog) - 1; + const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); + + DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); +} + +/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches + * the hash at the nth position in a row of the tagTable. + */ +FORCE_INLINE_TEMPLATE +ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) { + ZSTD_VecMask matches = 0; + if (rowEntries == 16) { + ZSTD_Vec128 hashes = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET); + ZSTD_Vec128 expandedTags = ZSTD_Vec128_set8(tag); + matches = ZSTD_Vec128_cmpMask8(hashes, expandedTags); + } else if (rowEntries == 32) { + ZSTD_Vec256 hashes = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET); + ZSTD_Vec256 expandedTags = ZSTD_Vec256_set8(tag); + matches = ZSTD_Vec256_cmpMask8(hashes, expandedTags); + } else { + assert(0); + } + /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield + to match up with the actual layout of the entries within the hashTable */ + return ZSTD_VecMask_rotateRight(matches, head, rowEntries); +} + +/* The high-level approach of the SIMD row based match finder is as follows: + * - Figure out where to insert the new entry: + * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" + * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines + * which row to insert into. + * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can + * be considered as a circular buffer with a "head" index that resides in the tagTable. + * - Also insert the "tag" into the equivalent row and position in the tagTable. + * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. + * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, + * for alignment/performance reasons, leaving some bytes unused. + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and + * generate a bitfield that we can cycle through to check the collisions in the hash table. + * - Pick the longest match. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_RowFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode, + const U32 rowLog) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32* const hashCache = ms->hashCache; + const U32 hashLog = ms->rowHashLog; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 rowEntries = (1U << rowLog); + const U32 rowMask = rowEntries - 1; + const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ + U32 nbAttempts = 1U << cappedSearchLog; + size_t ml=4-1; + + /* DMS/DDS variables that may be referenced laster */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; + size_t ddsIdx; + U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */ + U32 dmsTag; + U32* dmsRow; + BYTE* dmsTagRow; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + { /* Prefetch DDS hashtable entry */ + ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG; + PREFETCH_L1(&dms->hashTable[ddsIdx]); + } + ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; + } + + if (dictMode == ZSTD_dictMatchState) { + /* Prefetch DMS rows */ + U32* const dmsHashTable = dms->hashTable; + U16* const dmsTagTable = dms->tagTable; + U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; + dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); + dmsRow = dmsHashTable + dmsRelRow; + ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog); + } + + /* Update the hashTable and tagTable up to (but not including) ip */ + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); + { /* Get the hash for ip, compute the appropriate row */ + U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); + U32 const head = *tagRow & rowMask; + U32 matchBuffer[32 /* maximum nb entries per row */]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); + + /* Cycle through the matches and prefetch */ + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = row[matchPos]; + assert(numMatches < rowEntries); + if (matchIndex < lowLimit) + break; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + PREFETCH_L1(base + matchIndex); + } else { + PREFETCH_L1(dictBase + matchIndex); + } + matchBuffer[numMatches++] = matchIndex; + } + + /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop + in ZSTD_row_update_internal() at the next search. */ + { + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; + row[pos] = ms->nextToUpdate++; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex < curr); + assert(matchIndex >= lowLimit); + + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* Save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + /* TODO: Measure and potentially add prefetching to DMS */ + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + + { U32 const head = *dmsTagRow & rowMask; + U32 matchBuffer[32 /* maximum nb row entries */]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); + + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = dmsRow[matchPos]; + if (matchIndex < dmsLowestIndex) + break; + PREFETCH_L1(dmsBase + matchIndex); + matchBuffer[numMatches++] = matchIndex; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex >= dmsLowestIndex); + assert(matchIndex < curr); + + { const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + } + + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; + } + } + } + } + return ml; +} + +/* Inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog); + case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog); + case 7 : + case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5); + } +} + /* ******************************* * Common parser - lazy strategy *********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic( @@ -863,10 +1525,11 @@ ZSTD_compressBlock_lazy_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; const BYTE* const base = ms->window.base; const U32 prefixLowestIndex = ms->window.dictLimit; const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, @@ -878,26 +1541,30 @@ ZSTD_compressBlock_lazy_generic( * that should never occur (extDict modes go to the other implementation * below and there is no DDSS for binary tree search yet). */ - const searchMax_f searchFuncs[4][2] = { + const searchMax_f searchFuncs[4][3] = { { ZSTD_HcFindBestMatch_selectMLS, - ZSTD_BtFindBestMatch_selectMLS + ZSTD_BtFindBestMatch_selectMLS, + ZSTD_RowFindBestMatch_selectRowLog }, { NULL, + NULL, NULL }, { ZSTD_HcFindBestMatch_dictMatchState_selectMLS, - ZSTD_BtFindBestMatch_dictMatchState_selectMLS + ZSTD_BtFindBestMatch_dictMatchState_selectMLS, + ZSTD_RowFindBestMatch_dictMatchState_selectRowLog }, { ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, - NULL + NULL, + ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog } }; - searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; + searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const int isDMS = dictMode == ZSTD_dictMatchState; @@ -915,9 +1582,7 @@ ZSTD_compressBlock_lazy_generic( assert(searchMax != NULL); - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - - /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { U32 const curr = (U32)(ip - base); @@ -933,6 +1598,12 @@ ZSTD_compressBlock_lazy_generic( assert(offset_2 <= dictAndPrefixLength); } + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } + /* Match Loop */ #if defined(__GNUC__) && defined(__x86_64__) /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the @@ -1198,6 +1869,70 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); } +/* Row-based matchfinder */ +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); +} FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( @@ -1210,7 +1945,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; @@ -1218,18 +1953,28 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ms->window.lowLimit; const U32 windowLog = ms->cParams.windowLog; + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; - + const searchMax_f searchFuncs[3] = { + ZSTD_HcFindBestMatch_extDict_selectMLS, + ZSTD_BtFindBestMatch_extDict_selectMLS, + ZSTD_RowFindBestMatch_extDict_selectRowLog + }; + searchMax_f searchMax = searchFuncs[(int)searchMethod]; U32 offset_1 = rep[0], offset_2 = rep[1]; - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); /* init */ ip += (ip == prefixStart); + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } /* Match Loop */ #if defined(__GNUC__) && defined(__x86_64__) @@ -1249,7 +1994,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1280,7 +2026,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1311,7 +2058,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1357,7 +2105,8 @@ _storeSequence: const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1410,3 +2159,26 @@ size_t ZSTD_compressBlock_btlazy2_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } + +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); +} diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h index d0214d5e73..150f7b390b 100644 --- a/thirdparty/zstd/compress/zstd_lazy.h +++ b/thirdparty/zstd/compress/zstd_lazy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -26,6 +26,7 @@ extern "C" { #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); @@ -43,6 +44,15 @@ size_t ZSTD_compressBlock_lazy( size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -56,6 +66,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -66,6 +85,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -76,9 +104,19 @@ size_t ZSTD_compressBlock_lazy_extDict( size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + #if defined (__cplusplus) } diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c index 3f3d7c46ab..fa4ebeabd7 100644 --- a/thirdparty/zstd/compress/zstd_ldm.c +++ b/thirdparty/zstd/compress/zstd_ldm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -11,13 +11,126 @@ #include "zstd_ldm.h" #include "../common/debug.h" +#include "../common/xxhash.h" #include "zstd_fast.h" /* ZSTD_fillHashTable() */ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ +#include "zstd_ldm_geartab.h" #define LDM_BUCKET_SIZE_LOG 3 #define LDM_MIN_MATCH_LENGTH 64 #define LDM_HASH_RLOG 7 -#define LDM_HASH_CHAR_OFFSET 10 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_reset() + * Feeds [data, data + minMatchLength) into the hash without registering any + * splits. This effectively resets the hash state. This is used when skipping + * over data, either at the beginning of a block, or skipping sections. + */ +static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, + BYTE const* data, size_t minMatchLength) +{ + U64 hash = state->rolling; + size_t n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + } while (0) + while (n + 3 < minMatchLength) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < minMatchLength) { + GEAR_ITER_ONCE(); + } +#undef GEAR_ITER_ONCE +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} void ZSTD_ldm_adjustParameters(ldmParams_t* params, ZSTD_compressionParameters const* cParams) @@ -54,41 +167,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; } -/** ZSTD_ldm_getSmallHash() : - * numBits should be <= 32 - * If numBits==0, returns 0. - * @return : the most significant numBits of value. */ -static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) -{ - assert(numBits <= 32); - return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); -} - -/** ZSTD_ldm_getChecksum() : - * numBitsToDiscard should be <= 32 - * @return : the next most significant 32 bits after numBitsToDiscard */ -static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) -{ - assert(numBitsToDiscard <= 32); - return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; -} - -/** ZSTD_ldm_getTag() ; - * Given the hash, returns the most significant numTagBits bits - * after (32 + hbits) bits. - * - * If there are not enough bits remaining, return the last - * numTagBits bits. */ -static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) -{ - assert(numTagBits < 32 && hbits <= 32); - if (32 - hbits < numTagBits) { - return hash & (((U32)1 << numTagBits) - 1); - } else { - return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); - } -} - /** ZSTD_ldm_getBucket() : * Returns a pointer to the start of the bucket associated with hash. */ static ldmEntry_t* ZSTD_ldm_getBucket( @@ -103,38 +181,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, size_t const hash, const ldmEntry_t entry, ldmParams_t const ldmParams) { - BYTE* const bucketOffsets = ldmState->bucketOffsets; - *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; - bucketOffsets[hash]++; - bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; -} + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); -/** ZSTD_ldm_makeEntryAndInsertByTag() : - * - * Gets the small hash, checksum, and tag from the rollingHash. - * - * If the tag matches (1 << ldmParams.hashRateLog)-1, then - * creates an ldmEntry from the offset, and inserts it into the hash table. - * - * hBits is the length of the small hash, which is the most significant hBits - * of rollingHash. The checksum is the next 32 most significant bits, followed - * by ldmParams.hashRateLog bits that make up the tag. */ -static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, - U64 const rollingHash, - U32 const hBits, - U32 const offset, - ldmParams_t const ldmParams) -{ - U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); - U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; - if (tag == tagMask) { - U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - ldmEntry_t entry; - entry.offset = offset; - entry.checksum = checksum; - ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); - } } /** ZSTD_ldm_countBackwardsMatch() : @@ -212,43 +264,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, return 0; } -/** ZSTD_ldm_fillLdmHashTable() : - * - * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). - * lastHash is the rolling hash that corresponds to lastHashed. - * - * Returns the rolling hash corresponding to position iend-1. */ -static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, - U64 lastHash, const BYTE* lastHashed, - const BYTE* iend, const BYTE* base, - U32 hBits, ldmParams_t const ldmParams) -{ - U64 rollingHash = lastHash; - const BYTE* cur = lastHashed + 1; - - while (cur < iend) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], - cur[ldmParams.minMatchLength-1], - state->hashPower); - ZSTD_ldm_makeEntryAndInsertByTag(state, - rollingHash, hBits, - (U32)(cur - base), ldmParams); - ++cur; - } - return rollingHash; -} - void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, + ldmState_t* ldmState, const BYTE* ip, const BYTE* iend, ldmParams_t const* params) { + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); - if ((size_t)(iend - ip) >= params->minMatchLength) { - U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); - ZSTD_ldm_fillLdmHashTable( - state, startingHash, ip, iend - params->minMatchLength, state->window.base, - params->hashLog - params->bucketSizeLog, - *params); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; } } @@ -274,11 +325,8 @@ static size_t ZSTD_ldm_generateSequences_internal( /* LDM parameters */ int const extDict = ZSTD_window_hasExtDict(ldmState->window); U32 const minMatchLength = params->minMatchLength; - U64 const hashPower = ldmState->hashPower; + U32 const entsPerBucket = 1U << params->bucketSizeLog; U32 const hBits = params->hashLog - params->bucketSizeLog; - U32 const ldmBucketSize = 1U << params->bucketSizeLog; - U32 const hashRateLog = params->hashRateLog; - U32 const ldmTagMask = (1U << params->hashRateLog) - 1; /* Prefix and extDict parameters */ U32 const dictLimit = ldmState->window.dictLimit; U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; @@ -290,45 +338,69 @@ static size_t ZSTD_ldm_generateSequences_internal( /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; - BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + BYTE const* const ilimit = iend - HASH_READ_SIZE; /* Input positions */ BYTE const* anchor = istart; BYTE const* ip = istart; - /* Rolling hash */ - BYTE const* lastHashed = NULL; - U64 rollingHash = 0; - - while (ip <= ilimit) { - size_t mLength; - U32 const curr = (U32)(ip - base); - size_t forwardMatchLength = 0, backwardMatchLength = 0; - ldmEntry_t* bestEntry = NULL; - if (ip != istart) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], - lastHashed[minMatchLength], - hashPower); - } else { - rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); + ip += minMatchLength; + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); } - lastHashed = ip; - /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { - ip++; - continue; - } + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + U32 offset; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } - /* Get the best entry and compute the match lengths */ - { - ldmEntry_t* const bucket = - ZSTD_ldm_getBucket(ldmState, - ZSTD_ldm_getSmallHash(rollingHash, hBits), - *params); - ldmEntry_t* cur; - size_t bestMatchLength = 0; - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - - for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { size_t curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength; if (cur->checksum != checksum || cur->offset <= lowestIndex) { @@ -342,31 +414,23 @@ static size_t ZSTD_ldm_generateSequences_internal( cur->offset < dictLimit ? dictEnd : iend; BYTE const* const lowMatchPtr = cur->offset < dictLimit ? dictStart : lowPrefixPtr; - - curForwardMatchLength = ZSTD_count_2segments( - ip, pMatch, iend, - matchEnd, lowPrefixPtr); + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); if (curForwardMatchLength < minMatchLength) { continue; } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor, - pMatch, lowMatchPtr, - dictStart, dictEnd); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); } else { /* !extDict */ BYTE const* const pMatch = base + cur->offset; - curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + curForwardMatchLength = ZSTD_count(split, pMatch, iend); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowPrefixPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; if (curTotalMatchLength > bestMatchLength) { bestMatchLength = curTotalMatchLength; @@ -375,57 +439,54 @@ static size_t ZSTD_ldm_generateSequences_internal( bestEntry = cur; } } - } - - /* No match found -- continue searching */ - if (bestEntry == NULL) { - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, curr, - *params); - ip++; - continue; - } - /* Match found */ - mLength = forwardMatchLength + backwardMatchLength; - ip -= backwardMatchLength; + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } - { - /* Store the sequence: - * ip = curr - backwardMatchLength - * The match is at (bestEntry->offset - backwardMatchLength) - */ - U32 const matchIndex = bestEntry->offset; - U32 const offset = curr - matchIndex; - rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; - - /* Out of sequence storage */ - if (rawSeqStore->size == rawSeqStore->capacity) - return ERROR(dstSize_tooSmall); - seq->litLength = (U32)(ip - anchor); - seq->matchLength = (U32)mLength; - seq->offset = offset; - rawSeqStore->size++; - } + /* Match found */ + offset = (U32)(split - base) - bestEntry->offset; + mLength = forwardMatchLength + backwardMatchLength; + { + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } - /* Insert the current entry into the hash table */ - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - *params); + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); - assert(ip + backwardMatchLength == lastHashed); + anchor = split + forwardMatchLength; - /* Fill the hash table from lastHashed+1 to ip+mLength*/ - /* Heuristic: don't need to fill the entire table at end of block */ - if (ip + mLength <= ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, *params); - lastHashed = ip + mLength - 1; + /* If we find a match that ends after the data that we've hashed + * then we have a repeating, overlapping, pattern. E.g. all zeros. + * If one repetition of the pattern matches our `stopMask` then all + * repetitions will. We don't need to insert them all into out table, + * only the first one. So skip over overlapping matches. + * This is a major speed boost (20x) for compressing a single byte + * repeated, when that byte ends up in the table. + */ + if (anchor > ip + hashed) { + ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); + /* Continue the outter loop at anchor (ip + hashed == anchor). */ + ip = anchor - hashed; + break; + } } - ip += mLength; - anchor = ip; + + ip += hashed; } + return iend - anchor; } @@ -474,7 +535,7 @@ size_t ZSTD_ldm_generateSequences( assert(chunkStart < iend); /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); @@ -596,12 +657,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, void const* src, size_t srcSize) { const ZSTD_compressionParameters* const cParams = &ms->cParams; unsigned const minMatch = cParams->minMatch; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; @@ -620,7 +682,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); - /* Loop through each sequence and apply the block compressor to the lits */ + /* Loop through each sequence and apply the block compressor to the literals */ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { /* maybeSplitSequence updates rawSeqStore->pos */ rawSeq const sequence = maybeSplitSequence(rawSeqStore, diff --git a/thirdparty/zstd/compress/zstd_ldm.h b/thirdparty/zstd/compress/zstd_ldm.h index 6561024e4c..393466fa9f 100644 --- a/thirdparty/zstd/compress/zstd_ldm.h +++ b/thirdparty/zstd/compress/zstd_ldm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences( */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, void const* src, size_t srcSize); /** @@ -73,7 +74,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, * * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. * Avoids emitting matches less than `minMatch` bytes. - * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). */ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch); diff --git a/thirdparty/zstd/compress/zstd_ldm_geartab.h b/thirdparty/zstd/compress/zstd_ldm_geartab.h new file mode 100644 index 0000000000..e5c24d856b --- /dev/null +++ b/thirdparty/zstd/compress/zstd_ldm_geartab.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c index e55c459deb..402a7e5c76 100644 --- a/thirdparty/zstd/compress/zstd_opt.c +++ b/thirdparty/zstd/compress/zstd_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h index 9aba8a9018..627255f53d 100644 --- a/thirdparty/zstd/compress/zstd_opt.h +++ b/thirdparty/zstd/compress/zstd_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c index 50454a50b9..22aa3e1245 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.c +++ b/thirdparty/zstd/compress/zstdmt_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -472,8 +472,6 @@ ZSTDMT_serialState_reset(serialState_t* serialState, ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); assert(params.ldmParams.hashRateLog < 32); - serialState->ldmState.hashPower = - ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); } else { ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); } @@ -486,10 +484,10 @@ ZSTDMT_serialState_reset(serialState_t* serialState, size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); unsigned const bucketLog = params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; - size_t const bucketSize = (size_t)1 << bucketLog; unsigned const prevBucketLog = serialState->params.ldmParams.hashLog - serialState->params.ldmParams.bucketSizeLog; + size_t const numBuckets = (size_t)1 << bucketLog; /* Size the seq pool tables */ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); /* Reset the window */ @@ -501,20 +499,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState, } if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); - serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem); } if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) return 1; /* Zero the tables */ ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); - ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets); /* Update window state and fill hash table with dict */ serialState->ldmState.loadedDictEnd = 0; if (dictSize > 0) { if (dictContentType == ZSTD_dct_rawContent) { BYTE const* const dictEnd = (const BYTE*)dict + dictSize; - ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0); ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); } else { @@ -571,7 +569,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState, assert(seqStore.seq != NULL && seqStore.pos == 0 && seqStore.size == 0 && seqStore.capacity > 0); assert(src.size <= serialState->params.jobSize); - ZSTD_window_update(&serialState->ldmState.window, src.start, src.size); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0); error = ZSTD_ldm_generateSequences( &serialState->ldmState, &seqStore, &serialState->params.ldmParams, src.start, src.size); @@ -683,6 +681,8 @@ static void ZSTDMT_compressionJob(void* jobDescription) if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; /* Don't run LDM for the chunks, since we handle it externally */ jobParams.ldmParams.enableLdm = 0; + /* Correct nbWorkers to 0. */ + jobParams.nbWorkers = 0; /* init */ @@ -695,6 +695,10 @@ static void ZSTDMT_compressionJob(void* jobDescription) { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); } + if (!job->firstJob) { + size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0); + if (ZSTD_isError(err)) JOB_ERROR(err); + } { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ ZSTD_dtlm_fast, @@ -750,6 +754,13 @@ static void ZSTDMT_compressionJob(void* jobDescription) if (ZSTD_isError(cSize)) JOB_ERROR(cSize); lastCBlockSize = cSize; } } + if (!job->firstJob) { + /* Double check that we don't have an ext-dict, because then our + * repcode invalidation doesn't work. + */ + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); + } + ZSTD_CCtx_trace(cctx, 0); _endJob: ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); @@ -1239,9 +1250,8 @@ size_t ZSTDMT_initCStream_internal( if (params.rsyncable) { /* Aim for the targetsectionSize as the average job size. */ - U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); - U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20; - assert(jobSizeMB >= 1); + U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10); + U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10); DEBUGLOG(4, "rsyncLog = %u", rsyncBits); mtctx->rsync.hash = 0; mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1; diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h index 0a9e551c99..2fee2ec745 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.h +++ b/thirdparty/zstd/compress/zstdmt_compress.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -32,11 +32,11 @@ /* === Constants === */ -#ifndef ZSTDMT_NBWORKERS_MAX -# define ZSTDMT_NBWORKERS_MAX 200 +#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */ +# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256) #endif -#ifndef ZSTDMT_JOBSIZE_MIN -# define ZSTDMT_JOBSIZE_MIN (1 MB) +#ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */ +# define ZSTDMT_JOBSIZE_MIN (512 KB) #endif #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c index 1418206718..b93c9a003b 100644 --- a/thirdparty/zstd/decompress/huf_decompress.c +++ b/thirdparty/zstd/decompress/huf_decompress.c @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman decoder, * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -528,13 +528,15 @@ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, const U32* rankValOrigin, const int minWeight, const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq) + U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) { HUF_DEltX2 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; + U32* rankVal = wksp; + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + (void)wkspSize; /* get pre-calculated rankVal */ - ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); /* fill skipped values */ if (minWeight>1) { @@ -569,14 +571,18 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, const U32 sortedListSize, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) + const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) { - U32 rankVal[HUF_TABLELOG_MAX + 1]; + U32* rankVal = wksp; const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ const U32 minBits = nbBitsBaseline - maxWeight; U32 s; - ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + wksp += HUF_TABLELOG_MAX + 1; + wkspSize -= HUF_TABLELOG_MAX + 1; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); /* fill DTable */ for (s=0; s<sortedListSize; s++) { @@ -594,7 +600,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol); + nbBitsBaseline, symbol, wksp, wkspSize); } else { HUF_DEltX2 DElt; MEM_writeLE16(&(DElt.sequence), symbol); @@ -608,6 +614,15 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, } } +typedef struct { + rankValCol_t rankVal[HUF_TABLELOG_MAX]; + U32 rankStats[HUF_TABLELOG_MAX + 1]; + U32 rankStart0[HUF_TABLELOG_MAX + 2]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +} HUF_ReadDTableX2_Workspace; + size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) @@ -620,47 +635,32 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; U32 *rankStart; - rankValCol_t* rankVal; - U32* rankStats; - U32* rankStart0; - sortedSymbol_t* sortedSymbol; - BYTE* weightList; - size_t spaceUsed32 = 0; - - rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); - spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - - rankStart = rankStart0 + 1; - ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; + + if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); + + rankStart = wksp->rankStart0 + 1; + ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); + ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); if (HUF_isError(iSize)) return iSize; /* check result */ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ /* Get start index of each weight */ { U32 w, nextRankStart = 0; for (w=1; w<maxW+1; w++) { U32 curr = nextRankStart; - nextRankStart += rankStats[w]; + nextRankStart += wksp->rankStats[w]; rankStart[w] = curr; } rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ @@ -670,37 +670,38 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, /* sort symbols by weight */ { U32 s; for (s=0; s<nbSymbols; s++) { - U32 const w = weightList[s]; + U32 const w = wksp->weightList[s]; U32 const r = rankStart[w]++; - sortedSymbol[r].symbol = (BYTE)s; - sortedSymbol[r].weight = (BYTE)w; + wksp->sortedSymbol[r].symbol = (BYTE)s; + wksp->sortedSymbol[r].weight = (BYTE)w; } rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ } /* Build rankVal */ - { U32* const rankVal0 = rankVal[0]; + { U32* const rankVal0 = wksp->rankVal[0]; { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ U32 nextRankVal = 0; U32 w; for (w=1; w<maxW+1; w++) { U32 curr = nextRankVal; - nextRankVal += rankStats[w] << (w+rescale); + nextRankVal += wksp->rankStats[w] << (w+rescale); rankVal0[w] = curr; } } { U32 const minBits = tableLog+1 - maxW; U32 consumed; for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { - U32* const rankValPtr = rankVal[consumed]; + U32* const rankValPtr = wksp->rankVal[consumed]; U32 w; for (w = 1; w < maxW+1; w++) { rankValPtr[w] = rankVal0[w] >> consumed; } } } } HUF_fillDTableX2(dt, maxTableLog, - sortedSymbol, sizeOfSort, - rankStart0, rankVal, maxW, - tableLog+1); + wksp->sortedSymbol, sizeOfSort, + wksp->rankStart0, wksp->rankVal, maxW, + tableLog+1, + wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); dtd.tableLog = (BYTE)maxTableLog; dtd.tableType = 1; @@ -1225,7 +1226,7 @@ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); } -#endif +#endif #ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) diff --git a/thirdparty/zstd/decompress/zstd_ddict.c b/thirdparty/zstd/decompress/zstd_ddict.c index f5cc23b387..ce335477b3 100644 --- a/thirdparty/zstd/decompress/zstd_ddict.c +++ b/thirdparty/zstd/decompress/zstd_ddict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/decompress/zstd_ddict.h b/thirdparty/zstd/decompress/zstd_ddict.h index 8906a71c94..bd03268b50 100644 --- a/thirdparty/zstd/decompress/zstd_ddict.h +++ b/thirdparty/zstd/decompress/zstd_ddict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c index 21f846bc77..910bc034c0 100644 --- a/thirdparty/zstd/decompress/zstd_decompress.c +++ b/thirdparty/zstd/decompress/zstd_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -62,6 +62,7 @@ #include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" +#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */ #include "../common/zstd_internal.h" /* blockProperties_t */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ @@ -72,6 +73,144 @@ #endif + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + /*-************************************************************* * Context management ***************************************************************/ @@ -101,6 +240,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->outBufferMode = ZSTD_bm_buffered; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; } static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) @@ -120,8 +260,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; ZSTD_DCtx_resetParameters(dctx); - dctx->validateChecksum = 1; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dctx->dictContentEndForFuzzing = NULL; #endif @@ -178,6 +318,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) if (dctx->legacyContext) ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); #endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } ZSTD_customFree(dctx, cMem); return 0; } @@ -190,6 +334,29 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ } +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + /*-************************************************************* * Frame header decoding @@ -441,12 +608,19 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) /** ZSTD_decodeFrameHeader() : * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* Skip the dictID check in fuzzing mode, because it makes the search * harder. @@ -456,6 +630,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he #endif dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; return 0; } @@ -578,7 +753,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); - ZSTD_checkContinuity(dctx, blockStart); + ZSTD_checkContinuity(dctx, blockStart, blockSize); dctx->previousDstEnd = (const char*)blockStart + blockSize; return blockSize; } @@ -610,6 +785,32 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, return regenSize; } +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + /*! ZSTD_decompressFrame() : * @dctx must be properly initialized @@ -619,8 +820,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void** srcPtr, size_t *srcSizePtr) { - const BYTE* ip = (const BYTE*)(*srcPtr); - BYTE* const ostart = (BYTE* const)dst; + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; size_t remainingSrcSize = *srcSizePtr; @@ -695,7 +897,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += 4; remainingSrcSize -= 4; } - + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); /* Allow caller to get size read */ *srcPtr = ip; *srcSizePtr = remainingSrcSize; @@ -764,7 +966,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, * use this in all cases but ddict */ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); } - ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); @@ -899,7 +1101,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); - if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; switch (dctx->stage) { @@ -1004,6 +1208,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; /* ends here */ dctx->stage = ZSTDds_getFrameHeaderSize; } @@ -1023,6 +1228,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -1176,8 +1382,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0; +#endif dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; dctx->prefixStart = NULL; @@ -1391,6 +1601,16 @@ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) if (ddict) { dctx->ddict = ddict; dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } } return 0; } @@ -1436,6 +1656,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.lowerBound = (int)ZSTD_d_validateChecksum; bounds.upperBound = (int)ZSTD_d_ignoreChecksum; return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; default:; } bounds.error = ERROR(parameter_unsupported); @@ -1473,6 +1697,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value case ZSTD_d_forceIgnoreChecksum: *value = (int)dctx->forceIgnoreChecksum; return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1499,6 +1726,13 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1680,6 +1914,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } } #endif { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } DEBUGLOG(5, "header size : %u", (U32)hSize); if (ZSTD_isError(hSize)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.c b/thirdparty/zstd/decompress/zstd_decompress_block.c index 19cbdc5c16..349dcdc333 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.c +++ b/thirdparty/zstd/decompress/zstd_decompress_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -577,7 +577,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize) { - const BYTE* const istart = (const BYTE* const)src; + const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; int nbSeq; @@ -658,7 +658,6 @@ typedef struct { size_t litLength; size_t matchLength; size_t offset; - const BYTE* match; } seq_t; typedef struct { @@ -672,9 +671,6 @@ typedef struct { ZSTD_fseState stateOffb; ZSTD_fseState stateML; size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; } seqState_t; /*! ZSTD_overlapCopy8() : @@ -936,10 +932,9 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { seq_t seq; ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; @@ -1014,14 +1009,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } - /* ANS state update * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). @@ -1108,7 +1095,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -1122,7 +1109,6 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* Regen sequences */ if (nbSeq) { seqState_t seqState; - size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } RETURN_ERROR_IF( @@ -1156,13 +1142,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, * If you see most cycles served out of the DSB you've hit the good case. * If it is pretty even then you may be in an okay case. * - * I've been able to reproduce this issue on the following CPUs: + * This issue has been reproduced on the following CPUs: * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 * Use Instruments->Counters to get DSB/MITE cycles. * I never got performance swings, but I was able to * go from the good case of mostly DSB to half of the * cycles served from MITE. * - Coffeelake: Intel i9-9900k + * - Coffeelake: Intel i7-9700k * * I haven't been able to reproduce the instability or DSB misses on any * of the following CPUS: @@ -1175,33 +1162,35 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, * * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 */ + __asm__(".p2align 6"); + __asm__("nop"); __asm__(".p2align 5"); __asm__("nop"); +# if __GNUC__ >= 9 + /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */ + __asm__(".p2align 3"); +# else __asm__(".p2align 4"); +# endif #endif for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - BIT_reloadDStream(&(seqState.DStream)); op += oneSeqSize; - /* gcc and clang both don't like early returns in this loop. - * Instead break and check for an error at the end of the loop. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) { - error = oneSeqSize; + if (UNLIKELY(!--nbSeq)) break; - } - if (UNLIKELY(!--nbSeq)) break; + BIT_reloadDStream(&(seqState.DStream)); } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; RETURN_ERROR_IF(nbSeq, corruption_detected, ""); RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); /* save reps for next block */ @@ -1232,6 +1221,24 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + +FORCE_INLINE_TEMPLATE size_t +ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, + const BYTE* const prefixStart, const BYTE* const dictEnd) +{ + prefetchPos += sequence.litLength; + { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; + const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + } + return prefetchPos + sequence.matchLength; +} + +/* This decoding function employs prefetching + * to reduce latency impact of cache misses. + * It's generally employed when block contains a significant portion of long-distance matches + * or when coupled with a "cold" dictionary */ FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, @@ -1242,7 +1249,7 @@ ZSTD_decompressSequencesLong_body( { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -1254,18 +1261,17 @@ ZSTD_decompressSequencesLong_body( /* Regen sequences */ if (nbSeq) { -#define STORED_SEQS 4 +#define STORED_SEQS 8 #define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 +#define ADVANCED_SEQS STORED_SEQS seq_t sequences[STORED_SEQS]; int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); seqState_t seqState; int seqNb; + size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */ + dctx->fseEntropy = 1; { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; assert(dst != NULL); assert(iend >= ip); RETURN_ERROR_IF( @@ -1277,21 +1283,23 @@ ZSTD_decompressSequencesLong_body( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { - sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch); - PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb] = sequence; } RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, ""); /* decode and decompress */ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch); + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); sequences[seqNb & STORED_SEQS_MASK] = sequence; op += oneSeqSize; } @@ -1517,9 +1525,9 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, } -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) { - if (dst != dctx->previousDstEnd) { /* not contiguous */ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ dctx->dictEnd = dctx->previousDstEnd; dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dst; @@ -1533,7 +1541,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { size_t dSize; - ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); dctx->previousDstEnd = (char*)dst + dSize; return dSize; diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.h b/thirdparty/zstd/decompress/zstd_decompress_block.h index b5715c168e..049a0cd84c 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.h +++ b/thirdparty/zstd/decompress/zstd_decompress_block.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/decompress/zstd_decompress_internal.h b/thirdparty/zstd/decompress/zstd_decompress_internal.h index f80b471e99..ebda0c9031 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_internal.h +++ b/thirdparty/zstd/decompress/zstd_decompress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -99,6 +99,13 @@ typedef enum { ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ } ZSTD_dictUses_e; +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -113,6 +120,7 @@ struct ZSTD_DCtx_s const void* dictEnd; /* end of previous segment */ size_t expected; ZSTD_frameHeader fParams; + U64 processedCSize; U64 decodedSize; blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ ZSTD_dStage stage; @@ -136,6 +144,8 @@ struct ZSTD_DCtx_s U32 dictID; int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ /* streaming */ ZSTD_dStreamStage streamStage; @@ -166,6 +176,11 @@ struct ZSTD_DCtx_s void const* dictContentBeginForFuzzing; void const* dictContentEndForFuzzing; #endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ @@ -184,7 +199,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, * If yes, do nothing (continue on current segment). * If not, classify previous segment as "external dictionary", and start a new segment. * This function cannot fail. */ -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); #endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h index b0ecdf5538..4651e6c4dc 100644 --- a/thirdparty/zstd/zstd.h +++ b/thirdparty/zstd/zstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -71,8 +71,8 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 8 +#define ZSTD_VERSION_MINOR 5 +#define ZSTD_VERSION_RELEASE 0 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : @@ -109,7 +109,6 @@ ZSTDLIB_API const char* ZSTD_versionString(void); #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) - /*************************************** * Simple API ***************************************/ @@ -166,7 +165,7 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); -/*! ZSTD_findFrameCompressedSize() : +/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ * `src` should point to the start of a ZSTD frame or skippable frame. * `srcSize` must be >= first frame size * @return : the compressed size of the first frame starting at `src`, @@ -180,8 +179,9 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ /*************************************** @@ -199,7 +199,7 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ /*! ZSTD_compressCCtx() : * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. @@ -222,7 +222,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, * Use one context per thread for parallel execution. */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ /*! ZSTD_decompressDCtx() : * Same as ZSTD_decompress(), @@ -234,9 +234,9 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, const void* src, size_t srcSize); -/*************************************** -* Advanced compression API -***************************************/ +/********************************************* +* Advanced compression API (Requires v1.4.0+) +**********************************************/ /* API design : * Parameters are pushed one by one into an existing context, @@ -266,7 +266,6 @@ typedef enum { ZSTD_fast=1, Only the order (from fast to strong) is guaranteed */ } ZSTD_strategy; - typedef enum { /* compression parameters @@ -332,7 +331,6 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ - /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -389,7 +387,7 @@ typedef enum { ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. * The minimum size is automatically and transparently enforced. */ ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. @@ -419,6 +417,8 @@ typedef enum { * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences + * ZSTD_c_splitBlocks + * ZSTD_c_useRowMatchFinder * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -434,7 +434,10 @@ typedef enum { ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam10=1007, ZSTD_c_experimentalParam11=1008, - ZSTD_c_experimentalParam12=1009 + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011, + ZSTD_c_experimentalParam15=1012 } ZSTD_cParameter; typedef struct { @@ -519,9 +522,9 @@ ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, const void* src, size_t srcSize); -/*************************************** -* Advanced decompression API -***************************************/ +/*********************************************** +* Advanced decompression API (Requires v1.4.0+) +************************************************/ /* The advanced API pushes parameters one by one into an existing DCtx context. * Parameters are sticky, and remain valid for all following frames @@ -546,12 +549,14 @@ typedef enum { * ZSTD_d_format * ZSTD_d_stableOutBuffer * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ ZSTD_d_experimentalParam1=1000, ZSTD_d_experimentalParam2=1001, - ZSTD_d_experimentalParam3=1002 + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 } ZSTD_dParameter; @@ -665,7 +670,7 @@ typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ /*===== ZSTD_CStream management functions =====*/ ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); -ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ /*===== Streaming compression functions =====*/ typedef enum { @@ -681,7 +686,7 @@ typedef enum { : note : multithreaded compression will block to flush as much output as possible. */ } ZSTD_EndDirective; -/*! ZSTD_compressStream2() : +/*! ZSTD_compressStream2() : Requires v1.4.0+ * Behaves about the same as ZSTD_compressStream, with additional control on end directive. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) @@ -727,11 +732,11 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output /* ***************************************************************************** - * This following is a legacy streaming API. + * This following is a legacy streaming API, available since v1.0+ . * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. + * Streaming in combination with advanced parameters and dictionary compression + * can only be used through the new API. ******************************************************************************/ /*! @@ -786,7 +791,7 @@ typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ /*===== ZSTD_DStream management functions =====*/ ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); -ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ /*===== Streaming decompression functions =====*/ @@ -809,7 +814,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output /*! ZSTD_compress_usingDict() : * Compression at an explicit compression level using a Dictionary. * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dictBuilder/zdict.h). + * or a buffer with specified information (see zdict.h). * Note : This function loads the dictionary, resulting in significant startup delay. * It's intended for a dictionary used only once. * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ @@ -852,7 +857,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize int compressionLevel); /*! ZSTD_freeCDict() : - * Function frees memory allocated by ZSTD_createCDict(). */ + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); /*! ZSTD_compress_usingCDict() : @@ -874,7 +880,8 @@ typedef struct ZSTD_DDict_s ZSTD_DDict; ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); /*! ZSTD_freeDDict() : - * Function frees memory allocated with ZSTD_createDDict() */ + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); /*! ZSTD_decompress_usingDDict() : @@ -890,19 +897,25 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, * Dictionary helper functions *******************************/ -/*! ZSTD_getDictID_fromDict() : +/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+ * Provides the dictID stored within dictionary. * if @return == 0, the dictionary is not conformant with Zstandard specification. * It can still be loaded, but as a content-only dictionary. */ ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); -/*! ZSTD_getDictID_fromDDict() : +/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+ + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+ * Provides the dictID of the dictionary loaded into `ddict`. * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); -/*! ZSTD_getDictID_fromFrame() : +/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+ * Provides the dictID required to decompressed the frame stored within `src`. * If @return == 0, the dictID could not be decoded. * This could for one of the following reasons : @@ -916,7 +929,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); /******************************************************************************* - * Advanced dictionary and prefix API + * Advanced dictionary and prefix API (Requires v1.4.0+) * * This API allows dictionaries to be used with ZSTD_compress2(), * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and @@ -925,7 +938,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); ******************************************************************************/ -/*! ZSTD_CCtx_loadDictionary() : +/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+ * Create an internal CDict from `dict` buffer. * Decompression will have to use same dictionary. * @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -944,11 +957,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * to precisely select how dictionary content must be interpreted. */ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -/*! ZSTD_CCtx_refCDict() : +/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ * Reference a prepared dictionary, to be used for all next compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. * The dictionary will remain valid for future compressed frames using same CCtx. * @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -958,7 +971,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -/*! ZSTD_CCtx_refPrefix() : +/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+ * Reference a prefix (single-usage dictionary) for next compressed frame. * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). * Decompression will need same prefix to properly regenerate data. @@ -979,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); -/*! ZSTD_DCtx_loadDictionary() : +/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ * Create an internal DDict from dict buffer, * to be used to decompress next frames. * The dictionary remains valid for all future frames, until explicitly invalidated. @@ -996,9 +1009,16 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, */ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -/*! ZSTD_DCtx_refDDict() : +/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+ * Reference a prepared dictionary, to be used to decompress next frames. * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : Currently, only one dictionary can be managed. * Referencing a new dictionary effectively "discards" any previous one. @@ -1007,7 +1027,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s */ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -/*! ZSTD_DCtx_refPrefix() : +/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+ * Reference a prefix (single-usage dictionary) to decompress next frame. * This is the reverse operation of ZSTD_CCtx_refPrefix(), * and must use the same prefix as the one used during compression. @@ -1028,7 +1048,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, /* === Memory management === */ -/*! ZSTD_sizeof_*() : +/*! ZSTD_sizeof_*() : Requires v1.4.0+ * These functions give the _current_ memory usage of selected object. * Note that object memory usage can evolve (increase or decrease) over time. */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); @@ -1053,6 +1073,28 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + /**************************************************************************************** * experimental API (static linking only) **************************************************************************************** @@ -1206,6 +1248,12 @@ typedef enum { } ZSTD_forceIgnoreChecksum_e; typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { /* Note: this enum and the behavior it controls are effectively internal * implementation details of the compressor. They are expected to continue * to evolve and should be considered only in the context of extremely @@ -1253,6 +1301,11 @@ typedef enum { ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ } ZSTD_literalCompressionMode_e; +typedef enum { + ZSTD_urm_auto = 0, /* Automatically determine whether or not we use row matchfinder */ + ZSTD_urm_disableRowMatchFinder = 1, /* Never use row matchfinder */ + ZSTD_urm_enableRowMatchFinder = 2 /* Always use row matchfinder when applicable */ +} ZSTD_useRowMatchFinderMode_e; /*************************************** * Frame size functions @@ -1286,7 +1339,7 @@ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t * `srcSize` must be the _exact_ size of this series * (i.e. there should be a frame boundary at `src + srcSize`) * @return : - upper-bound for the decompressed size of all data in all successive frames - * - if an error occured: ZSTD_CONTENTSIZE_ERROR + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR * * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. @@ -1372,6 +1425,23 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size const void* src, size_t srcSize); +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + /*************************************** * Memory management ***************************************/ @@ -1506,13 +1576,14 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS * Note that the lifetime of such pool must exist while being used. * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value * to use an internal thread pool). - * ZSTD_freeThreadPool frees a thread pool. + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. */ typedef struct POOL_ctx_s ZSTD_threadPool; ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); -ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + /* * This API is temporary and is expected to change or disappear in the future! */ @@ -1523,10 +1594,12 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( const ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + /*************************************** * Advanced compression functions @@ -1540,12 +1613,6 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); -/*! ZSTD_getDictID_fromCDict() : - * Provides the dictID of the dictionary loaded into `cdict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); - /*! ZSTD_getCParams() : * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. * `estimatedSrcSize` value is optional, select 0 if not known */ @@ -1572,18 +1639,20 @@ ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParame /*! ZSTD_compress_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2") +size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. + * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict, @@ -1645,7 +1714,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre /* Controls how the literals are compressed (default is auto). * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. + * See ZSTD_literalCompressionMode_e enum definition for details. */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 @@ -1797,12 +1866,52 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 +/* ZSTD_c_splitBlocks + * Default is 0 == disabled. Set to 1 to enable block splitting. + * + * Will attempt to split blocks in order to improve compression ratio at the cost of speed. + */ +#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13 + +/* ZSTD_c_useRowMatchFinder + * Default is ZSTD_urm_auto. + * Controlled with ZSTD_useRowMatchFinderMode_e enum. + * + * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library + * will decide at runtime whether to use the row-based matchfinder based on support for SIMD + * instructions as well as the windowLog. + * + * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder. + * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder. + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 + +/* ZSTD_c_deterministicRefPrefix + * Default is 0 == disabled. Set to 1 to enable. + * + * Zstd produces different results for prefix compression when the prefix is + * directly adjacent to the data about to be compressed vs. when it isn't. + * This is because zstd detects that the two buffers are contiguous and it can + * use a more efficient match finding algorithm. However, this produces different + * results than when the two buffers are non-contiguous. This flag forces zstd + * to always load the prefix in non-contiguous mode, even if it happens to be + * adjacent to the data, to guarantee determinism. + * + * If you really care about determinism when using a dictionary or prefix, + * like when doing delta compression, you should select this option. It comes + * at a speed penalty of about ~2.5% if the dictionary and data happened to be + * contiguous, and is free if they weren't contiguous. We don't expect that + * intentionally making the dictionary and data contiguous will be worth the + * cost to memcpy() the data. + */ +#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_params : @@ -1817,13 +1926,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param * These parameters will be applied to * all subsequent frames. * - ZSTD_compressStream2() : Do compression using the CCtx. - * - ZSTD_freeCCtxParams() : Free the memory. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. * * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() * for static allocation of CCtx for single-threaded compression. */ ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ /*! ZSTD_CCtxParams_reset() : * Reset params to default values. @@ -1842,7 +1951,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compre */ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -/*! ZSTD_CCtxParams_setParameter() : +/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+ * Similar to ZSTD_CCtx_setParameter. * Set one compression parameter, selected by enum ZSTD_cParameter. * Parameters must be applied to a ZSTD_CCtx using @@ -1857,7 +1966,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_c * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. @@ -1983,12 +2092,38 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param */ #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + /*! ZSTD_DCtx_setFormat() : + * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). * Instruct the decoder context about what kind of data to decode next. * This instruction is mandatory to decode data without a fully-formed header, * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), @@ -2012,7 +2147,7 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( /*===== Advanced Streaming compression functions =====*/ /*! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: + * This function is DEPRECATED, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); @@ -2021,15 +2156,15 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( * pledgedSrcSize must be correct. If it is not known at init time, use * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /*! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); @@ -2038,15 +2173,15 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, * dict == NULL or dictSize < 8, in which case no dict is used. * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /*! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: + * This function is DEPRECATED, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * // Pseudocode: Set each zstd parameter and leave the rest as-is. * for ((param, value) : params) { @@ -2058,23 +2193,24 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. * pledgedSrcSize must be correct. * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: + * This function is DEPRECATED, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, cdict); - * + * * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /*! ZSTD_initCStream_usingCDict_advanced() : * This function is DEPRECATED, and is approximately equivalent to: @@ -2089,18 +2225,21 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. * pledgedSrcSize must be correct. If srcSize is not known at init time, use * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but + * ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be + * explicitly specified. * * start a new frame, using same parameters from previous frame. * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. @@ -2110,9 +2249,10 @@ ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); typedef struct { @@ -2199,8 +2339,7 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); ZSTD_CCtx object can be re-used multiple times within successive compression operations. Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). @@ -2225,15 +2364,17 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /*===== Buffer-less streaming compression functions =====*/ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - +/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ /** Buffer-less streaming decompression (synchronous mode) diff --git a/thirdparty/zstd/common/zstd_errors.h b/thirdparty/zstd/zstd_errors.h index 6d0d003004..fa3686b772 100644 --- a/thirdparty/zstd/common/zstd_errors.h +++ b/thirdparty/zstd/zstd_errors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the |