diff options
Diffstat (limited to 'thirdparty')
358 files changed, 44612 insertions, 11921 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index 1c4bad4836..c846094a6a 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -37,6 +37,18 @@ Check the diff of enet.h, protocol.c, and host.c with the 1.3.13 tarball before the next update. +## etc2comp + +- Upstream: https://github.com/google/etc2comp +- Version: 9cd0f9c (git) +- License: Apache + +Files extracted from upstream source: + +- all .cpp and .h files in EtcLib/ +- README.md, LICENSE, AUTHORS + + ## fonts - Upstream: ? @@ -60,7 +72,7 @@ Files extracted from upstream source: ## glad - Upstream: https://github.com/Dav1dde/glad -- Version: 0.1.13a0 +- Version: 0.1.14a0 - License: MIT The files we package are automatically generated. @@ -112,6 +124,10 @@ Files extracted from upstream source: - Version: 05cfdc2 (git) - License: MIT, BSD-3-Clause +Files extracted from upstream source: + +TODO. + ## libvorbis @@ -132,6 +148,10 @@ Files extracted from upstream source: - Version: 1.6.0 - License: BSD-3-Clause +Files extracted from upstream source: + +TODO. + ## libwebp @@ -242,13 +262,29 @@ Collection of single-file libraries used in Godot components. Files extracted from the upstream source: -TODO. +- Our `openssl/`: contains the headers installed in /usr/include/openssl; + gather them in the source tarball with `make links` and + `cp -f include/openssl/*.h ../openssl/openssl/` +- Our `crypto/`: copy of upstream `crypto/`, with some cleanup (see below). +- Our `ssl/`: copy of upstream `ssl/`, with some cleanup (see below). +- Cleanup: + ``` + find \( -name "Makefile" -o -name "*.S" -o -name "*.bat" -o -name "*.bc" \ + -o -name "*.com" -o -name "*.cnf" -o -name "*.ec" -o -name "*.fre" \ + -o -name "*.gcc" -o -name "*.in" -o -name "*.lnx" -o -name "*.m4" \ + -o -name "*.pl" -o -name "*.pod" -o -name "*.s" -o -name "*.sh" \ + -o -name "*.sol" -o -name "*test*" \) -delete + cd openssl; for file in *.h; do find ../{crypto,ssl} -name "$file" -delete; done + ``` + For the rest check the `git status` and decide. +- e_os.h +- Apply the Godot-specific patches in the `patches/` folder. ## opus - Upstream: https://opus-codec.org -- Version: 1.1.4 (opus) and 0.8 (opusfile) +- Version: 1.1.5 (opus) and 0.8 (opusfile) - License: BSD-3-Clause Files extracted from upstream source: @@ -272,17 +308,6 @@ Files extracted from upstream source: - LICENSE.TXT -## rg-etc1 - -- Upstream: https://github.com/richgel999/rg-etc1 -- Version: 1.04 -- License: zlib - -Files extracted from upstream source: - -- `rg_etc1.{cpp,h}` - - ## rtaudio - Upstream: http://www.music.mcgill.ca/~gary/rtaudio/ @@ -318,6 +343,21 @@ Files extracted from upstream source: - COPYING and LICENSE +## tinyexr + +- Upstream: https://github.com/syoyo/tinyexr +- Version: 0.9.5+ (git a145d69) +- License: BSD-3-Clause + +Files extracted from upstream source: + +- `tinyexr.{cc,h}` + +Important: Some changes were made to get TinyEXR to build on the ancient +MinGW-w64 toolchain of Travis CI. +https://github.com/godotengine/godot/commit/37f5e1dcd94611dd5b670f013abf0323e8b47def + + ## zlib - Upstream: http://www.zlib.net/ @@ -327,3 +367,14 @@ Files extracted from upstream source: Files extracted from upstream source: - all .c and .h files + +## zstd + +- Upstream: https://github.com/facebook/zstd +- Version: 1.2.0 +- License: BSD-3-Clause + +Files extracted from upstream source: + +- all .c and .h under lib/ +- README.md, LICENSE, PATENTS diff --git a/thirdparty/etc2comp/AUTHORS b/thirdparty/etc2comp/AUTHORS new file mode 100644 index 0000000000..32daca27fe --- /dev/null +++ b/thirdparty/etc2comp/AUTHORS @@ -0,0 +1,7 @@ +# This is the list of Etc2Comp authors for copyright purposes.
+#
+# This does not necessarily list everyone who has contributed code, since in
+# some cases, their employer may be the copyright holder. To see the full list
+# of contributors, see the revision history in source control.
+Google Inc.
+Blue Shift Inc.
diff --git a/thirdparty/etc2comp/Etc.cpp b/thirdparty/etc2comp/Etc.cpp new file mode 100644 index 0000000000..a5ee706048 --- /dev/null +++ b/thirdparty/etc2comp/Etc.cpp @@ -0,0 +1,128 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EtcConfig.h" +#include "Etc.h" +#include "EtcFilter.h" + +#include <string.h> + +namespace Etc +{ + // ---------------------------------------------------------------------------------------------------- + // C-style inteface to the encoder + // + void Encode(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uiMaxJobs, + unsigned char **a_ppaucEncodingBits, + unsigned int *a_puiEncodingBitsBytes, + unsigned int *a_puiExtendedWidth, + unsigned int *a_puiExtendedHeight, + int *a_piEncodingTime_ms, bool a_bVerboseOutput) + { + + Image image(a_pafSourceRGBA, a_uiSourceWidth, + a_uiSourceHeight, + a_eErrMetric); + image.m_bVerboseOutput = a_bVerboseOutput; + image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs); + + *a_ppaucEncodingBits = image.GetEncodingBits(); + *a_puiEncodingBitsBytes = image.GetEncodingBitsBytes(); + *a_puiExtendedWidth = image.GetExtendedWidth(); + *a_puiExtendedHeight = image.GetExtendedHeight(); + *a_piEncodingTime_ms = image.GetEncodingTimeMs(); + } + + void EncodeMipmaps(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uiMaxJobs, + unsigned int a_uiMaxMipmaps, + unsigned int a_uiMipFilterFlags, + RawImage* a_pMipmapImages, + int *a_piEncodingTime_ms, + bool a_bVerboseOutput) + { + auto mipWidth = a_uiSourceWidth; + auto mipHeight = a_uiSourceHeight; + int totalEncodingTime = 0; + for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++) + { + float* pImageData = nullptr; + float* pMipImage = nullptr; + + if(mip == 0) + { + pImageData = a_pafSourceRGBA; + } + else + { + pMipImage = new float[mipWidth*mipHeight*4]; + if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) ) + { + pImageData = pMipImage; + } + } + + if ( pImageData ) + { + + Image image(pImageData, mipWidth, mipHeight, a_eErrMetric); + + image.m_bVerboseOutput = a_bVerboseOutput; + image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs); + + a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr<unsigned char>(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; }); + a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes(); + a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth(); + a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight(); + + totalEncodingTime += image.GetEncodingTimeMs(); + } + + if(pMipImage) + { + delete[] pMipImage; + } + + if (!pImageData) + { + break; + } + + mipWidth >>= 1; + mipHeight >>= 1; + } + + *a_piEncodingTime_ms = totalEncodingTime; + } + + + // ---------------------------------------------------------------------------------------------------- + // + +} diff --git a/thirdparty/etc2comp/Etc.h b/thirdparty/etc2comp/Etc.h new file mode 100644 index 0000000000..439388d649 --- /dev/null +++ b/thirdparty/etc2comp/Etc.h @@ -0,0 +1,71 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcConfig.h" +#include "EtcImage.h" +#include "EtcColor.h" +#include "EtcErrorMetric.h" +#include <memory> + +#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f) +#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f) +#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f) + +namespace Etc +{ + class Block4x4EncodingBits; + + struct RawImage + { + int uiExtendedWidth; + int uiExtendedHeight; + unsigned int uiEncodingBitsBytes; + std::shared_ptr<unsigned char> paucEncodingBits; + }; + + + + // C-style inteface to the encoder + void Encode(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uimaxJobs, + unsigned char **a_ppaucEncodingBits, + unsigned int *a_puiEncodingBitsBytes, + unsigned int *a_puiExtendedWidth, + unsigned int *a_puiExtendedHeight, + int *a_piEncodingTime_ms, bool a_bVerboseOutput = false); + + void EncodeMipmaps(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uiMaxJobs, + unsigned int a_uiMaxMipmaps, + unsigned int a_uiMipFilterFlags, + RawImage* a_pMipmaps, + int *a_piEncodingTime_ms, bool a_bVerboseOutput = false); + +} diff --git a/thirdparty/etc2comp/EtcBlock4x4.cpp b/thirdparty/etc2comp/EtcBlock4x4.cpp new file mode 100644 index 0000000000..3082fe60db --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4.cpp @@ -0,0 +1,425 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4.cpp + +Implements the state associated with each 4x4 block of pixels in an image + +Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an +alpha of NAN + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcColor.h" +#include "EtcImage.h" +#include "EtcColorFloatRGBA.h" +#include "EtcBlock4x4Encoding_RGB8.h" +#include "EtcBlock4x4Encoding_RGBA8.h" +#include "EtcBlock4x4Encoding_RGB8A1.h" +#include "EtcBlock4x4Encoding_R11.h" +#include "EtcBlock4x4Encoding_RG11.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +namespace Etc +{ + // ETC pixels are scanned vertically. + // this mapping is for when someone wants to scan the ETC pixels horizontally + const unsigned int Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4::Block4x4(void) + { + m_pimageSource = nullptr; + m_uiSourceH = 0; + m_uiSourceV = 0; + + m_sourcealphamix = SourceAlphaMix::UNKNOWN; + m_boolBorderPixels = false; + m_boolPunchThroughPixels = false; + + m_pencoding = nullptr; + + m_errormetric = ErrorMetric::NUMERIC; + + } + Block4x4::~Block4x4() + { + m_pimageSource = nullptr; + if (m_pencoding) + { + delete m_pencoding; + m_pencoding = nullptr; + } + } + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding from a source image + // [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource + // a_paucEncodingBits is the place to store the final encoding + // a_errormetric is used for finding the best encoding + // + void Block4x4::InitFromSource(Image *a_pimageSource, + unsigned int a_uiSourceH, unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric) + { + + Block4x4(); + + m_pimageSource = a_pimageSource; + m_uiSourceH = a_uiSourceH; + m_uiSourceV = a_uiSourceV; + m_errormetric = a_errormetric; + + SetSourcePixels(); + + // set block encoder function + switch (m_pimageSource->GetFormat()) + { + case Image::Format::ETC1: + m_pencoding = new Block4x4Encoding_ETC1; + break; + + case Image::Format::RGB8: + case Image::Format::SRGB8: + m_pencoding = new Block4x4Encoding_RGB8; + break; + + case Image::Format::RGBA8: + case Image::Format::SRGBA8: + if (a_errormetric == RGBX) + { + m_pencoding = new Block4x4Encoding_RGBA8; + } + else + { + switch (m_sourcealphamix) + { + case SourceAlphaMix::OPAQUE: + m_pencoding = new Block4x4Encoding_RGBA8_Opaque; + break; + + case SourceAlphaMix::TRANSPARENT: + m_pencoding = new Block4x4Encoding_RGBA8_Transparent; + break; + + case SourceAlphaMix::TRANSLUCENT: + m_pencoding = new Block4x4Encoding_RGBA8; + break; + + default: + assert(0); + break; + } + break; + } + break; + + case Image::Format::RGB8A1: + case Image::Format::SRGB8A1: + switch (m_sourcealphamix) + { + case SourceAlphaMix::OPAQUE: + m_pencoding = new Block4x4Encoding_RGB8A1_Opaque; + break; + + case SourceAlphaMix::TRANSPARENT: + m_pencoding = new Block4x4Encoding_RGB8A1_Transparent; + break; + + case SourceAlphaMix::TRANSLUCENT: + if (m_boolPunchThroughPixels) + { + m_pencoding = new Block4x4Encoding_RGB8A1; + } + else + { + m_pencoding = new Block4x4Encoding_RGB8A1_Opaque; + } + break; + + default: + assert(0); + break; + } + break; + + case Image::Format::R11: + case Image::Format::SIGNED_R11: + m_pencoding = new Block4x4Encoding_R11; + break; + case Image::Format::RG11: + case Image::Format::SIGNED_RG11: + m_pencoding = new Block4x4Encoding_RG11; + break; + default: + assert(0); + break; + } + + m_pencoding->InitFromSource(this, m_afrgbaSource, + a_paucEncodingBits, a_errormetric); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization of encoding state from a prior encoding using encoding bits + // [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource + // a_paucEncodingBits is the place to read the prior encoding + // a_imageformat is used to determine how to interpret a_paucEncodingBits + // a_errormetric was used for the prior encoding + // + void Block4x4::InitFromEtcEncodingBits(Image::Format a_imageformat, + unsigned int a_uiSourceH, unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits, + Image *a_pimageSource, + ErrorMetric a_errormetric) + { + Block4x4(); + + m_pimageSource = a_pimageSource; + m_uiSourceH = a_uiSourceH; + m_uiSourceV = a_uiSourceV; + m_errormetric = a_errormetric; + + SetSourcePixels(); + + // set block encoder function + switch (a_imageformat) + { + case Image::Format::ETC1: + m_pencoding = new Block4x4Encoding_ETC1; + break; + + case Image::Format::RGB8: + case Image::Format::SRGB8: + m_pencoding = new Block4x4Encoding_RGB8; + break; + + case Image::Format::RGBA8: + case Image::Format::SRGBA8: + m_pencoding = new Block4x4Encoding_RGBA8; + break; + + case Image::Format::RGB8A1: + case Image::Format::SRGB8A1: + m_pencoding = new Block4x4Encoding_RGB8A1; + break; + + case Image::Format::R11: + case Image::Format::SIGNED_R11: + m_pencoding = new Block4x4Encoding_R11; + break; + case Image::Format::RG11: + case Image::Format::SIGNED_RG11: + m_pencoding = new Block4x4Encoding_RG11; + break; + default: + assert(0); + break; + } + + m_pencoding->InitFromEncodingBits(this, a_paucEncodingBits, m_afrgbaSource, + m_pimageSource->GetErrorMetric()); + + } + + // ---------------------------------------------------------------------------------------------------- + // set source pixels from m_pimageSource + // set m_alphamix + // + void Block4x4::SetSourcePixels(void) + { + + Image::Format imageformat = m_pimageSource->GetFormat(); + + // alpha census + unsigned int uiTransparentSourcePixels = 0; + unsigned int uiOpaqueSourcePixels = 0; + + // copy source to consecutive memory locations + // convert from image horizontal scan to block vertical scan + unsigned int uiPixel = 0; + for (unsigned int uiBlockPixelH = 0; uiBlockPixelH < Block4x4::COLUMNS; uiBlockPixelH++) + { + unsigned int uiSourcePixelH = m_uiSourceH + uiBlockPixelH; + + for (unsigned int uiBlockPixelV = 0; uiBlockPixelV < Block4x4::ROWS; uiBlockPixelV++) + { + unsigned int uiSourcePixelV = m_uiSourceV + uiBlockPixelV; + + ColorFloatRGBA *pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV); + + // if pixel extends beyond source image because of block padding + if (pfrgbaSource == nullptr) + { + m_afrgbaSource[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, NAN); // denotes border pixel + m_boolBorderPixels = true; + uiTransparentSourcePixels++; + } + else + { + //get teh current pixel data, and store some of the attributes + //before capping values to fit the encoder type + + m_afrgbaSource[uiPixel] = (*pfrgbaSource).ClampRGBA(); + + if (m_afrgbaSource[uiPixel].fA == 1.0f || m_errormetric == RGBX) + { + m_pimageSource->m_iNumOpaquePixels++; + } + else if (m_afrgbaSource[uiPixel].fA == 0.0f) + { + m_pimageSource->m_iNumTransparentPixels++; + } + else if(m_afrgbaSource[uiPixel].fA > 0.0f && m_afrgbaSource[uiPixel].fA < 1.0f) + { + m_pimageSource->m_iNumTranslucentPixels++; + } + else + { + m_pimageSource->m_numOutOfRangeValues.fA++; + } + + if (m_afrgbaSource[uiPixel].fR != 0.0f) + { + m_pimageSource->m_numColorValues.fR++; + //make sure we are getting a float between 0-1 + if (m_afrgbaSource[uiPixel].fR - 1.0f > 0.0f) + { + m_pimageSource->m_numOutOfRangeValues.fR++; + } + } + + if (m_afrgbaSource[uiPixel].fG != 0.0f) + { + m_pimageSource->m_numColorValues.fG++; + if (m_afrgbaSource[uiPixel].fG - 1.0f > 0.0f) + { + m_pimageSource->m_numOutOfRangeValues.fG++; + } + } + if (m_afrgbaSource[uiPixel].fB != 0.0f) + { + m_pimageSource->m_numColorValues.fB++; + if (m_afrgbaSource[uiPixel].fB - 1.0f > 0.0f) + { + m_pimageSource->m_numOutOfRangeValues.fB++; + } + } + // for formats with no alpha, set source alpha to 1 + if (imageformat == Image::Format::ETC1 || + imageformat == Image::Format::RGB8 || + imageformat == Image::Format::SRGB8) + { + m_afrgbaSource[uiPixel].fA = 1.0f; + } + + if (imageformat == Image::Format::R11 || + imageformat == Image::Format::SIGNED_R11) + { + m_afrgbaSource[uiPixel].fA = 1.0f; + m_afrgbaSource[uiPixel].fG = 0.0f; + m_afrgbaSource[uiPixel].fB = 0.0f; + } + + if (imageformat == Image::Format::RG11 || + imageformat == Image::Format::SIGNED_RG11) + { + m_afrgbaSource[uiPixel].fA = 1.0f; + m_afrgbaSource[uiPixel].fB = 0.0f; + } + + + // for RGB8A1, set source alpha to 0.0 or 1.0 + // set punch through flag + if (imageformat == Image::Format::RGB8A1 || + imageformat == Image::Format::SRGB8A1) + { + if (m_afrgbaSource[uiPixel].fA >= 0.5f) + { + m_afrgbaSource[uiPixel].fA = 1.0f; + } + else + { + m_afrgbaSource[uiPixel].fA = 0.0f; + m_boolPunchThroughPixels = true; + } + } + + if (m_afrgbaSource[uiPixel].fA == 1.0f || m_errormetric == RGBX) + { + uiOpaqueSourcePixels++; + } + else if (m_afrgbaSource[uiPixel].fA == 0.0f) + { + uiTransparentSourcePixels++; + } + + } + + uiPixel += 1; + } + } + + if (uiOpaqueSourcePixels == PIXELS) + { + m_sourcealphamix = SourceAlphaMix::OPAQUE; + } + else if (uiTransparentSourcePixels == PIXELS) + { + m_sourcealphamix = SourceAlphaMix::TRANSPARENT; + } + else + { + m_sourcealphamix = SourceAlphaMix::TRANSLUCENT; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // return a name for the encoding mode + // + const char * Block4x4::GetEncodingModeName(void) + { + + switch (m_pencoding->GetMode()) + { + case Block4x4Encoding::MODE_ETC1: + return "ETC1"; + case Block4x4Encoding::MODE_T: + return "T"; + case Block4x4Encoding::MODE_H: + return "H"; + case Block4x4Encoding::MODE_PLANAR: + return "PLANAR"; + default: + return "???"; + } + } + + // ---------------------------------------------------------------------------------------------------- + // + +} diff --git a/thirdparty/etc2comp/EtcBlock4x4.h b/thirdparty/etc2comp/EtcBlock4x4.h new file mode 100644 index 0000000000..0fd30c598d --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4.h @@ -0,0 +1,172 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColor.h" +#include "EtcColorFloatRGBA.h" +#include "EtcErrorMetric.h" +#include "EtcImage.h" +#include "EtcBlock4x4Encoding.h" + +namespace Etc +{ + class Block4x4EncodingBits; + + class Block4x4 + { + public: + + static const unsigned int ROWS = 4; + static const unsigned int COLUMNS = 4; + static const unsigned int PIXELS = ROWS * COLUMNS; + + // the alpha mix for a 4x4 block of pixels + enum class SourceAlphaMix + { + UNKNOWN, + // + OPAQUE, // all 1.0 + TRANSPARENT, // all 0.0 or NAN + TRANSLUCENT // not all opaque or transparent + }; + + typedef void (Block4x4::*EncoderFunctionPtr)(void); + + Block4x4(void); + ~Block4x4(); + void InitFromSource(Image *a_pimageSource, + unsigned int a_uiSourceH, + unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric); + + void InitFromEtcEncodingBits(Image::Format a_imageformat, + unsigned int a_uiSourceH, + unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits, + Image *a_pimageSource, + ErrorMetric a_errormetric); + + // return true if final iteration was performed + inline void PerformEncodingIteration(float a_fEffort) + { + m_pencoding->PerformIteration(a_fEffort); + } + + inline void SetEncodingBitsFromEncoding(void) + { + m_pencoding->SetEncodingBits(); + } + + inline unsigned int GetSourceH(void) + { + return m_uiSourceH; + } + + inline unsigned int GetSourceV(void) + { + return m_uiSourceV; + } + + inline float GetError(void) + { + return m_pencoding->GetError(); + } + + static const unsigned int s_auiPixelOrderHScan[PIXELS]; + + inline ColorFloatRGBA * GetDecodedColors(void) + { + return m_pencoding->GetDecodedColors(); + } + + inline float * GetDecodedAlphas(void) + { + return m_pencoding->GetDecodedAlphas(); + } + + inline Block4x4Encoding::Mode GetEncodingMode(void) + { + return m_pencoding->GetMode(); + } + + inline bool GetFlip(void) + { + return m_pencoding->GetFlip(); + } + + inline bool IsDifferential(void) + { + return m_pencoding->IsDifferential(); + } + + inline ColorFloatRGBA * GetSource() + { + return m_afrgbaSource; + } + + inline ErrorMetric GetErrorMetric() + { + return m_errormetric; + } + + const char * GetEncodingModeName(void); + + inline Block4x4Encoding * GetEncoding(void) + { + return m_pencoding; + } + + inline SourceAlphaMix GetSourceAlphaMix(void) + { + return m_sourcealphamix; + } + + inline Image * GetImageSource(void) + { + return m_pimageSource; + } + + inline bool HasBorderPixels(void) + { + return m_boolBorderPixels; + } + + inline bool HasPunchThroughPixels(void) + { + return m_boolPunchThroughPixels; + } + + private: + + void SetSourcePixels(void); + + Image *m_pimageSource; + unsigned int m_uiSourceH; + unsigned int m_uiSourceV; + ErrorMetric m_errormetric; + ColorFloatRGBA m_afrgbaSource[PIXELS]; // vertical scan + + SourceAlphaMix m_sourcealphamix; + bool m_boolBorderPixels; // marked as rgba(NAN, NAN, NAN, NAN) + bool m_boolPunchThroughPixels; // RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5 + + Block4x4Encoding *m_pencoding; + + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp new file mode 100644 index 0000000000..7a9e68c4cf --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding.cpp @@ -0,0 +1,261 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding.cpp + +Block4x4Encoding is the abstract base class for the different encoders. Each encoder targets a +particular file format (e.g. ETC1, RGB8, RGBA8, R11) + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +namespace Etc +{ + // ---------------------------------------------------------------------------------------------------- + // + const float Block4x4Encoding::LUMA_WEIGHT = 3.0f; + const float Block4x4Encoding::CHROMA_BLUE_WEIGHT = 0.5f; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding::Block4x4Encoding(void) + { + + m_pblockParent = nullptr; + + m_pafrgbaSource = nullptr; + + m_boolBorderPixels = false; + + m_fError = -1.0f; + + m_mode = MODE_UNKNOWN; + + m_uiEncodingIterations = 0; + m_boolDone = false; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f); + m_afDecodedAlphas[uiPixel] = -1.0f; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialize the generic encoding for a 4x4 block + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // init the decoded pixels to -1 to mark them as undefined + // init the error to -1 to mark it as undefined + // + void Block4x4Encoding::Init(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + + m_pblockParent = a_pblockParent; + + m_pafrgbaSource = a_pafrgbaSource; + + m_boolBorderPixels = m_pblockParent->HasBorderPixels(); + + m_fError = -1.0f; + + m_uiEncodingIterations = 0; + + m_errormetric = a_errormetric; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f); + m_afDecodedAlphas[uiPixel] = -1.0f; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // calculate the error for the block by summing the pixel errors + // + void Block4x4Encoding::CalcBlockError(void) + { + m_fError = 0.0f; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], m_afDecodedAlphas[uiPixel], + m_pafrgbaSource[uiPixel]); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // calculate the error between the source pixel and the decoded pixel + // the error amount is base on the error metric + // + float Block4x4Encoding::CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha, + ColorFloatRGBA a_frgbaSourcePixel) + { + + // if a border pixel + if (isnan(a_frgbaSourcePixel.fA)) + { + return 0.0f; + } + + if (m_errormetric == ErrorMetric::RGBA) + { + assert(a_fDecodedAlpha >= 0.0f); + + float fDRed = (a_fDecodedAlpha * a_frgbaDecodedColor.fR) - + (a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fR); + float fDGreen = (a_fDecodedAlpha * a_frgbaDecodedColor.fG) - + (a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fG); + float fDBlue = (a_fDecodedAlpha * a_frgbaDecodedColor.fB) - + (a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fB); + + float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA; + + return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha; + } + else if (m_errormetric == ErrorMetric::RGBX) + { + assert(a_fDecodedAlpha >= 0.0f); + + float fDRed = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR; + float fDGreen = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG; + float fDBlue = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB; + float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA; + + return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha; + } + else if (m_errormetric == ErrorMetric::REC709) + { + assert(a_fDecodedAlpha >= 0.0f); + + float fLuma1 = a_frgbaSourcePixel.fR*0.2126f + a_frgbaSourcePixel.fG*0.7152f + a_frgbaSourcePixel.fB*0.0722f; + float fChromaR1 = 0.5f * ((a_frgbaSourcePixel.fR - fLuma1) * (1.0f / (1.0f - 0.2126f))); + float fChromaB1 = 0.5f * ((a_frgbaSourcePixel.fB - fLuma1) * (1.0f / (1.0f - 0.0722f))); + + float fLuma2 = a_frgbaDecodedColor.fR*0.2126f + + a_frgbaDecodedColor.fG*0.7152f + + a_frgbaDecodedColor.fB*0.0722f; + float fChromaR2 = 0.5f * ((a_frgbaDecodedColor.fR - fLuma2) * (1.0f / (1.0f - 0.2126f))); + float fChromaB2 = 0.5f * ((a_frgbaDecodedColor.fB - fLuma2) * (1.0f / (1.0f - 0.0722f))); + + float fDeltaL = a_frgbaSourcePixel.fA * fLuma1 - a_fDecodedAlpha * fLuma2; + float fDeltaCr = a_frgbaSourcePixel.fA * fChromaR1 - a_fDecodedAlpha * fChromaR2; + float fDeltaCb = a_frgbaSourcePixel.fA * fChromaB1 - a_fDecodedAlpha * fChromaB2; + + float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA; + + // Favor Luma accuracy over Chroma, and Red over Blue + return LUMA_WEIGHT*fDeltaL*fDeltaL + + fDeltaCr*fDeltaCr + + CHROMA_BLUE_WEIGHT*fDeltaCb*fDeltaCb + + fDAlpha*fDAlpha; + #if 0 + float fDRed = a_frgbaDecodedPixel.fR - a_frgbaSourcePixel.fR; + float fDGreen = a_frgbaDecodedPixel.fG - a_frgbaSourcePixel.fG; + float fDBlue = a_frgbaDecodedPixel.fB - a_frgbaSourcePixel.fB; + return 2.0f * 3.0f * fDeltaL * fDeltaL + fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue; +#endif + } + else if (m_errormetric == ErrorMetric::NORMALXYZ) + { + float fDecodedX = 2.0f * a_frgbaDecodedColor.fR - 1.0f; + float fDecodedY = 2.0f * a_frgbaDecodedColor.fG - 1.0f; + float fDecodedZ = 2.0f * a_frgbaDecodedColor.fB - 1.0f; + + float fDecodedLength = sqrtf(fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ); + + if (fDecodedLength < 0.5f) + { + return 1.0f; + } + else if (fDecodedLength == 0.0f) + { + fDecodedX = 1.0f; + fDecodedY = 0.0f; + fDecodedZ = 0.0f; + } + else + { + fDecodedX /= fDecodedLength; + fDecodedY /= fDecodedLength; + fDecodedZ /= fDecodedLength; + } + + float fSourceX = 2.0f * a_frgbaSourcePixel.fR - 1.0f; + float fSourceY = 2.0f * a_frgbaSourcePixel.fG - 1.0f; + float fSourceZ = 2.0f * a_frgbaSourcePixel.fB - 1.0f; + + float fSourceLength = sqrtf(fSourceX*fSourceX + fSourceY*fSourceY + fSourceZ*fSourceZ); + + if (fSourceLength == 0.0f) + { + fSourceX = 1.0f; + fSourceY = 0.0f; + fSourceZ = 0.0f; + } + else + { + fSourceX /= fSourceLength; + fSourceY /= fSourceLength; + fSourceZ /= fSourceLength; + } + + float fDotProduct = fSourceX*fDecodedX + fSourceY*fDecodedY + fSourceZ*fDecodedZ; + float fNormalizedDotProduct = 1.0f - 0.5f * (fDotProduct + 1.0f); + float fDotProductError = fNormalizedDotProduct * fNormalizedDotProduct; + + float fLength2 = fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ; + float fLength2Error = fabsf(1.0f - fLength2); + + float fDeltaW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA; + float fErrorW = fDeltaW * fDeltaW; + + return fDotProductError + fLength2Error + fErrorW; + } + else // ErrorMetric::NUMERIC + { + assert(a_fDecodedAlpha >= 0.0f); + + float fDX = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR; + float fDY = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG; + float fDZ = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB; + float fDW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA; + + return fDX*fDX + fDY*fDY + fDZ*fDZ + fDW*fDW; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc + diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding.h b/thirdparty/etc2comp/EtcBlock4x4Encoding.h new file mode 100644 index 0000000000..c14c3b8616 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding.h @@ -0,0 +1,148 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" + +#include "EtcErrorMetric.h" + +#include <assert.h> +#include <float.h> + +namespace Etc +{ + class Block4x4; + + // abstract base class for specific encodings + class Block4x4Encoding + { + public: + + static const unsigned int ROWS = 4; + static const unsigned int COLUMNS = 4; + static const unsigned int PIXELS = ROWS * COLUMNS; + static const float LUMA_WEIGHT; + static const float CHROMA_BLUE_WEIGHT; + + typedef enum + { + MODE_UNKNOWN, + // + MODE_ETC1, + MODE_T, + MODE_H, + MODE_PLANAR, + MODE_R11, + MODE_RG11, + // + MODES + } Mode; + + Block4x4Encoding(void); + //virtual ~Block4x4Encoding(void) =0; + virtual ~Block4x4Encoding(void) {} + virtual void InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) = 0; + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric) = 0; + + // perform an iteration of the encoding + // the first iteration must generate a complete, valid (if poor) encoding + virtual void PerformIteration(float a_fEffort) = 0; + + void CalcBlockError(void); + + inline float GetError(void) + { + assert(m_fError >= 0.0f); + + return m_fError; + } + + inline ColorFloatRGBA * GetDecodedColors(void) + { + return m_afrgbaDecodedColors; + } + + inline float * GetDecodedAlphas(void) + { + return m_afDecodedAlphas; + } + + virtual void SetEncodingBits(void) = 0; + + virtual bool GetFlip(void) = 0; + + virtual bool IsDifferential(void) = 0; + + virtual bool HasSeverelyBentDifferentialColors(void) const = 0; + + inline Mode GetMode(void) + { + return m_mode; + } + + inline bool IsDone(void) + { + return m_boolDone; + } + + inline void SetDoneIfPerfect() + { + if (GetError() == 0.0f) + { + m_boolDone = true; + } + } + + float CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha, + ColorFloatRGBA a_frgbaSourcePixel); + + protected: + + void Init(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric); + + Block4x4 *m_pblockParent; + ColorFloatRGBA *m_pafrgbaSource; + + bool m_boolBorderPixels; // if block has any border pixels + + ColorFloatRGBA m_afrgbaDecodedColors[PIXELS]; // decoded RGB components, ignore Alpha + float m_afDecodedAlphas[PIXELS]; // decoded alpha component + float m_fError; // error for RGBA relative to m_pafrgbaSource + + // intermediate encoding + Mode m_mode; + + unsigned int m_uiEncodingIterations; + bool m_boolDone; // all iterations have been done + ErrorMetric m_errormetric; + + private: + + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h b/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h new file mode 100644 index 0000000000..4065700379 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4EncodingBits.h @@ -0,0 +1,315 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <assert.h> + +namespace Etc +{ + + // ################################################################################ + // Block4x4EncodingBits + // Base class for Block4x4EncodingBits_XXXX + // ################################################################################ + + class Block4x4EncodingBits + { + public: + + enum class Format + { + UNKNOWN, + // + RGB8, + RGBA8, + R11, + RG11, + RGB8A1, + // + FORMATS + }; + + static unsigned int GetBytesPerBlock(Format a_format) + { + switch (a_format) + { + case Format::RGB8: + case Format::R11: + case Format::RGB8A1: + return 8; + break; + + case Format::RGBA8: + case Format::RG11: + return 16; + break; + + default: + return 0; + break; + } + + } + + }; + + // ################################################################################ + // Block4x4EncodingBits_RGB8 + // Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8 + // ################################################################################ + + class Block4x4EncodingBits_RGB8 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 8; + + inline Block4x4EncodingBits_RGB8(void) + { + assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK); + + for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++) + { + auc[uiByte] = 0; + } + + } + + typedef struct + { + unsigned red2 : 4; + unsigned red1 : 4; + // + unsigned green2 : 4; + unsigned green1 : 4; + // + unsigned blue2 : 4; + unsigned blue1 : 4; + // + unsigned flip : 1; + unsigned diff : 1; + unsigned cw2 : 3; + unsigned cw1 : 3; + // + unsigned int selectors; + } Individual; + + typedef struct + { + signed dred2 : 3; + unsigned red1 : 5; + // + signed dgreen2 : 3; + unsigned green1 : 5; + // + signed dblue2 : 3; + unsigned blue1 : 5; + // + unsigned flip : 1; + unsigned diff : 1; + unsigned cw2 : 3; + unsigned cw1 : 3; + // + unsigned int selectors; + } Differential; + + typedef struct + { + unsigned red1b : 2; + unsigned detect2 : 1; + unsigned red1a : 2; + unsigned detect1 : 3; + // + unsigned blue1 : 4; + unsigned green1 : 4; + // + unsigned green2 : 4; + unsigned red2 : 4; + // + unsigned db : 1; + unsigned diff : 1; + unsigned da : 2; + unsigned blue2 : 4; + // + unsigned int selectors; + } T; + + typedef struct + { + unsigned green1a : 3; + unsigned red1 : 4; + unsigned detect1 : 1; + // + unsigned blue1b : 2; + unsigned detect3 : 1; + unsigned blue1a : 1; + unsigned green1b : 1; + unsigned detect2 : 3; + // + unsigned green2a : 3; + unsigned red2 : 4; + unsigned blue1c : 1; + // + unsigned db : 1; + unsigned diff : 1; + unsigned da : 1; + unsigned blue2 : 4; + unsigned green2b : 1; + // + unsigned int selectors; + } H; + + typedef struct + { + unsigned originGreen1 : 1; + unsigned originRed : 6; + unsigned detect1 : 1; + // + unsigned originBlue1 : 1; + unsigned originGreen2 : 6; + unsigned detect2 : 1; + // + unsigned originBlue3 : 2; + unsigned detect4 : 1; + unsigned originBlue2 : 2; + unsigned detect3 : 3; + // + unsigned horizRed2 : 1; + unsigned diff : 1; + unsigned horizRed1 : 5; + unsigned originBlue4 : 1; + // + unsigned horizBlue1: 1; + unsigned horizGreen : 7; + // + unsigned vertRed1 : 3; + unsigned horizBlue2 : 5; + // + unsigned vertGreen1 : 5; + unsigned vertRed2 : 3; + // + unsigned vertBlue : 6; + unsigned vertGreen2 : 2; + } Planar; + + union + { + unsigned char auc[BYTES_PER_BLOCK]; + unsigned long int ul; + Individual individual; + Differential differential; + T t; + H h; + Planar planar; + }; + + }; + + // ################################################################################ + // Block4x4EncodingBits_A8 + // Encoding bits for the A portion of RGBA8 + // ################################################################################ + + class Block4x4EncodingBits_A8 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 8; + static const unsigned int SELECTOR_BYTES = 6; + + typedef struct + { + unsigned base : 8; + unsigned table : 4; + unsigned multiplier : 4; + unsigned selectors0 : 8; + unsigned selectors1 : 8; + unsigned selectors2 : 8; + unsigned selectors3 : 8; + unsigned selectors4 : 8; + unsigned selectors5 : 8; + } Data; + + Data data; + + }; + + // ################################################################################ + // Block4x4EncodingBits_R11 + // Encoding bits for the R portion of R11 + // ################################################################################ + + class Block4x4EncodingBits_R11 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 8; + static const unsigned int SELECTOR_BYTES = 6; + + typedef struct + { + unsigned base : 8; + unsigned table : 4; + unsigned multiplier : 4; + unsigned selectors0 : 8; + unsigned selectors1 : 8; + unsigned selectors2 : 8; + unsigned selectors3 : 8; + unsigned selectors4 : 8; + unsigned selectors5 : 8; + } Data; + + Data data; + + }; + + class Block4x4EncodingBits_RG11 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 16; + static const unsigned int SELECTOR_BYTES = 12; + + typedef struct + { + //Red portion + unsigned baseR : 8; + unsigned tableIndexR : 4; + unsigned multiplierR : 4; + unsigned selectorsR0 : 8; + unsigned selectorsR1 : 8; + unsigned selectorsR2 : 8; + unsigned selectorsR3 : 8; + unsigned selectorsR4 : 8; + unsigned selectorsR5 : 8; + //Green portion + unsigned baseG : 8; + unsigned tableIndexG : 4; + unsigned multiplierG : 4; + unsigned selectorsG0 : 8; + unsigned selectorsG1 : 8; + unsigned selectorsG2 : 8; + unsigned selectorsG3 : 8; + unsigned selectorsG4 : 8; + unsigned selectorsG5 : 8; + } Data; + + Data data; + + }; + +} diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp new file mode 100644 index 0000000000..a27f74c0d5 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.cpp @@ -0,0 +1,1281 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_ETC1.cpp + +Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1. This encoder is also +used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1 + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_ETC1.h" + +#include "EtcBlock4x4.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcDifferentialTrys.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <float.h> +#include <limits> + +namespace Etc +{ + + // pixel processing order if the flip bit = 0 (horizontal split) + const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + + // pixel processing order if the flip bit = 1 (vertical split) + const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }; + + // pixel processing order for horizontal scan (ETC normally does a vertical scan) + const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + // pixel indices for different block halves + const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; + const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 }; + const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 }; + + // CW ranges that the ETC1 decoders use + // CW is basically a contrast for the different selector bits, since these values are offsets to the base color + // the first axis in the array is indexed by the CW in the encoding bits + // the second axis in the array is indexed by the selector bits + float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] = + { + { 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f }, + { 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f }, + { 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f }, + { 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f }, + { 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f }, + { 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f }, + { 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f }, + { 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f } + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void) + { + m_mode = MODE_ETC1; + m_boolDiff = false; + m_boolFlip = false; + m_frgbaColor1 = ColorFloatRGBA(); + m_frgbaColor2 = ColorFloatRGBA(); + m_uiCW1 = 0; + m_uiCW2 = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = 0; + m_afDecodedAlphas[uiPixel] = 1.0f; + } + + m_boolMostLikelyFlip = false; + + m_fError = -1.0f; + + m_fError1 = -1.0f; + m_fError2 = -1.0f; + m_boolSeverelyBentDifferentialColors = false; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afDecodedAlphas[uiPixel] = 1.0f; + } + + } + + Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {} + + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_ETC1::InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) + { + + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afDecodedAlphas[uiPixel] = 1.0f; + } + + m_fError = -1.0f; + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_ETC1::InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric); + m_fError = -1.0f; + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + m_mode = MODE_ETC1; + m_boolDiff = m_pencodingbitsRGB8->individual.diff; + m_boolFlip = m_pencodingbitsRGB8->individual.flip; + if (m_boolDiff) + { + int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2); + if (iR2 < 0) + { + iR2 = 0; + } + else if (iR2 > 31) + { + iR2 = 31; + } + + int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2); + if (iG2 < 0) + { + iG2 = 0; + } + else if (iG2 > 31) + { + iG2 = 31; + } + + int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2); + if (iB2 < 0) + { + iB2 = 0; + } + else if (iB2 > 31) + { + iB2 = 31; + } + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2); + + } + else + { + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2); + } + + m_uiCW1 = m_pencodingbitsRGB8->individual.cw1; + m_uiCW2 = m_pencodingbitsRGB8->individual.cw2; + + InitFromEncodingBits_Selectors(); + + Decode(); + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // init the selectors from a prior encoding + // + void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void) + { + + unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors; + + for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++) + { + unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8)); + unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8)); + unsigned int uiShift = (unsigned int)(iPixel & 7); + + unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1); + unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1); + + m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + PerformFirstIteration(); + break; + + case 1: + TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + TryIndividual(m_boolMostLikelyFlip, 1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 3: + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 4: + TryIndividual(!m_boolMostLikelyFlip, 1); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 5: + TryDegenerates1(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 6: + TryDegenerates2(); + if (a_fEffort <= 89.5f) + { + m_boolDone = true; + } + break; + + case 7: + TryDegenerates3(); + if (a_fEffort <= 99.5f) + { + m_boolDone = true; + } + break; + + case 8: + TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // find best initial encoding to ensure block has a valid encoding + // + void Block4x4Encoding_ETC1::PerformFirstIteration(void) + { + CalculateMostLikelyFlip(); + + m_fError = FLT_MAX; + + TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + + TryIndividual(m_boolMostLikelyFlip, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + TryIndividual(!m_boolMostLikelyFlip, 0); + + } + + // ---------------------------------------------------------------------------------------------------- + // algorithm: + // create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half + // note: the "gray line" is the line of equal delta RGB that goes thru the average color + // for each half: + // see how close each of the 8 pixels are to the "gray line" that goes thru the source average color + // create an error value that is the sum of the distances from the gray line + // h_error is the sum of Left and Right errors + // v_error is the sum of Top and Bottom errors + // + void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void) + { + static const bool DEBUG_PRINT = false; + + CalculateSourceAverages(); + + float fLeftGrayErrorSum = 0.0f; + float fRightGrayErrorSum = 0.0f; + float fTopGrayErrorSum = 0.0f; + float fBottomGrayErrorSum = 0.0f; + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel]; + ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8]; + ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]]; + ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]]; + + float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft); + float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight); + float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop); + float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom); + + fLeftGrayErrorSum += fLeftGrayError; + fRightGrayErrorSum += fRightGrayError; + fTopGrayErrorSum += fTopGrayError; + fBottomGrayErrorSum += fBottomGrayError; + } + + if (DEBUG_PRINT) + { + printf("\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum); + } + + m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum); + + } + + // ---------------------------------------------------------------------------------------------------- + // calculate source pixel averages for each 2x2 quadrant in a 4x4 block + // these are used to determine the averages for each of the 4 different halves (left, right, top, bottom) + // ignore pixels that have alpha == NAN (these are border pixels outside of the source image) + // weight the averages based on a pixel's alpha + // + void Block4x4Encoding_ETC1::CalculateSourceAverages(void) + { + static const bool DEBUG_PRINT = false; + + bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX; + + if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX) + { + ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5]; + ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7]; + ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13]; + ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15]; + + m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f; + m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f; + m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f; + m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f; + } + else + { + float afSourceAlpha[PIXELS]; + + // treat alpha NAN as 0.0f + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? + 0.0f : + m_pafrgbaSource[uiPixel].fA; + } + + ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS]; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel]; + } + + ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] + + afrgbaAlphaWeightedSource[1] + + afrgbaAlphaWeightedSource[4] + + afrgbaAlphaWeightedSource[5]; + + ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] + + afrgbaAlphaWeightedSource[3] + + afrgbaAlphaWeightedSource[6] + + afrgbaAlphaWeightedSource[7]; + + ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] + + afrgbaAlphaWeightedSource[9] + + afrgbaAlphaWeightedSource[12] + + afrgbaAlphaWeightedSource[13]; + + ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] + + afrgbaAlphaWeightedSource[11] + + afrgbaAlphaWeightedSource[14] + + afrgbaAlphaWeightedSource[15]; + + float fWeightSumUL = afSourceAlpha[0] + + afSourceAlpha[1] + + afSourceAlpha[4] + + afSourceAlpha[5]; + + float fWeightSumLL = afSourceAlpha[2] + + afSourceAlpha[3] + + afSourceAlpha[6] + + afSourceAlpha[7]; + + float fWeightSumUR = afSourceAlpha[8] + + afSourceAlpha[9] + + afSourceAlpha[12] + + afSourceAlpha[13]; + + float fWeightSumLR = afSourceAlpha[10] + + afSourceAlpha[11] + + afSourceAlpha[14] + + afSourceAlpha[15]; + + ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL; + ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR; + ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR; + ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR; + + float fWeightSumLeft = fWeightSumUL + fWeightSumLL; + float fWeightSumRight = fWeightSumUR + fWeightSumLR; + float fWeightSumTop = fWeightSumUL + fWeightSumUR; + float fWeightSumBottom = fWeightSumLL + fWeightSumLR; + + // check to see if there is at least 1 pixel with non-zero alpha + // completely transparent block should not make it to this code + assert((fWeightSumLeft + fWeightSumRight) > 0.0f); + assert((fWeightSumTop + fWeightSumBottom) > 0.0f); + + if (fWeightSumLeft > 0.0f) + { + m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft); + } + if (fWeightSumRight > 0.0f) + { + m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight); + } + if (fWeightSumTop > 0.0f) + { + m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop); + } + if (fWeightSumBottom > 0.0f) + { + m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom); + } + + if (fWeightSumLeft == 0.0f) + { + assert(fWeightSumRight > 0.0f); + m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight; + } + if (fWeightSumRight == 0.0f) + { + assert(fWeightSumLeft > 0.0f); + m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft; + } + if (fWeightSumTop == 0.0f) + { + assert(fWeightSumBottom > 0.0f); + m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom; + } + if (fWeightSumBottom == 0.0f) + { + assert(fWeightSumTop > 0.0f); + m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop; + } + } + + + + if (DEBUG_PRINT) + { + printf("\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n", + m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB, + m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB, + m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB, + m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 differential mode encoding + // use a_boolFlip to set the encoding F bit + // use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius] + // use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings + // replace the encoding if the encoding error is less than previous encoding + // + void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2) + { + + ColorFloatRGBA frgbaColor1; + ColorFloatRGBA frgbaColor2; + + const unsigned int *pauiPixelMapping1; + const unsigned int *pauiPixelMapping2; + + if (a_boolFlip) + { + frgbaColor1 = m_frgbaSourceAverageTop; + frgbaColor2 = m_frgbaSourceAverageBottom; + + pauiPixelMapping1 = s_auiTopPixelMapping; + pauiPixelMapping2 = s_auiBottomPixelMapping; + } + else + { + frgbaColor1 = m_frgbaSourceAverageLeft; + frgbaColor2 = m_frgbaSourceAverageRight; + + pauiPixelMapping1 = s_auiLeftPixelMapping; + pauiPixelMapping2 = s_auiRightPixelMapping; + } + + DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, + a_uiRadius, a_iGrayOffset1, a_iGrayOffset2); + + Block4x4Encoding_ETC1 encodingTry = *this; + encodingTry.m_boolFlip = a_boolFlip; + + encodingTry.TryDifferentialHalf(&trys.m_half1); + encodingTry.TryDifferentialHalf(&trys.m_half2); + + // find best halves that are within differential range + DifferentialTrys::Try *ptryBest1 = nullptr; + DifferentialTrys::Try *ptryBest2 = nullptr; + encodingTry.m_fError = FLT_MAX; + + // see if the best of each half are in differential range + int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed; + int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen; + int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue; + if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3) + { + ptryBest1 = trys.m_half1.m_ptryBest; + ptryBest2 = trys.m_half2.m_ptryBest; + encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; + } + else + { + // else, find the next best halves that are in differential range + for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0]; + ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys]; + ptry1++) + { + for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0]; + ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys]; + ptry2++) + { + iDRed = ptry2->m_iRed - ptry1->m_iRed; + bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4; + iDGreen = ptry2->m_iGreen - ptry1->m_iGreen; + bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4; + iDBlue = ptry2->m_iBlue - ptry1->m_iBlue; + bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4; + + if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta) + { + float fError = ptry1->m_fError + ptry2->m_fError; + + if (fError < encodingTry.m_fError) + { + encodingTry.m_fError = fError; + + ptryBest1 = ptry1; + ptryBest2 = ptry2; + } + } + + } + } + assert(encodingTry.m_fError < FLT_MAX); + assert(ptryBest1 != nullptr); + assert(ptryBest2 != nullptr); + } + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = encodingTry.m_boolFlip; + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); + m_uiCW1 = ptryBest1->m_uiCW; + m_uiCW2 = ptryBest2->m_uiCW; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) + { + unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; + unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; + + unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; + unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; + + m_auiSelectors[uiPixel1] = uiSelector1; + m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; + + float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1]; + float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2]; + + m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); + m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); + } + + m_fError1 = ptryBest1->m_fError; + m_fError2 = ptryBest2->m_fError; + m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors; + m_fError = m_fError1 + m_fError2; + + // sanity check + { + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + iDRed = iRed2 - iRed1; + iDGreen = iGreen2 - iGreen1; + iDBlue = iBlue2 - iBlue1; + + assert(iDRed >= -4 && iDRed < 4); + assert(iDGreen >= -4 && iDGreen < 4); + assert(iDBlue >= -4 && iDBlue < 4); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 differential mode encoding for a half of a 4x4 block + // vary the basecolor components using a radius + // + void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf) + { + + a_phalf->m_ptryBest = nullptr; + float fBestTryError = FLT_MAX; + + a_phalf->m_uiTrys = 0; + for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; + iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius; + iRed++) + { + assert(iRed >= 0 && iRed <= 31); + + for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius; + iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius; + iGreen++) + { + assert(iGreen >= 0 && iGreen <= 31); + + for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius; + iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius; + iBlue++) + { + assert(iBlue >= 0 && iBlue <= 31); + + DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; + assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]); + + ptry->m_iRed = iRed; + ptry->m_iGreen = iGreen; + ptry->m_iBlue = iBlue; + ptry->m_fError = FLT_MAX; + ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue); + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + // pre-compute decoded pixels for each selector + ColorFloatRGBA afrgbaSelectors[SELECTORS]; + assert(SELECTORS == 4); + afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB(); + afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB(); + afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB(); + afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB(); + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + frgbaDecodedPixel = afrgbaSelectors[uiSelector]; + + float fPixelError; + + fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]], + *pfrgbaSourcePixel); + + if (fPixelError < afPixelErrors[uiPixel]) + { + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + + // if best CW so far + if (fCWError < ptry->m_fError) + { + ptry->m_uiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; + } + ptry->m_fError = fCWError; + } + + } + + if (ptry->m_fError < fBestTryError) + { + a_phalf->m_ptryBest = ptry; + fBestTryError = ptry->m_fError; + } + + assert(ptry->m_fError < FLT_MAX); + + a_phalf->m_uiTrys++; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 individual mode encoding + // use a_boolFlip to set the encoding F bit + // use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius] + // replace the encoding if the encoding error is less than previous encoding + // + void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius) + { + + ColorFloatRGBA frgbaColor1; + ColorFloatRGBA frgbaColor2; + + const unsigned int *pauiPixelMapping1; + const unsigned int *pauiPixelMapping2; + + if (a_boolFlip) + { + frgbaColor1 = m_frgbaSourceAverageTop; + frgbaColor2 = m_frgbaSourceAverageBottom; + + pauiPixelMapping1 = s_auiTopPixelMapping; + pauiPixelMapping2 = s_auiBottomPixelMapping; + } + else + { + frgbaColor1 = m_frgbaSourceAverageLeft; + frgbaColor2 = m_frgbaSourceAverageRight; + + pauiPixelMapping1 = s_auiLeftPixelMapping; + pauiPixelMapping2 = s_auiRightPixelMapping; + } + + IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius); + + Block4x4Encoding_ETC1 encodingTry = *this; + encodingTry.m_boolFlip = a_boolFlip; + + encodingTry.TryIndividualHalf(&trys.m_half1); + encodingTry.TryIndividualHalf(&trys.m_half2); + + // use the best of each half + IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest; + IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest; + encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_ETC1; + m_boolDiff = false; + m_boolFlip = encodingTry.m_boolFlip; + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); + m_uiCW1 = ptryBest1->m_uiCW; + m_uiCW2 = ptryBest2->m_uiCW; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) + { + unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; + unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; + + unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; + unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; + + m_auiSelectors[uiPixel1] = uiSelector1; + m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; + + float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1]; + float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2]; + + m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); + m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); + } + + m_fError1 = ptryBest1->m_fError; + m_fError2 = ptryBest2->m_fError; + m_fError = m_fError1 + m_fError2; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 differential mode encoding for a half of a 4x4 block + // vary the basecolor components using a radius + // + void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf) + { + + a_phalf->m_ptryBest = nullptr; + float fBestTryError = FLT_MAX; + + a_phalf->m_uiTrys = 0; + for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; + iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius; + iRed++) + { + assert(iRed >= 0 && iRed <= 15); + + for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius; + iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius; + iGreen++) + { + assert(iGreen >= 0 && iGreen <= 15); + + for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius; + iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius; + iBlue++) + { + assert(iBlue >= 0 && iBlue <= 15); + + IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; + assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]); + + ptry->m_iRed = iRed; + ptry->m_iGreen = iGreen; + ptry->m_iBlue = iBlue; + ptry->m_fError = FLT_MAX; + ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue); + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + // pre-compute decoded pixels for each selector + ColorFloatRGBA afrgbaSelectors[SELECTORS]; + assert(SELECTORS == 4); + afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB(); + afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB(); + afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB(); + afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB(); + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + frgbaDecodedPixel = afrgbaSelectors[uiSelector]; + + float fPixelError; + + fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]], + *pfrgbaSourcePixel); + + if (fPixelError < afPixelErrors[uiPixel]) + { + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + + // if best CW so far + if (fCWError < ptry->m_fError) + { + ptry->m_uiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; + } + ptry->m_fError = fCWError; + } + + } + + if (ptry->m_fError < fBestTryError) + { + a_phalf->m_ptryBest = ptry; + fBestTryError = ptry->m_fError; + } + + assert(ptry->m_fError < FLT_MAX); + + a_phalf->m_uiTrys++; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 1 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates1(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 2 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates2(void) + { + + TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 3 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates3(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, -2, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 4 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates4(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 4); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -4); + + } + + // ---------------------------------------------------------------------------------------------------- + // find the best selector for each pixel based on a particular basecolor and CW that have been previously set + // calculate the selectors for each half of the block separately + // set the block error as the sum of each half's error + // + void Block4x4Encoding_ETC1::CalculateSelectors() + { + if (m_boolFlip) + { + CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping); + CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping); + } + else + { + CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping); + CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping); + } + + m_fError = m_fError1 + m_fError2; + } + + // ---------------------------------------------------------------------------------------------------- + // choose best selectors for half of the block + // calculate the error for half of the block + // + void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf, + const unsigned int *pauiPixelMapping) + { + static const bool DEBUG_PRINT = false; + + ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1; + unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1; + + float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1; + *pfHalfError = FLT_MAX; + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + if (DEBUG_PRINT) + { + printf("\ncw=%u\n", uiCW); + } + + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + if (DEBUG_PRINT) + { + printf("\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR, + m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB); + } + + ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + float fDeltaRGB = s_aafCwTable[uiCW][uiSelector]; + + frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB(); + + float fPixelError; + + fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[pauiPixelMapping[uiPixel]], + *pfrgbaSourcePixel); + + if (DEBUG_PRINT) + { + printf("\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f", uiPixel, uiSelector, + frgbaDecodedPixel.fR, + frgbaDecodedPixel.fG, + frgbaDecodedPixel.fB, + fPixelError); + } + + if (fPixelError < afPixelErrors[uiPixel]) + { + if (DEBUG_PRINT) + { + printf(" *"); + } + + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + if (DEBUG_PRINT) + { + printf("\n"); + } + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + if (DEBUG_PRINT) + { + printf("\terror %.2f\n", fCWError); + } + + // if best CW so far + if (fCWError < *pfHalfError) + { + *pfHalfError = fCWError; + *puiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel]; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_ETC1::SetEncodingBits(void) + { + assert(m_mode == MODE_ETC1); + + if (m_boolDiff) + { + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + int iDRed2 = iRed2 - iRed1; + int iDGreen2 = iGreen2 - iGreen1; + int iDBlue2 = iBlue2 - iBlue1; + + assert(iDRed2 >= -4 && iDRed2 < 4); + assert(iDGreen2 >= -4 && iDGreen2 < 4); + assert(iDBlue2 >= -4 && iDBlue2 < 4); + + m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1; + m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1; + m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1; + + m_pencodingbitsRGB8->differential.dred2 = iDRed2; + m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2; + m_pencodingbitsRGB8->differential.dblue2 = iDBlue2; + } + else + { + m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + } + + m_pencodingbitsRGB8->individual.cw1 = m_uiCW1; + m_pencodingbitsRGB8->individual.cw2 = m_uiCW2; + + SetEncodingBits_Selectors(); + + m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff; + m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip; + + } + + // ---------------------------------------------------------------------------------------------------- + // set the selectors in the encoding bits + // + void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void) + { + + m_pencodingbitsRGB8->individual.selectors = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiSelector = m_auiSelectors[uiPixel]; + + // set index msb + m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8); + + // set index lsb + m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_ETC1::Decode(void) + { + + const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++) + { + ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2; + unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2; + + unsigned int uiPixel = pauiPixelOrder[uiPixelOrder]; + + float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]]; + m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h new file mode 100644 index 0000000000..c0dc84d5d5 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_ETC1.h @@ -0,0 +1,186 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcDifferentialTrys.h" +#include "EtcIndividualTrys.h" + +namespace Etc +{ + + // base class for Block4x4Encoding_RGB8 + class Block4x4Encoding_ETC1 : public Block4x4Encoding + { + public: + + Block4x4Encoding_ETC1(void); + virtual ~Block4x4Encoding_ETC1(void); + + virtual void InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric); + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric); + + virtual void PerformIteration(float a_fEffort); + + inline virtual bool GetFlip(void) + { + return m_boolFlip; + } + + inline virtual bool IsDifferential(void) + { + return m_boolDiff; + } + + virtual void SetEncodingBits(void); + + void Decode(void); + + inline ColorFloatRGBA GetColor1(void) const + { + return m_frgbaColor1; + } + + inline ColorFloatRGBA GetColor2(void) const + { + return m_frgbaColor2; + } + + inline const unsigned int * GetSelectors(void) const + { + return m_auiSelectors; + } + + inline unsigned int GetCW1(void) const + { + return m_uiCW1; + } + + inline unsigned int GetCW2(void) const + { + return m_uiCW2; + } + + inline bool HasSeverelyBentDifferentialColors(void) const + { + return m_boolSeverelyBentDifferentialColors; + } + + protected: + + static const unsigned int s_auiPixelOrderFlip0[PIXELS]; + static const unsigned int s_auiPixelOrderFlip1[PIXELS]; + static const unsigned int s_auiPixelOrderHScan[PIXELS]; + + static const unsigned int s_auiLeftPixelMapping[8]; + static const unsigned int s_auiRightPixelMapping[8]; + static const unsigned int s_auiTopPixelMapping[8]; + static const unsigned int s_auiBottomPixelMapping[8]; + + static const unsigned int SELECTOR_BITS = 2; + static const unsigned int SELECTORS = 1 << SELECTOR_BITS; + + static const unsigned int CW_BITS = 3; + static const unsigned int CW_RANGES = 1 << CW_BITS; + + static float s_aafCwTable[CW_RANGES][SELECTORS]; + static unsigned char s_aucDifferentialCwRange[256]; + + static const int MAX_DIFFERENTIAL = 3; + static const int MIN_DIFFERENTIAL = -4; + + void InitFromEncodingBits_Selectors(void); + + void PerformFirstIteration(void); + void CalculateMostLikelyFlip(void); + + void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2); + void TryDifferentialHalf(DifferentialTrys::Half *a_phalf); + + void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius); + void TryIndividualHalf(IndividualTrys::Half *a_phalf); + + void TryDegenerates1(void); + void TryDegenerates2(void); + void TryDegenerates3(void); + void TryDegenerates4(void); + + void CalculateSelectors(); + void CalculateHalfOfTheSelectors(unsigned int a_uiHalf, + const unsigned int *pauiPixelMapping); + + // calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line + inline float CalcGrayDistance2(ColorFloatRGBA &r_frgbaPixel, + ColorFloatRGBA &r_frgbaTarget) + { + float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) + + (r_frgbaPixel.fG - r_frgbaTarget.fG) + + (r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f; + + ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB(); + + float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR; + float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG; + float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB; + + return (fDR*fDR) + (fDG*fDG) + (fDB*fDB); + } + + void SetEncodingBits_Selectors(void); + + // intermediate encoding + bool m_boolDiff; + bool m_boolFlip; + ColorFloatRGBA m_frgbaColor1; + ColorFloatRGBA m_frgbaColor2; + unsigned int m_uiCW1; + unsigned int m_uiCW2; + unsigned int m_auiSelectors[PIXELS]; + + // state shared between iterations + ColorFloatRGBA m_frgbaSourceAverageLeft; + ColorFloatRGBA m_frgbaSourceAverageRight; + ColorFloatRGBA m_frgbaSourceAverageTop; + ColorFloatRGBA m_frgbaSourceAverageBottom; + bool m_boolMostLikelyFlip; + + // stats + float m_fError1; // error for Etc1 half 1 + float m_fError2; // error for Etc1 half 2 + bool m_boolSeverelyBentDifferentialColors; // only valid if m_boolDiff; + + // final encoding + Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8; // or RGB8 portion of Block4x4EncodingBits_RGB8A8 + + private: + + void CalculateSourceAverages(void); + + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp new file mode 100644 index 0000000000..4c012fbbf1 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.cpp @@ -0,0 +1,429 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_R11.cpp + +Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11). + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_R11.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <float.h> +#include <limits> + +namespace Etc +{ + + // modifier values to use for R11, SR11, RG11 and SRG11 + float Block4x4Encoding_R11::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS] + { + { -3.0f / 255.0f, -6.0f / 255.0f, -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f }, + { -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f }, + { -2.0f / 255.0f, -5.0f / 255.0f, -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f }, + { -2.0f / 255.0f, -4.0f / 255.0f, -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f }, + + { -3.0f / 255.0f, -6.0f / 255.0f, -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f }, + { -3.0f / 255.0f, -7.0f / 255.0f, -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f }, + { -4.0f / 255.0f, -7.0f / 255.0f, -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f }, + { -3.0f / 255.0f, -5.0f / 255.0f, -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f }, + + { -2.0f / 255.0f, -6.0f / 255.0f, -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 9.0f / 255.0f }, + { -2.0f / 255.0f, -5.0f / 255.0f, -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 9.0f / 255.0f }, + { -2.0f / 255.0f, -4.0f / 255.0f, -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f, 9.0f / 255.0f }, + { -2.0f / 255.0f, -5.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f }, + + { -3.0f / 255.0f, -4.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f }, + { -1.0f / 255.0f, -2.0f / 255.0f, -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f, 9.0f / 255.0f }, + { -4.0f / 255.0f, -6.0f / 255.0f, -8.0f / 255.0f, -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 8.0f / 255.0f }, + { -3.0f / 255.0f, -5.0f / 255.0f, -7.0f / 255.0f, -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f } + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_R11::Block4x4Encoding_R11(void) + { + + m_pencodingbitsR11 = nullptr; + + } + + Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_R11::InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) + { + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric); + + m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits; + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_R11::InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits; + + // init RGB portion + Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent, + (unsigned char *)m_pencodingbitsR11, + a_pafrgbaSource, + a_errormetric); + + // init R11 portion + { + m_mode = MODE_R11; + if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_fRedBase = (float)(signed char)m_pencodingbitsR11->data.base; + } + else + { + m_fRedBase = (float)(unsigned char)m_pencodingbitsR11->data.base; + } + m_fRedMultiplier = (float)m_pencodingbitsR11->data.multiplier; + m_uiRedModifierTableIndex = m_pencodingbitsR11->data.table; + + unsigned long long int ulliSelectorBits = 0; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors0 << 40; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors1 << 32; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors2 << 24; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors3 << 16; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors4 << 8; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors5; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiShift = 45 - (3 * uiPixel); + m_auiRedSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (SELECTORS - 1); + } + + // decode the red channel + // calc red error + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + float fDecodedPixelData = 0.0f; + if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + fDecodedPixelData = DecodePixelRed(m_fRedBase, m_fRedMultiplier, + m_uiRedModifierTableIndex, + m_auiRedSelectors[uiPixel]); + } + else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + fDecodedPixelData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier, + m_uiRedModifierTableIndex, + m_auiRedSelectors[uiPixel]); + } + else + { + assert(0); + } + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fDecodedPixelData, 0.0f, 0.0f, 1.0f); + } + CalcBlockError(); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_R11::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + m_mode = MODE_R11; + + switch (m_uiEncodingIterations) + { + case 0: + m_fError = FLT_MAX; + m_fRedBlockError = FLT_MAX; // artificially high value + CalculateR11(8, 0.0f, 0.0f); + m_fError = m_fRedBlockError; + break; + + case 1: + CalculateR11(8, 2.0f, 1.0f); + m_fError = m_fRedBlockError; + if (a_fEffort <= 24.5f) + { + m_boolDone = true; + } + break; + + case 2: + CalculateR11(8, 12.0f, 1.0f); + m_fError = m_fRedBlockError; + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 3: + CalculateR11(7, 6.0f, 1.0f); + m_fError = m_fRedBlockError; + break; + + case 4: + CalculateR11(6, 3.0f, 1.0f); + m_fError = m_fRedBlockError; + break; + + case 5: + CalculateR11(5, 1.0f, 0.0f); + m_fError = m_fRedBlockError; + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // find the best combination of base color, multiplier and selectors + // + // a_uiSelectorsUsed limits the number of selector combinations to try + // a_fBaseRadius limits the range of base colors to try + // a_fMultiplierRadius limits the range of multipliers to try + // + void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, + float a_fBaseRadius, float a_fMultiplierRadius) + { + // maps from virtual (monotonic) selector to ETC selector + static const unsigned int auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7}; + + // find min/max red + float fMinRed = 1.0f; + float fMaxRed = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // ignore border pixels + float fAlpha = m_pafrgbaSource[uiPixel].fA; + if (isnan(fAlpha)) + { + continue; + } + + float fRed = m_pafrgbaSource[uiPixel].fR; + + if (fRed < fMinRed) + { + fMinRed = fRed; + } + if (fRed > fMaxRed) + { + fMaxRed = fRed; + } + } + assert(fMinRed <= fMaxRed); + + float fRedRange = (fMaxRed - fMinRed); + + // try each modifier table entry + for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++) + { + for (unsigned int uiMinVirtualSelector = 0; + uiMinVirtualSelector <= (8- a_uiSelectorsUsed); + uiMinVirtualSelector++) + { + unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1; + + unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector]; + unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector]; + + float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector]; + + float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] - + s_aafModifierTable[uiTableEntry][uiMinSelector]; + + float fCenterRatio = fTableEntryCenter / fTableEntryRange; + + float fCenter = fMinRed + fCenterRatio*fRedRange; + fCenter = roundf(255.0f * fCenter) / 255.0f; + + float fMinBase = fCenter - (a_fBaseRadius / 255.0f); + if (fMinBase < 0.0f) + { + fMinBase = 0.0f; + } + + float fMaxBase = fCenter + (a_fBaseRadius / 255.0f); + if (fMaxBase > 1.0f) + { + fMaxBase = 1.0f; + } + + for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f)) + { + float fRangeMultiplier = roundf(fRedRange / fTableEntryRange); + + float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius; + if (fMinMultiplier < 1.0f) + { + fMinMultiplier = 0.0f; + } + else if (fMinMultiplier > 15.0f) + { + fMinMultiplier = 15.0f; + } + + float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius; + if (fMaxMultiplier < 1.0f) + { + fMaxMultiplier = 1.0f; + } + else if (fMaxMultiplier > 15.0f) + { + fMaxMultiplier = 15.0f; + } + + for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f) + { + // find best selector for each pixel + unsigned int auiBestSelectors[PIXELS]; + float afBestRedError[PIXELS]; + float afBestPixelRed[PIXELS]; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + float fBestPixelRedError = FLT_MAX; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + float fPixelRed = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector); + + ColorFloatRGBA frgba(fPixelRed, m_pafrgbaSource[uiPixel].fG,0.0f,1.0f); + + float fPixelRedError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]); + + if (fPixelRedError < fBestPixelRedError) + { + fBestPixelRedError = fPixelRedError; + auiBestSelectors[uiPixel] = uiSelector; + afBestRedError[uiPixel] = fBestPixelRedError; + afBestPixelRed[uiPixel] = fPixelRed; + } + } + } + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestRedError[uiPixel]; + } + if (fBlockError < m_fRedBlockError) + { + m_fRedBlockError = fBlockError; + + if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + m_fRedBase = 255.0f * fBase; + } + else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_fRedBase = (fBase * 255) - 128; + } + else + { + assert(0); + } + m_fRedMultiplier = fMultiplier; + m_uiRedModifierTableIndex = uiTableEntry; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiRedSelectors[uiPixel] = auiBestSelectors[uiPixel]; + float fBestPixelRed = afBestPixelRed[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fBestPixelRed, 0.0f, 0.0f, 1.0f); + m_afDecodedAlphas[uiPixel] = 1.0f; + } + } + } + } + + } + } + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_R11::SetEncodingBits(void) + { + if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + m_pencodingbitsR11->data.base = (unsigned char)roundf(m_fRedBase); + } + else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_pencodingbitsR11->data.base = (signed char)roundf(m_fRedBase); + } + else + { + assert(0); + } + m_pencodingbitsR11->data.table = m_uiRedModifierTableIndex; + m_pencodingbitsR11->data.multiplier = (unsigned char)roundf(m_fRedMultiplier); + + unsigned long long int ulliSelectorBits = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiShift = 45 - (3 * uiPixel); + ulliSelectorBits |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift; + } + + m_pencodingbitsR11->data.selectors0 = ulliSelectorBits >> 40; + m_pencodingbitsR11->data.selectors1 = ulliSelectorBits >> 32; + m_pencodingbitsR11->data.selectors2 = ulliSelectorBits >> 24; + m_pencodingbitsR11->data.selectors3 = ulliSelectorBits >> 16; + m_pencodingbitsR11->data.selectors4 = ulliSelectorBits >> 8; + m_pencodingbitsR11->data.selectors5 = ulliSelectorBits; + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h new file mode 100644 index 0000000000..b40c1e0036 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_R11.h @@ -0,0 +1,122 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_RGB8.h" + +namespace Etc +{ + class Block4x4EncodingBits_R11; + + // ################################################################################ + // Block4x4Encoding_R11 + // ################################################################################ + + class Block4x4Encoding_R11 : public Block4x4Encoding_RGB8 + { + public: + + Block4x4Encoding_R11(void); + virtual ~Block4x4Encoding_R11(void); + + virtual void InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric); + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric); + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + inline float GetRedBase(void) const + { + return m_fRedBase; + } + + inline float GetRedMultiplier(void) const + { + return m_fRedMultiplier; + } + + inline int GetRedTableIndex(void) const + { + return m_uiRedModifierTableIndex; + } + + inline const unsigned int * GetRedSelectors(void) const + { + return m_auiRedSelectors; + } + + protected: + + static const unsigned int MODIFIER_TABLE_ENTRYS = 16; + static const unsigned int SELECTOR_BITS = 3; + static const unsigned int SELECTORS = 1 << SELECTOR_BITS; + + static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS]; + + void CalculateR11(unsigned int a_uiSelectorsUsed, + float a_fBaseRadius, float a_fMultiplierRadius); + + + + + inline float DecodePixelRed(float a_fBase, float a_fMultiplier, + unsigned int a_uiTableIndex, unsigned int a_uiSelector) + { + float fMultiplier = a_fMultiplier; + if (fMultiplier <= 0.0f) + { + fMultiplier = 1.0f / 8.0f; + } + + float fPixelRed = a_fBase * 8 + 4 + + 8 * fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector]*255; + fPixelRed /= 2047.0f; + + if (fPixelRed < 0.0f) + { + fPixelRed = 0.0f; + } + else if (fPixelRed > 1.0f) + { + fPixelRed = 1.0f; + } + + return fPixelRed; + } + + Block4x4EncodingBits_R11 *m_pencodingbitsR11; + + float m_fRedBase; + float m_fRedMultiplier; + float m_fRedBlockError; + unsigned int m_uiRedModifierTableIndex; + unsigned int m_auiRedSelectors[PIXELS]; + + + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp new file mode 100644 index 0000000000..417835db51 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.cpp @@ -0,0 +1,447 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RG11.cpp + +Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11). + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RG11.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <float.h> +#include <limits> + +namespace Etc +{ + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RG11::Block4x4Encoding_RG11(void) + { + m_pencodingbitsRG11 = nullptr; + } + + Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_RG11::InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) + { + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric); + + m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits; + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RG11::InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + + m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits; + + // init RGB portion + Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent, + (unsigned char *)m_pencodingbitsRG11, + a_pafrgbaSource, + a_errormetric); + m_fError = 0.0f; + + { + m_mode = MODE_RG11; + if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_fRedBase = (float)(signed char)m_pencodingbitsRG11->data.baseR; + m_fGrnBase = (float)(signed char)m_pencodingbitsRG11->data.baseG; + } + else + { + m_fRedBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseR; + m_fGrnBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseG; + } + m_fRedMultiplier = (float)m_pencodingbitsRG11->data.multiplierR; + m_fGrnMultiplier = (float)m_pencodingbitsRG11->data.multiplierG; + m_uiRedModifierTableIndex = m_pencodingbitsRG11->data.tableIndexR; + m_uiGrnModifierTableIndex = m_pencodingbitsRG11->data.tableIndexG; + + unsigned long long int ulliSelectorBitsR = 0; + ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR0 << 40; + ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR1 << 32; + ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR2 << 24; + ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR3 << 16; + ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR4 << 8; + ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR5; + + unsigned long long int ulliSelectorBitsG = 0; + ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG0 << 40; + ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG1 << 32; + ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG2 << 24; + ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG3 << 16; + ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG4 << 8; + ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG5; + + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiShift = 45 - (3 * uiPixel); + m_auiRedSelectors[uiPixel] = (ulliSelectorBitsR >> uiShift) & (SELECTORS - 1); + m_auiGrnSelectors[uiPixel] = (ulliSelectorBitsG >> uiShift) & (SELECTORS - 1); + } + + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + float fRedDecodedData = 0.0f; + float fGrnDecodedData = 0.0f; + if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + fRedDecodedData = DecodePixelRed(m_fRedBase, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]); + fGrnDecodedData = DecodePixelRed(m_fGrnBase, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]); + } + else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + fRedDecodedData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]); + fGrnDecodedData = DecodePixelRed(m_fGrnBase + 128, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]); + } + else + { + assert(0); + } + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fRedDecodedData, fGrnDecodedData, 0.0f, 1.0f); + } + + } + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RG11::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + m_fError = FLT_MAX; + m_fGrnBlockError = FLT_MAX; // artificially high value + m_fRedBlockError = FLT_MAX; + CalculateR11(8, 0.0f, 0.0f); + CalculateG11(8, 0.0f, 0.0f); + m_fError = (m_fGrnBlockError + m_fRedBlockError); + break; + + case 1: + CalculateR11(8, 2.0f, 1.0f); + CalculateG11(8, 2.0f, 1.0f); + m_fError = (m_fGrnBlockError + m_fRedBlockError); + if (a_fEffort <= 24.5f) + { + m_boolDone = true; + } + break; + + case 2: + CalculateR11(8, 12.0f, 1.0f); + CalculateG11(8, 12.0f, 1.0f); + m_fError = (m_fGrnBlockError + m_fRedBlockError); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 3: + CalculateR11(7, 6.0f, 1.0f); + CalculateG11(7, 6.0f, 1.0f); + m_fError = (m_fGrnBlockError + m_fRedBlockError); + break; + + case 4: + CalculateR11(6, 3.0f, 1.0f); + CalculateG11(6, 3.0f, 1.0f); + m_fError = (m_fGrnBlockError + m_fRedBlockError); + break; + + case 5: + CalculateR11(5, 1.0f, 0.0f); + CalculateG11(5, 1.0f, 0.0f); + m_fError = (m_fGrnBlockError + m_fRedBlockError); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // find the best combination of base color, multiplier and selectors + // + // a_uiSelectorsUsed limits the number of selector combinations to try + // a_fBaseRadius limits the range of base colors to try + // a_fMultiplierRadius limits the range of multipliers to try + // + void Block4x4Encoding_RG11::CalculateG11(unsigned int a_uiSelectorsUsed, + float a_fBaseRadius, float a_fMultiplierRadius) + { + // maps from virtual (monotonic) selector to etc selector + static const unsigned int auiVirtualSelectorMap[8] = { 3, 2, 1, 0, 4, 5, 6, 7 }; + + // find min/max Grn + float fMinGrn = 1.0f; + float fMaxGrn = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // ignore border pixels + float fAlpha = m_pafrgbaSource[uiPixel].fA; + if (isnan(fAlpha)) + { + continue; + } + + float fGrn = m_pafrgbaSource[uiPixel].fG; + + if (fGrn < fMinGrn) + { + fMinGrn = fGrn; + } + if (fGrn > fMaxGrn) + { + fMaxGrn = fGrn; + } + } + assert(fMinGrn <= fMaxGrn); + + float fGrnRange = (fMaxGrn - fMinGrn); + + // try each modifier table entry + for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++) + { + for (unsigned int uiMinVirtualSelector = 0; + uiMinVirtualSelector <= (8 - a_uiSelectorsUsed); + uiMinVirtualSelector++) + { + unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1; + + unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector]; + unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector]; + + float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector]; + + float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] - + s_aafModifierTable[uiTableEntry][uiMinSelector]; + + float fCenterRatio = fTableEntryCenter / fTableEntryRange; + + float fCenter = fMinGrn + fCenterRatio*fGrnRange; + fCenter = roundf(255.0f * fCenter) / 255.0f; + + float fMinBase = fCenter - (a_fBaseRadius / 255.0f); + if (fMinBase < 0.0f) + { + fMinBase = 0.0f; + } + + float fMaxBase = fCenter + (a_fBaseRadius / 255.0f); + if (fMaxBase > 1.0f) + { + fMaxBase = 1.0f; + } + + for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f)) + { + float fRangeMultiplier = roundf(fGrnRange / fTableEntryRange); + + float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius; + if (fMinMultiplier < 1.0f) + { + fMinMultiplier = 0.0f; + } + else if (fMinMultiplier > 15.0f) + { + fMinMultiplier = 15.0f; + } + + float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius; + if (fMaxMultiplier < 1.0f) + { + fMaxMultiplier = 1.0f; + } + else if (fMaxMultiplier > 15.0f) + { + fMaxMultiplier = 15.0f; + } + + for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f) + { + // find best selector for each pixel + unsigned int auiBestSelectors[PIXELS]; + float afBestGrnError[PIXELS]; + float afBestPixelGrn[PIXELS]; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + float fBestPixelGrnError = FLT_MAX; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + //DecodePixelRed is not red channel specific + float fPixelGrn = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector); + + ColorFloatRGBA frgba(m_pafrgbaSource[uiPixel].fR, fPixelGrn, 0.0f, 1.0f); + + float fPixelGrnError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]); + + if (fPixelGrnError < fBestPixelGrnError) + { + fBestPixelGrnError = fPixelGrnError; + auiBestSelectors[uiPixel] = uiSelector; + afBestGrnError[uiPixel] = fBestPixelGrnError; + afBestPixelGrn[uiPixel] = fPixelGrn; + } + } + } + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestGrnError[uiPixel]; + } + + if (fBlockError < m_fGrnBlockError) + { + m_fGrnBlockError = fBlockError; + + if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + m_fGrnBase = 255.0f * fBase; + } + else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_fGrnBase = (fBase * 255) - 128; + } + else + { + assert(0); + } + m_fGrnMultiplier = fMultiplier; + m_uiGrnModifierTableIndex = uiTableEntry; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiGrnSelectors[uiPixel] = auiBestSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel].fG = afBestPixelGrn[uiPixel]; + m_afDecodedAlphas[uiPixel] = 1.0f; + } + } + } + } + + } + } + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RG11::SetEncodingBits(void) + { + unsigned long long int ulliSelectorBitsR = 0; + unsigned long long int ulliSelectorBitsG = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiShift = 45 - (3 * uiPixel); + ulliSelectorBitsR |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift; + ulliSelectorBitsG |= ((unsigned long long int)m_auiGrnSelectors[uiPixel]) << uiShift; + } + if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + m_pencodingbitsRG11->data.baseR = (unsigned char)roundf(m_fRedBase); + } + else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_pencodingbitsRG11->data.baseR = (signed char)roundf(m_fRedBase); + } + else + { + assert(0); + } + m_pencodingbitsRG11->data.tableIndexR = m_uiRedModifierTableIndex; + m_pencodingbitsRG11->data.multiplierR = (unsigned char)roundf(m_fRedMultiplier); + + m_pencodingbitsRG11->data.selectorsR0 = ulliSelectorBitsR >> 40; + m_pencodingbitsRG11->data.selectorsR1 = ulliSelectorBitsR >> 32; + m_pencodingbitsRG11->data.selectorsR2 = ulliSelectorBitsR >> 24; + m_pencodingbitsRG11->data.selectorsR3 = ulliSelectorBitsR >> 16; + m_pencodingbitsRG11->data.selectorsR4 = ulliSelectorBitsR >> 8; + m_pencodingbitsRG11->data.selectorsR5 = ulliSelectorBitsR; + + if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11) + { + m_pencodingbitsRG11->data.baseG = (unsigned char)roundf(m_fGrnBase); + } + else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11) + { + m_pencodingbitsRG11->data.baseG = (signed char)roundf(m_fGrnBase); + } + else + { + assert(0); + } + m_pencodingbitsRG11->data.tableIndexG = m_uiGrnModifierTableIndex; + m_pencodingbitsRG11->data.multiplierG = (unsigned char)roundf(m_fGrnMultiplier); + + m_pencodingbitsRG11->data.selectorsG0 = ulliSelectorBitsG >> 40; + m_pencodingbitsRG11->data.selectorsG1 = ulliSelectorBitsG >> 32; + m_pencodingbitsRG11->data.selectorsG2 = ulliSelectorBitsG >> 24; + m_pencodingbitsRG11->data.selectorsG3 = ulliSelectorBitsG >> 16; + m_pencodingbitsRG11->data.selectorsG4 = ulliSelectorBitsG >> 8; + m_pencodingbitsRG11->data.selectorsG5 = ulliSelectorBitsG; + + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h new file mode 100644 index 0000000000..d4993b8c5f --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RG11.h @@ -0,0 +1,86 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_RGB8.h" +#include "EtcBlock4x4Encoding_R11.h" + +namespace Etc +{ + class Block4x4EncodingBits_RG11; + + // ################################################################################ + // Block4x4Encoding_RG11 + // ################################################################################ + + class Block4x4Encoding_RG11 : public Block4x4Encoding_R11 + { + float m_fGrnBase; + float m_fGrnMultiplier; + float m_fGrnBlockError; + unsigned int m_auiGrnSelectors[PIXELS]; + unsigned int m_uiGrnModifierTableIndex; + public: + + Block4x4Encoding_RG11(void); + virtual ~Block4x4Encoding_RG11(void); + + virtual void InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric); + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric); + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + Block4x4EncodingBits_RG11 *m_pencodingbitsRG11; + + void CalculateG11(unsigned int a_uiSelectorsUsed, float a_fBaseRadius, float a_fMultiplierRadius); + + inline float GetGrnBase(void) const + { + return m_fGrnBase; + } + + inline float GetGrnMultiplier(void) const + { + return m_fGrnMultiplier; + } + + inline int GetGrnTableIndex(void) const + { + return m_uiGrnModifierTableIndex; + } + + inline const unsigned int * GetGrnSelectors(void) const + { + return m_auiGrnSelectors; + } + + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp new file mode 100644 index 0000000000..5656556db9 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.cpp @@ -0,0 +1,1730 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RGB8.cpp + +Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8. +This encoder is also used for the ETC2 subset of file format RGBA8. + +Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8. + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RGB8.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" +#include "EtcMath.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <float.h> +#include <limits> + +namespace Etc +{ + float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] = + { + 3.0f / 255.0f, + 6.0f / 255.0f, + 11.0f / 255.0f, + 16.0f / 255.0f, + 23.0f / 255.0f, + 32.0f / 255.0f, + 41.0f / 255.0f, + 64.0f / 255.0f + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void) + { + + m_pencodingbitsRGB8 = nullptr; + + } + + Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RGB8::InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + + // handle ETC1 modes + Block4x4Encoding_ETC1::InitFromEncodingBits(a_pblockParent, + a_paucEncodingBits, a_pafrgbaSource,a_errormetric); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + // detect if there is a T, H or Planar mode present + if (m_pencodingbitsRGB8->differential.diff) + { + int iRed1 = (int)m_pencodingbitsRGB8->differential.red1; + int iDRed2 = m_pencodingbitsRGB8->differential.dred2; + int iRed2 = iRed1 + iDRed2; + + int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1; + int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2; + int iGreen2 = iGreen1 + iDGreen2; + + int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1; + int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2; + int iBlue2 = iBlue1 + iDBlue2; + + if (iRed2 < 0 || iRed2 > 31) + { + InitFromEncodingBits_T(); + } + else if (iGreen2 < 0 || iGreen2 > 31) + { + InitFromEncodingBits_H(); + } + else if (iBlue2 < 0 || iBlue2 > 31) + { + InitFromEncodingBits_Planar(); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if T mode is detected + // + void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void) + { + + m_mode = MODE_T; + + unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) + + m_pencodingbitsRGB8->t.red1b); + unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1; + unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1; + + unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2; + unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2; + unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db; + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_T(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if H mode is detected + // + void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void) + { + + m_mode = MODE_H; + + unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1; + unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) + + m_pencodingbitsRGB8->h.green1b); + unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) + + (m_pencodingbitsRGB8->h.blue1b << 1) + + m_pencodingbitsRGB8->h.blue1c); + + unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2; + unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) + + m_pencodingbitsRGB8->h.green2b); + unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + // used to determine the LSB of the CW + unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1); + unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1); + if (uiRGB1 >= uiRGB2) + { + m_uiCW1++; + } + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_H(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if Planar mode is detected + // + void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void) + { + + m_mode = MODE_PLANAR; + + unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed; + unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) + + m_pencodingbitsRGB8->planar.originGreen2); + unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) + + (m_pencodingbitsRGB8->planar.originBlue2 << 3) + + (m_pencodingbitsRGB8->planar.originBlue3 << 1) + + m_pencodingbitsRGB8->planar.originBlue4); + + unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) + + m_pencodingbitsRGB8->planar.horizRed2); + unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen; + unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) + + m_pencodingbitsRGB8->planar.horizBlue2); + + unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) + + m_pencodingbitsRGB8->planar.vertRed2); + unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) + + m_pencodingbitsRGB8->planar.vertGreen2); + unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue); + m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue); + + DecodePixels_Planar(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + Block4x4Encoding_ETC1::PerformFirstIteration(); + if (m_boolDone) + { + break; + } + TryPlanar(0); + SetDoneIfPerfect(); + if (m_boolDone) + { + break; + } + TryTAndH(0); + break; + + case 1: + Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1); + break; + + case 3: + Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 4: + Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1); + break; + + case 5: + TryPlanar(1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 6: + TryTAndH(1); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 7: + Block4x4Encoding_ETC1::TryDegenerates1(); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 8: + Block4x4Encoding_ETC1::TryDegenerates2(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 9: + Block4x4Encoding_ETC1::TryDegenerates3(); + if (a_fEffort <= 89.5f) + { + m_boolDone = true; + } + break; + + case 10: + Block4x4Encoding_ETC1::TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in Planar mode + // save this encoding if it improves the error + // + void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + encodingTry.CalculatePlanarCornerColors(); + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (a_uiRadius > 0) + { + encodingTry.TwiddlePlanar(); + } + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode or H mode + // save this encoding if it improves the error + // + void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius) + { + + CalculateBaseColorsForTAndH(); + + TryT(a_uiRadius); + + TryH(a_uiRadius); + + } + + // ---------------------------------------------------------------------------------------------------- + // calculate original values for base colors + // store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2 + // + void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void) + { + + bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX; + + ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f; + + // find pixel farthest from average gray line + unsigned int uiFarthestPixel = 0; + float fFarthestGrayDistance2 = 0.0f; + unsigned int uiTransparentPixels = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // don't count transparent + if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX) + { + uiTransparentPixels++; + } + else + { + float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage); + + if (fGrayDistance2 > fFarthestGrayDistance2) + { + uiFarthestPixel = uiPixel; + fFarthestGrayDistance2 = fGrayDistance2; + } + } + } + // a transparent block should not reach this method + assert(uiTransparentPixels < PIXELS); + + // set the original base colors to: + // half way to the farthest pixel and + // the mirror color on the other side of the average + ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f; + m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4(); + m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4(); // the "other side" might be out of range + + // move base colors to find best fit + for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++) + { + // find the center of pixels closest to each color + float fPixelsCloserToColor1 = 0.0f; + ColorFloatRGBA frgbSumPixelsCloserToColor1; + float fPixelsCloserToColor2 = 0.0f; + ColorFloatRGBA frgbSumPixelsCloserToColor2; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // don't count transparent pixels + if (m_pafrgbaSource[uiPixel].fA == 0.0f) + { + continue; + } + + float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH); + float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH); + + ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * m_pafrgbaSource[uiPixel].fA; + + if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2) + { + fPixelsCloserToColor1 += m_pafrgbaSource[uiPixel].fA; + frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource; + } + else + { + fPixelsCloserToColor2 += m_pafrgbaSource[uiPixel].fA; + frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource; + } + } + if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f) + { + break; + } + + ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4(); + ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4(); + + if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR && + frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG && + frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB && + frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR && + frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG && + frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB) + { + break; + } + + m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels; + m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode + // save this encoding if it improves the error + // + // since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently + // better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower + // + void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_T; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + if (iMaxRed1 > 15) + { + iMinRed1 = 15; + } + + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + if (iMaxGreen1 > 15) + { + iMinGreen1 = 15; + } + + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + if (iMaxBlue1 > 15) + { + iMinBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + if (iMaxRed2 > 15) + { + iMinRed2 = 15; + } + + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + if (iMaxGreen2 > 15) + { + iMinGreen2 = 15; + } + + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + if (iMaxBlue2 > 15) + { + iMinBlue2 = 15; + } + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor2_TAndH + // twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector + // + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + } + else + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH; + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + } + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + } + else + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryT + // called on an encodingTry + // + void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = m_frgbaColor1; + afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = m_frgbaColor2; + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + // try each selector + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel], + m_pafrgbaSource[uiPixel]); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (fBlockError < m_fError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode + // save this encoding if it improves the error + // + // since all pixels use the distance table, color1 and color2 can NOT be twiddled independently + // TWIDDLE_RADIUS of 2 is WAY too slow + // + void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_H; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + if (iMaxRed1 > 15) + { + iMinRed1 = 15; + } + + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + if (iMaxGreen1 > 15) + { + iMinGreen1 = 15; + } + + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + if (iMaxBlue1 > 15) + { + iMinBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + if (iMaxRed2 > 15) + { + iMinRed2 = 15; + } + + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + if (iMaxGreen2 > 15) + { + iMinGreen2 = 15; + } + + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + if (iMaxBlue2 > 15) + { + iMinBlue2 = 15; + } + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + + // twiddle m_frgbaOriginalColor2_TAndH + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryH + // called on an encodingTry + // + void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB(); + afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB(); + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + // try each selector + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel], + m_pafrgbaSource[uiPixel]); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (fBlockError < m_fError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // use linear regression to find the best fit for colors along the edges of the 4x4 block + // + void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void) + { + ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE]; + ColorFloatRGBA frgbaSlope; + ColorFloatRGBA frgbaOffset; + + // top edge + afrgbaRegression[0] = m_pafrgbaSource[0]; + afrgbaRegression[1] = m_pafrgbaSource[4]; + afrgbaRegression[2] = m_pafrgbaSource[8]; + afrgbaRegression[3] = m_pafrgbaSource[12]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor1 = frgbaOffset; + m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset; + + // left edge + afrgbaRegression[0] = m_pafrgbaSource[0]; + afrgbaRegression[1] = m_pafrgbaSource[1]; + afrgbaRegression[2] = m_pafrgbaSource[2]; + afrgbaRegression[3] = m_pafrgbaSource[3]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f; // average with top edge + m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset; + + // right edge + afrgbaRegression[0] = m_pafrgbaSource[12]; + afrgbaRegression[1] = m_pafrgbaSource[13]; + afrgbaRegression[2] = m_pafrgbaSource[14]; + afrgbaRegression[3] = m_pafrgbaSource[15]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f; // average with top edge + + // bottom edge + afrgbaRegression[0] = m_pafrgbaSource[3]; + afrgbaRegression[1] = m_pafrgbaSource[7]; + afrgbaRegression[2] = m_pafrgbaSource[11]; + afrgbaRegression[3] = m_pafrgbaSource[15]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f; // average with left edge + + // quantize corner colors to 6/7/6 + m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6(); + m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6(); + m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6(); + + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing R, G and B independently + // + // R, G and B decoding and errors are independent, so R, G and B twiddles can be independent + // + // return true if improvement + // + bool Block4x4Encoding_RGB8::TwiddlePlanar(void) + { + bool boolImprovement = false; + + while (TwiddlePlanarR()) + { + boolImprovement = true; + } + + while (TwiddlePlanarG()) + { + boolImprovement = true; + } + + while (TwiddlePlanarB()) + { + boolImprovement = true; + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing R + // + bool Block4x4Encoding_RGB8::TwiddlePlanarR() + { + bool boolImprovement = false; + + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f); + int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f); + int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f); + + for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++) + { + // check for out of range + if (iTryOriginRed < 0 || iTryOriginRed > 63) + { + continue; + } + + encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f; + + for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++) + { + // check for out of range + if (iTryHorizRed < 0 || iTryHorizRed > 63) + { + continue; + } + + encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f; + + for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++) + { + // check for out of range + if (iTryVertRed < 0 || iTryVertRed > 63) + { + continue; + } + + // don't bother with null twiddle + if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed) + { + continue; + } + + encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f; + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + boolImprovement = true; + } + } + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing G + // + bool Block4x4Encoding_RGB8::TwiddlePlanarG() + { + bool boolImprovement = false; + + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f); + int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f); + int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f); + + for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++) + { + // check for out of range + if (iTryOriginGreen < 0 || iTryOriginGreen > 127) + { + continue; + } + + encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f; + + for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++) + { + // check for out of range + if (iTryHorizGreen < 0 || iTryHorizGreen > 127) + { + continue; + } + + encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f; + + for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++) + { + // check for out of range + if (iTryVertGreen < 0 || iTryVertGreen > 127) + { + continue; + } + + // don't bother with null twiddle + if (iTryOriginGreen == iOriginGreen && + iTryHorizGreen == iHorizGreen && + iTryVertGreen == iVertGreen) + { + continue; + } + + encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f; + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + boolImprovement = true; + } + } + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing B + // + bool Block4x4Encoding_RGB8::TwiddlePlanarB() + { + bool boolImprovement = false; + + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f); + int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f); + int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f); + + for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++) + { + // check for out of range + if (iTryOriginBlue < 0 || iTryOriginBlue > 63) + { + continue; + } + + encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f; + + for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++) + { + // check for out of range + if (iTryHorizBlue < 0 || iTryHorizBlue > 63) + { + continue; + } + + encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f; + + for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++) + { + // check for out of range + if (iTryVertBlue < 0 || iTryVertBlue > 63) + { + continue; + } + + // don't bother with null twiddle + if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue) + { + continue; + } + + encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f; + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + boolImprovement = true; + } + } + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGB8::SetEncodingBits(void) + { + + switch (m_mode) + { + case MODE_ETC1: + Block4x4Encoding_ETC1::SetEncodingBits(); + break; + + case MODE_T: + SetEncodingBits_T(); + break; + + case MODE_H: + SetEncodingBits_H(); + break; + + case MODE_PLANAR: + SetEncodingBits_Planar(); + break; + + default: + assert(false); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state for T mode + // + void Block4x4Encoding_RGB8::SetEncodingBits_T(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_T); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2; + m_pencodingbitsRGB8->t.red1b = uiRed1; + m_pencodingbitsRGB8->t.green1 = uiGreen1; + m_pencodingbitsRGB8->t.blue1 = uiBlue1; + + m_pencodingbitsRGB8->t.red2 = uiRed2; + m_pencodingbitsRGB8->t.green2 = uiGreen2; + m_pencodingbitsRGB8->t.blue2 = uiBlue2; + + m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1; + m_pencodingbitsRGB8->t.db = m_uiCW1; + + m_pencodingbitsRGB8->t.diff = 1; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + if (iRed2 >= 4) + { + m_pencodingbitsRGB8->t.detect1 = 7; + m_pencodingbitsRGB8->t.detect2 = 0; + } + else + { + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + + // make sure red overflows + assert(iRed2 < 0 || iRed2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state for H mode + // + // colors and selectors may need to swap in order to generate lsb of distance index + // + void Block4x4Encoding_RGB8::SetEncodingBits_H(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_H); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; + unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; + + bool boolOddDistance = m_uiCW1 & 1; + bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance; + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.red1 = uiRed2; + m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen2; + m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue2; + + m_pencodingbitsRGB8->h.red2 = uiRed1; + m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen1; + m_pencodingbitsRGB8->h.blue2 = uiBlue1; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + else + { + m_pencodingbitsRGB8->h.red1 = uiRed1; + m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen1; + m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue1; + + m_pencodingbitsRGB8->h.red2 = uiRed2; + m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen2; + m_pencodingbitsRGB8->h.blue2 = uiBlue2; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + + m_pencodingbitsRGB8->h.diff = 1; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF; + } + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->h.detect1 = 0; + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + if (iRed2 < 0 || iRed2 > 31) + { + m_pencodingbitsRGB8->h.detect1 = 1; + } + if (iGreen2 >= 4) + { + m_pencodingbitsRGB8->h.detect2 = 7; + m_pencodingbitsRGB8->h.detect3 = 0; + } + else + { + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + + // make sure red doesn't overflow and green does + assert(iRed2 >= 0 && iRed2 <= 31); + assert(iGreen2 < 0 || iGreen2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state for Planar mode + // + void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_PLANAR); + assert(m_boolDiff == true); + + unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f); + unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f); + unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f); + + unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f); + unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f); + unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f); + + unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f); + unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f); + unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f); + + m_pencodingbitsRGB8->planar.originRed = uiOriginRed; + m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6; + m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen; + m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5; + m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3; + m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1; + m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue; + + m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1; + m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed; + m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen; + m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5; + m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue; + + m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3; + m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed; + m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2; + m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen; + m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue; + + m_pencodingbitsRGB8->planar.diff = 1; + + // create valid RG differentials and an invalid B differential to trigger planar mode + m_pencodingbitsRGB8->planar.detect1 = 0; + m_pencodingbitsRGB8->planar.detect2 = 0; + m_pencodingbitsRGB8->planar.detect3 = 0; + m_pencodingbitsRGB8->planar.detect4 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2; + if (iRed2 < 0 || iRed2 > 31) + { + m_pencodingbitsRGB8->planar.detect1 = 1; + } + if (iGreen2 < 0 || iGreen2 > 31) + { + m_pencodingbitsRGB8->planar.detect2 = 1; + } + if (iBlue2 >= 4) + { + m_pencodingbitsRGB8->planar.detect3 = 7; + m_pencodingbitsRGB8->planar.detect4 = 0; + } + else + { + m_pencodingbitsRGB8->planar.detect3 = 0; + m_pencodingbitsRGB8->planar.detect4 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2; + + // make sure red and green don't overflow and blue does + assert(iRed2 >= 0 && iRed2 <= 31); + assert(iGreen2 >= 0 && iGreen2 <= 31); + assert(iBlue2 < 0 || iBlue2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state for T mode + // + void Block4x4Encoding_RGB8::DecodePixels_T(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1; + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + break; + + case 2: + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2; + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + break; + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state for H mode + // + void Block4x4Encoding_RGB8::DecodePixels_H(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB(); + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB(); + break; + + case 2: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + break; + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state for Planar mode + // + void Block4x4Encoding_RGB8::DecodePixels_Planar(void) + { + + int iRO = (int)roundf(m_frgbaColor1.fR * 255.0f); + int iGO = (int)roundf(m_frgbaColor1.fG * 255.0f); + int iBO = (int)roundf(m_frgbaColor1.fB * 255.0f); + + int iRH = (int)roundf(m_frgbaColor2.fR * 255.0f); + int iGH = (int)roundf(m_frgbaColor2.fG * 255.0f); + int iBH = (int)roundf(m_frgbaColor2.fB * 255.0f); + + int iRV = (int)roundf(m_frgbaColor3.fR * 255.0f); + int iGV = (int)roundf(m_frgbaColor3.fG * 255.0f); + int iBV = (int)roundf(m_frgbaColor3.fB * 255.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int iX = (int)(uiPixel >> 2); + int iY = (int)(uiPixel & 3); + + int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2; + int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2; + int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2; + + ColorFloatRGBA frgba; + frgba.fR = (float)iR / 255.0f; + frgba.fG = (float)iG / 255.0f; + frgba.fB = (float)iB / 255.0f; + frgba.fA = 1.0f; + + m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB(); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a linear regression for the a_uiPixels in a_pafrgbaPixels[] + // + // output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset + // + void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels, + ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset) + { + typedef struct + { + float f[4]; + } Float4; + + Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels); + Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope); + Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset); + + float afX[MAX_PLANAR_REGRESSION_SIZE]; + float afY[MAX_PLANAR_REGRESSION_SIZE]; + + // handle r, g and b separately. don't bother with a + for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++) + { + for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++) + { + afX[uiPixel] = (float)uiPixel; + afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent]; + + } + Etc::Regression(afX, afY, a_uiPixels, + &(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent])); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h new file mode 100644 index 0000000000..03754d5e3b --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8.h @@ -0,0 +1,96 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_ETC1.h" + +namespace Etc +{ + + class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1 + { + public: + + Block4x4Encoding_RGB8(void); + virtual ~Block4x4Encoding_RGB8(void); + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric); + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + inline ColorFloatRGBA GetColor3(void) const + { + return m_frgbaColor3; + } + + protected: + + static const unsigned int PLANAR_CORNER_COLORS = 3; + static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4; + static const unsigned int TH_DISTANCES = 8; + + static float s_afTHDistanceTable[TH_DISTANCES]; + + void TryPlanar(unsigned int a_uiRadius); + void TryTAndH(unsigned int a_uiRadius); + + void InitFromEncodingBits_Planar(void); + + ColorFloatRGBA m_frgbaColor3; // used for planar + + void SetEncodingBits_T(void); + void SetEncodingBits_H(void); + void SetEncodingBits_Planar(void); + + // state shared between iterations + ColorFloatRGBA m_frgbaOriginalColor1_TAndH; + ColorFloatRGBA m_frgbaOriginalColor2_TAndH; + + void CalculateBaseColorsForTAndH(void); + void TryT(unsigned int a_uiRadius); + void TryT_BestSelectorCombination(void); + void TryH(unsigned int a_uiRadius); + void TryH_BestSelectorCombination(void); + + private: + + void InitFromEncodingBits_T(void); + void InitFromEncodingBits_H(void); + + void CalculatePlanarCornerColors(void); + + void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels, + ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset); + + bool TwiddlePlanar(void); + bool TwiddlePlanarR(); + bool TwiddlePlanarG(); + bool TwiddlePlanarB(); + + void DecodePixels_T(void); + void DecodePixels_H(void); + void DecodePixels_Planar(void); + + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp new file mode 100644 index 0000000000..ba2b42fb05 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp @@ -0,0 +1,1819 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RGB8A1.cpp contains: + Block4x4Encoding_RGB8A1 + Block4x4Encoding_RGB8A1_Opaque + Block4x4Encoding_RGB8A1_Transparent + +These encoders are used when targetting file format RGB8A1. + +Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque +Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent +Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RGB8A1.h" + +#include "EtcBlock4x4.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4Encoding_RGB8.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +namespace Etc +{ + + // #################################################################################################### + // Block4x4Encoding_RGB8A1 + // #################################################################################################### + + float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] = + { + { 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f }, + { 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f }, + { 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f }, + { 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f }, + { 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f }, + { 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f }, + { 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f }, + { 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f } + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void) + { + m_pencodingbitsRGB8 = nullptr; + m_boolOpaque = false; + m_boolTransparent = false; + m_boolPunchThroughPixels = true; + + } + Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_RGB8A1::InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric) + { + + Block4x4Encoding_RGB8::InitFromSource(a_pblockParent, + a_pafrgbaSource, + a_paucEncodingBits, + a_errormetric); + + m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE; + m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT; + m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels(); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + if (m_pafrgbaSource[uiPixel].fA >= 0.5f) + { + m_afDecodedAlphas[uiPixel] = 1.0f; + } + else + { + m_afDecodedAlphas[uiPixel] = 0.0f; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + + + InitFromEncodingBits_ETC1(a_pblockParent, + a_paucEncodingBits, + a_pafrgbaSource, + a_errormetric); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + // detect if there is a T, H or Planar mode present + int iRed1 = m_pencodingbitsRGB8->differential.red1; + int iDRed2 = m_pencodingbitsRGB8->differential.dred2; + int iRed2 = iRed1 + iDRed2; + + int iGreen1 = m_pencodingbitsRGB8->differential.green1; + int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2; + int iGreen2 = iGreen1 + iDGreen2; + + int iBlue1 = m_pencodingbitsRGB8->differential.blue1; + int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2; + int iBlue2 = iBlue1 + iDBlue2; + + if (iRed2 < 0 || iRed2 > 31) + { + InitFromEncodingBits_T(); + } + else if (iGreen2 < 0 || iGreen2 > 31) + { + InitFromEncodingBits_H(); + } + else if (iBlue2 < 0 || iBlue2 > 31) + { + Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode. + // if it isn't an ETC1 mode, this will be overwritten later + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, + a_errormetric); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = m_pencodingbitsRGB8->differential.flip; + m_boolOpaque = m_pencodingbitsRGB8->differential.diff; + + int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2; + if (iR2 < 0) + { + iR2 = 0; + } + else if (iR2 > 31) + { + iR2 = 31; + } + + int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2; + if (iG2 < 0) + { + iG2 = 0; + } + else if (iG2 > 31) + { + iG2 = 31; + } + + int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2; + if (iB2 < 0) + { + iB2 = 0; + } + else if (iB2 > 31) + { + iB2 = 31; + } + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2); + + m_uiCW1 = m_pencodingbitsRGB8->differential.cw1; + m_uiCW2 = m_pencodingbitsRGB8->differential.cw2; + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + Decode_ETC1(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if T mode is detected + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void) + { + m_mode = MODE_T; + + unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) + + m_pencodingbitsRGB8->t.red1b); + unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1; + unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1; + + unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2; + unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2; + unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db; + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_T(); + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if H mode is detected + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void) + { + m_mode = MODE_H; + + unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1; + unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) + + m_pencodingbitsRGB8->h.green1b); + unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) + + (m_pencodingbitsRGB8->h.blue1b << 1) + + m_pencodingbitsRGB8->h.blue1c); + + unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2; + unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) + + m_pencodingbitsRGB8->h.green2b); + unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + // used to determine the LSB of the CW + unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1); + unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1); + if (uiRGB1 >= uiRGB2) + { + m_uiCW1++; + } + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_H(); + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_RGB8A1::Decode_ETC1(void) + { + + const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++) + { + ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2; + unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2; + + unsigned int uiPixel = pauiPixelOrder[uiPixelOrder]; + + float fDelta; + if (m_boolOpaque) + fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]]; + else + fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]]; + + if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel] = 0.0f; + } + else + { + m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // for T mode, set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_RGB8A1::DecodePixels_T(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1; + m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 2: + if (m_boolOpaque == false) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel] = 0.0f; + } + else + { + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2; + m_afDecodedAlphas[uiPixel] = 1.0f; + } + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + break; + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // for H mode, set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_RGB8A1::DecodePixels_H(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 2: + if (m_boolOpaque == false) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel] = 0.0f; + } + else + { + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + } + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + m_afDecodedAlphas[uiPixel] = 1.0f; + break; + } + + } + + } + + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + // RGB8A1 can't use individual mode + // RGB8A1 with transparent pixels can't use planar mode + // + void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort) + { + assert(!m_boolOpaque); + assert(!m_boolTransparent); + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + PerformFirstIteration(); + break; + + case 1: + TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + if (a_fEffort <= 39.5f) + { + m_boolDone = true; + } + break; + + case 3: + Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(); + TryT(1); + TryH(1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 4: + TryDegenerates1(); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 5: + TryDegenerates2(); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 6: + TryDegenerates3(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 7: + TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + + SetDoneIfPerfect(); + + } + + // ---------------------------------------------------------------------------------------------------- + // find best initial encoding to ensure block has a valid encoding + // + void Block4x4Encoding_RGB8A1::PerformFirstIteration(void) + { + Block4x4Encoding_ETC1::CalculateMostLikelyFlip(); + + m_fError = FLT_MAX; + + TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + + } + + // ---------------------------------------------------------------------------------------------------- + // mostly copied from ETC1 + // differences: + // Block4x4Encoding_RGB8A1 encodingTry = *this; + // + void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2) + { + + ColorFloatRGBA frgbaColor1; + ColorFloatRGBA frgbaColor2; + + const unsigned int *pauiPixelMapping1; + const unsigned int *pauiPixelMapping2; + + if (a_boolFlip) + { + frgbaColor1 = m_frgbaSourceAverageTop; + frgbaColor2 = m_frgbaSourceAverageBottom; + + pauiPixelMapping1 = s_auiTopPixelMapping; + pauiPixelMapping2 = s_auiBottomPixelMapping; + } + else + { + frgbaColor1 = m_frgbaSourceAverageLeft; + frgbaColor2 = m_frgbaSourceAverageRight; + + pauiPixelMapping1 = s_auiLeftPixelMapping; + pauiPixelMapping2 = s_auiRightPixelMapping; + } + + DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, + a_uiRadius, a_iGrayOffset1, a_iGrayOffset2); + + Block4x4Encoding_RGB8A1 encodingTry = *this; + encodingTry.m_boolFlip = a_boolFlip; + + encodingTry.TryDifferentialHalf(&trys.m_half1); + encodingTry.TryDifferentialHalf(&trys.m_half2); + + // find best halves that are within differential range + DifferentialTrys::Try *ptryBest1 = nullptr; + DifferentialTrys::Try *ptryBest2 = nullptr; + encodingTry.m_fError = FLT_MAX; + + // see if the best of each half are in differential range + int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed; + int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen; + int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue; + if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3) + { + ptryBest1 = trys.m_half1.m_ptryBest; + ptryBest2 = trys.m_half2.m_ptryBest; + encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; + } + else + { + // else, find the next best halves that are in differential range + for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0]; + ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys]; + ptry1++) + { + for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0]; + ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys]; + ptry2++) + { + iDRed = ptry2->m_iRed - ptry1->m_iRed; + bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4; + iDGreen = ptry2->m_iGreen - ptry1->m_iGreen; + bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4; + iDBlue = ptry2->m_iBlue - ptry1->m_iBlue; + bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4; + + if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta) + { + float fError = ptry1->m_fError + ptry2->m_fError; + + if (fError < encodingTry.m_fError) + { + encodingTry.m_fError = fError; + + ptryBest1 = ptry1; + ptryBest2 = ptry2; + } + } + + } + } + assert(encodingTry.m_fError < FLT_MAX); + assert(ptryBest1 != nullptr); + assert(ptryBest2 != nullptr); + } + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = encodingTry.m_boolFlip; + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); + m_uiCW1 = ptryBest1->m_uiCW; + m_uiCW2 = ptryBest2->m_uiCW; + + m_fError = 0.0f; + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) + { + unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; + unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; + + unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; + unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; + + m_auiSelectors[uiPixel1] = uiSelector1; + m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; + + if (uiSelector1 == TRANSPARENT_SELECTOR) + { + m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel1] = 0.0f; + } + else + { + float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1]; + m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); + m_afDecodedAlphas[uiPixel1] = 1.0f; + } + + if (uiSelector2 == TRANSPARENT_SELECTOR) + { + m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel2] = 0.0f; + } + else + { + float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2]; + m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); + m_afDecodedAlphas[uiPixel2] = 1.0f; + } + + float fDeltaA1 = m_afDecodedAlphas[uiPixel1] - m_pafrgbaSource[uiPixel1].fA; + m_fError += fDeltaA1 * fDeltaA1; + float fDeltaA2 = m_afDecodedAlphas[uiPixel2] - m_pafrgbaSource[uiPixel2].fA; + m_fError += fDeltaA2 * fDeltaA2; + } + + m_fError1 = ptryBest1->m_fError; + m_fError2 = ptryBest2->m_fError; + m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors; + m_fError = m_fError1 + m_fError2; + + // sanity check + { + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + iDRed = iRed2 - iRed1; + iDGreen = iGreen2 - iGreen1; + iDBlue = iBlue2 - iBlue1; + + assert(iDRed >= -4 && iDRed < 4); + assert(iDGreen >= -4 && iDGreen < 4); + assert(iDBlue >= -4 && iDBlue < 4); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // mostly copied from ETC1 + // differences: + // uses s_aafCwOpaqueUnsetTable + // color for selector set to 0,0,0,0 + // + void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf) + { + + a_phalf->m_ptryBest = nullptr; + float fBestTryError = FLT_MAX; + + a_phalf->m_uiTrys = 0; + for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; + iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius; + iRed++) + { + assert(iRed >= 0 && iRed <= 31); + + for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius; + iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius; + iGreen++) + { + assert(iGreen >= 0 && iGreen <= 31); + + for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius; + iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius; + iBlue++) + { + assert(iBlue >= 0 && iBlue <= 31); + + DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; + assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]); + + ptry->m_iRed = iRed; + ptry->m_iGreen = iGreen; + ptry->m_iBlue = iBlue; + ptry->m_fError = FLT_MAX; + ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue); + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedColors[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + // pre-compute decoded pixels for each selector + ColorFloatRGBA afrgbaSelectors[SELECTORS]; + assert(SELECTORS == 4); + afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB(); + afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB(); + afrgbaSelectors[2] = ColorFloatRGBA(); + afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB(); + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + if (pfrgbaSourcePixel->fA < 0.5f) + { + uiSelector = TRANSPARENT_SELECTOR; + } + else if (uiSelector == TRANSPARENT_SELECTOR) + { + continue; + } + + frgbaDecodedPixel = afrgbaSelectors[uiSelector]; + + float fPixelError; + + fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]], + *pfrgbaSourcePixel); + + if (fPixelError < afPixelErrors[uiPixel]) + { + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + if (uiSelector == TRANSPARENT_SELECTOR) + { + break; + } + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + + // if best CW so far + if (fCWError < ptry->m_fError) + { + ptry->m_uiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; + } + ptry->m_fError = fCWError; + } + + } + + if (ptry->m_fError < fBestTryError) + { + a_phalf->m_ptryBest = ptry; + fBestTryError = ptry->m_fError; + } + + assert(ptry->m_fError < FLT_MAX); + + a_phalf->m_uiTrys++; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode + // save this encoding if it improves the error + // + // since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently + // better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower + // + void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8A1 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_T; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + if (iMaxRed1 > 15) + { + iMinRed1 = 15; + } + + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + if (iMaxGreen1 > 15) + { + iMinGreen1 = 15; + } + + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + if (iMaxBlue1 > 15) + { + iMinBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + if (iMaxRed2 > 15) + { + iMinRed2 = 15; + } + + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + if (iMaxGreen2 > 15) + { + iMinGreen2 = 15; + } + + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + if (iMaxBlue2 > 15) + { + iMinBlue2 = 15; + } + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor2_TAndH + // twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector + // + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + } + else + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH; + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + } + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + } + else + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryT + // called on an encodingTry + // + void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = m_frgbaColor1; + afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = ColorFloatRGBA(); + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + // try each selector + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiMinSelector = 0; + unsigned int uiMaxSelector = SELECTORS - 1; + + if (m_pafrgbaSource[uiPixel].fA < 0.5f) + { + uiMinSelector = 2; + uiMaxSelector = 2; + } + + for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++) + { + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel], + m_pafrgbaSource[uiPixel]); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (fBlockError < m_fError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in H mode + // save this encoding if it improves the error + // + // since all pixels use the distance table, color1 and color2 can NOT be twiddled independently + // TWIDDLE_RADIUS of 2 is WAY too slow + // + void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8A1 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_H; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + if (iMaxRed1 > 15) + { + iMinRed1 = 15; + } + + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + if (iMaxGreen1 > 15) + { + iMinGreen1 = 15; + } + + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + if (iMaxBlue1 > 15) + { + iMinBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + if (iMaxRed2 > 15) + { + iMinRed2 = 15; + } + + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + if (iMaxGreen2 > 15) + { + iMinGreen2 = 15; + } + + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + if (iMaxBlue2 > 15) + { + iMinBlue2 = 15; + } + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + + // twiddle m_frgbaOriginalColor2_TAndH + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryH + // called on an encodingTry + // + void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void) + { + + // abort if colors and CW will pose an encoding problem + { + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f); + unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f); + unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; + + unsigned int uiCWLsb = m_uiCW1 & 1; + + if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 || + (uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1) + { + return; + } + } + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB(); + afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = ColorFloatRGBA();; + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + + // try each selector + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiMinSelector = 0; + unsigned int uiMaxSelector = SELECTORS - 1; + + if (m_pafrgbaSource[uiPixel].fA < 0.5f) + { + uiMinSelector = 2; + uiMaxSelector = 2; + } + + for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++) + { + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel], + m_pafrgbaSource[uiPixel]); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (fBlockError < m_fError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 1 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates1(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 2 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates2(void) + { + + TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 3 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates3(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, -2, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 4 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates4(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 4); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -4); + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGB8A1::SetEncodingBits(void) + { + switch (m_mode) + { + case MODE_ETC1: + SetEncodingBits_ETC1(); + break; + + case MODE_T: + SetEncodingBits_T(); + break; + + case MODE_H: + SetEncodingBits_H(); + break; + + case MODE_PLANAR: + Block4x4Encoding_RGB8::SetEncodingBits_Planar(); + break; + + default: + assert(false); + } + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state if ETC1 mode + // + void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void) + { + + // there is no individual mode in RGB8A1 + assert(m_boolDiff); + + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + int iDRed2 = iRed2 - iRed1; + int iDGreen2 = iGreen2 - iGreen1; + int iDBlue2 = iBlue2 - iBlue1; + + assert(iDRed2 >= -4 && iDRed2 < 4); + assert(iDGreen2 >= -4 && iDGreen2 < 4); + assert(iDBlue2 >= -4 && iDBlue2 < 4); + + m_pencodingbitsRGB8->differential.red1 = iRed1; + m_pencodingbitsRGB8->differential.green1 = iGreen1; + m_pencodingbitsRGB8->differential.blue1 = iBlue1; + + m_pencodingbitsRGB8->differential.dred2 = iDRed2; + m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2; + m_pencodingbitsRGB8->differential.dblue2 = iDBlue2; + + m_pencodingbitsRGB8->individual.cw1 = m_uiCW1; + m_pencodingbitsRGB8->individual.cw2 = m_uiCW2; + + SetEncodingBits_Selectors(); + + // in RGB8A1 encoding bits, opaque replaces differential + m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; + + m_pencodingbitsRGB8->individual.flip = m_boolFlip; + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state if T mode + // + void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_T); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2; + m_pencodingbitsRGB8->t.red1b = uiRed1; + m_pencodingbitsRGB8->t.green1 = uiGreen1; + m_pencodingbitsRGB8->t.blue1 = uiBlue1; + + m_pencodingbitsRGB8->t.red2 = uiRed2; + m_pencodingbitsRGB8->t.green2 = uiGreen2; + m_pencodingbitsRGB8->t.blue2 = uiBlue2; + + m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1; + m_pencodingbitsRGB8->t.db = m_uiCW1; + + // in RGB8A1 encoding bits, opaque replaces differential + m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + if (iRed2 >= 4) + { + m_pencodingbitsRGB8->t.detect1 = 7; + m_pencodingbitsRGB8->t.detect2 = 0; + } + else + { + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + + // make sure red overflows + assert(iRed2 < 0 || iRed2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state if H mode + // + // colors and selectors may need to swap in order to generate lsb of distance index + // + void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_H); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; + unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; + + bool boolOddDistance = m_uiCW1 & 1; + bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance; + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.red1 = uiRed2; + m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen2; + m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue2; + + m_pencodingbitsRGB8->h.red2 = uiRed1; + m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen1; + m_pencodingbitsRGB8->h.blue2 = uiBlue1; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + else + { + m_pencodingbitsRGB8->h.red1 = uiRed1; + m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen1; + m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue1; + + m_pencodingbitsRGB8->h.red2 = uiRed2; + m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen2; + m_pencodingbitsRGB8->h.blue2 = uiBlue2; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + + // in RGB8A1 encoding bits, opaque replaces differential + m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF; + } + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->h.detect1 = 0; + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + if (iRed2 < 0 || iRed2 > 31) + { + m_pencodingbitsRGB8->h.detect1 = 1; + } + if (iGreen2 >= 4) + { + m_pencodingbitsRGB8->h.detect2 = 7; + m_pencodingbitsRGB8->h.detect3 = 0; + } + else + { + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + + // make sure red doesn't overflow and green does + assert(iRed2 >= 0 && iRed2 <= 31); + assert(iGreen2 < 0 || iGreen2 > 31); + } + + } + + // #################################################################################################### + // Block4x4Encoding_RGB8A1_Opaque + // #################################################################################################### + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGB8A1_Opaque::PerformIteration(float a_fEffort) + { + assert(!m_boolPunchThroughPixels); + assert(!m_boolTransparent); + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + PerformFirstIteration(); + break; + + case 1: + Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 3: + Block4x4Encoding_RGB8::TryPlanar(1); + break; + + case 4: + Block4x4Encoding_RGB8::TryTAndH(1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 5: + Block4x4Encoding_ETC1::TryDegenerates1(); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 6: + Block4x4Encoding_ETC1::TryDegenerates2(); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 7: + Block4x4Encoding_ETC1::TryDegenerates3(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 8: + Block4x4Encoding_ETC1::TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // find best initial encoding to ensure block has a valid encoding + // + void Block4x4Encoding_RGB8A1_Opaque::PerformFirstIteration(void) + { + + // set decoded alphas + // calculate alpha error + m_fError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afDecodedAlphas[uiPixel] = 1.0f; + + float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA; + m_fError += fDeltaA * fDeltaA; + } + + CalculateMostLikelyFlip(); + + m_fError = FLT_MAX; + + Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + Block4x4Encoding_RGB8::TryPlanar(0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + Block4x4Encoding_RGB8::TryTAndH(0); + SetDoneIfPerfect(); + } + + // #################################################################################################### + // Block4x4Encoding_RGB8A1_Transparent + // #################################################################################################### + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGB8A1_Transparent::PerformIteration(float ) + { + assert(!m_boolOpaque); + assert(m_boolTransparent); + assert(!m_boolDone); + assert(m_uiEncodingIterations == 0); + + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = false; + + m_uiCW1 = 0; + m_uiCW2 = 0; + + m_frgbaColor1 = ColorFloatRGBA(); + m_frgbaColor2 = ColorFloatRGBA(); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR; + + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel] = 0.0f; + } + + CalcBlockError(); + + m_boolDone = true; + m_uiEncodingIterations++; + + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h new file mode 100644 index 0000000000..ff26e462f8 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGB8A1.h @@ -0,0 +1,129 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_RGB8.h" +#include "EtcErrorMetric.h" +#include "EtcBlock4x4EncodingBits.h" + +namespace Etc +{ + + // ################################################################################ + // Block4x4Encoding_RGB8A1 + // RGB8A1 if not completely opaque or transparent + // ################################################################################ + + class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8 + { + public: + + static const unsigned int TRANSPARENT_SELECTOR = 2; + + Block4x4Encoding_RGB8A1(void); + virtual ~Block4x4Encoding_RGB8A1(void); + + virtual void InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric); + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric); + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric); + + void InitFromEncodingBits_T(void); + void InitFromEncodingBits_H(void); + + void PerformFirstIteration(void); + + void Decode_ETC1(void); + void DecodePixels_T(void); + void DecodePixels_H(void); + void SetEncodingBits_ETC1(void); + void SetEncodingBits_T(void); + void SetEncodingBits_H(void); + + protected: + + bool m_boolOpaque; // all source pixels have alpha >= 0.5 + bool m_boolTransparent; // all source pixels have alpha < 0.5 + bool m_boolPunchThroughPixels; // some source pixels have alpha < 0.5 + + static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS]; + + private: + + void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2); + void TryDifferentialHalf(DifferentialTrys::Half *a_phalf); + + void TryT(unsigned int a_uiRadius); + void TryT_BestSelectorCombination(void); + void TryH(unsigned int a_uiRadius); + void TryH_BestSelectorCombination(void); + + void TryDegenerates1(void); + void TryDegenerates2(void); + void TryDegenerates3(void); + void TryDegenerates4(void); + + }; + + // ################################################################################ + // Block4x4Encoding_RGB8A1_Opaque + // RGB8A1 if all pixels have alpha==1 + // ################################################################################ + + class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1 + { + public: + + virtual void PerformIteration(float a_fEffort); + + void PerformFirstIteration(void); + + private: + + }; + + // ################################################################################ + // Block4x4Encoding_RGB8A1_Transparent + // RGB8A1 if all pixels have alpha==0 + // ################################################################################ + + class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1 + { + public: + + virtual void PerformIteration(float a_fEffort); + + private: + + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp new file mode 100644 index 0000000000..600c7ab405 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp @@ -0,0 +1,474 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RGBA8.cpp contains: + Block4x4Encoding_RGBA8 + Block4x4Encoding_RGBA8_Opaque + Block4x4Encoding_RGBA8_Transparent + +These encoders are used when targetting file format RGBA8. + +Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque +Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent +Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RGBA8.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <float.h> +#include <limits> + +namespace Etc +{ + + // #################################################################################################### + // Block4x4Encoding_RGBA8 + // #################################################################################################### + + float Block4x4Encoding_RGBA8::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS] + { + { -3.0f / 255.0f, -6.0f / 255.0f, -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f }, + { -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f }, + { -2.0f / 255.0f, -5.0f / 255.0f, -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f }, + { -2.0f / 255.0f, -4.0f / 255.0f, -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f }, + + { -3.0f / 255.0f, -6.0f / 255.0f, -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f }, + { -3.0f / 255.0f, -7.0f / 255.0f, -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f }, + { -4.0f / 255.0f, -7.0f / 255.0f, -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f }, + { -3.0f / 255.0f, -5.0f / 255.0f, -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f }, + + { -2.0f / 255.0f, -6.0f / 255.0f, -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 9.0f / 255.0f }, + { -2.0f / 255.0f, -5.0f / 255.0f, -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 9.0f / 255.0f }, + { -2.0f / 255.0f, -4.0f / 255.0f, -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f, 9.0f / 255.0f }, + { -2.0f / 255.0f, -5.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f }, + + { -3.0f / 255.0f, -4.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f }, + { -1.0f / 255.0f, -2.0f / 255.0f, -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f, 9.0f / 255.0f }, + { -4.0f / 255.0f, -6.0f / 255.0f, -8.0f / 255.0f, -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 8.0f / 255.0f }, + { -3.0f / 255.0f, -5.0f / 255.0f, -7.0f / 255.0f, -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f } + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void) + { + + m_pencodingbitsA8 = nullptr; + + } + Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_RGBA8::InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) + { + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric); + + m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8)); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RGBA8::InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric) + { + + m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8)); + + // init RGB portion + Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent, + (unsigned char *) m_pencodingbitsRGB8, + a_pafrgbaSource, + a_errormetric); + + // init A8 portion + // has to be done after InitFromEncodingBits() + { + m_fBase = m_pencodingbitsA8->data.base / 255.0f; + m_fMultiplier = (float)m_pencodingbitsA8->data.multiplier; + m_uiModifierTableIndex = m_pencodingbitsA8->data.table; + + unsigned long long int ulliSelectorBits = 0; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors0 << 40; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors1 << 32; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors2 << 24; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors3 << 16; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors4 << 8; + ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors5; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiShift = 45 - (3 * uiPixel); + m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (ALPHA_SELECTORS - 1); + } + + // decode the alphas + // calc alpha error + m_fError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afDecodedAlphas[uiPixel] = DecodePixelAlpha(m_fBase, m_fMultiplier, + m_uiModifierTableIndex, + m_auiAlphaSelectors[uiPixel]); + + float fDeltaAlpha = m_afDecodedAlphas[uiPixel] - m_pafrgbaSource[uiPixel].fA; + m_fError += fDeltaAlpha * fDeltaAlpha; + } + } + + // redo error calc to include alpha + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + // similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added + // + void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + if (m_uiEncodingIterations == 0) + { + if (a_fEffort < 24.9f) + { + CalculateA8(0.0f); + } + else if (a_fEffort < 49.9f) + { + CalculateA8(1.0f); + } + else + { + CalculateA8(2.0f); + } + } + + Block4x4Encoding_RGB8::PerformIteration(a_fEffort); + + } + + // ---------------------------------------------------------------------------------------------------- + // find the best combination of base alpga, multiplier and selectors + // + // a_fRadius limits the range of base alpha to try + // + void Block4x4Encoding_RGBA8::CalculateA8(float a_fRadius) + { + + // find min/max alpha + float fMinAlpha = 1.0f; + float fMaxAlpha = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + float fAlpha = m_pafrgbaSource[uiPixel].fA; + + // ignore border pixels + if (isnan(fAlpha)) + { + continue; + } + + if (fAlpha < fMinAlpha) + { + fMinAlpha = fAlpha; + } + if (fAlpha > fMaxAlpha) + { + fMaxAlpha = fAlpha; + } + } + assert(fMinAlpha <= fMaxAlpha); + + float fAlphaRange = fMaxAlpha - fMinAlpha; + + // try each modifier table entry + m_fError = FLT_MAX; // artificially high value + for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++) + { + static const unsigned int MIN_VALUE_SELECTOR = 3; + static const unsigned int MAX_VALUE_SELECTOR = 7; + + float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR]; + + float fTableEntryRange = s_aafModifierTable[uiTableEntry][MAX_VALUE_SELECTOR] - + s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR]; + + float fCenterRatio = fTableEntryCenter / fTableEntryRange; + + float fCenter = fMinAlpha + fCenterRatio*fAlphaRange; + fCenter = roundf(255.0f * fCenter) / 255.0f; + + float fMinBase = fCenter - (a_fRadius / 255.0f); + if (fMinBase < 0.0f) + { + fMinBase = 0.0f; + } + + float fMaxBase = fCenter + (a_fRadius / 255.0f); + if (fMaxBase > 1.0f) + { + fMaxBase = 1.0f; + } + + for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f)) + { + + float fRangeMultiplier = roundf(fAlphaRange / fTableEntryRange); + + float fMinMultiplier = fRangeMultiplier - a_fRadius; + if (fMinMultiplier < 1.0f) + { + fMinMultiplier = 1.0f; + } + else if (fMinMultiplier > 15.0f) + { + fMinMultiplier = 15.0f; + } + + float fMaxMultiplier = fRangeMultiplier + a_fRadius; + if (fMaxMultiplier < 1.0f) + { + fMaxMultiplier = 1.0f; + } + else if (fMaxMultiplier > 15.0f) + { + fMaxMultiplier = 15.0f; + } + + for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f) + { + // find best selector for each pixel + unsigned int auiBestSelectors[PIXELS]; + float afBestAlphaError[PIXELS]; + float afBestDecodedAlphas[PIXELS]; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + float fBestPixelAlphaError = FLT_MAX; + for (unsigned int uiSelector = 0; uiSelector < ALPHA_SELECTORS; uiSelector++) + { + float fDecodedAlpha = DecodePixelAlpha(fBase, fMultiplier, uiTableEntry, uiSelector); + + // border pixels (NAN) should have zero error + float fPixelDeltaAlpha = isnan(m_pafrgbaSource[uiPixel].fA) ? + 0.0f : + fDecodedAlpha - m_pafrgbaSource[uiPixel].fA; + + float fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha; + + if (fPixelAlphaError < fBestPixelAlphaError) + { + fBestPixelAlphaError = fPixelAlphaError; + auiBestSelectors[uiPixel] = uiSelector; + afBestAlphaError[uiPixel] = fBestPixelAlphaError; + afBestDecodedAlphas[uiPixel] = fDecodedAlpha; + } + } + } + + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestAlphaError[uiPixel]; + } + + if (fBlockError < m_fError) + { + m_fError = fBlockError; + + m_fBase = fBase; + m_fMultiplier = fMultiplier; + m_uiModifierTableIndex = uiTableEntry; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel]; + m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel]; + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGBA8::SetEncodingBits(void) + { + + // set the RGB8 portion + Block4x4Encoding_RGB8::SetEncodingBits(); + + // set the A8 portion + { + m_pencodingbitsA8->data.base = (unsigned char)roundf(255.0f * m_fBase); + m_pencodingbitsA8->data.table = m_uiModifierTableIndex; + m_pencodingbitsA8->data.multiplier = (unsigned char)roundf(m_fMultiplier); + + unsigned long long int ulliSelectorBits = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiShift = 45 - (3 * uiPixel); + ulliSelectorBits |= ((unsigned long long int)m_auiAlphaSelectors[uiPixel]) << uiShift; + } + + m_pencodingbitsA8->data.selectors0 = ulliSelectorBits >> 40; + m_pencodingbitsA8->data.selectors1 = ulliSelectorBits >> 32; + m_pencodingbitsA8->data.selectors2 = ulliSelectorBits >> 24; + m_pencodingbitsA8->data.selectors3 = ulliSelectorBits >> 16; + m_pencodingbitsA8->data.selectors4 = ulliSelectorBits >> 8; + m_pencodingbitsA8->data.selectors5 = ulliSelectorBits; + } + + } + + // #################################################################################################### + // Block4x4Encoding_RGBA8_Opaque + // #################################################################################################### + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGBA8_Opaque::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + if (m_uiEncodingIterations == 0) + { + m_fError = 0.0f; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afDecodedAlphas[uiPixel] = 1.0f; + } + } + + Block4x4Encoding_RGB8::PerformIteration(a_fEffort); + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGBA8_Opaque::SetEncodingBits(void) + { + + // set the RGB8 portion + Block4x4Encoding_RGB8::SetEncodingBits(); + + // set the A8 portion + m_pencodingbitsA8->data.base = 255; + m_pencodingbitsA8->data.table = 15; + m_pencodingbitsA8->data.multiplier = 15; + m_pencodingbitsA8->data.selectors0 = 0xFF; + m_pencodingbitsA8->data.selectors1 = 0xFF; + m_pencodingbitsA8->data.selectors2 = 0xFF; + m_pencodingbitsA8->data.selectors3 = 0xFF; + m_pencodingbitsA8->data.selectors4 = 0xFF; + m_pencodingbitsA8->data.selectors5 = 0xFF; + + } + + // #################################################################################################### + // Block4x4Encoding_RGBA8_Transparent + // #################################################################################################### + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGBA8_Transparent::PerformIteration(float ) + { + assert(!m_boolDone); + assert(m_uiEncodingIterations == 0); + + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = false; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + m_afDecodedAlphas[uiPixel] = 0.0f; + } + + m_fError = 0.0f; + + m_boolDone = true; + m_uiEncodingIterations++; + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGBA8_Transparent::SetEncodingBits(void) + { + + Block4x4Encoding_RGB8::SetEncodingBits(); + + // set the A8 portion + m_pencodingbitsA8->data.base = 0; + m_pencodingbitsA8->data.table = 0; + m_pencodingbitsA8->data.multiplier = 1; + m_pencodingbitsA8->data.selectors0 = 0; + m_pencodingbitsA8->data.selectors1 = 0; + m_pencodingbitsA8->data.selectors2 = 0; + m_pencodingbitsA8->data.selectors3 = 0; + m_pencodingbitsA8->data.selectors4 = 0; + m_pencodingbitsA8->data.selectors5 = 0; + + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h new file mode 100644 index 0000000000..5765d36b90 --- /dev/null +++ b/thirdparty/etc2comp/EtcBlock4x4Encoding_RGBA8.h @@ -0,0 +1,121 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_RGB8.h" + +namespace Etc +{ + class Block4x4EncodingBits_A8; + + // ################################################################################ + // Block4x4Encoding_RGBA8 + // RGBA8 if not completely opaque or transparent + // ################################################################################ + + class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8 + { + public: + + Block4x4Encoding_RGBA8(void); + virtual ~Block4x4Encoding_RGBA8(void); + + virtual void InitFromSource(Block4x4 *a_pblockParent, + ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric); + + virtual void InitFromEncodingBits(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric); + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + protected: + + static const unsigned int MODIFIER_TABLE_ENTRYS = 16; + static const unsigned int ALPHA_SELECTOR_BITS = 3; + static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS; + + static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]; + + void CalculateA8(float a_fRadius); + + Block4x4EncodingBits_A8 *m_pencodingbitsA8; // A8 portion of Block4x4EncodingBits_RGBA8 + + float m_fBase; + float m_fMultiplier; + unsigned int m_uiModifierTableIndex; + unsigned int m_auiAlphaSelectors[PIXELS]; + + private: + + inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier, + unsigned int a_uiTableIndex, unsigned int a_uiSelector) + { + float fPixelAlpha = a_fBase + + a_fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector]; + if (fPixelAlpha < 0.0f) + { + fPixelAlpha = 0.0f; + } + else if (fPixelAlpha > 1.0f) + { + fPixelAlpha = 1.0f; + } + + return fPixelAlpha; + } + + }; + + // ################################################################################ + // Block4x4Encoding_RGBA8_Opaque + // RGBA8 if all pixels have alpha==1 + // ################################################################################ + + class Block4x4Encoding_RGBA8_Opaque : public Block4x4Encoding_RGBA8 + { + public: + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + }; + + // ################################################################################ + // Block4x4Encoding_RGBA8_Transparent + // RGBA8 if all pixels have alpha==0 + // ################################################################################ + + class Block4x4Encoding_RGBA8_Transparent : public Block4x4Encoding_RGBA8 + { + public: + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcColor.h b/thirdparty/etc2comp/EtcColor.h new file mode 100644 index 0000000000..7ceae05b65 --- /dev/null +++ b/thirdparty/etc2comp/EtcColor.h @@ -0,0 +1,64 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <math.h> + +namespace Etc +{ + + inline float LogToLinear(float a_fLog) + { + static const float ALPHA = 0.055f; + static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; + + if (a_fLog <= 0.04045f) + { + return a_fLog / 12.92f; + } + else + { + return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f); + } + } + + inline float LinearToLog(float &a_fLinear) + { + static const float ALPHA = 0.055f; + static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; + + if (a_fLinear <= 0.0031308f) + { + return 12.92f * a_fLinear; + } + else + { + return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA; + } + } + + class ColorR8G8B8A8 + { + public: + + unsigned char ucR; + unsigned char ucG; + unsigned char ucB; + unsigned char ucA; + + }; +} diff --git a/thirdparty/etc2comp/EtcColorFloatRGBA.h b/thirdparty/etc2comp/EtcColorFloatRGBA.h new file mode 100644 index 0000000000..f2ca2c1f71 --- /dev/null +++ b/thirdparty/etc2comp/EtcColorFloatRGBA.h @@ -0,0 +1,321 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcConfig.h" +#include "EtcColor.h" + +#include <math.h> + +namespace Etc +{ + + class ColorFloatRGBA + { + public: + + ColorFloatRGBA(void) + { + fR = fG = fB = fA = 0.0f; + } + + ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA) + { + fR = a_fR; + fG = a_fG; + fB = a_fB; + fA = a_fA; + } + + inline ColorFloatRGBA operator+(ColorFloatRGBA& a_rfrgba) + { + ColorFloatRGBA frgba; + frgba.fR = fR + a_rfrgba.fR; + frgba.fG = fG + a_rfrgba.fG; + frgba.fB = fB + a_rfrgba.fB; + frgba.fA = fA + a_rfrgba.fA; + return frgba; + } + + inline ColorFloatRGBA operator+(float a_f) + { + ColorFloatRGBA frgba; + frgba.fR = fR + a_f; + frgba.fG = fG + a_f; + frgba.fB = fB + a_f; + frgba.fA = fA; + return frgba; + } + + inline ColorFloatRGBA operator-(float a_f) + { + ColorFloatRGBA frgba; + frgba.fR = fR - a_f; + frgba.fG = fG - a_f; + frgba.fB = fB - a_f; + frgba.fA = fA; + return frgba; + } + + inline ColorFloatRGBA operator-(ColorFloatRGBA& a_rfrgba) + { + ColorFloatRGBA frgba; + frgba.fR = fR - a_rfrgba.fR; + frgba.fG = fG - a_rfrgba.fG; + frgba.fB = fB - a_rfrgba.fB; + frgba.fA = fA - a_rfrgba.fA; + return frgba; + } + + inline ColorFloatRGBA operator*(float a_f) + { + ColorFloatRGBA frgba; + frgba.fR = fR * a_f; + frgba.fG = fG * a_f; + frgba.fB = fB * a_f; + frgba.fA = fA; + + return frgba; + } + + inline ColorFloatRGBA ScaleRGB(float a_f) + { + ColorFloatRGBA frgba; + frgba.fR = a_f * fR; + frgba.fG = a_f * fG; + frgba.fB = a_f * fB; + frgba.fA = fA; + + return frgba; + } + + inline ColorFloatRGBA RoundRGB(void) + { + ColorFloatRGBA frgba; + frgba.fR = roundf(fR); + frgba.fG = roundf(fG); + frgba.fB = roundf(fB); + + return frgba; + } + + inline ColorFloatRGBA ToLinear() + { + ColorFloatRGBA frgbaLinear; + frgbaLinear.fR = LogToLinear(fR); + frgbaLinear.fG = LogToLinear(fG); + frgbaLinear.fB = LogToLinear(fB); + frgbaLinear.fA = fA; + + return frgbaLinear; + } + + inline ColorFloatRGBA ToLog(void) + { + ColorFloatRGBA frgbaLog; + frgbaLog.fR = LinearToLog(fR); + frgbaLog.fG = LinearToLog(fG); + frgbaLog.fB = LinearToLog(fB); + frgbaLog.fA = fA; + + return frgbaLog; + } + + inline static ColorFloatRGBA ConvertFromRGBA8(unsigned char a_ucR, + unsigned char a_ucG, unsigned char a_ucB, unsigned char a_ucA) + { + ColorFloatRGBA frgba; + + frgba.fR = (float)a_ucR / 255.0f; + frgba.fG = (float)a_ucG / 255.0f; + frgba.fB = (float)a_ucB / 255.0f; + frgba.fA = (float)a_ucA / 255.0f; + + return frgba; + } + + inline static ColorFloatRGBA ConvertFromRGB4(unsigned char a_ucR4, + unsigned char a_ucG4, + unsigned char a_ucB4) + { + ColorFloatRGBA frgba; + + unsigned char ucR8 = (unsigned char)((a_ucR4 << 4) + a_ucR4); + unsigned char ucG8 = (unsigned char)((a_ucG4 << 4) + a_ucG4); + unsigned char ucB8 = (unsigned char)((a_ucB4 << 4) + a_ucB4); + + frgba.fR = (float)ucR8 / 255.0f; + frgba.fG = (float)ucG8 / 255.0f; + frgba.fB = (float)ucB8 / 255.0f; + frgba.fA = 1.0f; + + return frgba; + } + + inline static ColorFloatRGBA ConvertFromRGB5(unsigned char a_ucR5, + unsigned char a_ucG5, + unsigned char a_ucB5) + { + ColorFloatRGBA frgba; + + unsigned char ucR8 = (unsigned char)((a_ucR5 << 3) + (a_ucR5 >> 2)); + unsigned char ucG8 = (unsigned char)((a_ucG5 << 3) + (a_ucG5 >> 2)); + unsigned char ucB8 = (unsigned char)((a_ucB5 << 3) + (a_ucB5 >> 2)); + + frgba.fR = (float)ucR8 / 255.0f; + frgba.fG = (float)ucG8 / 255.0f; + frgba.fB = (float)ucB8 / 255.0f; + frgba.fA = 1.0f; + + return frgba; + } + + inline static ColorFloatRGBA ConvertFromR6G7B6(unsigned char a_ucR6, + unsigned char a_ucG7, + unsigned char a_ucB6) + { + ColorFloatRGBA frgba; + + unsigned char ucR8 = (unsigned char)((a_ucR6 << 2) + (a_ucR6 >> 4)); + unsigned char ucG8 = (unsigned char)((a_ucG7 << 1) + (a_ucG7 >> 6)); + unsigned char ucB8 = (unsigned char)((a_ucB6 << 2) + (a_ucB6 >> 4)); + + frgba.fR = (float)ucR8 / 255.0f; + frgba.fG = (float)ucG8 / 255.0f; + frgba.fB = (float)ucB8 / 255.0f; + frgba.fA = 1.0f; + + return frgba; + } + + // quantize to 4 bits, expand to 8 bits + inline ColorFloatRGBA QuantizeR4G4B4(void) const + { + ColorFloatRGBA frgba = *this; + + // quantize to 4 bits + frgba = frgba.ClampRGB().ScaleRGB(15.0f).RoundRGB(); + unsigned int uiR4 = (unsigned int)frgba.fR; + unsigned int uiG4 = (unsigned int)frgba.fG; + unsigned int uiB4 = (unsigned int)frgba.fB; + + // expand to 8 bits + frgba.fR = (float) ((uiR4 << 4) + uiR4); + frgba.fG = (float) ((uiG4 << 4) + uiG4); + frgba.fB = (float) ((uiB4 << 4) + uiB4); + + frgba = frgba.ScaleRGB(1.0f/255.0f); + + return frgba; + } + + // quantize to 5 bits, expand to 8 bits + inline ColorFloatRGBA QuantizeR5G5B5(void) const + { + ColorFloatRGBA frgba = *this; + + // quantize to 5 bits + frgba = frgba.ClampRGB().ScaleRGB(31.0f).RoundRGB(); + unsigned int uiR5 = (unsigned int)frgba.fR; + unsigned int uiG5 = (unsigned int)frgba.fG; + unsigned int uiB5 = (unsigned int)frgba.fB; + + // expand to 8 bits + frgba.fR = (float)((uiR5 << 3) + (uiR5 >> 2)); + frgba.fG = (float)((uiG5 << 3) + (uiG5 >> 2)); + frgba.fB = (float)((uiB5 << 3) + (uiB5 >> 2)); + + frgba = frgba.ScaleRGB(1.0f / 255.0f); + + return frgba; + } + + // quantize to 6/7/6 bits, expand to 8 bits + inline ColorFloatRGBA QuantizeR6G7B6(void) const + { + ColorFloatRGBA frgba = *this; + + // quantize to 6/7/6 bits + ColorFloatRGBA frgba6 = frgba.ClampRGB().ScaleRGB(63.0f).RoundRGB(); + ColorFloatRGBA frgba7 = frgba.ClampRGB().ScaleRGB(127.0f).RoundRGB(); + unsigned int uiR6 = (unsigned int)frgba6.fR; + unsigned int uiG7 = (unsigned int)frgba7.fG; + unsigned int uiB6 = (unsigned int)frgba6.fB; + + // expand to 8 bits + frgba.fR = (float)((uiR6 << 2) + (uiR6 >> 4)); + frgba.fG = (float)((uiG7 << 1) + (uiG7 >> 6)); + frgba.fB = (float)((uiB6 << 2) + (uiB6 >> 4)); + + frgba = frgba.ScaleRGB(1.0f / 255.0f); + + return frgba; + } + + inline ColorFloatRGBA ClampRGB(void) + { + ColorFloatRGBA frgba = *this; + if (frgba.fR < 0.0f) { frgba.fR = 0.0f; } + if (frgba.fR > 1.0f) { frgba.fR = 1.0f; } + if (frgba.fG < 0.0f) { frgba.fG = 0.0f; } + if (frgba.fG > 1.0f) { frgba.fG = 1.0f; } + if (frgba.fB < 0.0f) { frgba.fB = 0.0f; } + if (frgba.fB > 1.0f) { frgba.fB = 1.0f; } + + return frgba; + } + + inline ColorFloatRGBA ClampRGBA(void) + { + ColorFloatRGBA frgba = *this; + if (frgba.fR < 0.0f) { frgba.fR = 0.0f; } + if (frgba.fR > 1.0f) { frgba.fR = 1.0f; } + if (frgba.fG < 0.0f) { frgba.fG = 0.0f; } + if (frgba.fG > 1.0f) { frgba.fG = 1.0f; } + if (frgba.fB < 0.0f) { frgba.fB = 0.0f; } + if (frgba.fB > 1.0f) { frgba.fB = 1.0f; } + if (frgba.fA < 0.0f) { frgba.fA = 0.0f; } + if (frgba.fA > 1.0f) { frgba.fA = 1.0f; } + + return frgba; + } + + inline int IntRed(float a_fScale) + { + return (int)roundf(fR * a_fScale); + } + + inline int IntGreen(float a_fScale) + { + return (int)roundf(fG * a_fScale); + } + + inline int IntBlue(float a_fScale) + { + return (int)roundf(fB * a_fScale); + } + + inline int IntAlpha(float a_fScale) + { + return (int)roundf(fA * a_fScale); + } + + float fR, fG, fB, fA; + }; + +} + diff --git a/thirdparty/etc2comp/EtcConfig.h b/thirdparty/etc2comp/EtcConfig.h new file mode 100644 index 0000000000..3bfe1d99a8 --- /dev/null +++ b/thirdparty/etc2comp/EtcConfig.h @@ -0,0 +1,67 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef _WIN32 +#define ETC_WINDOWS (1) +#else +#define ETC_WINDOWS (0) +#endif + +#if __APPLE__ +#define ETC_OSX (1) +#else +#define ETC_OSX (0) +#endif + +#if __unix__ +#define ETC_UNIX (1) +#else +#define ETC_UNIX (0) +#endif + + +// short names for common types +#include <stdint.h> +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef float f32; +typedef double f64; + +// Keep asserts enabled in release builds during development +#undef NDEBUG + +// 0=disable. stb_image can be used if you need to compress +//other image formats like jpg +#define USE_STB_IMAGE_LOAD 0 + +#if ETC_WINDOWS +#include <sdkddkver.h> +#define _CRT_SECURE_NO_WARNINGS (1) +#include <tchar.h> +#endif + +#include <stdio.h> + diff --git a/thirdparty/etc2comp/EtcDifferentialTrys.cpp b/thirdparty/etc2comp/EtcDifferentialTrys.cpp new file mode 100644 index 0000000000..ef4cd103d9 --- /dev/null +++ b/thirdparty/etc2comp/EtcDifferentialTrys.cpp @@ -0,0 +1,173 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcDifferentialTrys.cpp + +Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode + +*/ + +#include "EtcConfig.h" +#include "EtcDifferentialTrys.h" + +#include <assert.h> + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // construct a list of trys (encoding attempts) + // + // a_frgbaColor1 is the basecolor for the first half + // a_frgbaColor2 is the basecolor for the second half + // a_pauiPixelMapping1 is the pixel order for the first half + // a_pauiPixelMapping2 is the pixel order for the second half + // a_uiRadius is the amount to vary the base colors + // + DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2) + { + assert(a_uiRadius <= MAX_RADIUS); + + m_boolSeverelyBentColors = false; + + ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5(); + ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5(); + + // quantize base colors + // ensure that trys with a_uiRadius don't overflow + int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius); + int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius); + int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius); + int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius); + int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius); + int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius); + + int iDeltaRed = iRed2 - iRed1; + int iDeltaGreen = iGreen2 - iGreen1; + int iDeltaBlue = iBlue2 - iBlue1; + + // make sure components are within range + { + if (iDeltaRed > 3) + { + if (iDeltaRed > 7) + { + m_boolSeverelyBentColors = true; + } + + iRed1 += (iDeltaRed - 3) / 2; + iRed2 = iRed1 + 3; + iDeltaRed = 3; + } + else if (iDeltaRed < -4) + { + if (iDeltaRed < -8) + { + m_boolSeverelyBentColors = true; + } + + iRed1 += (iDeltaRed + 4) / 2; + iRed2 = iRed1 - 4; + iDeltaRed = -4; + } + assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius)); + assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius)); + assert(iDeltaRed >= -4 && iDeltaRed <= 3); + + if (iDeltaGreen > 3) + { + if (iDeltaGreen > 7) + { + m_boolSeverelyBentColors = true; + } + + iGreen1 += (iDeltaGreen - 3) / 2; + iGreen2 = iGreen1 + 3; + iDeltaGreen = 3; + } + else if (iDeltaGreen < -4) + { + if (iDeltaGreen < -8) + { + m_boolSeverelyBentColors = true; + } + + iGreen1 += (iDeltaGreen + 4) / 2; + iGreen2 = iGreen1 - 4; + iDeltaGreen = -4; + } + assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius)); + assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius)); + assert(iDeltaGreen >= -4 && iDeltaGreen <= 3); + + if (iDeltaBlue > 3) + { + if (iDeltaBlue > 7) + { + m_boolSeverelyBentColors = true; + } + + iBlue1 += (iDeltaBlue - 3) / 2; + iBlue2 = iBlue1 + 3; + iDeltaBlue = 3; + } + else if (iDeltaBlue < -4) + { + if (iDeltaBlue < -8) + { + m_boolSeverelyBentColors = true; + } + + iBlue1 += (iDeltaBlue + 4) / 2; + iBlue2 = iBlue1 - 4; + iDeltaBlue = -4; + } + assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius)); + assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius)); + assert(iDeltaBlue >= -4 && iDeltaBlue <= 3); + } + + m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius); + m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius); + + } + + // ---------------------------------------------------------------------------------------------------- + // + void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius) + { + + m_iRed = a_iRed; + m_iGreen = a_iGreen; + m_iBlue = a_iBlue; + + m_pauiPixelMapping = a_pauiPixelMapping; + m_uiRadius = a_uiRadius; + + m_uiTrys = 0; + + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcDifferentialTrys.h b/thirdparty/etc2comp/EtcDifferentialTrys.h new file mode 100644 index 0000000000..71860908ff --- /dev/null +++ b/thirdparty/etc2comp/EtcDifferentialTrys.h @@ -0,0 +1,97 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" + +namespace Etc +{ + + class DifferentialTrys + { + public: + + static const unsigned int MAX_RADIUS = 2; + + DifferentialTrys(ColorFloatRGBA a_frgbaColor1, + ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2); + + inline static int MoveAwayFromEdge(int a_i, int a_iDistance) + { + if (a_i < (0+ a_iDistance)) + { + return (0 + a_iDistance); + } + else if (a_i > (31- a_iDistance)) + { + return (31 - a_iDistance); + } + + return a_i; + } + + class Try + { + public : + static const unsigned int SELECTORS = 8; // per half + + int m_iRed; + int m_iGreen; + int m_iBlue; + unsigned int m_uiCW; + unsigned int m_auiSelectors[SELECTORS]; + float m_fError; + }; + + class Half + { + public: + + static const unsigned int MAX_TRYS = 125; + + void Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, + unsigned int a_uiRadius); + + // center of trys + int m_iRed; + int m_iGreen; + int m_iBlue; + + const unsigned int *m_pauiPixelMapping; + unsigned int m_uiRadius; + + unsigned int m_uiTrys; + Try m_atry[MAX_TRYS]; + + Try *m_ptryBest; + }; + + Half m_half1; + Half m_half2; + + bool m_boolSeverelyBentColors; + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcErrorMetric.h b/thirdparty/etc2comp/EtcErrorMetric.h new file mode 100644 index 0000000000..df4dcab4fb --- /dev/null +++ b/thirdparty/etc2comp/EtcErrorMetric.h @@ -0,0 +1,54 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace Etc +{ + + enum ErrorMetric + { + RGBA, + RGBX, + REC709, + NUMERIC, + NORMALXYZ, + // + ERROR_METRICS, + // + BT709 = REC709 + }; + + inline const char *ErrorMetricToString(ErrorMetric errorMetric) + { + switch (errorMetric) + { + case RGBA: + return "RGBA"; + case RGBX: + return "RGBX"; + case REC709: + return "REC709"; + case NUMERIC: + return "NUMERIC"; + case NORMALXYZ: + return "NORMALXYZ"; + case ERROR_METRICS: + default: + return "UNKNOWN"; + } + } +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcFile.cpp b/thirdparty/etc2comp/EtcFile.cpp new file mode 100644 index 0000000000..831a3aac45 --- /dev/null +++ b/thirdparty/etc2comp/EtcFile.cpp @@ -0,0 +1,390 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef _WIN32 +#define _CRT_SECURE_NO_WARNINGS (1) +#endif + +#include "EtcConfig.h" + + +#include "EtcFile.h" + +#include "EtcFileHeader.h" +#include "EtcColor.h" +#include "Etc.h" +#include "EtcBlock4x4EncodingBits.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +using namespace Etc; + +// ---------------------------------------------------------------------------------------------------- +// +File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat, + unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, + unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight) +{ + if (a_pstrFilename == nullptr) + { + m_pstrFilename = const_cast<char *>(""); + } + else + { + m_pstrFilename = new char[strlen(a_pstrFilename) + 1]; + strcpy(m_pstrFilename, a_pstrFilename); + } + + m_fileformat = a_fileformat; + if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION) + { + // ***** TODO: add this later ***** + m_fileformat = Format::KTX; + } + + m_imageformat = a_imageformat; + + m_uiNumMipmaps = 1; + m_pMipmapImages = new RawImage[m_uiNumMipmaps]; + m_pMipmapImages[0].paucEncodingBits = std::shared_ptr<unsigned char>(a_paucEncodingBits, [](unsigned char *p) { delete[] p; } ); + m_pMipmapImages[0].uiEncodingBitsBytes = a_uiEncodingBitsBytes; + m_pMipmapImages[0].uiExtendedWidth = a_uiExtendedWidth; + m_pMipmapImages[0].uiExtendedHeight = a_uiExtendedHeight; + + m_uiSourceWidth = a_uiSourceWidth; + m_uiSourceHeight = a_uiSourceHeight; + + switch (m_fileformat) + { + case Format::PKM: + m_pheader = new FileHeader_Pkm(this); + break; + + case Format::KTX: + m_pheader = new FileHeader_Ktx(this); + break; + + default: + assert(0); + break; + } + +} + +// ---------------------------------------------------------------------------------------------------- +// +File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat, + unsigned int a_uiNumMipmaps, RawImage *a_pMipmapImages, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight) +{ + if (a_pstrFilename == nullptr) + { + m_pstrFilename = const_cast<char *>(""); + } + else + { + m_pstrFilename = new char[strlen(a_pstrFilename) + 1]; + strcpy(m_pstrFilename, a_pstrFilename); + } + + m_fileformat = a_fileformat; + if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION) + { + // ***** TODO: add this later ***** + m_fileformat = Format::KTX; + } + + m_imageformat = a_imageformat; + + m_uiNumMipmaps = a_uiNumMipmaps; + m_pMipmapImages = new RawImage[m_uiNumMipmaps]; + + for(unsigned int mip = 0; mip < m_uiNumMipmaps; mip++) + { + m_pMipmapImages[mip] = a_pMipmapImages[mip]; + } + + m_uiSourceWidth = a_uiSourceWidth; + m_uiSourceHeight = a_uiSourceHeight; + + switch (m_fileformat) + { + case Format::PKM: + m_pheader = new FileHeader_Pkm(this); + break; + + case Format::KTX: + m_pheader = new FileHeader_Ktx(this); + break; + + default: + assert(0); + break; + } + +} + +// ---------------------------------------------------------------------------------------------------- +// +File::File(const char *a_pstrFilename, Format a_fileformat) +{ + if (a_pstrFilename == nullptr) + { + return; + } + else + { + m_pstrFilename = new char[strlen(a_pstrFilename) + 1]; + strcpy(m_pstrFilename, a_pstrFilename); + } + + m_fileformat = a_fileformat; + if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION) + { + // ***** TODO: add this later ***** + m_fileformat = Format::KTX; + } + + FILE *pfile = fopen(m_pstrFilename, "rb"); + if (pfile == nullptr) + { + printf("ERROR: Couldn't open %s", m_pstrFilename); + exit(1); + } + fseek(pfile, 0, SEEK_END); + unsigned int fileSize = ftell(pfile); + fseek(pfile, 0, SEEK_SET); + size_t szResult; + + m_pheader = new FileHeader_Ktx(this); + szResult = fread( ((FileHeader_Ktx*)m_pheader)->GetData(), 1, sizeof(FileHeader_Ktx::Data), pfile); + assert(szResult > 0); + + m_uiNumMipmaps = 1; + m_pMipmapImages = new RawImage[m_uiNumMipmaps]; + + if (((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData > 0) + fseek(pfile, ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData, SEEK_CUR); + szResult = fread(&m_pMipmapImages->uiEncodingBitsBytes, 1, sizeof(unsigned int), pfile); + assert(szResult > 0); + + m_pMipmapImages->paucEncodingBits = std::shared_ptr<unsigned char>(new unsigned char[m_pMipmapImages->uiEncodingBitsBytes], [](unsigned char *p) { delete[] p; } ); + assert(ftell(pfile) + m_pMipmapImages->uiEncodingBitsBytes <= fileSize); + szResult = fread(m_pMipmapImages->paucEncodingBits.get(), 1, m_pMipmapImages->uiEncodingBitsBytes, pfile); + assert(szResult == m_pMipmapImages->uiEncodingBitsBytes); + + uint32_t uiInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlInternalFormat; + uint32_t uiBaseInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlBaseInternalFormat; + + if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC1_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC1_RGB8) + { + m_imageformat = Image::Format::ETC1; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8) + { + m_imageformat = Image::Format::RGB8; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8A1 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8A1) + { + m_imageformat = Image::Format::RGB8A1; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGBA8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGBA8) + { + m_imageformat = Image::Format::RGBA8; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11) + { + m_imageformat = Image::Format::R11; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11) + { + m_imageformat = Image::Format::SIGNED_R11; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11) + { + m_imageformat = Image::Format::RG11; + } + else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11) + { + m_imageformat = Image::Format::SIGNED_RG11; + } + else + { + m_imageformat = Image::Format::UNKNOWN; + } + + m_uiSourceWidth = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelWidth; + m_uiSourceHeight = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelHeight; + m_pMipmapImages->uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth); + m_pMipmapImages->uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight); + + unsigned int uiBlocks = m_pMipmapImages->uiExtendedWidth * m_pMipmapImages->uiExtendedHeight / 16; + Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat); + unsigned int expectedbytes = uiBlocks * Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat); + assert(expectedbytes == m_pMipmapImages->uiEncodingBitsBytes); + + fclose(pfile); +} + +File::~File() +{ + if (m_pMipmapImages != nullptr) + { + delete [] m_pMipmapImages; + } + + if(m_pstrFilename != nullptr) + { + delete[] m_pstrFilename; + m_pstrFilename = nullptr; + } + if (m_pheader != nullptr) + { + delete m_pheader; + m_pheader = nullptr; + } +} + +void File::UseSingleBlock(int a_iPixelX, int a_iPixelY) +{ + if (a_iPixelX <= -1 || a_iPixelY <= -1) + return; + if (a_iPixelX >(int) m_uiSourceWidth) + { + //if we are using a ktx thats the size of a single block or less + //then make sure we use the 4x4 image as the single block + if (m_uiSourceWidth <= 4) + { + a_iPixelX = 0; + } + else + { + printf("blockAtHV: H coordinate out of range, capped to image width\n"); + a_iPixelX = m_uiSourceWidth - 1; + } + } + if (a_iPixelY >(int) m_uiSourceHeight) + { + //if we are using a ktx thats the size of a single block or less + //then make sure we use the 4x4 image as the single block + if (m_uiSourceHeight <= 4) + { + a_iPixelY= 0; + } + else + { + printf("blockAtHV: V coordinate out of range, capped to image height\n"); + a_iPixelY = m_uiSourceHeight - 1; + } + } + + unsigned int origWidth = m_uiSourceWidth; + unsigned int origHeight = m_uiSourceHeight; + + m_uiSourceWidth = 4; + m_uiSourceHeight = 4; + + Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat); + unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat); + + int numMipmaps = 1; + RawImage* pMipmapImages = new RawImage[numMipmaps]; + pMipmapImages[0].uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth); + pMipmapImages[0].uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight); + pMipmapImages[0].uiEncodingBitsBytes = 0; + pMipmapImages[0].paucEncodingBits = std::shared_ptr<unsigned char>(new unsigned char[uiEncodingBitsBytesPerBlock], [](unsigned char *p) { delete[] p; }); + + //block position in pixels + // remove the bottom 2 bits to get the block coordinates + unsigned int iBlockPosX = (a_iPixelX & 0xFFFFFFFC); + unsigned int iBlockPosY = (a_iPixelY & 0xFFFFFFFC); + + int numXBlocks = (origWidth / 4); + int numYBlocks = (origHeight / 4); + + + // block location + //int iBlockX = (a_iPixelX % 4) == 0 ? a_iPixelX / 4.0f : (a_iPixelX / 4) + 1; + //int iBlockY = (a_iPixelY % 4) == 0 ? a_iPixelY / 4.0f : (a_iPixelY / 4) + 1; + //m_paucEncodingBits += ((iBlockY * numXBlocks) + iBlockX) * uiEncodingBitsBytesPerBlock; + + + unsigned int num = numXBlocks*numYBlocks; + unsigned int uiH = 0, uiV = 0; + unsigned char* pEncodingBits = m_pMipmapImages[0].paucEncodingBits.get(); + for (unsigned int uiBlock = 0; uiBlock < num; uiBlock++) + { + if (uiH == iBlockPosX && uiV == iBlockPosY) + { + memcpy(pMipmapImages[0].paucEncodingBits.get(),pEncodingBits, uiEncodingBitsBytesPerBlock); + break; + } + pEncodingBits += uiEncodingBitsBytesPerBlock; + uiH += 4; + + if (uiH >= origWidth) + { + uiH = 0; + uiV += 4; + } + } + + delete [] m_pMipmapImages; + m_pMipmapImages = pMipmapImages; +} +// ---------------------------------------------------------------------------------------------------- +// +void File::Write() +{ + + FILE *pfile = fopen(m_pstrFilename, "wb"); + if (pfile == nullptr) + { + printf("Error: couldn't open Etc file (%s)\n", m_pstrFilename); + exit(1); + } + + m_pheader->Write(pfile); + + for(unsigned int mip = 0; mip < m_uiNumMipmaps; mip++) + { + if(m_fileformat == Format::KTX) + { + // Write u32 image size + uint32_t u32ImageSize = m_pMipmapImages[mip].uiEncodingBitsBytes; + uint32_t szBytesWritten = fwrite(&u32ImageSize, 1, sizeof(u32ImageSize), pfile); + assert(szBytesWritten == sizeof(u32ImageSize)); + } + + unsigned int iResult = (int)fwrite(m_pMipmapImages[mip].paucEncodingBits.get(), 1, m_pMipmapImages[mip].uiEncodingBitsBytes, pfile); + if (iResult != m_pMipmapImages[mip].uiEncodingBitsBytes) + { + printf("Error: couldn't write Etc file (%s)\n", m_pstrFilename); + exit(1); + } + } + + fclose(pfile); + +} + +// ---------------------------------------------------------------------------------------------------- +// + diff --git a/thirdparty/etc2comp/EtcFile.h b/thirdparty/etc2comp/EtcFile.h new file mode 100644 index 0000000000..69bf3b2d3a --- /dev/null +++ b/thirdparty/etc2comp/EtcFile.h @@ -0,0 +1,136 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" +#include "EtcImage.h" +#include "Etc.h" + +namespace Etc +{ + class FileHeader; + class SourceImage; + + class File + { + public: + + enum class Format + { + INFER_FROM_FILE_EXTENSION, + PKM, + KTX, + }; + + File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat, + unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, + unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight); + + File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat, + unsigned int a_uiNumMipmaps, RawImage *pMipmapImages, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight ); + + File(const char *a_pstrFilename, Format a_fileformat); + ~File(); + const char *GetFilename(void) { return m_pstrFilename; } + + void Read(const char *a_pstrFilename); + void Write(void); + + inline unsigned int GetSourceWidth(void) + { + return m_uiSourceWidth; + } + + inline unsigned int GetSourceHeight(void) + { + return m_uiSourceHeight; + } + + inline unsigned int GetExtendedWidth(unsigned int mipmapIndex = 0) + { + if (mipmapIndex < m_uiNumMipmaps) + { + return m_pMipmapImages[mipmapIndex].uiExtendedWidth; + } + else + { + return 0; + } + } + + inline unsigned int GetExtendedHeight(unsigned int mipmapIndex = 0) + { + if (mipmapIndex < m_uiNumMipmaps) + { + return m_pMipmapImages[mipmapIndex].uiExtendedHeight; + } + else + { + return 0; + } + } + + inline Image::Format GetImageFormat() + { + return m_imageformat; + } + + inline unsigned int GetEncodingBitsBytes(unsigned int mipmapIndex = 0) + { + if (mipmapIndex < m_uiNumMipmaps) + { + return m_pMipmapImages[mipmapIndex].uiEncodingBitsBytes; + } + else + { + return 0; + } + } + + inline unsigned char* GetEncodingBits(unsigned int mipmapIndex = 0) + { + if( mipmapIndex < m_uiNumMipmaps) + { + return m_pMipmapImages[mipmapIndex].paucEncodingBits.get(); + } + else + { + return nullptr; + } + } + + inline unsigned int GetNumMipmaps() + { + return m_uiNumMipmaps; + } + + void UseSingleBlock(int a_iPixelX = -1, int a_iPixelY = -1); + private: + + char *m_pstrFilename; // includes directory path and file extension + Format m_fileformat; + Image::Format m_imageformat; + FileHeader *m_pheader; + unsigned int m_uiNumMipmaps; + RawImage* m_pMipmapImages; + unsigned int m_uiSourceWidth; + unsigned int m_uiSourceHeight; + }; + +} diff --git a/thirdparty/etc2comp/EtcFileHeader.cpp b/thirdparty/etc2comp/EtcFileHeader.cpp new file mode 100644 index 0000000000..f02fcab011 --- /dev/null +++ b/thirdparty/etc2comp/EtcFileHeader.cpp @@ -0,0 +1,185 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EtcFileHeader.h" + +#include "EtcBlock4x4EncodingBits.h" + +#include <assert.h> + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // + FileHeader_Pkm::FileHeader_Pkm(File *a_pfile) + { + m_pfile = a_pfile; + + static const char s_acMagicNumberData[4] = { 'P', 'K', 'M', ' ' }; + static const char s_acVersionData[2] = { '1', '0' }; + + for (unsigned int ui = 0; ui < sizeof(s_acMagicNumberData); ui++) + { + m_data.m_acMagicNumber[ui] = s_acMagicNumberData[ui]; + } + + for (unsigned int ui = 0; ui < sizeof(s_acVersionData); ui++) + { + m_data.m_acVersion[ui] = s_acVersionData[ui]; + } + + m_data.m_ucDataType_msb = 0; // ETC1_RGB_NO_MIPMAPS + m_data.m_ucDataType_lsb = 0; + + m_data.m_ucOriginalWidth_msb = (unsigned char)(m_pfile->GetSourceWidth() >> 8); + m_data.m_ucOriginalWidth_lsb = m_pfile->GetSourceWidth() & 0xFF; + m_data.m_ucOriginalHeight_msb = (unsigned char)(m_pfile->GetSourceHeight() >> 8); + m_data.m_ucOriginalHeight_lsb = m_pfile->GetSourceHeight() & 0xFF; + + m_data.m_ucExtendedWidth_msb = (unsigned char)(m_pfile->GetExtendedWidth() >> 8); + m_data.m_ucExtendedWidth_lsb = m_pfile->GetExtendedWidth() & 0xFF; + m_data.m_ucExtendedHeight_msb = (unsigned char)(m_pfile->GetExtendedHeight() >> 8); + m_data.m_ucExtendedHeight_lsb = m_pfile->GetExtendedHeight() & 0xFF; + + } + + // ---------------------------------------------------------------------------------------------------- + // + void FileHeader_Pkm::Write(FILE *a_pfile) + { + + fwrite(&m_data, sizeof(Data), 1, a_pfile); + + } + + // ---------------------------------------------------------------------------------------------------- + // + FileHeader_Ktx::FileHeader_Ktx(File *a_pfile) + { + m_pfile = a_pfile; + + static const uint8_t s_au8Itentfier[12] = + { + 0xAB, 0x4B, 0x54, 0x58, // first four bytes of Byte[12] identifier + 0x20, 0x31, 0x31, 0xBB, // next four bytes of Byte[12] identifier + 0x0D, 0x0A, 0x1A, 0x0A // final four bytes of Byte[12] identifier + }; + + for (unsigned int ui = 0; ui < sizeof(s_au8Itentfier); ui++) + { + m_data.m_au8Identifier[ui] = s_au8Itentfier[ui]; + } + + m_data.m_u32Endianness = 0x04030201; + m_data.m_u32GlType = 0; + m_data.m_u32GlTypeSize = 1; + m_data.m_u32GlFormat = 0; + + switch (m_pfile->GetImageFormat()) + { + case Image::Format::RGB8: + case Image::Format::SRGB8: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8; + break; + + case Image::Format::RGBA8: + case Image::Format::SRGBA8: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGBA8; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGBA8; + break; + + case Image::Format::RGB8A1: + case Image::Format::SRGB8A1: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8A1; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8A1; + break; + + case Image::Format::R11: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_R11; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11; + break; + + case Image::Format::SIGNED_R11: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_R11; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11; + break; + + case Image::Format::RG11: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RG11; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11; + break; + + case Image::Format::SIGNED_RG11: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_RG11; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11; + break; + + default: + m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC1_RGB8; + m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC1_RGB8; + break; + } + + m_data.m_u32PixelWidth = 0; + m_data.m_u32PixelHeight = 0; + m_data.m_u32PixelDepth = 0; + m_data.m_u32NumberOfArrayElements = 0; + m_data.m_u32NumberOfFaces = 0; + m_data.m_u32BytesOfKeyValueData = 0; + + m_pkeyvaluepair = nullptr; + + m_u32Images = 0; + m_u32KeyValuePairs = 0; + + m_data.m_u32PixelWidth = m_pfile->GetSourceWidth(); + m_data.m_u32PixelHeight = m_pfile->GetSourceHeight(); + m_data.m_u32PixelDepth = 0; + m_data.m_u32NumberOfArrayElements = 0; + m_data.m_u32NumberOfFaces = 1; + m_data.m_u32NumberOfMipmapLevels = m_pfile->GetNumMipmaps(); + + } + + // ---------------------------------------------------------------------------------------------------- + // + void FileHeader_Ktx::Write(FILE *a_pfile) + { + size_t szBytesWritten; + + // Write header + szBytesWritten = fwrite(&m_data, 1, sizeof(Data), a_pfile); + assert(szBytesWritten == sizeof(Data)); + + // Write KeyAndValuePairs + if (m_u32KeyValuePairs) + { + fwrite(m_pkeyvaluepair, m_pkeyvaluepair->u32KeyAndValueByteSize, 1, a_pfile); + } + } + + // ---------------------------------------------------------------------------------------------------- + // + FileHeader_Ktx::Data *FileHeader_Ktx::GetData() + { + return &m_data; + } + + // ---------------------------------------------------------------------------------------------------- + // +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcFileHeader.h b/thirdparty/etc2comp/EtcFileHeader.h new file mode 100644 index 0000000000..55a9cb5d9d --- /dev/null +++ b/thirdparty/etc2comp/EtcFileHeader.h @@ -0,0 +1,146 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcFile.h" +#include <stdio.h> +#include <inttypes.h> + +namespace Etc +{ + + class Image; + + class FileHeader + { + public: + + virtual void Write(FILE *a_pfile) = 0; + File GetFile(); + virtual ~FileHeader(void) {} + protected: + + File *m_pfile; + }; + + // ---------------------------------------------------------------------------------------------------- + // + class FileHeader_Pkm : public FileHeader + { + public: + + FileHeader_Pkm(File *a_pfile); + + virtual void Write(FILE *a_pfile); + virtual ~FileHeader_Pkm(void) {} + private: + + typedef struct + { + char m_acMagicNumber[4]; + char m_acVersion[2]; + unsigned char m_ucDataType_msb; // e.g. ETC1_RGB_NO_MIPMAPS + unsigned char m_ucDataType_lsb; + unsigned char m_ucExtendedWidth_msb; // padded to 4x4 blocks + unsigned char m_ucExtendedWidth_lsb; + unsigned char m_ucExtendedHeight_msb; // padded to 4x4 blocks + unsigned char m_ucExtendedHeight_lsb; + unsigned char m_ucOriginalWidth_msb; + unsigned char m_ucOriginalWidth_lsb; + unsigned char m_ucOriginalHeight_msb; + unsigned char m_ucOriginalHeight_lsb; + } Data; + + Data m_data; + }; + + // ---------------------------------------------------------------------------------------------------- + // + class FileHeader_Ktx : public FileHeader + { + public: + + typedef struct + { + uint32_t u32KeyAndValueByteSize; + } KeyValuePair; + + typedef struct + { + uint8_t m_au8Identifier[12]; + uint32_t m_u32Endianness; + uint32_t m_u32GlType; + uint32_t m_u32GlTypeSize; + uint32_t m_u32GlFormat; + uint32_t m_u32GlInternalFormat; + uint32_t m_u32GlBaseInternalFormat; + uint32_t m_u32PixelWidth; + uint32_t m_u32PixelHeight; + uint32_t m_u32PixelDepth; + uint32_t m_u32NumberOfArrayElements; + uint32_t m_u32NumberOfFaces; + uint32_t m_u32NumberOfMipmapLevels; + uint32_t m_u32BytesOfKeyValueData; + } Data; + + enum class InternalFormat + { + ETC1_RGB8 = 0x8D64, + ETC1_ALPHA8 = ETC1_RGB8, + // + ETC2_R11 = 0x9270, + ETC2_SIGNED_R11 = 0x9271, + ETC2_RG11 = 0x9272, + ETC2_SIGNED_RG11 = 0x9273, + ETC2_RGB8 = 0x9274, + ETC2_SRGB8 = 0x9275, + ETC2_RGB8A1 = 0x9276, + ETC2_SRGB8_PUNCHTHROUGH_ALPHA1 = 0x9277, + ETC2_RGBA8 = 0x9278 + }; + + enum class BaseInternalFormat + { + ETC2_R11 = 0x1903, + ETC2_RG11 = 0x8227, + ETC1_RGB8 = 0x1907, + ETC1_ALPHA8 = ETC1_RGB8, + // + ETC2_RGB8 = 0x1907, + ETC2_RGB8A1 = 0x1908, + ETC2_RGBA8 = 0x1908, + }; + + FileHeader_Ktx(File *a_pfile); + + virtual void Write(FILE *a_pfile); + virtual ~FileHeader_Ktx(void) {} + + void AddKeyAndValue(KeyValuePair *a_pkeyvaluepair); + + Data* GetData(); + + private: + + Data m_data; + KeyValuePair *m_pkeyvaluepair; + + uint32_t m_u32Images; + uint32_t m_u32KeyValuePairs; + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcFilter.cpp b/thirdparty/etc2comp/EtcFilter.cpp new file mode 100644 index 0000000000..bc899a533e --- /dev/null +++ b/thirdparty/etc2comp/EtcFilter.cpp @@ -0,0 +1,401 @@ +#include <stdlib.h> +#include <math.h> +#include "EtcFilter.h" + + +namespace Etc +{ + +static const double PiConst = 3.14159265358979323846; + +inline double sinc(double x) +{ + if ( x == 0.0 ) + { + return 1.0; + } + + return sin(PiConst * x) / (PiConst * x); +} + +//inline float sincf( float x ) +//{ +// x *= F_PI; +// if (x < 0.01f && x > -0.01f) +// { +// return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f); +// } +// +// return sinf(x)/x; +//} +// +//double bessel0(double x) +//{ +// const double EPSILON_RATIO = 1E-16; +// double xh, sum, pow, ds; +// int k; +// +// xh = 0.5 * x; +// sum = 1.0; +// pow = 1.0; +// k = 0; +// ds = 1.0; +// while (ds > sum * EPSILON_RATIO) +// { +// ++k; +// pow = pow * (xh / k); +// ds = pow * pow; +// sum = sum + ds; +// } +// +// return sum; +//} + +//**-------------------------------------------------------------------------- +//** Name: kaiser(double alpha, double half_width, double x) +//** Returns: +//** Description: Alpha controls shape of filter. We are using 4. +//**-------------------------------------------------------------------------- +//inline double kaiser(double alpha, double half_width, double x) +//{ +// double ratio = (x / half_width); +// return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); +//} +// +//float Filter_Lanczos4Sinc(float x) +//{ +// if (x <= -4.0f || x >= 4.0f) // half-width of 4 +// { +// return 0.0; +// } +// +// return sinc(0.875f * x) * sinc(0.25f * x); +//} +// +//double Filter_Kaiser4( double t ) +//{ +// return kaiser( 4.0, 3.0, t); +//} +// +//double Filter_KaiserOptimal( double t ) +//{ +// return kaiser( 8.93, 3.0f, t); +//} + +double FilterLanczos3( double t ) +{ + if ( t <= -3.0 || t >= 3.0 ) + { + return 0.0; + } + + return sinc( t ) * sinc( t / 3.0 ); +} + +double FilterBox( double t ) +{ + return ( t > -0.5 && t < 0.5) ? 1.0 : 0.0; +} + +double FilterLinear( double t ) +{ + if (t < 0.0) t = -t; + + return (t < 1.0) ? (1.0 - t) : 0.0; +} + + +//**-------------------------------------------------------------------------- +//** Name: CalcContributions( int srcSize, +//** int destSize, +//** double filterSize, +//** bool wrap, +//** double (*FilterProc)(double), +//** FilterWeights contrib[] ) +//** Returns: void +//** Description: +//**-------------------------------------------------------------------------- +void CalcContributions( int srcSize, int destSize, double filterSize, bool wrap, double (*FilterProc)(double), FilterWeights contrib[] ) +{ + double scale; + double filterScale; + double center; + double totalWeight; + double weight; + int iRight; + int iLeft; + int iDest; + + scale = (double)destSize / srcSize; + if ( scale < 1.0 ) + { + filterSize = filterSize / scale; + filterScale = scale; + } + else + { + filterScale = 1.0; + } + + if ( filterSize > (double)MaxFilterSize ) + { + filterSize = (double)MaxFilterSize; + } + + for ( iDest = 0; iDest < destSize; ++iDest ) + { + center = (double)iDest / scale; + + iLeft = (int)ceil(center - filterSize); + iRight = (int)floor(center + filterSize); + + if ( !wrap ) + { + if ( iLeft < 0 ) + { + iLeft = 0; + } + + if ( iRight >= srcSize ) + { + iRight = srcSize - 1; + } + } + + int numWeights = iRight - iLeft + 1; + + contrib[iDest].first = iLeft; + contrib[iDest].numWeights = numWeights; + + totalWeight = 0; + double t = ((double)iLeft - center) * filterScale; + for (int i = 0; i < numWeights; i++) + { + weight = (*FilterProc)(t) * filterScale; + totalWeight += weight; + contrib[iDest].weight[i] = weight; + t += filterScale; + } + + //**-------------------------------------------------------- + //** Normalize weights by dividing by the sum of the weights + //**-------------------------------------------------------- + if ( totalWeight > 0.0 ) + { + for ( int i = 0; i < numWeights; i++) + { + contrib[iDest].weight[i] /= totalWeight; + } + } + } +} + +//**------------------------------------------------------------------------- +//** Name: Filter_TwoPass( RGBCOLOR *pSrcImage, +//** int srcWidth, int srcHeight, +//** RGBCOLOR *pDestImage, +//** int destWidth, int destHeight, +//** double (*FilterProc)(double) ) +//** Returns: 0 on failure and 1 on success +//** Description: Filters a 2d image with a two pass filter by averaging the +//** weighted contributions of the pixels within the filter region. The +//** contributions are determined by a weighting function parameter. +//**------------------------------------------------------------------------- +int FilterTwoPass( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, + RGBCOLOR *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double (*FilterProc)(double) ) +{ + FilterWeights *contrib; + RGBCOLOR *pPixel; + RGBCOLOR *pSrcPixel; + RGBCOLOR *pTempImage; + int iRow; + int iCol; + int iSrcCol; + int iSrcRow; + int iWeight; + double dRed; + double dGreen; + double dBlue; + double dAlpha; + double filterSize = 3.0; + + int maxDim = (srcWidth>srcHeight)?srcWidth:srcHeight; + contrib = (FilterWeights*)malloc(maxDim * sizeof(FilterWeights)); + + //**------------------------------------------------------------------------ + //** Need to create a temporary image to stuff the horizontally scaled image + //**------------------------------------------------------------------------ + pTempImage = (RGBCOLOR *)malloc( destWidth * srcHeight * sizeof(RGBCOLOR) ); + if ( pTempImage == NULL ) + { + return 0; + } + + //**------------------------------------------------------- + //** Horizontally filter the image into the temporary image + //**------------------------------------------------------- + bool bWrapHorizontal = !!(wrapFlags&FILTER_WRAP_X); + CalcContributions( srcWidth, destWidth, filterSize, bWrapHorizontal, FilterProc, contrib ); + for ( iRow = 0; iRow < srcHeight; iRow++ ) + { + for ( iCol = 0; iCol < destWidth; iCol++ ) + { + dRed = 0; + dGreen = 0; + dBlue = 0; + dAlpha = 0; + + for ( iWeight = 0; iWeight < contrib[iCol].numWeights; iWeight++ ) + { + iSrcCol = iWeight + contrib[iCol].first; + if (bWrapHorizontal) + { + iSrcCol = (iSrcCol < 0) ? (srcWidth + iSrcCol) : (iSrcCol >= srcWidth) ? (iSrcCol - srcWidth) : iSrcCol; + } + pSrcPixel = pSrcImage + (iRow * srcWidth) + iSrcCol; + dRed += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[0]; + dGreen += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[1]; + dBlue += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[2]; + dAlpha += contrib[iCol].weight[iWeight] * pSrcPixel->rgba[3]; + } + + pPixel = pTempImage + (iRow * destWidth) + iCol; + pPixel->rgba[0] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dRed))); + pPixel->rgba[1] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dGreen))); + pPixel->rgba[2] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dBlue))); + pPixel->rgba[3] = static_cast<unsigned char>(std::max(0.0, std::min(255.0, dAlpha))); + } + } + + //**------------------------------------------------------- + //** Vertically filter the image into the destination image + //**------------------------------------------------------- + bool bWrapVertical = !!(wrapFlags&FILTER_WRAP_Y); + CalcContributions(srcHeight, destHeight, filterSize, bWrapVertical, FilterProc, contrib); + for ( iCol = 0; iCol < destWidth; iCol++ ) + { + for ( iRow = 0; iRow < destHeight; iRow++ ) + { + dRed = 0; + dGreen = 0; + dBlue = 0; + dAlpha = 0; + + for ( iWeight = 0; iWeight < contrib[iRow].numWeights; iWeight++ ) + { + iSrcRow = iWeight + contrib[iRow].first; + if (bWrapVertical) + { + iSrcRow = (iSrcRow < 0) ? (srcHeight + iSrcRow) : (iSrcRow >= srcHeight) ? (iSrcRow - srcHeight) : iSrcRow; + } + pSrcPixel = pTempImage + (iSrcRow * destWidth) + iCol; + dRed += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[0]; + dGreen += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[1]; + dBlue += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[2]; + dAlpha += contrib[iRow].weight[iWeight] * pSrcPixel->rgba[3]; + } + + pPixel = pDestImage + (iRow * destWidth) + iCol; + pPixel->rgba[0] = (unsigned char)(std::max( 0.0, std::min( 255.0, dRed))); + pPixel->rgba[1] = (unsigned char)(std::max( 0.0, std::min( 255.0, dGreen))); + pPixel->rgba[2] = (unsigned char)(std::max( 0.0, std::min( 255.0, dBlue))); + pPixel->rgba[3] = (unsigned char)(std::max( 0.0, std::min( 255.0, dAlpha))); + } + } + + free( pTempImage ); + free( contrib ); + + return 1; +} + +//**------------------------------------------------------------------------- +//** Name: FilterResample(RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, +//** RGBCOLOR *pDstImage, int dstWidth, int dstHeight) +//** Returns: 1 +//** Description: This function runs a 2d box filter over the srouce image +//** to produce the destination image. +//**------------------------------------------------------------------------- +void FilterResample( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, + RGBCOLOR *pDstImage, int dstWidth, int dstHeight ) +{ + int iRow; + int iCol; + int iSampleRow; + int iSampleCol; + int iFirstSampleRow; + int iFirstSampleCol; + int iLastSampleRow; + int iLastSampleCol; + int red; + int green; + int blue; + int alpha; + int samples; + float xScale; + float yScale; + + RGBCOLOR *pSrcPixel; + RGBCOLOR *pDstPixel; + + xScale = (float)srcWidth / dstWidth; + yScale = (float)srcHeight / dstHeight; + + for ( iRow = 0; iRow < dstHeight; iRow++ ) + { + for ( iCol = 0; iCol < dstWidth; iCol++ ) + { + iFirstSampleRow = (int)(iRow * yScale); + iLastSampleRow = (int)ceil(iFirstSampleRow + yScale - 1); + if ( iLastSampleRow >= srcHeight ) + { + iLastSampleRow = srcHeight - 1; + } + + iFirstSampleCol = (int)(iCol * xScale); + iLastSampleCol = (int)ceil(iFirstSampleCol + xScale - 1); + if ( iLastSampleCol >= srcWidth ) + { + iLastSampleCol = srcWidth - 1; + } + + samples = 0; + red = 0; + green = 0; + blue = 0; + alpha = 0; + for ( iSampleRow = iFirstSampleRow; iSampleRow <= iLastSampleRow; iSampleRow++ ) + { + for ( iSampleCol = iFirstSampleCol; iSampleCol <= iLastSampleCol; iSampleCol++ ) + { + pSrcPixel = pSrcImage + iSampleRow * srcWidth + iSampleCol; + red += pSrcPixel->rgba[0]; + green += pSrcPixel->rgba[1]; + blue += pSrcPixel->rgba[2]; + alpha += pSrcPixel->rgba[3]; + + samples++; + } + } + + pDstPixel = pDstImage + iRow * dstWidth + iCol; + if ( samples > 0 ) + { + pDstPixel->rgba[0] = static_cast<uint8_t>(red / samples); + pDstPixel->rgba[1] = static_cast<uint8_t>(green / samples); + pDstPixel->rgba[2] = static_cast<uint8_t>(blue / samples); + pDstPixel->rgba[3] = static_cast<uint8_t>(alpha / samples); + } + else + { + pDstPixel->rgba[0] = static_cast<uint8_t>(red); + pDstPixel->rgba[1] = static_cast<uint8_t>(green); + pDstPixel->rgba[2] = static_cast<uint8_t>(blue); + pDstPixel->rgba[3] = static_cast<uint8_t>(alpha); + } + } + } +} + + +}
\ No newline at end of file diff --git a/thirdparty/etc2comp/EtcFilter.h b/thirdparty/etc2comp/EtcFilter.h new file mode 100644 index 0000000000..fcf125c6df --- /dev/null +++ b/thirdparty/etc2comp/EtcFilter.h @@ -0,0 +1,244 @@ +#pragma once +#include <stdint.h> +#include <algorithm> + +namespace Etc +{ + +enum FilterEnums +{ + MaxFilterSize = 32 +}; + +enum WrapFlags +{ + FILTER_WRAP_NONE = 0, + FILTER_WRAP_X = 0x1, + FILTER_WRAP_Y = 0x2 +}; + +typedef struct tagFilterWeights +{ + int first; + int numWeights; + double weight[MaxFilterSize * 2 + 1]; +} FilterWeights; + +typedef struct tagRGBCOLOR +{ + union + { + uint32_t ulColor; + uint8_t rgba[4]; + }; +} RGBCOLOR; + + +double FilterBox( double t ); +double FilterLinear( double t ); +double FilterLanczos3( double t ); + +int FilterTwoPass( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, + RGBCOLOR *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double (*FilterProc)(double) ); +void FilterResample( RGBCOLOR *pSrcImage, int srcWidth, int srcHeight, + RGBCOLOR *pDstImage, int dstWidth, int dstHeight ); + + +void CalcContributions(int srcSize, int destSize, double filterSize, bool wrap, double(*FilterProc)(double), FilterWeights contrib[]); + +template <typename T> +void FilterResample(T *pSrcImage, int srcWidth, int srcHeight, T *pDstImage, int dstWidth, int dstHeight) +{ + float xScale; + float yScale; + + T *pSrcPixel; + T *pDstPixel; + + xScale = (float)srcWidth / dstWidth; + yScale = (float)srcHeight / dstHeight; + + for (int iRow = 0; iRow < dstHeight; iRow++) + { + for (int iCol = 0; iCol < dstWidth; iCol++) + { + int samples; + int iFirstSampleRow; + int iFirstSampleCol; + int iLastSampleRow; + int iLastSampleCol; + float red; + float green; + float blue; + float alpha; + + iFirstSampleRow = (int)(iRow * yScale); + iLastSampleRow = (int)ceil(iFirstSampleRow + yScale - 1); + if (iLastSampleRow >= srcHeight) + { + iLastSampleRow = srcHeight - 1; + } + + iFirstSampleCol = (int)(iCol * xScale); + iLastSampleCol = (int)ceil(iFirstSampleCol + xScale - 1); + if (iLastSampleCol >= srcWidth) + { + iLastSampleCol = srcWidth - 1; + } + + samples = 0; + red = 0.f; + green = 0.f; + blue = 0.f; + alpha = 0.f; + for (int iSampleRow = iFirstSampleRow; iSampleRow <= iLastSampleRow; iSampleRow++) + { + for (int iSampleCol = iFirstSampleCol; iSampleCol <= iLastSampleCol; iSampleCol++) + { + pSrcPixel = pSrcImage + (iSampleRow * srcWidth + iSampleCol) * 4; + red += static_cast<float>(pSrcPixel[0]); + green += static_cast<float>(pSrcPixel[1]); + blue += static_cast<float>(pSrcPixel[2]); + alpha += static_cast<float>(pSrcPixel[3]); + + samples++; + } + } + + pDstPixel = pDstImage + (iRow * dstWidth + iCol) * 4; + if (samples > 0) + { + pDstPixel[0] = static_cast<T>(red / samples); + pDstPixel[1] = static_cast<T>(green / samples); + pDstPixel[2] = static_cast<T>(blue / samples); + pDstPixel[3] = static_cast<T>(alpha / samples); + } + else + { + pDstPixel[0] = static_cast<T>(red); + pDstPixel[1] = static_cast<T>(green); + pDstPixel[2] = static_cast<T>(blue); + pDstPixel[3] = static_cast<T>(alpha); + } + } + } + +} + +//**------------------------------------------------------------------------- +//** Name: Filter_TwoPass( RGBCOLOR *pSrcImage, +//** int srcWidth, int srcHeight, +//** RGBCOLOR *pDestImage, +//** int destWidth, int destHeight, +//** double (*FilterProc)(double) ) +//** Returns: 0 on failure and 1 on success +//** Description: Filters a 2d image with a two pass filter by averaging the +//** weighted contributions of the pixels within the filter region. The +//** contributions are determined by a weighting function parameter. +//**------------------------------------------------------------------------- +template <typename T> +int FilterTwoPass(T *pSrcImage, int srcWidth, int srcHeight, + T *pDestImage, int destWidth, int destHeight, unsigned int wrapFlags, double(*FilterProc)(double)) +{ + const int numComponents = 4; + FilterWeights *contrib; + T *pPixel; + T *pTempImage; + double dRed; + double dGreen; + double dBlue; + double dAlpha; + double filterSize = 3.0; + + int maxDim = (srcWidth>srcHeight) ? srcWidth : srcHeight; + contrib = new FilterWeights[maxDim]; + + //**------------------------------------------------------------------------ + //** Need to create a temporary image to stuff the horizontally scaled image + //**------------------------------------------------------------------------ + pTempImage = new T[destWidth * srcHeight * numComponents]; + if (pTempImage == NULL) + { + return 0; + } + + //**------------------------------------------------------- + //** Horizontally filter the image into the temporary image + //**------------------------------------------------------- + bool bWrapHorizontal = !!(wrapFlags&FILTER_WRAP_X); + CalcContributions(srcWidth, destWidth, filterSize, bWrapHorizontal, FilterProc, contrib); + for (int iRow = 0; iRow < srcHeight; iRow++) + { + for (int iCol = 0; iCol < destWidth; iCol++) + { + dRed = 0; + dGreen = 0; + dBlue = 0; + dAlpha = 0; + + for (int iWeight = 0; iWeight < contrib[iCol].numWeights; iWeight++) + { + int iSrcCol = iWeight + contrib[iCol].first; + if(bWrapHorizontal) + { + iSrcCol = (iSrcCol < 0)?(srcWidth+iSrcCol):(iSrcCol >= srcWidth)?(iSrcCol-srcWidth):iSrcCol; + } + T* pSrcPixel = pSrcImage + ((iRow * srcWidth) + iSrcCol)*numComponents; + dRed += contrib[iCol].weight[iWeight] * pSrcPixel[0]; + dGreen += contrib[iCol].weight[iWeight] * pSrcPixel[1]; + dBlue += contrib[iCol].weight[iWeight] * pSrcPixel[2]; + dAlpha += contrib[iCol].weight[iWeight] * pSrcPixel[3]; + } + + pPixel = pTempImage + ((iRow * destWidth) + iCol)*numComponents; + pPixel[0] = static_cast<T>(std::max(0.0, std::min(255.0, dRed))); + pPixel[1] = static_cast<T>(std::max(0.0, std::min(255.0, dGreen))); + pPixel[2] = static_cast<T>(std::max(0.0, std::min(255.0, dBlue))); + pPixel[3] = static_cast<T>(std::max(0.0, std::min(255.0, dAlpha))); + } + } + + //**------------------------------------------------------- + //** Vertically filter the image into the destination image + //**------------------------------------------------------- + bool bWrapVertical = !!(wrapFlags&FILTER_WRAP_Y); + CalcContributions(srcHeight, destHeight, filterSize, bWrapVertical, FilterProc, contrib); + for (int iCol = 0; iCol < destWidth; iCol++) + { + for (int iRow = 0; iRow < destHeight; iRow++) + { + dRed = 0; + dGreen = 0; + dBlue = 0; + dAlpha = 0; + + for (int iWeight = 0; iWeight < contrib[iRow].numWeights; iWeight++) + { + int iSrcRow = iWeight + contrib[iRow].first; + if (bWrapVertical) + { + iSrcRow = (iSrcRow < 0) ? (srcHeight + iSrcRow) : (iSrcRow >= srcHeight) ? (iSrcRow - srcHeight) : iSrcRow; + } + T* pSrcPixel = pTempImage + ((iSrcRow * destWidth) + iCol)*numComponents; + dRed += contrib[iRow].weight[iWeight] * pSrcPixel[0]; + dGreen += contrib[iRow].weight[iWeight] * pSrcPixel[1]; + dBlue += contrib[iRow].weight[iWeight] * pSrcPixel[2]; + dAlpha += contrib[iRow].weight[iWeight] * pSrcPixel[3]; + } + + pPixel = pDestImage + ((iRow * destWidth) + iCol)*numComponents; + pPixel[0] = static_cast<T>(std::max(0.0, std::min(255.0, dRed))); + pPixel[1] = static_cast<T>(std::max(0.0, std::min(255.0, dGreen))); + pPixel[2] = static_cast<T>(std::max(0.0, std::min(255.0, dBlue))); + pPixel[3] = static_cast<T>(std::max(0.0, std::min(255.0, dAlpha))); + } + } + + delete[] pTempImage; + delete[] contrib; + + return 1; +} + + +}
\ No newline at end of file diff --git a/thirdparty/etc2comp/EtcImage.cpp b/thirdparty/etc2comp/EtcImage.cpp new file mode 100644 index 0000000000..7a1058844d --- /dev/null +++ b/thirdparty/etc2comp/EtcImage.cpp @@ -0,0 +1,685 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcImage.cpp + +Image is an array of 4x4 blocks that represent the encoding of the source image + +*/ + +#include "EtcConfig.h" + +#include <stdlib.h> + +#include "EtcImage.h" + +#include "Etc.h" +#include "EtcBlock4x4.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcSortedBlockList.h" + +#if ETC_WINDOWS +#include <windows.h> +#endif +#include <ctime> +#include <chrono> +#include <future> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +// fix conflict with Block4x4::AlphaMix +#ifdef OPAQUE +#undef OPAQUE +#endif +#ifdef TRANSPARENT +#undef TRANSPARENT +#endif + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // + Image::Image(void) + { + m_encodingStatus = EncodingStatus::SUCCESS; + m_warningsToCapture = EncodingStatus::SUCCESS; + m_pafrgbaSource = nullptr; + + m_pablock = nullptr; + + m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN; + m_uiEncodingBitsBytes = 0; + m_paucEncodingBits = nullptr; + + m_format = Format::UNKNOWN; + m_iNumOpaquePixels = 0; + m_iNumTranslucentPixels = 0; + m_iNumTransparentPixels = 0; + } + + // ---------------------------------------------------------------------------------------------------- + // constructor using source image + // used to set state before Encode() is called + // + Image::Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + ErrorMetric a_errormetric) + { + m_encodingStatus = EncodingStatus::SUCCESS; + m_warningsToCapture = EncodingStatus::SUCCESS; + m_pafrgbaSource = (ColorFloatRGBA *) a_pafSourceRGBA; + m_uiSourceWidth = a_uiSourceWidth; + m_uiSourceHeight = a_uiSourceHeight; + + m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth); + m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight); + + m_uiBlockColumns = m_uiExtendedWidth >> 2; + m_uiBlockRows = m_uiExtendedHeight >> 2; + + m_pablock = new Block4x4[GetNumberOfBlocks()]; + assert(m_pablock); + + m_format = Format::UNKNOWN; + + m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN; + m_uiEncodingBitsBytes = 0; + m_paucEncodingBits = nullptr; + + m_errormetric = a_errormetric; + m_fEffort = 0.0f; + + m_iEncodeTime_ms = -1; + + m_iNumOpaquePixels = 0; + m_iNumTranslucentPixels = 0; + m_iNumTransparentPixels = 0; + m_bVerboseOutput = false; + + } + + // ---------------------------------------------------------------------------------------------------- + // constructor using encoding bits + // recreates encoding state using a previously encoded image + // + Image::Image(Format a_format, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, + unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes, + Image *a_pimageSource, ErrorMetric a_errormetric) + { + m_encodingStatus = EncodingStatus::SUCCESS; + m_pafrgbaSource = nullptr; + m_uiSourceWidth = a_uiSourceWidth; + m_uiSourceHeight = a_uiSourceHeight; + + m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth); + m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight); + + m_uiBlockColumns = m_uiExtendedWidth >> 2; + m_uiBlockRows = m_uiExtendedHeight >> 2; + + unsigned int uiBlocks = GetNumberOfBlocks(); + + m_pablock = new Block4x4[uiBlocks]; + assert(m_pablock); + + m_format = a_format; + + m_iNumOpaquePixels = 0; + m_iNumTranslucentPixels = 0; + m_iNumTransparentPixels = 0; + + m_encodingbitsformat = DetermineEncodingBitsFormat(m_format); + if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN) + { + AddToEncodingStatus(ERROR_UNKNOWN_FORMAT); + return; + } + m_uiEncodingBitsBytes = a_uiEncodingBitsBytes; + m_paucEncodingBits = a_paucEncidingBits; + + m_errormetric = a_errormetric; + m_fEffort = 0.0f; + m_bVerboseOutput = false; + m_iEncodeTime_ms = -1; + + unsigned char *paucEncodingBits = m_paucEncodingBits; + unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + + unsigned int uiH = 0; + unsigned int uiV = 0; + for (unsigned int uiBlock = 0; uiBlock < uiBlocks; uiBlock++) + { + m_pablock[uiBlock].InitFromEtcEncodingBits(a_format, uiH, uiV, paucEncodingBits, + a_pimageSource, a_errormetric); + paucEncodingBits += uiEncodingBitsBytesPerBlock; + uiH += 4; + if (uiH >= m_uiSourceWidth) + { + uiH = 0; + uiV += 4; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // + Image::~Image(void) + { + if (m_pablock != nullptr) + { + delete[] m_pablock; + m_pablock = nullptr; + } + + /*if (m_paucEncodingBits != nullptr) + { + delete[] m_paucEncodingBits; + m_paucEncodingBits = nullptr; + }*/ + } + + // ---------------------------------------------------------------------------------------------------- + // encode an image + // create a set of encoding bits that conforms to a_format + // find best fit using a_errormetric + // explore a range of possible encodings based on a_fEffort (range = [0:100]) + // speed up process using a_uiJobs as the number of process threads (a_uiJobs must not excede a_uiMaxJobs) + // + Image::EncodingStatus Image::Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, unsigned int a_uiJobs, unsigned int a_uiMaxJobs) + { + + auto start = std::chrono::steady_clock::now(); + + m_encodingStatus = EncodingStatus::SUCCESS; + + m_format = a_format; + m_errormetric = a_errormetric; + m_fEffort = a_fEffort; + + if (m_errormetric < 0 || m_errormetric > ERROR_METRICS) + { + AddToEncodingStatus(ERROR_UNKNOWN_ERROR_METRIC); + return m_encodingStatus; + } + + if (m_fEffort < ETCCOMP_MIN_EFFORT_LEVEL) + { + AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE); + m_fEffort = ETCCOMP_MIN_EFFORT_LEVEL; + } + else if (m_fEffort > ETCCOMP_MAX_EFFORT_LEVEL) + { + AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE); + m_fEffort = ETCCOMP_MAX_EFFORT_LEVEL; + } + if (a_uiJobs < 1) + { + a_uiJobs = 1; + AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE); + } + else if (a_uiJobs > a_uiMaxJobs) + { + a_uiJobs = a_uiMaxJobs; + AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE); + } + + m_encodingbitsformat = DetermineEncodingBitsFormat(m_format); + + if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN) + { + AddToEncodingStatus(ERROR_UNKNOWN_FORMAT); + return m_encodingStatus; + } + + assert(m_paucEncodingBits == nullptr); + m_uiEncodingBitsBytes = GetNumberOfBlocks() * Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + m_paucEncodingBits = new unsigned char[m_uiEncodingBitsBytes]; + + InitBlocksAndBlockSorter(); + + + std::future<void> *handle = new std::future<void>[a_uiMaxJobs]; + + unsigned int uiNumThreadsNeeded = 0; + unsigned int uiUnfinishedBlocks = GetNumberOfBlocks(); + + uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs; + + for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++) + { + handle[i] = async(std::launch::async, &Image::RunFirstPass, this, i, uiNumThreadsNeeded); + } + + RunFirstPass(uiNumThreadsNeeded - 1, uiNumThreadsNeeded); + + for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++) + { + handle[i].get(); + } + + // perform effort-based encoding + if (m_fEffort > ETCCOMP_MIN_EFFORT_LEVEL) + { + unsigned int uiFinishedBlocks = 0; + unsigned int uiTotalEffortBlocks = static_cast<unsigned int>(roundf(0.01f * m_fEffort * GetNumberOfBlocks())); + + if (m_bVerboseOutput) + { + printf("effortblocks = %d\n", uiTotalEffortBlocks); + } + unsigned int uiPass = 0; + while (1) + { + if (m_bVerboseOutput) + { + uiPass++; + printf("pass %u\n", uiPass); + } + m_psortedblocklist->Sort(); + uiUnfinishedBlocks = m_psortedblocklist->GetNumberOfSortedBlocks(); + uiFinishedBlocks = GetNumberOfBlocks() - uiUnfinishedBlocks; + if (m_bVerboseOutput) + { + printf(" %u unfinished blocks\n", uiUnfinishedBlocks); + // m_psortedblocklist->Print(); + } + + + + //stop enocding when we did enough to satify the effort percentage + if (uiFinishedBlocks >= uiTotalEffortBlocks) + { + if (m_bVerboseOutput) + { + printf("Finished %d Blocks out of %d\n", uiFinishedBlocks, uiTotalEffortBlocks); + } + break; + } + + unsigned int uiIteratedBlocks = 0; + unsigned int blocksToIterateThisPass = (uiTotalEffortBlocks - uiFinishedBlocks); + uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs; + + if (uiNumThreadsNeeded <= 1) + { + //since we already how many blocks each thread will process + //cap the thread limit to do the proper amount of work, and not more + uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, 0, 1); + } + else + { + //we have a lot of work to do, so lets multi thread it + std::future<unsigned int> *handleToBlockEncoders = new std::future<unsigned int>[uiNumThreadsNeeded-1]; + + for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++) + { + handleToBlockEncoders[i] = async(std::launch::async, &Image::IterateThroughWorstBlocks, this, blocksToIterateThisPass, i, uiNumThreadsNeeded); + } + uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, uiNumThreadsNeeded - 1, uiNumThreadsNeeded); + + for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++) + { + uiIteratedBlocks += handleToBlockEncoders[i].get(); + } + + delete[] handleToBlockEncoders; + } + + if (m_bVerboseOutput) + { + printf(" %u iterated blocks\n", uiIteratedBlocks); + } + } + } + + // generate Etc2-compatible bit-format 4x4 blocks + for (int i = 0; i < (int)a_uiJobs - 1; i++) + { + handle[i] = async(std::launch::async, &Image::SetEncodingBits, this, i, a_uiJobs); + } + SetEncodingBits(a_uiJobs - 1, a_uiJobs); + + for (int i = 0; i < (int)a_uiJobs - 1; i++) + { + handle[i].get(); + } + + auto end = std::chrono::steady_clock::now(); + std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start); + m_iEncodeTime_ms = (int)elapsed.count(); + + delete[] handle; + delete m_psortedblocklist; + return m_encodingStatus; + } + + // ---------------------------------------------------------------------------------------------------- + // iterate the encoding thru the blocks with the worst error + // stop when a_uiMaxBlocks blocks have been iterated + // split the blocks between the process threads using a_uiMultithreadingOffset and a_uiMultithreadingStride + // + unsigned int Image::IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks, + unsigned int a_uiMultithreadingOffset, + unsigned int a_uiMultithreadingStride) + { + assert(a_uiMultithreadingStride > 0); + unsigned int uiIteratedBlocks = a_uiMultithreadingOffset; + + SortedBlockList::Link *plink = m_psortedblocklist->GetLinkToFirstBlock(); + for (plink = plink->Advance(a_uiMultithreadingOffset); + plink != nullptr; + plink = plink->Advance(a_uiMultithreadingStride) ) + { + if (uiIteratedBlocks >= a_uiMaxBlocks) + { + break; + } + + plink->GetBlock()->PerformEncodingIteration(m_fEffort); + + uiIteratedBlocks += a_uiMultithreadingStride; + } + + return uiIteratedBlocks; + } + + // ---------------------------------------------------------------------------------------------------- + // determine which warnings to check for during Encode() based on encoding format + // + void Image::FindEncodingWarningTypesForCurFormat() + { + TrackEncodingWarning(WARNING_ALL_TRANSPARENT_PIXELS); + TrackEncodingWarning(WARNING_SOME_RGBA_NOT_0_TO_1); + switch (m_format) + { + case Image::Format::ETC1: + case Image::Format::RGB8: + case Image::Format::SRGB8: + TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS); + TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS); + break; + + case Image::Format::RGB8A1: + case Image::Format::SRGB8A1: + TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS); + TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS); + break; + case Image::Format::RGBA8: + case Image::Format::SRGBA8: + TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS); + break; + + case Image::Format::R11: + case Image::Format::SIGNED_R11: + TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS); + TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS); + TrackEncodingWarning(WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO); + TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO); + break; + + case Image::Format::RG11: + case Image::Format::SIGNED_RG11: + TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS); + TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS); + TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO); + break; + case Image::Format::FORMATS: + case Image::Format::UNKNOWN: + default: + assert(0); + break; + } + } + + // ---------------------------------------------------------------------------------------------------- + // examine source pixels to check for warnings + // + void Image::FindAndSetEncodingWarnings() + { + int numPixels = (m_uiBlockRows * 4) * (m_uiBlockColumns * 4); + if (m_iNumOpaquePixels == numPixels) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_OPAQUE_PIXELS); + } + if (m_iNumOpaquePixels < numPixels) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_NON_OPAQUE_PIXELS); + } + if (m_iNumTranslucentPixels > 0) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_TRANSLUCENT_PIXELS); + } + if (m_iNumTransparentPixels == numPixels) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_TRANSPARENT_PIXELS); + } + if (m_numColorValues.fB > 0.0f) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO); + } + if (m_numColorValues.fG > 0.0f) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO); + } + + if (m_numOutOfRangeValues.fR > 0.0f || m_numOutOfRangeValues.fG > 0.0f) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1); + } + if (m_numOutOfRangeValues.fB > 0.0f || m_numOutOfRangeValues.fA > 0.0f) + { + AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1); + } + } + + // ---------------------------------------------------------------------------------------------------- + // return a string name for a given image format + // + const char * Image::EncodingFormatToString(Image::Format a_format) + { + switch (a_format) + { + case Image::Format::ETC1: + return "ETC1"; + case Image::Format::RGB8: + return "RGB8"; + case Image::Format::SRGB8: + return "SRGB8"; + + case Image::Format::RGB8A1: + return "RGB8A1"; + case Image::Format::SRGB8A1: + return "SRGB8A1"; + case Image::Format::RGBA8: + return "RGBA8"; + case Image::Format::SRGBA8: + return "SRGBA8"; + + case Image::Format::R11: + return "R11"; + case Image::Format::SIGNED_R11: + return "SIGNED_R11"; + + case Image::Format::RG11: + return "RG11"; + case Image::Format::SIGNED_RG11: + return "SIGNED_RG11"; + case Image::Format::FORMATS: + case Image::Format::UNKNOWN: + default: + return "UNKNOWN"; + } + } + + // ---------------------------------------------------------------------------------------------------- + // return a string name for the image's format + // + const char * Image::EncodingFormatToString(void) + { + return EncodingFormatToString(m_format); + } + + // ---------------------------------------------------------------------------------------------------- + // init image blocks prior to encoding + // init block sorter for subsequent sortings + // check for encoding warnings + // + void Image::InitBlocksAndBlockSorter(void) + { + + FindEncodingWarningTypesForCurFormat(); + + // init each block + Block4x4 *pblock = m_pablock; + unsigned char *paucEncodingBits = m_paucEncodingBits; + for (unsigned int uiBlockRow = 0; uiBlockRow < m_uiBlockRows; uiBlockRow++) + { + unsigned int uiBlockV = uiBlockRow * 4; + + for (unsigned int uiBlockColumn = 0; uiBlockColumn < m_uiBlockColumns; uiBlockColumn++) + { + unsigned int uiBlockH = uiBlockColumn * 4; + + pblock->InitFromSource(this, uiBlockH, uiBlockV, paucEncodingBits, m_errormetric); + + paucEncodingBits += Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + + pblock++; + } + } + + FindAndSetEncodingWarnings(); + + // init block sorter + { + m_psortedblocklist = new SortedBlockList(GetNumberOfBlocks(), 100); + + for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++) + { + pblock = &m_pablock[uiBlock]; + m_psortedblocklist->AddBlock(pblock); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // run the first pass of the encoder + // the encoder generally finds a reasonable, fast encoding + // this is run on all blocks regardless of effort to ensure that all blocks have a valid encoding + // + void Image::RunFirstPass(unsigned int a_uiMultithreadingOffset, unsigned int a_uiMultithreadingStride) + { + assert(a_uiMultithreadingStride > 0); + + for (unsigned int uiBlock = a_uiMultithreadingOffset; + uiBlock < GetNumberOfBlocks(); + uiBlock += a_uiMultithreadingStride) + { + Block4x4 *pblock = &m_pablock[uiBlock]; + pblock->PerformEncodingIteration(m_fEffort); + } + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits (for the output file) based on the best encoding for each block + // + void Image::SetEncodingBits(unsigned int a_uiMultithreadingOffset, + unsigned int a_uiMultithreadingStride) + { + assert(a_uiMultithreadingStride > 0); + + for (unsigned int uiBlock = a_uiMultithreadingOffset; + uiBlock < GetNumberOfBlocks(); + uiBlock += a_uiMultithreadingStride) + { + Block4x4 *pblock = &m_pablock[uiBlock]; + pblock->SetEncodingBitsFromEncoding(); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // return the image error + // image error is the sum of all block errors + // + float Image::GetError(void) + { + float fError = 0.0f; + + for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++) + { + Block4x4 *pblock = &m_pablock[uiBlock]; + fError += pblock->GetError(); + } + + return fError; + } + + // ---------------------------------------------------------------------------------------------------- + // determine the encoding bits format based on the encoding format + // the encoding bits format is a family of bit encodings that are shared across various encoding formats + // + Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format) + { + Block4x4EncodingBits::Format encodingbitsformat; + + // determine encoding bits format from image format + switch (a_format) + { + case Format::ETC1: + case Format::RGB8: + case Format::SRGB8: + encodingbitsformat = Block4x4EncodingBits::Format::RGB8; + break; + + case Format::RGBA8: + case Format::SRGBA8: + encodingbitsformat = Block4x4EncodingBits::Format::RGBA8; + break; + + case Format::R11: + case Format::SIGNED_R11: + encodingbitsformat = Block4x4EncodingBits::Format::R11; + break; + + case Format::RG11: + case Format::SIGNED_RG11: + encodingbitsformat = Block4x4EncodingBits::Format::RG11; + break; + + case Format::RGB8A1: + case Format::SRGB8A1: + encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1; + break; + + default: + encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN; + break; + } + + return encodingbitsformat; + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcImage.h b/thirdparty/etc2comp/EtcImage.h new file mode 100644 index 0000000000..bd807ac32e --- /dev/null +++ b/thirdparty/etc2comp/EtcImage.h @@ -0,0 +1,249 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +//#include "Etc.h" +#include "EtcColorFloatRGBA.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcErrorMetric.h" + + +namespace Etc +{ + class Block4x4; + class EncoderSpec; + class SortedBlockList; + + class Image + { + public: + + //the differnt warning and errors that can come up during encoding + enum EncodingStatus + { + SUCCESS = 0, + // + WARNING_THRESHOLD = 1 << 0, + // + WARNING_EFFORT_OUT_OF_RANGE = 1 << 1, + WARNING_JOBS_OUT_OF_RANGE = 1 << 2, + WARNING_SOME_NON_OPAQUE_PIXELS = 1 << 3,//just for opaque formats, etc1, rgb8, r11, rg11 + WARNING_ALL_OPAQUE_PIXELS = 1 << 4, + WARNING_ALL_TRANSPARENT_PIXELS = 1 << 5, + WARNING_SOME_TRANSLUCENT_PIXELS = 1 << 6,//just for rgb8A1 + WARNING_SOME_RGBA_NOT_0_TO_1 = 1 << 7, + WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO = 1 << 8, + WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO = 1 << 9, + // + ERROR_THRESHOLD = 1 << 16, + // + ERROR_UNKNOWN_FORMAT = 1 << 17, + ERROR_UNKNOWN_ERROR_METRIC = 1 << 18, + ERROR_ZERO_WIDTH_OR_HEIGHT = 1 << 19, + // + }; + + enum class Format + { + UNKNOWN, + // + ETC1, + // + // ETC2 formats + RGB8, + SRGB8, + RGBA8, + SRGBA8, + R11, + SIGNED_R11, + RG11, + SIGNED_RG11, + RGB8A1, + SRGB8A1, + // + FORMATS, + // + DEFAULT = SRGB8 + }; + + // constructor using source image + Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + ErrorMetric a_errormetric); + + // constructor using encoding bits + Image(Format a_format, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, + unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes, + Image *a_pimageSource, + ErrorMetric a_errormetric); + + ~Image(void); + + EncodingStatus Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, + unsigned int a_uiJobs, unsigned int a_uiMaxJobs); + + inline void AddToEncodingStatus(EncodingStatus a_encStatus) + { + m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus); + } + + inline unsigned int GetSourceWidth(void) + { + return m_uiSourceWidth; + } + + inline unsigned int GetSourceHeight(void) + { + return m_uiSourceHeight; + } + + inline unsigned int GetExtendedWidth(void) + { + return m_uiExtendedWidth; + } + + inline unsigned int GetExtendedHeight(void) + { + return m_uiExtendedHeight; + } + + inline unsigned int GetNumberOfBlocks() + { + return m_uiBlockColumns * m_uiBlockRows; + } + + inline Block4x4 * GetBlocks() + { + return m_pablock; + } + + inline unsigned char * GetEncodingBits(void) + { + return m_paucEncodingBits; + } + + inline unsigned int GetEncodingBitsBytes(void) + { + return m_uiEncodingBitsBytes; + } + + inline int GetEncodingTimeMs(void) + { + return m_iEncodeTime_ms; + } + + float GetError(void); + + inline ColorFloatRGBA * GetSourcePixel(unsigned int a_uiH, unsigned int a_uiV) + { + if (a_uiH >= m_uiSourceWidth || a_uiV >= m_uiSourceHeight) + { + return nullptr; + } + + return &m_pafrgbaSource[a_uiV*m_uiSourceWidth + a_uiH]; + } + + inline Format GetFormat(void) + { + return m_format; + } + + static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format); + + inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension) + { + return (unsigned short)((a_ushOriginalDimension + 3) & ~3); + } + + inline ErrorMetric GetErrorMetric(void) + { + return m_errormetric; + } + + static const char * EncodingFormatToString(Image::Format a_format); + const char * EncodingFormatToString(void); + //used to get basic information about the image data + int m_iNumOpaquePixels; + int m_iNumTranslucentPixels; + int m_iNumTransparentPixels; + + ColorFloatRGBA m_numColorValues; + ColorFloatRGBA m_numOutOfRangeValues; + + bool m_bVerboseOutput; + private: + //add a warning or error to check for while encoding + inline void TrackEncodingWarning(EncodingStatus a_encStatus) + { + m_warningsToCapture = (EncodingStatus)((unsigned int)m_warningsToCapture | (unsigned int)a_encStatus); + } + + //report the warning if it is something we care about for this encoding + inline void AddToEncodingStatusIfSignfigant(EncodingStatus a_encStatus) + { + if ((EncodingStatus)((unsigned int)m_warningsToCapture & (unsigned int)a_encStatus) == a_encStatus) + { + AddToEncodingStatus(a_encStatus); + } + } + + Image(void); + void FindEncodingWarningTypesForCurFormat(); + void FindAndSetEncodingWarnings(); + + void InitBlocksAndBlockSorter(void); + + void RunFirstPass(unsigned int a_uiMultithreadingOffset, + unsigned int a_uiMultithreadingStride); + + void SetEncodingBits(unsigned int a_uiMultithreadingOffset, + unsigned int a_uiMultithreadingStride); + + unsigned int IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks, + unsigned int a_uiMultithreadingOffset, + unsigned int a_uiMultithreadingStride); + + // inputs + ColorFloatRGBA *m_pafrgbaSource; + unsigned int m_uiSourceWidth; + unsigned int m_uiSourceHeight; + unsigned int m_uiExtendedWidth; + unsigned int m_uiExtendedHeight; + unsigned int m_uiBlockColumns; + unsigned int m_uiBlockRows; + // intermediate data + Block4x4 *m_pablock; + // encoding + Format m_format; + Block4x4EncodingBits::Format m_encodingbitsformat; + unsigned int m_uiEncodingBitsBytes; // for entire image + unsigned char *m_paucEncodingBits; + ErrorMetric m_errormetric; + float m_fEffort; + // stats + int m_iEncodeTime_ms; + + SortedBlockList *m_psortedblocklist; + //this will hold any warning or errors that happen during encoding + EncodingStatus m_encodingStatus; + //these will be the warnings we are tracking + EncodingStatus m_warningsToCapture; + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcIndividualTrys.cpp b/thirdparty/etc2comp/EtcIndividualTrys.cpp new file mode 100644 index 0000000000..56ff4c65ec --- /dev/null +++ b/thirdparty/etc2comp/EtcIndividualTrys.cpp @@ -0,0 +1,85 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcIndividualTrys.cpp + +Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode + +*/ + +#include "EtcConfig.h" +#include "EtcIndividualTrys.h" + +#include <assert.h> + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // construct a list of trys (encoding attempts) + // + // a_frgbaColor1 is the basecolor for the first half + // a_frgbaColor2 is the basecolor for the second half + // a_pauiPixelMapping1 is the pixel order for the first half + // a_pauiPixelMapping2 is the pixel order for the second half + // a_uiRadius is the amount to vary the base colors + // + IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius) + { + assert(a_uiRadius <= MAX_RADIUS); + + ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4(); + ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4(); + + // quantize base colors + // ensure that trys with a_uiRadius don't overflow + int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius); + int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius); + int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius); + int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius); + int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius); + int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius); + + m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius); + m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius); + + } + + // ---------------------------------------------------------------------------------------------------- + // + void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius) + { + + m_iRed = a_iRed; + m_iGreen = a_iGreen; + m_iBlue = a_iBlue; + + m_pauiPixelMapping = a_pauiPixelMapping; + m_uiRadius = a_uiRadius; + + m_uiTrys = 0; + + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcIndividualTrys.h b/thirdparty/etc2comp/EtcIndividualTrys.h new file mode 100644 index 0000000000..5fb12fbcf4 --- /dev/null +++ b/thirdparty/etc2comp/EtcIndividualTrys.h @@ -0,0 +1,95 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" + +namespace Etc +{ + + class IndividualTrys + { + public: + + static const unsigned int MAX_RADIUS = 1; + + IndividualTrys(ColorFloatRGBA a_frgbaColor1, + ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius); + + inline static int MoveAwayFromEdge(int a_i, int a_iDistance) + { + if (a_i < (0+ a_iDistance)) + { + return (0 + a_iDistance); + } + else if (a_i > (15- a_iDistance)) + { + return (15 - a_iDistance); + } + + return a_i; + } + + class Try + { + public : + static const unsigned int SELECTORS = 8; // per half + + int m_iRed; + int m_iGreen; + int m_iBlue; + unsigned int m_uiCW; + unsigned int m_auiSelectors[SELECTORS]; + float m_fError; + }; + + class Half + { + public: + + static const unsigned int MAX_TRYS = 27; + + void Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, + unsigned int a_uiRadius); + + // center of trys + int m_iRed; + int m_iGreen; + int m_iBlue; + + const unsigned int *m_pauiPixelMapping; + unsigned int m_uiRadius; + + unsigned int m_uiTrys; + Try m_atry[MAX_TRYS]; + + Try *m_ptryBest; + }; + + Half m_half1; + Half m_half2; + + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcMath.cpp b/thirdparty/etc2comp/EtcMath.cpp new file mode 100644 index 0000000000..096d5f7ab9 --- /dev/null +++ b/thirdparty/etc2comp/EtcMath.cpp @@ -0,0 +1,64 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EtcConfig.h" +#include "EtcMath.h" + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[] + // use a_fSlope and a_fOffset to define that line + // + bool Regression(float a_afX[], float a_afY[], unsigned int a_Points, + float *a_fSlope, float *a_fOffset) + { + float fPoints = (float)a_Points; + + float fSumX = 0.0f; + float fSumY = 0.0f; + float fSumXY = 0.0f; + float fSumX2 = 0.0f; + + for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++) + { + fSumX += a_afX[uiPoint]; + fSumY += a_afY[uiPoint]; + fSumXY += a_afX[uiPoint] * a_afY[uiPoint]; + fSumX2 += a_afX[uiPoint] * a_afX[uiPoint]; + } + + float fDivisor = fPoints*fSumX2 - fSumX*fSumX; + + // if vertical line + if (fDivisor == 0.0f) + { + *a_fSlope = 0.0f; + *a_fOffset = 0.0f; + return true; + } + + *a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor; + *a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints; + + return false; + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcMath.h b/thirdparty/etc2comp/EtcMath.h new file mode 100644 index 0000000000..c58c9a91bc --- /dev/null +++ b/thirdparty/etc2comp/EtcMath.h @@ -0,0 +1,40 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <math.h> + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // return true if vertical line + bool Regression(float a_afX[], float a_afY[], unsigned int a_Points, + float *a_fSlope, float *a_fOffset); + + inline float ConvertMSEToPSNR(float a_fMSE) + { + if (a_fMSE == 0.0f) + { + return INFINITY; + } + + return 10.0f * log10f(1.0f / a_fMSE); + } + + +} diff --git a/thirdparty/etc2comp/EtcSortedBlockList.cpp b/thirdparty/etc2comp/EtcSortedBlockList.cpp new file mode 100644 index 0000000000..bfa6b7b3fa --- /dev/null +++ b/thirdparty/etc2comp/EtcSortedBlockList.cpp @@ -0,0 +1,228 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcSortedBlockList.cpp + +SortedBlockList is a list of 4x4 blocks that can be used by the "effort" system to prioritize +the encoding of the 4x4 blocks. + +The sorting is done with buckets, where each bucket is an indication of how much error each 4x4 block has + +*/ + +#include "EtcConfig.h" +#include "EtcSortedBlockList.h" + +#include "EtcBlock4x4.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // construct an empty list + // + // allocate enough memory to add all of the image's 4x4 blocks later + // allocate enough buckets to sort the blocks + // + SortedBlockList::SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets) + { + m_uiImageBlocks = a_uiImageBlocks; + m_iBuckets = (int)a_uiBuckets; + + m_uiAddedBlocks = 0; + m_uiSortedBlocks = 0; + m_palinkPool = new Link[m_uiImageBlocks]; + m_pabucket = new Bucket[m_iBuckets]; + m_fMaxError = 0.0f; + + InitBuckets(); + + } + + // ---------------------------------------------------------------------------------------------------- + // + SortedBlockList::~SortedBlockList(void) + { + delete[] m_palinkPool; + delete[] m_pabucket; + } + + // ---------------------------------------------------------------------------------------------------- + // add a 4x4 block to the list + // the 4x4 block will be sorted later + // + void SortedBlockList::AddBlock(Block4x4 *a_pblock) + { + assert(m_uiAddedBlocks < m_uiImageBlocks); + Link *plink = &m_palinkPool[m_uiAddedBlocks++]; + plink->Init(a_pblock); + } + + // ---------------------------------------------------------------------------------------------------- + // sort all of the 4x4 blocks that have been added to the list + // + // first, determine the maximum error, then assign an error range to each bucket + // next, determine which bucket each 4x4 block belongs to based on the 4x4 block's error + // add the 4x4 block to the appropriate bucket + // lastly, walk thru the buckets and add each bucket to a sorted linked list + // + // the resultant sorting is an approximate sorting from most to least error + // + void SortedBlockList::Sort(void) + { + assert(m_uiAddedBlocks == m_uiImageBlocks); + InitBuckets(); + + // find max block error + m_fMaxError = -1.0f; + + for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++) + { + Link *plinkBlock = &m_palinkPool[uiLink]; + + float fBlockError = plinkBlock->GetBlock()->GetError(); + if (fBlockError > m_fMaxError) + { + m_fMaxError = fBlockError; + } + } + // prevent divide by zero or divide by negative + if (m_fMaxError <= 0.0f) + { + m_fMaxError = 1.0f; + } + //used for debugging + //int numDone = 0; + // put all of the blocks with unfinished encodings into the appropriate bucket + m_uiSortedBlocks = 0; + for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++) + { + Link *plinkBlock = &m_palinkPool[uiLink]; + + // if the encoding is done, don't add it to the list + if (plinkBlock->GetBlock()->GetEncoding()->IsDone()) + { + //numDone++; + continue; + } + + // calculate the appropriate sort bucket + float fBlockError = plinkBlock->GetBlock()->GetError(); + int iBucket = (int) floorf(m_iBuckets * fBlockError / m_fMaxError); + // clamp to bucket index + iBucket = iBucket < 0 ? 0 : iBucket >= m_iBuckets ? m_iBuckets - 1 : iBucket; + + // add block to bucket + { + Bucket *pbucket = &m_pabucket[iBucket]; + if (pbucket->plinkLast) + { + pbucket->plinkLast->SetNext(plinkBlock); + pbucket->plinkLast = plinkBlock; + } + else + { + pbucket->plinkFirst = pbucket->plinkLast = plinkBlock; + } + plinkBlock->SetNext(nullptr); + } + + m_uiSortedBlocks++; + + if (0) + { + printf("%u: e=%.3f\n", uiLink, fBlockError); + Print(); + printf("\n\n\n"); + } + } + //printf("num blocks already done: %d\n",numDone); + //link the blocks together across buckets + m_plinkFirst = nullptr; + m_plinkLast = nullptr; + for (int iBucket = m_iBuckets - 1; iBucket >= 0; iBucket--) + { + Bucket *pbucket = &m_pabucket[iBucket]; + + if (pbucket->plinkFirst) + { + if (m_plinkFirst == nullptr) + { + m_plinkFirst = pbucket->plinkFirst; + } + else + { + assert(pbucket->plinkLast->GetNext() == nullptr); + m_plinkLast->SetNext(pbucket->plinkFirst); + } + + m_plinkLast = pbucket->plinkLast; + } + } + + + } + + // ---------------------------------------------------------------------------------------------------- + // clear all of the buckets. normally done in preparation for a sort + // + void SortedBlockList::InitBuckets(void) + { + for (int iBucket = 0; iBucket < m_iBuckets; iBucket++) + { + Bucket *pbucket = &m_pabucket[iBucket]; + + pbucket->plinkFirst = 0; + pbucket->plinkLast = 0; + } + } + + // ---------------------------------------------------------------------------------------------------- + // print out the list of sorted 4x4 blocks + // normally used for debugging + // + void SortedBlockList::Print(void) + { + for (int iBucket = m_iBuckets-1; iBucket >= 0; iBucket--) + { + Bucket *pbucket = &m_pabucket[iBucket]; + + unsigned int uiBlocks = 0; + for (Link *plink = pbucket->plinkFirst; plink != nullptr; plink = plink->GetNext() ) + { + uiBlocks++; + + if (plink == pbucket->plinkLast) + { + break; + } + } + + float fBucketError = m_fMaxError * iBucket / m_iBuckets; + float fBucketRMS = sqrtf(fBucketError / (4.0f*16.0f) ); + printf("%3d: e=%.3f rms=%.6f %u\n", iBucket, fBucketError, fBucketRMS, uiBlocks); + } + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/thirdparty/etc2comp/EtcSortedBlockList.h b/thirdparty/etc2comp/EtcSortedBlockList.h new file mode 100644 index 0000000000..960e8adc34 --- /dev/null +++ b/thirdparty/etc2comp/EtcSortedBlockList.h @@ -0,0 +1,124 @@ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace Etc +{ + class Block4x4; + + class SortedBlockList + { + public: + + class Link + { + public: + + inline void Init(Block4x4 *a_pblock) + { + m_pblock = a_pblock; + m_plinkNext = nullptr; + } + + inline Block4x4 * GetBlock(void) + { + return m_pblock; + } + + inline void SetNext(Link *a_plinkNext) + { + m_plinkNext = a_plinkNext; + } + + inline Link * GetNext(void) + { + return m_plinkNext; + } + + inline Link * Advance(unsigned int a_uiSteps = 1) + { + Link *plink = this; + + for (unsigned int uiStep = 0; uiStep < a_uiSteps; uiStep++) + { + if (plink == nullptr) + { + break; + } + + plink = plink->m_plinkNext; + } + + return plink; + } + + private: + + Block4x4 *m_pblock; + Link *m_plinkNext; + }; + + SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets); + ~SortedBlockList(void); + + void AddBlock(Block4x4 *a_pblock); + + void Sort(void); + + inline Link * GetLinkToFirstBlock(void) + { + return m_plinkFirst; + } + + inline unsigned int GetNumberOfAddedBlocks(void) + { + return m_uiAddedBlocks; + } + + inline unsigned int GetNumberOfSortedBlocks(void) + { + return m_uiSortedBlocks; + } + + void Print(void); + + private: + + void InitBuckets(void); + + class Bucket + { + public: + Link *plinkFirst; + Link *plinkLast; + }; + + unsigned int m_uiImageBlocks; + int m_iBuckets; + + unsigned int m_uiAddedBlocks; + unsigned int m_uiSortedBlocks; + Link *m_palinkPool; + Bucket *m_pabucket; + float m_fMaxError; + + Link *m_plinkFirst; + Link *m_plinkLast; + + }; + +} // namespace Etc diff --git a/thirdparty/etc2comp/LICENSE b/thirdparty/etc2comp/LICENSE new file mode 100644 index 0000000000..75b52484ea --- /dev/null +++ b/thirdparty/etc2comp/LICENSE @@ -0,0 +1,202 @@ +
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/thirdparty/etc2comp/README.md b/thirdparty/etc2comp/README.md new file mode 100644 index 0000000000..1c70ae9f4e --- /dev/null +++ b/thirdparty/etc2comp/README.md @@ -0,0 +1,197 @@ +# Etc2Comp - Texture to ETC2 compressor
+
+Etc2Comp is a command line tool that converts textures (e.g. bitmaps)
+into the [ETC2](https://en.wikipedia.org/wiki/Ericsson_Texture_Compression)
+format. The tool is built with a focus on encoding performance
+to reduce the amount of time required to compile asset heavy applications as
+well as reduce overall application size.
+
+This repo provides source code that can be compiled into a binary. The
+binary can then be used to convert textures to the ETC2 format.
+
+Important: This is not an official Google product. It is an experimental
+library published as-is. Please see the CONTRIBUTORS.md file for information
+about questions or issues.
+
+## Setup
+This project uses [CMake](https://cmake.org/) to generate platform-specific
+build files:
+ - Linux: make files
+ - OS X: Xcode workspace files
+ - Microsoft Windows: Visual Studio solution files
+ - Note: CMake supports other formats, but this doc only provides steps for
+ one of each platform for brevity.
+
+Refer to each platform's setup section to setup your environment and build
+an Etc2Comp binary. Then skip to the usage section of this page for examples
+of how to use the library.
+
+### Setup for OS X
+ build tested on this config:
+ OS X 10.9.5 i7 16GB RAM
+ Xcode 5.1.1
+ cmake 3.2.3
+
+Start by downloading and installing the following components if they are not
+already installed on your development machine.
+ - *Xcode* version 5.1.1, or greater
+ - [CMake](https://cmake.org/download/) version 3.2.3, or greater
+
+To build the Etc2Comp binary:
+ 1. Open a *Terminal* window and navigate to the project directory.
+ 1. Run `mkdir build_xcode`
+ 1. Run `cd build_xcode`
+ 1. Run `cmake -G Xcode ../`
+ 1. Open *Xcode* and import the `build_xcode/EtcTest.xcodeproj` file.
+ 1. Open the Product menu and choose Build For -> Running.
+ 1. Once the build succeeds the binary located at `build_xcode/EtcTool/Debug/EtcTool`
+can be executed.
+
+Optional
+Xcode EtcTool ‘Run’ preferences
+note: if the build_xcode/EtcTest.xcodeproj is manually deleted then some Xcode preferences
+will need to be set by hand after cmake is run (these prefs are retained across
+cmake updates if the .xcodeproj is not deleted/removed)
+
+1. Set the active scheme to ‘EtcTool’
+1. Edit the scheme
+1. Select option ‘Run EtcTool’, then tab ‘Arguments’.
+Add this launch argument: ‘-argfile ../../EtcTool/args.txt’
+1. Select tab ‘Options’ and set a custom working directory to: ‘$(SRCROOT)/Build_Xcode/EtcTool’
+
+### SetUp for Windows
+
+1. Open a *Terminal* window and navigate to the project directory.
+1. Run `mkdir build_vs`
+1. Run `cd build_vs`
+1. Run CMAKE, noting what build version you need, and pointing to the parent directory as the source root;
+ For VS 2013 : `cmake -G "Visual Studio 12 2013 Win64" ../`
+ For VS 2015 : `cmake -G "Visual Studio 14 2015 Win64" ../`
+ NOTE: To see what supported Visual Studio outputs there are, run `cmake -G`
+1. open the 'EtcTest' solution
+1. make the 'EtcTool' project the start up project
+1. (optional) in the project properties, under 'Debugging ->command arguments'
+add the argfile textfile thats included in the EtcTool directory.
+example: -argfile C:\etc2\EtcTool\Args.txt
+
+### Setup For Linux
+The Linux build was tested on this config:
+ Ubuntu desktop 14.04
+ gcc/g++ 4.8
+ cmake 2.8.12.2
+
+1. Verify linux has cmake and C++-11 capable g++ installed
+1. Open shell
+1. Run `mkdir build_linux`
+1. Run `cd build_linux`
+1. Run `cmake ../`
+1. Run `make`
+1. navigate to the newly created EtcTool directory `cd EtcTool`
+1. run the executable: `./EtcTool -argfile ../../EtcTool/args.txt`
+
+Skip to the <a href="#usage">Usage</a> section for more information about using the
+tool.
+
+## Usage
+
+### Command Line Usage
+EtcTool can be run from the command line with the following usage:
+ etctool.exe source_image [options ...] -output encoded_image
+
+The encoder will use an array of RGBA floats read from the source_image to create
+an ETC1 or ETC2 encoded image in encoded_image. The RGBA floats should be in the
+range [0:1].
+
+Options:
+
+ -analyze <analysis_folder>
+ -argfile <arg_file> additional command line arguments read from a file
+ -blockAtHV <H V> encodes a single block that contains the
+ pixel specified by the H V coordinates
+ -compare <comparison_image> compares source_image to comparison_image
+ -effort <amount> number between 0 and 100 to specify the encoding quality
+ (100 is the highest quality)
+ -errormetric <error_metric> specify the error metric, the options are
+ rgba, rgbx, rec709, numeric and normalxyz
+ -format <etc_format> ETC1, RGB8, SRGB8, RGBA8, SRGB8, RGB8A1,
+ SRGB8A1 or R11
+ -help prints this message
+ -jobs or -j <thread_count> specifies the number of threads (default=1)
+ -normalizexyz normalize RGB to have a length of 1
+ -verbose or -v shows status information during the encoding
+ process
+ -mipmaps or -m <mip_count> sets the maximum number of mipaps to generate (default=1)
+ -mipwrap or -w <x|y|xy> sets the mipmap filter wrap mode (default=clamp)
+
+* -analyze will run an analysis of the encoding and place it in folder
+"analysis_folder" (e.g. ../analysis/kodim05). within the analysis_folder, a folder
+will be created with a name of the current date/time (e.g. 20151204_153306). this
+date/time folder is used to compare encodings of the same texture over time.
+within the date/time folder is a text file with several encoding stats and a 2x png
+image showing the encoding mode for each 4x4 block.
+
+* -argfile allows additional command line arguments to be placed in a text file
+
+* -blockAtHV selects the 4x4 pixel subset of the source image at position (H,V).
+This is mainly used for debugging
+
+* -compare compares the source image to the created encoded image. The encoding
+will dictate what error analysis is used in the comparison.
+
+* -effort uses an "amount" between 0 and 100 to determine how much additional effort
+to apply during the encoding.
+
+* -errormetric selects the fitting algorithm used by the encoder. "rgba" calculates
+RMS error using RGB components that are weighted by A. "rgbx" calculates RMS error
+using RGBA components, where A is treated as an additional data channel, instead of
+as alpha. "rec709" is similar to "rgba", except the RGB components are also weighted
+according to Rec709. "numeric" calculates RMS error using unweighted RGBA components.
+"normalize" calculates error based on dot product and vector length for RGB and RMS
+error for A.
+
+* -help prints out the usage message
+
+* -jobs enables multi-threading to speed up image encoding
+
+* -normalizexyz normalizes the source RGB to have a length of 1.
+
+* -verbose shows information on the current encoding process. It will then display the
+PSNR and time time it took to encode the image.
+
+* -mipmaps takes an argument that specifies how many mipmaps to generate from the
+source image. The mipmaps are generated with a lanczos3 filter using edge clamping.
+If the mipmaps option is not specified no mipmaps are created.
+
+* -mipwrap takes an argument that specifies the mipmap filter wrap mode. The options
+are "x", "y" and "xy" which specify wrapping in x only, y only or x and y respectively.
+The default options are clamping in both x and y.
+
+Note: Path names can use slashes or backslashes. The tool will convert the
+slashes to the appropriate polarity for the current platform.
+
+
+## API
+
+The library supports two different APIs - a C-like API that is not heavily
+class-based and a class-based API.
+
+main() in EtcTool.cpp contains an example of both APIs.
+
+The Encode() method now returns an EncodingStatus that contains bit flags for
+reporting various warnings and flags encountered when encoding.
+
+
+## Copyright
+Copyright 2015 Etc2Comp Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/thirdparty/glad/glad.c b/thirdparty/glad/glad.c index 70a93f8d25..2d756ec3f6 100644 --- a/thirdparty/glad/glad.c +++ b/thirdparty/glad/glad.c @@ -1,6 +1,6 @@ /* - OpenGL loader generated by glad 0.1.13a0 on Fri Jan 6 19:27:07 2017. + OpenGL loader generated by glad 0.1.14a0 on Wed Jun 14 20:12:45 2017. Language/Generator: C/C++ Specification: gl @@ -30,7 +30,7 @@ static void* get_proc(const char *namez); static HMODULE libGL; typedef void* (APIENTRYP PFNWGLGETPROCADDRESSPROC_PRIVATE)(const char*); -PFNWGLGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr; +static PFNWGLGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr; static int open_gl(void) { @@ -57,7 +57,7 @@ static void* libGL; #ifndef __APPLE__ typedef void* (APIENTRYP PFNGLXGETPROCADDRESSPROC_PRIVATE)(const char*); -PFNGLXGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr; +static PFNGLXGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr; #endif static @@ -152,19 +152,19 @@ static int get_exts(void) { exts = (const char *)glGetString(GL_EXTENSIONS); #ifdef _GLAD_IS_SOME_NEW_VERSION } else { - int index; + unsigned int index; num_exts_i = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &num_exts_i); if (num_exts_i > 0) { - exts_i = (const char **)realloc((void *)exts_i, num_exts_i * sizeof *exts_i); + exts_i = (const char **)realloc((void *)exts_i, (size_t)num_exts_i * (sizeof *exts_i)); } if (exts_i == NULL) { return 0; } - for(index = 0; index < num_exts_i; index++) { + for(index = 0; index < (unsigned)num_exts_i; index++) { exts_i[index] = (const char*)glGetStringi(GL_EXTENSIONS, index); } } @@ -174,7 +174,7 @@ static int get_exts(void) { static void free_exts(void) { if (exts_i != NULL) { - free((char **)exts_i); + free((void *)exts_i); exts_i = NULL; } } diff --git a/thirdparty/glad/glad/glad.h b/thirdparty/glad/glad/glad.h index e5eb22e297..cb78df071e 100644 --- a/thirdparty/glad/glad/glad.h +++ b/thirdparty/glad/glad/glad.h @@ -1,6 +1,6 @@ /* - OpenGL loader generated by glad 0.1.13a0 on Fri Jan 6 19:27:07 2017. + OpenGL loader generated by glad 0.1.14a0 on Wed Jun 14 20:12:45 2017. Language/Generator: C/C++ Specification: gl @@ -54,7 +54,7 @@ typedef void* (* GLADloadproc)(const char *name); #ifndef GLAPI # if defined(GLAD_GLAPI_EXPORT) -# if defined(WIN32) || defined(__CYGWIN__) +# if defined(_WIN32) || defined(__CYGWIN__) # if defined(GLAD_GLAPI_EXPORT_BUILD) # if defined(__GNUC__) # define GLAPI __attribute__ ((dllexport)) extern @@ -183,6 +183,7 @@ typedef void (APIENTRY *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLen typedef void (APIENTRY *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam); typedef unsigned short GLhalfNV; typedef GLintptr GLvdpauSurfaceNV; +typedef void (APIENTRY *GLVULKANPROCNV)(void); #define GL_DEPTH_BUFFER_BIT 0x00000100 #define GL_STENCIL_BUFFER_BIT 0x00000400 #define GL_COLOR_BUFFER_BIT 0x00004000 @@ -264,7 +265,6 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_BLEND_SRC 0x0BE1 #define GL_BLEND 0x0BE2 #define GL_LOGIC_OP_MODE 0x0BF0 -#define GL_COLOR_LOGIC_OP 0x0BF2 #define GL_DRAW_BUFFER 0x0C01 #define GL_READ_BUFFER 0x0C02 #define GL_SCISSOR_BOX 0x0C10 @@ -292,21 +292,9 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_SUBPIXEL_BITS 0x0D50 #define GL_TEXTURE_1D 0x0DE0 #define GL_TEXTURE_2D 0x0DE1 -#define GL_POLYGON_OFFSET_UNITS 0x2A00 -#define GL_POLYGON_OFFSET_POINT 0x2A01 -#define GL_POLYGON_OFFSET_LINE 0x2A02 -#define GL_POLYGON_OFFSET_FILL 0x8037 -#define GL_POLYGON_OFFSET_FACTOR 0x8038 -#define GL_TEXTURE_BINDING_1D 0x8068 -#define GL_TEXTURE_BINDING_2D 0x8069 #define GL_TEXTURE_WIDTH 0x1000 #define GL_TEXTURE_HEIGHT 0x1001 -#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 #define GL_TEXTURE_BORDER_COLOR 0x1004 -#define GL_TEXTURE_RED_SIZE 0x805C -#define GL_TEXTURE_GREEN_SIZE 0x805D -#define GL_TEXTURE_BLUE_SIZE 0x805E -#define GL_TEXTURE_ALPHA_SIZE 0x805F #define GL_DONT_CARE 0x1100 #define GL_FASTEST 0x1101 #define GL_NICEST 0x1102 @@ -317,7 +305,6 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_INT 0x1404 #define GL_UNSIGNED_INT 0x1405 #define GL_FLOAT 0x1406 -#define GL_DOUBLE 0x140A #define GL_STACK_OVERFLOW 0x0503 #define GL_STACK_UNDERFLOW 0x0504 #define GL_CLEAR 0x1500 @@ -369,23 +356,7 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_TEXTURE_MIN_FILTER 0x2801 #define GL_TEXTURE_WRAP_S 0x2802 #define GL_TEXTURE_WRAP_T 0x2803 -#define GL_PROXY_TEXTURE_1D 0x8063 -#define GL_PROXY_TEXTURE_2D 0x8064 #define GL_REPEAT 0x2901 -#define GL_R3_G3_B2 0x2A10 -#define GL_RGB4 0x804F -#define GL_RGB5 0x8050 -#define GL_RGB8 0x8051 -#define GL_RGB10 0x8052 -#define GL_RGB12 0x8053 -#define GL_RGB16 0x8054 -#define GL_RGBA2 0x8055 -#define GL_RGBA4 0x8056 -#define GL_RGB5_A1 0x8057 -#define GL_RGBA8 0x8058 -#define GL_RGB10_A2 0x8059 -#define GL_RGBA12 0x805A -#define GL_RGBA16 0x805B #define GL_CURRENT_BIT 0x00000001 #define GL_POINT_BIT 0x00000002 #define GL_LINE_BIT 0x00000004 @@ -404,9 +375,6 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_TEXTURE_BIT 0x00040000 #define GL_SCISSOR_BIT 0x00080000 #define GL_ALL_ATTRIB_BITS 0xFFFFFFFF -#define GL_CLIENT_PIXEL_STORE_BIT 0x00000001 -#define GL_CLIENT_VERTEX_ARRAY_BIT 0x00000002 -#define GL_CLIENT_ALL_ATTRIB_BITS 0xFFFFFFFF #define GL_QUAD_STRIP 0x0008 #define GL_POLYGON 0x0009 #define GL_ACCUM 0x0100 @@ -446,14 +414,6 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_PIXEL_MAP_G_TO_G 0x0C77 #define GL_PIXEL_MAP_B_TO_B 0x0C78 #define GL_PIXEL_MAP_A_TO_A 0x0C79 -#define GL_VERTEX_ARRAY_POINTER 0x808E -#define GL_NORMAL_ARRAY_POINTER 0x808F -#define GL_COLOR_ARRAY_POINTER 0x8090 -#define GL_INDEX_ARRAY_POINTER 0x8091 -#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092 -#define GL_EDGE_FLAG_ARRAY_POINTER 0x8093 -#define GL_FEEDBACK_BUFFER_POINTER 0x0DF0 -#define GL_SELECTION_BUFFER_POINTER 0x0DF3 #define GL_CURRENT_COLOR 0x0B00 #define GL_CURRENT_INDEX 0x0B01 #define GL_CURRENT_NORMAL 0x0B02 @@ -499,11 +459,9 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_PROJECTION_MATRIX 0x0BA7 #define GL_TEXTURE_MATRIX 0x0BA8 #define GL_ATTRIB_STACK_DEPTH 0x0BB0 -#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 #define GL_ALPHA_TEST 0x0BC0 #define GL_ALPHA_TEST_FUNC 0x0BC1 #define GL_ALPHA_TEST_REF 0x0BC2 -#define GL_INDEX_LOGIC_OP 0x0BF1 #define GL_LOGIC_OP 0x0BF1 #define GL_AUX_BUFFERS 0x0C00 #define GL_INDEX_CLEAR_VALUE 0x0C20 @@ -553,7 +511,6 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_MAX_NAME_STACK_DEPTH 0x0D37 #define GL_MAX_PROJECTION_STACK_DEPTH 0x0D38 #define GL_MAX_TEXTURE_STACK_DEPTH 0x0D39 -#define GL_MAX_CLIENT_ATTRIB_STACK_DEPTH 0x0D3B #define GL_INDEX_BITS 0x0D51 #define GL_RED_BITS 0x0D52 #define GL_GREEN_BITS 0x0D53 @@ -589,35 +546,8 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_MAP1_GRID_SEGMENTS 0x0DD1 #define GL_MAP2_GRID_DOMAIN 0x0DD2 #define GL_MAP2_GRID_SEGMENTS 0x0DD3 -#define GL_FEEDBACK_BUFFER_SIZE 0x0DF1 -#define GL_FEEDBACK_BUFFER_TYPE 0x0DF2 -#define GL_SELECTION_BUFFER_SIZE 0x0DF4 -#define GL_VERTEX_ARRAY 0x8074 -#define GL_NORMAL_ARRAY 0x8075 -#define GL_COLOR_ARRAY 0x8076 -#define GL_INDEX_ARRAY 0x8077 -#define GL_TEXTURE_COORD_ARRAY 0x8078 -#define GL_EDGE_FLAG_ARRAY 0x8079 -#define GL_VERTEX_ARRAY_SIZE 0x807A -#define GL_VERTEX_ARRAY_TYPE 0x807B -#define GL_VERTEX_ARRAY_STRIDE 0x807C -#define GL_NORMAL_ARRAY_TYPE 0x807E -#define GL_NORMAL_ARRAY_STRIDE 0x807F -#define GL_COLOR_ARRAY_SIZE 0x8081 -#define GL_COLOR_ARRAY_TYPE 0x8082 -#define GL_COLOR_ARRAY_STRIDE 0x8083 -#define GL_INDEX_ARRAY_TYPE 0x8085 -#define GL_INDEX_ARRAY_STRIDE 0x8086 -#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088 -#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089 -#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A -#define GL_EDGE_FLAG_ARRAY_STRIDE 0x808C #define GL_TEXTURE_COMPONENTS 0x1003 #define GL_TEXTURE_BORDER 0x1005 -#define GL_TEXTURE_LUMINANCE_SIZE 0x8060 -#define GL_TEXTURE_INTENSITY_SIZE 0x8061 -#define GL_TEXTURE_PRIORITY 0x8066 -#define GL_TEXTURE_RESIDENT 0x8067 #define GL_AMBIENT 0x1200 #define GL_DIFFUSE 0x1201 #define GL_SPECULAR 0x1202 @@ -664,6 +594,91 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_OBJECT_PLANE 0x2501 #define GL_EYE_PLANE 0x2502 #define GL_CLAMP 0x2900 +#define GL_CLIP_PLANE0 0x3000 +#define GL_CLIP_PLANE1 0x3001 +#define GL_CLIP_PLANE2 0x3002 +#define GL_CLIP_PLANE3 0x3003 +#define GL_CLIP_PLANE4 0x3004 +#define GL_CLIP_PLANE5 0x3005 +#define GL_LIGHT0 0x4000 +#define GL_LIGHT1 0x4001 +#define GL_LIGHT2 0x4002 +#define GL_LIGHT3 0x4003 +#define GL_LIGHT4 0x4004 +#define GL_LIGHT5 0x4005 +#define GL_LIGHT6 0x4006 +#define GL_LIGHT7 0x4007 +#define GL_COLOR_LOGIC_OP 0x0BF2 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_OFFSET_POINT 0x2A01 +#define GL_POLYGON_OFFSET_LINE 0x2A02 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_1D 0x8068 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_ALPHA_SIZE 0x805F +#define GL_DOUBLE 0x140A +#define GL_PROXY_TEXTURE_1D 0x8063 +#define GL_PROXY_TEXTURE_2D 0x8064 +#define GL_R3_G3_B2 0x2A10 +#define GL_RGB4 0x804F +#define GL_RGB5 0x8050 +#define GL_RGB8 0x8051 +#define GL_RGB10 0x8052 +#define GL_RGB12 0x8053 +#define GL_RGB16 0x8054 +#define GL_RGBA2 0x8055 +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGBA8 0x8058 +#define GL_RGB10_A2 0x8059 +#define GL_RGBA12 0x805A +#define GL_RGBA16 0x805B +#define GL_CLIENT_PIXEL_STORE_BIT 0x00000001 +#define GL_CLIENT_VERTEX_ARRAY_BIT 0x00000002 +#define GL_CLIENT_ALL_ATTRIB_BITS 0xFFFFFFFF +#define GL_VERTEX_ARRAY_POINTER 0x808E +#define GL_NORMAL_ARRAY_POINTER 0x808F +#define GL_COLOR_ARRAY_POINTER 0x8090 +#define GL_INDEX_ARRAY_POINTER 0x8091 +#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092 +#define GL_EDGE_FLAG_ARRAY_POINTER 0x8093 +#define GL_FEEDBACK_BUFFER_POINTER 0x0DF0 +#define GL_SELECTION_BUFFER_POINTER 0x0DF3 +#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 +#define GL_INDEX_LOGIC_OP 0x0BF1 +#define GL_MAX_CLIENT_ATTRIB_STACK_DEPTH 0x0D3B +#define GL_FEEDBACK_BUFFER_SIZE 0x0DF1 +#define GL_FEEDBACK_BUFFER_TYPE 0x0DF2 +#define GL_SELECTION_BUFFER_SIZE 0x0DF4 +#define GL_VERTEX_ARRAY 0x8074 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_COLOR_ARRAY 0x8076 +#define GL_INDEX_ARRAY 0x8077 +#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_EDGE_FLAG_ARRAY 0x8079 +#define GL_VERTEX_ARRAY_SIZE 0x807A +#define GL_VERTEX_ARRAY_TYPE 0x807B +#define GL_VERTEX_ARRAY_STRIDE 0x807C +#define GL_NORMAL_ARRAY_TYPE 0x807E +#define GL_NORMAL_ARRAY_STRIDE 0x807F +#define GL_COLOR_ARRAY_SIZE 0x8081 +#define GL_COLOR_ARRAY_TYPE 0x8082 +#define GL_COLOR_ARRAY_STRIDE 0x8083 +#define GL_INDEX_ARRAY_TYPE 0x8085 +#define GL_INDEX_ARRAY_STRIDE 0x8086 +#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088 +#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089 +#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A +#define GL_EDGE_FLAG_ARRAY_STRIDE 0x808C +#define GL_TEXTURE_LUMINANCE_SIZE 0x8060 +#define GL_TEXTURE_INTENSITY_SIZE 0x8061 +#define GL_TEXTURE_PRIORITY 0x8066 +#define GL_TEXTURE_RESIDENT 0x8067 #define GL_ALPHA4 0x803B #define GL_ALPHA8 0x803C #define GL_ALPHA12 0x803D @@ -697,20 +712,6 @@ typedef GLintptr GLvdpauSurfaceNV; #define GL_T2F_N3F_V3F 0x2A2B #define GL_T2F_C4F_N3F_V3F 0x2A2C #define GL_T4F_C4F_N3F_V4F 0x2A2D -#define GL_CLIP_PLANE0 0x3000 -#define GL_CLIP_PLANE1 0x3001 -#define GL_CLIP_PLANE2 0x3002 -#define GL_CLIP_PLANE3 0x3003 -#define GL_CLIP_PLANE4 0x3004 -#define GL_CLIP_PLANE5 0x3005 -#define GL_LIGHT0 0x4000 -#define GL_LIGHT1 0x4001 -#define GL_LIGHT2 0x4002 -#define GL_LIGHT3 0x4003 -#define GL_LIGHT4 0x4004 -#define GL_LIGHT5 0x4005 -#define GL_LIGHT6 0x4006 -#define GL_LIGHT7 0x4007 #define GL_UNSIGNED_BYTE_3_3_2 0x8032 #define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 #define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 diff --git a/thirdparty/misc/yuv2rgb.h b/thirdparty/misc/yuv2rgb.h index a9bef76da8..d0c2813a75 100644 --- a/thirdparty/misc/yuv2rgb.h +++ b/thirdparty/misc/yuv2rgb.h @@ -1,5 +1,28 @@ /* Thirdparty code presumably from http://wss.co.uk/pinknoise/yuv2rgb/ */ -/* FIXME: Move to thirdparty dir */ + +/* +This YUV2RGB code is Copyright (C) 2008-11 Robin Watts +<theorarm@wss.co.uk>. + +The software is released under the BSD license. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +In particular, I warrant absolutely nothing about how patent free +this method is. It is your responsibility to ensure that this code +does not infringe any patents that apply in your area before you +ship it. +*/ #ifndef YUV2RGB_H #define YUV2RGB_H diff --git a/thirdparty/openssl/crypto/LPdir_nyi.c b/thirdparty/openssl/crypto/LPdir_nyi.c index 283d5b0636..b16e84957b 100644 --- a/thirdparty/openssl/crypto/LPdir_nyi.c +++ b/thirdparty/openssl/crypto/LPdir_nyi.c @@ -1,7 +1,4 @@ /* - * $LP: LPlib/source/LPdir_win.c,v 1.1 2004/06/14 10:07:56 _cvs_levitte Exp $ - */ -/* * Copyright (c) 2004, Richard Levitte <richard@levitte.org> * All rights reserved. * diff --git a/thirdparty/openssl/crypto/LPdir_unix.c b/thirdparty/openssl/crypto/LPdir_unix.c index bead6abd71..c97e260492 100644 --- a/thirdparty/openssl/crypto/LPdir_unix.c +++ b/thirdparty/openssl/crypto/LPdir_unix.c @@ -1,8 +1,4 @@ /* - * $LP: LPlib/source/LPdir_unix.c,v 1.11 2004/09/23 22:07:22 _cvs_levitte Exp - * $ - */ -/* * Copyright (c) 2004, Richard Levitte <richard@levitte.org> * All rights reserved. * diff --git a/thirdparty/openssl/crypto/LPdir_win32.c b/thirdparty/openssl/crypto/LPdir_win32.c index b1c983d87f..84f61117b8 100644 --- a/thirdparty/openssl/crypto/LPdir_win32.c +++ b/thirdparty/openssl/crypto/LPdir_win32.c @@ -1,8 +1,4 @@ /* - * $LP: LPlib/source/LPdir_win32.c,v 1.3 2004/08/26 13:36:05 _cvs_levitte Exp - * $ - */ -/* * Copyright (c) 2004, Richard Levitte <richard@levitte.org> * All rights reserved. * diff --git a/thirdparty/openssl/crypto/LPdir_wince.c b/thirdparty/openssl/crypto/LPdir_wince.c index ae8a56f4be..a8377f30f4 100644 --- a/thirdparty/openssl/crypto/LPdir_wince.c +++ b/thirdparty/openssl/crypto/LPdir_wince.c @@ -1,8 +1,4 @@ /* - * $LP: LPlib/source/LPdir_wince.c,v 1.3 2004/08/26 13:36:05 _cvs_levitte Exp - * $ - */ -/* * Copyright (c) 2004, Richard Levitte <richard@levitte.org> * All rights reserved. * diff --git a/thirdparty/openssl/crypto/asn1/a_bitstr.c b/thirdparty/openssl/crypto/asn1/a_bitstr.c index f906188b11..c429342e03 100644 --- a/thirdparty/openssl/crypto/asn1/a_bitstr.c +++ b/thirdparty/openssl/crypto/asn1/a_bitstr.c @@ -114,10 +114,11 @@ int i2c_ASN1_BIT_STRING(ASN1_BIT_STRING *a, unsigned char **pp) *(p++) = (unsigned char)bits; d = a->data; - memcpy(p, d, len); - p += len; - if (len > 0) + if (len > 0) { + memcpy(p, d, len); + p += len; p[-1] &= (0xff << bits); + } *pp = p; return (ret); } diff --git a/thirdparty/openssl/crypto/asn1/a_bytes.c b/thirdparty/openssl/crypto/asn1/a_bytes.c index 385b53986a..65e5394664 100644 --- a/thirdparty/openssl/crypto/asn1/a_bytes.c +++ b/thirdparty/openssl/crypto/asn1/a_bytes.c @@ -60,7 +60,12 @@ #include "cryptlib.h" #include <openssl/asn1.h> -static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c); +static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c, + int depth); +static ASN1_STRING *int_d2i_ASN1_bytes(ASN1_STRING **a, + const unsigned char **pp, long length, + int Ptag, int Pclass, int depth, + int *perr); /* * type is a 'bitmap' of acceptable string types. */ @@ -99,7 +104,7 @@ ASN1_STRING *d2i_ASN1_type_bytes(ASN1_STRING **a, const unsigned char **pp, ret = (*a); if (len != 0) { - s = (unsigned char *)OPENSSL_malloc((int)len + 1); + s = OPENSSL_malloc((int)len + 1); if (s == NULL) { i = ERR_R_MALLOC_FAILURE; goto err; @@ -154,15 +159,38 @@ int i2d_ASN1_bytes(ASN1_STRING *a, unsigned char **pp, int tag, int xclass) return (r); } +/* + * Maximum recursion depth of d2i_ASN1_bytes(): much more than should be + * encountered in pratice. + */ + +#define ASN1_BYTES_MAXDEPTH 20 + ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp, long length, int Ptag, int Pclass) { + int err = 0; + ASN1_STRING *s = int_d2i_ASN1_bytes(a, pp, length, Ptag, Pclass, 0, &err); + if (err != 0) + ASN1err(ASN1_F_D2I_ASN1_BYTES, err); + return s; +} + +static ASN1_STRING *int_d2i_ASN1_bytes(ASN1_STRING **a, + const unsigned char **pp, long length, + int Ptag, int Pclass, + int depth, int *perr) +{ ASN1_STRING *ret = NULL; const unsigned char *p; unsigned char *s; long len; int inf, tag, xclass; - int i = 0; + + if (depth > ASN1_BYTES_MAXDEPTH) { + *perr = ASN1_R_NESTED_ASN1_STRING; + return NULL; + } if ((a == NULL) || ((*a) == NULL)) { if ((ret = ASN1_STRING_new()) == NULL) @@ -173,18 +201,19 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp, p = *pp; inf = ASN1_get_object(&p, &len, &tag, &xclass, length); if (inf & 0x80) { - i = ASN1_R_BAD_OBJECT_HEADER; + *perr = ASN1_R_BAD_OBJECT_HEADER; goto err; } if (tag != Ptag) { - i = ASN1_R_WRONG_TAG; + *perr = ASN1_R_WRONG_TAG; goto err; } if (inf & V_ASN1_CONSTRUCTED) { ASN1_const_CTX c; + c.error = 0; c.pp = pp; c.p = p; c.inf = inf; @@ -192,17 +221,18 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp, c.tag = Ptag; c.xclass = Pclass; c.max = (length == 0) ? 0 : (p + length); - if (!asn1_collate_primitive(ret, &c)) + if (!asn1_collate_primitive(ret, &c, depth)) { + *perr = c.error; goto err; - else { + } else { p = c.p; } } else { if (len != 0) { if ((ret->length < len) || (ret->data == NULL)) { - s = (unsigned char *)OPENSSL_malloc((int)len + 1); + s = OPENSSL_malloc((int)len + 1); if (s == NULL) { - i = ERR_R_MALLOC_FAILURE; + *perr = ERR_R_MALLOC_FAILURE; goto err; } if (ret->data != NULL) @@ -230,7 +260,6 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp, err: if ((ret != NULL) && ((a == NULL) || (*a != ret))) ASN1_STRING_free(ret); - ASN1err(ASN1_F_D2I_ASN1_BYTES, i); return (NULL); } @@ -242,7 +271,8 @@ ASN1_STRING *d2i_ASN1_bytes(ASN1_STRING **a, const unsigned char **pp, * There have been a few bug fixes for this function from Paul Keogh * <paul.keogh@sse.ie>, many thanks to him */ -static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c) +static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c, + int depth) { ASN1_STRING *os = NULL; BUF_MEM b; @@ -270,9 +300,8 @@ static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c) } c->q = c->p; - if (d2i_ASN1_bytes(&os, &c->p, c->max - c->p, c->tag, c->xclass) - == NULL) { - c->error = ERR_R_ASN1_LIB; + if (int_d2i_ASN1_bytes(&os, &c->p, c->max - c->p, c->tag, c->xclass, + depth + 1, &c->error) == NULL) { goto err; } @@ -297,7 +326,6 @@ static int asn1_collate_primitive(ASN1_STRING *a, ASN1_const_CTX *c) ASN1_STRING_free(os); return (1); err: - ASN1err(ASN1_F_ASN1_COLLATE_PRIMITIVE, c->error); if (os != NULL) ASN1_STRING_free(os); if (b.data != NULL) diff --git a/thirdparty/openssl/crypto/asn1/a_digest.c b/thirdparty/openssl/crypto/asn1/a_digest.c index 7cbc4751cd..57a04f768c 100644 --- a/thirdparty/openssl/crypto/asn1/a_digest.c +++ b/thirdparty/openssl/crypto/asn1/a_digest.c @@ -86,8 +86,10 @@ int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data, p = str; i2d(data, &p); - if (!EVP_Digest(str, i, md, len, type, NULL)) + if (!EVP_Digest(str, i, md, len, type, NULL)) { + OPENSSL_free(str); return 0; + } OPENSSL_free(str); return (1); } @@ -104,8 +106,10 @@ int ASN1_item_digest(const ASN1_ITEM *it, const EVP_MD *type, void *asn, if (!str) return (0); - if (!EVP_Digest(str, i, md, len, type, NULL)) + if (!EVP_Digest(str, i, md, len, type, NULL)) { + OPENSSL_free(str); return 0; + } OPENSSL_free(str); return (1); } diff --git a/thirdparty/openssl/crypto/asn1/a_gentm.c b/thirdparty/openssl/crypto/asn1/a_gentm.c index fa76dcac91..8511813785 100644 --- a/thirdparty/openssl/crypto/asn1/a_gentm.c +++ b/thirdparty/openssl/crypto/asn1/a_gentm.c @@ -202,7 +202,7 @@ int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d) if (a[o] == 'Z') o++; else if ((a[o] == '+') || (a[o] == '-')) { - int offsign = a[o] == '-' ? -1 : 1, offset = 0; + int offsign = a[o] == '-' ? 1 : -1, offset = 0; o++; if (o + 4 > l) goto err; diff --git a/thirdparty/openssl/crypto/asn1/a_object.c b/thirdparty/openssl/crypto/asn1/a_object.c index 27f9c16914..229a40ffa3 100644 --- a/thirdparty/openssl/crypto/asn1/a_object.c +++ b/thirdparty/openssl/crypto/asn1/a_object.c @@ -73,7 +73,7 @@ int i2d_ASN1_OBJECT(ASN1_OBJECT *a, unsigned char **pp) return (0); objsize = ASN1_object_size(0, a->length, V_ASN1_OBJECT); - if (pp == NULL) + if (pp == NULL || objsize == -1) return objsize; p = *pp; @@ -174,8 +174,12 @@ int a2d_ASN1_OBJECT(unsigned char *out, int olen, const char *buf, int num) if (!tmp) goto err; } - while (blsize--) - tmp[i++] = (unsigned char)BN_div_word(bl, 0x80L); + while (blsize--) { + BN_ULONG t = BN_div_word(bl, 0x80L); + if (t == (BN_ULONG)-1) + goto err; + tmp[i++] = (unsigned char)t; + } } else { for (;;) { diff --git a/thirdparty/openssl/crypto/asn1/a_set.c b/thirdparty/openssl/crypto/asn1/a_set.c index bf3f971889..5fb5865575 100644 --- a/thirdparty/openssl/crypto/asn1/a_set.c +++ b/thirdparty/openssl/crypto/asn1/a_set.c @@ -57,6 +57,7 @@ */ #include <stdio.h> +#include <limits.h> #include "cryptlib.h" #include <openssl/asn1_mac.h> @@ -98,10 +99,14 @@ int i2d_ASN1_SET(STACK_OF(OPENSSL_BLOCK) *a, unsigned char **pp, if (a == NULL) return (0); - for (i = sk_OPENSSL_BLOCK_num(a) - 1; i >= 0; i--) + for (i = sk_OPENSSL_BLOCK_num(a) - 1; i >= 0; i--) { + int tmplen = i2d(sk_OPENSSL_BLOCK_value(a, i), NULL); + if (tmplen > INT_MAX - ret) + return -1; ret += i2d(sk_OPENSSL_BLOCK_value(a, i), NULL); + } r = ASN1_object_size(1, ret, ex_tag); - if (pp == NULL) + if (pp == NULL || r == -1) return (r); p = *pp; diff --git a/thirdparty/openssl/crypto/asn1/a_strex.c b/thirdparty/openssl/crypto/asn1/a_strex.c index 35fd44cd22..2d562f9345 100644 --- a/thirdparty/openssl/crypto/asn1/a_strex.c +++ b/thirdparty/openssl/crypto/asn1/a_strex.c @@ -337,7 +337,7 @@ static const signed char tag2nbyte[] = { -1, -1, -1, -1, -1, /* 5-9 */ -1, -1, 0, -1, /* 10-13 */ -1, -1, -1, -1, /* 15-17 */ - -1, 1, 1, /* 18-20 */ + 1, 1, 1, /* 18-20 */ -1, 1, 1, 1, /* 21-24 */ -1, 1, -1, /* 25-27 */ 4, -1, 2 /* 28-30 */ diff --git a/thirdparty/openssl/crypto/asn1/a_strnid.c b/thirdparty/openssl/crypto/asn1/a_strnid.c index 5224345368..99ffe73787 100644 --- a/thirdparty/openssl/crypto/asn1/a_strnid.c +++ b/thirdparty/openssl/crypto/asn1/a_strnid.c @@ -192,7 +192,8 @@ static const ASN1_STRING_TABLE tbl_standard[] = { {NID_name, 1, ub_name, DIRSTRING_TYPE, 0}, {NID_dnQualifier, -1, -1, B_ASN1_PRINTABLESTRING, STABLE_NO_MASK}, {NID_domainComponent, 1, -1, B_ASN1_IA5STRING, STABLE_NO_MASK}, - {NID_ms_csp_name, -1, -1, B_ASN1_BMPSTRING, STABLE_NO_MASK} + {NID_ms_csp_name, -1, -1, B_ASN1_BMPSTRING, STABLE_NO_MASK}, + {NID_jurisdictionCountryName, 2, 2, B_ASN1_PRINTABLESTRING, STABLE_NO_MASK} }; static int sk_table_cmp(const ASN1_STRING_TABLE *const *a, @@ -250,6 +251,7 @@ int ASN1_STRING_TABLE_add(int nid, } tmp->flags = flags | STABLE_FLAGS_MALLOC; tmp->nid = nid; + tmp->minsize = tmp->maxsize = -1; new_nid = 1; } else tmp->flags = (tmp->flags & STABLE_FLAGS_MALLOC) | flags; diff --git a/thirdparty/openssl/crypto/asn1/a_time.c b/thirdparty/openssl/crypto/asn1/a_time.c index fcb2d565cd..0eeb79cd42 100644 --- a/thirdparty/openssl/crypto/asn1/a_time.c +++ b/thirdparty/openssl/crypto/asn1/a_time.c @@ -137,7 +137,7 @@ int ASN1_TIME_check(ASN1_TIME *t) ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(ASN1_TIME *t, ASN1_GENERALIZEDTIME **out) { - ASN1_GENERALIZEDTIME *ret; + ASN1_GENERALIZEDTIME *ret = NULL; char *str; int newlen; @@ -146,22 +146,21 @@ ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(ASN1_TIME *t, if (!out || !*out) { if (!(ret = ASN1_GENERALIZEDTIME_new())) - return NULL; - if (out) - *out = ret; - } else + goto err; + } else { ret = *out; + } /* If already GeneralizedTime just copy across */ if (t->type == V_ASN1_GENERALIZEDTIME) { if (!ASN1_STRING_set(ret, t->data, t->length)) - return NULL; - return ret; + goto err; + goto done; } /* grow the string */ if (!ASN1_STRING_set(ret, NULL, t->length + 2)) - return NULL; + goto err; /* ASN1_STRING_set() allocated 'len + 1' bytes. */ newlen = t->length + 2 + 1; str = (char *)ret->data; @@ -173,9 +172,18 @@ ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(ASN1_TIME *t, BUF_strlcat(str, (char *)t->data, newlen); - return ret; + done: + if (out != NULL && *out == NULL) + *out = ret; + return ret; + + err: + if (out == NULL || *out != ret) + ASN1_GENERALIZEDTIME_free(ret); + return NULL; } + int ASN1_TIME_set_string(ASN1_TIME *s, const char *str) { ASN1_TIME t; diff --git a/thirdparty/openssl/crypto/asn1/a_utctm.c b/thirdparty/openssl/crypto/asn1/a_utctm.c index 724a10be4e..0344482cc2 100644 --- a/thirdparty/openssl/crypto/asn1/a_utctm.c +++ b/thirdparty/openssl/crypto/asn1/a_utctm.c @@ -172,7 +172,7 @@ int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d) if (a[o] == 'Z') o++; else if ((a[o] == '+') || (a[o] == '-')) { - int offsign = a[o] == '-' ? -1 : 1, offset = 0; + int offsign = a[o] == '-' ? 1 : -1, offset = 0; o++; if (o + 4 > l) goto err; diff --git a/thirdparty/openssl/crypto/asn1/ameth_lib.c b/thirdparty/openssl/crypto/asn1/ameth_lib.c index 5389c04347..43ddebba33 100644 --- a/thirdparty/openssl/crypto/asn1/ameth_lib.c +++ b/thirdparty/openssl/crypto/asn1/ameth_lib.c @@ -93,7 +93,9 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = { &eckey_asn1_meth, #endif &hmac_asn1_meth, +#ifndef OPENSSL_NO_CMAC &cmac_asn1_meth, +#endif #ifndef OPENSSL_NO_DH &dhx_asn1_meth #endif diff --git a/thirdparty/openssl/crypto/asn1/asn1_lib.c b/thirdparty/openssl/crypto/asn1/asn1_lib.c index 874b1af8b0..e63e82a8b4 100644 --- a/thirdparty/openssl/crypto/asn1/asn1_lib.c +++ b/thirdparty/openssl/crypto/asn1/asn1_lib.c @@ -256,26 +256,30 @@ static void asn1_put_length(unsigned char **pp, int length) int ASN1_object_size(int constructed, int length, int tag) { - int ret; - - ret = length; - ret++; + int ret = 1; + if (length < 0) + return -1; if (tag >= 31) { while (tag > 0) { tag >>= 7; ret++; } } - if (constructed == 2) - return ret + 3; - ret++; - if (length > 127) { - while (length > 0) { - length >>= 8; - ret++; + if (constructed == 2) { + ret += 3; + } else { + ret++; + if (length > 127) { + int tmplen = length; + while (tmplen > 0) { + tmplen >>= 8; + ret++; + } } } - return (ret); + if (ret >= INT_MAX - length) + return -1; + return ret + length; } static int _asn1_Finish(ASN1_const_CTX *c) @@ -324,7 +328,7 @@ int asn1_GetSequence(ASN1_const_CTX *c, long *length) return (0); } if (c->inf == (1 | V_ASN1_CONSTRUCTED)) - c->slen = *length + *(c->pp) - c->p; + c->slen = *length; c->eos = 0; return (1); } @@ -366,7 +370,7 @@ int ASN1_STRING_set(ASN1_STRING *str, const void *_data, int len) else len = strlen(data); } - if ((str->length < len) || (str->data == NULL)) { + if ((str->length <= len) || (str->data == NULL)) { c = str->data; if (c == NULL) str->data = OPENSSL_malloc(len + 1); diff --git a/thirdparty/openssl/crypto/asn1/asn_mime.c b/thirdparty/openssl/crypto/asn1/asn_mime.c index 96110c540f..5170906c62 100644 --- a/thirdparty/openssl/crypto/asn1/asn_mime.c +++ b/thirdparty/openssl/crypto/asn1/asn_mime.c @@ -289,7 +289,7 @@ int SMIME_write_ASN1(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, if ((flags & SMIME_DETACHED) && data) { /* We want multipart/signed */ /* Generate a random boundary */ - if (RAND_pseudo_bytes((unsigned char *)bound, 32) < 0) + if (RAND_bytes((unsigned char *)bound, 32) <= 0) return 0; for (i = 0; i < 32; i++) { c = bound[i] & 0xf; @@ -623,6 +623,8 @@ static int multi_split(BIO *bio, char *bound, STACK_OF(BIO) **ret) if (bpart) sk_BIO_push(parts, bpart); bpart = BIO_new(BIO_s_mem()); + if (bpart == NULL) + return 1; BIO_set_mem_eof_return(bpart, 0); } else if (eol) BIO_write(bpart, "\r\n", 2); diff --git a/thirdparty/openssl/crypto/asn1/bio_asn1.c b/thirdparty/openssl/crypto/asn1/bio_asn1.c index 60189b3b2c..c3afff69dc 100644 --- a/thirdparty/openssl/crypto/asn1/bio_asn1.c +++ b/thirdparty/openssl/crypto/asn1/bio_asn1.c @@ -170,10 +170,12 @@ static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size) ctx->copylen = 0; ctx->asn1_class = V_ASN1_UNIVERSAL; ctx->asn1_tag = V_ASN1_OCTET_STRING; - ctx->ex_buf = 0; - ctx->ex_pos = 0; + ctx->ex_buf = NULL; ctx->ex_len = 0; + ctx->ex_pos = 0; ctx->state = ASN1_STATE_START; + ctx->prefix = ctx->prefix_free = ctx->suffix = ctx->suffix_free = NULL; + ctx->ex_arg = NULL; return 1; } diff --git a/thirdparty/openssl/crypto/asn1/bio_ndef.c b/thirdparty/openssl/crypto/asn1/bio_ndef.c index 31949b8794..8d7046633c 100644 --- a/thirdparty/openssl/crypto/asn1/bio_ndef.c +++ b/thirdparty/openssl/crypto/asn1/bio_ndef.c @@ -136,6 +136,7 @@ BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it) ndef_aux->ndef_bio = sarg.ndef_bio; ndef_aux->boundary = sarg.boundary; ndef_aux->out = out; + ndef_aux->derbuf = NULL; BIO_ctrl(asn_bio, BIO_C_SET_EX_ARG, 0, ndef_aux); diff --git a/thirdparty/openssl/crypto/asn1/d2i_pr.c b/thirdparty/openssl/crypto/asn1/d2i_pr.c index d21829af19..86dcf5fba9 100644 --- a/thirdparty/openssl/crypto/asn1/d2i_pr.c +++ b/thirdparty/openssl/crypto/asn1/d2i_pr.c @@ -97,15 +97,17 @@ EVP_PKEY *d2i_PrivateKey(int type, EVP_PKEY **a, const unsigned char **pp, if (!ret->ameth->old_priv_decode || !ret->ameth->old_priv_decode(ret, &p, length)) { if (ret->ameth->priv_decode) { + EVP_PKEY *tmp; PKCS8_PRIV_KEY_INFO *p8 = NULL; p8 = d2i_PKCS8_PRIV_KEY_INFO(NULL, &p, length); if (!p8) goto err; - EVP_PKEY_free(ret); - ret = EVP_PKCS82PKEY(p8); + tmp = EVP_PKCS82PKEY(p8); PKCS8_PRIV_KEY_INFO_free(p8); - if (ret == NULL) + if (tmp == NULL) goto err; + EVP_PKEY_free(ret); + ret = tmp; } else { ASN1err(ASN1_F_D2I_PRIVATEKEY, ERR_R_ASN1_LIB); goto err; diff --git a/thirdparty/openssl/crypto/asn1/f_enum.c b/thirdparty/openssl/crypto/asn1/f_enum.c index 591c3b5781..527f1d8f87 100644 --- a/thirdparty/openssl/crypto/asn1/f_enum.c +++ b/thirdparty/openssl/crypto/asn1/f_enum.c @@ -138,7 +138,7 @@ int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size) bufp = (unsigned char *)buf; if (first) { first = 0; - if ((bufp[0] == '0') && (buf[1] == '0')) { + if ((bufp[0] == '0') && (bufp[1] == '0')) { bufp += 2; i -= 2; } @@ -160,8 +160,6 @@ int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size) i * 2); if (sp == NULL) { ASN1err(ASN1_F_A2I_ASN1_ENUMERATED, ERR_R_MALLOC_FAILURE); - if (s != NULL) - OPENSSL_free(s); goto err; } s = sp; @@ -199,5 +197,7 @@ int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size) err_sl: ASN1err(ASN1_F_A2I_ASN1_ENUMERATED, ASN1_R_SHORT_LINE); } + if (ret != 1) + OPENSSL_free(s); return (ret); } diff --git a/thirdparty/openssl/crypto/asn1/f_int.c b/thirdparty/openssl/crypto/asn1/f_int.c index 4a81f81c88..e6ed7f1e77 100644 --- a/thirdparty/openssl/crypto/asn1/f_int.c +++ b/thirdparty/openssl/crypto/asn1/f_int.c @@ -152,7 +152,7 @@ int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size) bufp = (unsigned char *)buf; if (first) { first = 0; - if ((bufp[0] == '0') && (buf[1] == '0')) { + if ((bufp[0] == '0') && (bufp[1] == '0')) { bufp += 2; i -= 2; } @@ -172,8 +172,6 @@ int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size) sp = OPENSSL_realloc_clean(s, slen, num + i * 2); if (sp == NULL) { ASN1err(ASN1_F_A2I_ASN1_INTEGER, ERR_R_MALLOC_FAILURE); - if (s != NULL) - OPENSSL_free(s); goto err; } s = sp; @@ -211,5 +209,7 @@ int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size) err_sl: ASN1err(ASN1_F_A2I_ASN1_INTEGER, ASN1_R_SHORT_LINE); } + if (ret != 1) + OPENSSL_free(s); return (ret); } diff --git a/thirdparty/openssl/crypto/asn1/f_string.c b/thirdparty/openssl/crypto/asn1/f_string.c index 6a6cf34714..0f7b9cfb11 100644 --- a/thirdparty/openssl/crypto/asn1/f_string.c +++ b/thirdparty/openssl/crypto/asn1/f_string.c @@ -166,8 +166,6 @@ int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size) i * 2); if (sp == NULL) { ASN1err(ASN1_F_A2I_ASN1_STRING, ERR_R_MALLOC_FAILURE); - if (s != NULL) - OPENSSL_free(s); goto err; } s = sp; @@ -205,5 +203,7 @@ int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size) err_sl: ASN1err(ASN1_F_A2I_ASN1_STRING, ASN1_R_SHORT_LINE); } + if (ret != 1) + OPENSSL_free(s); return (ret); } diff --git a/thirdparty/openssl/crypto/asn1/i2d_pr.c b/thirdparty/openssl/crypto/asn1/i2d_pr.c index 4d338ac55a..12966ec536 100644 --- a/thirdparty/openssl/crypto/asn1/i2d_pr.c +++ b/thirdparty/openssl/crypto/asn1/i2d_pr.c @@ -69,10 +69,13 @@ int i2d_PrivateKey(EVP_PKEY *a, unsigned char **pp) } if (a->ameth && a->ameth->priv_encode) { PKCS8_PRIV_KEY_INFO *p8 = EVP_PKEY2PKCS8(a); - int ret = i2d_PKCS8_PRIV_KEY_INFO(p8, pp); - PKCS8_PRIV_KEY_INFO_free(p8); + int ret = 0; + if (p8 != NULL) { + ret = i2d_PKCS8_PRIV_KEY_INFO(p8, pp); + PKCS8_PRIV_KEY_INFO_free(p8); + } return ret; } ASN1err(ASN1_F_I2D_PRIVATEKEY, ASN1_R_UNSUPPORTED_PUBLIC_KEY_TYPE); - return (-1); + return -1; } diff --git a/thirdparty/openssl/crypto/asn1/p5_pbe.c b/thirdparty/openssl/crypto/asn1/p5_pbe.c index bdbfdcd67c..e2a1def53f 100644 --- a/thirdparty/openssl/crypto/asn1/p5_pbe.c +++ b/thirdparty/openssl/crypto/asn1/p5_pbe.c @@ -101,7 +101,7 @@ int PKCS5_pbe_set0_algor(X509_ALGOR *algor, int alg, int iter, sstr = ASN1_STRING_data(pbe->salt); if (salt) memcpy(sstr, salt, saltlen); - else if (RAND_pseudo_bytes(sstr, saltlen) < 0) + else if (RAND_bytes(sstr, saltlen) <= 0) goto err; if (!ASN1_item_pack(pbe, ASN1_ITEM_rptr(PBEPARAM), &pbe_str)) { diff --git a/thirdparty/openssl/crypto/asn1/p5_pbev2.c b/thirdparty/openssl/crypto/asn1/p5_pbev2.c index 73ba4a3d67..4c037d3d2c 100644 --- a/thirdparty/openssl/crypto/asn1/p5_pbev2.c +++ b/thirdparty/openssl/crypto/asn1/p5_pbev2.c @@ -91,12 +91,11 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, unsigned char *salt, int saltlen, unsigned char *aiv, int prf_nid) { - X509_ALGOR *scheme = NULL, *kalg = NULL, *ret = NULL; + X509_ALGOR *scheme = NULL, *ret = NULL; int alg_nid, keylen; EVP_CIPHER_CTX ctx; unsigned char iv[EVP_MAX_IV_LENGTH]; PBE2PARAM *pbe2 = NULL; - ASN1_OBJECT *obj; alg_nid = EVP_CIPHER_type(cipher); if (alg_nid == NID_undef) { @@ -104,7 +103,6 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, ASN1_R_CIPHER_HAS_NO_OBJECT_IDENTIFIER); goto err; } - obj = OBJ_nid2obj(alg_nid); if (!(pbe2 = PBE2PARAM_new())) goto merr; @@ -112,7 +110,7 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, /* Setup the AlgorithmIdentifier for the encryption scheme */ scheme = pbe2->encryption; - scheme->algorithm = obj; + scheme->algorithm = OBJ_nid2obj(alg_nid); if (!(scheme->parameter = ASN1_TYPE_new())) goto merr; @@ -120,7 +118,7 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, if (EVP_CIPHER_iv_length(cipher)) { if (aiv) memcpy(iv, aiv, EVP_CIPHER_iv_length(cipher)); - else if (RAND_pseudo_bytes(iv, EVP_CIPHER_iv_length(cipher)) < 0) + else if (RAND_bytes(iv, EVP_CIPHER_iv_length(cipher)) <= 0) goto err; } @@ -188,11 +186,9 @@ X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, err: PBE2PARAM_free(pbe2); /* Note 'scheme' is freed as part of pbe2 */ - X509_ALGOR_free(kalg); X509_ALGOR_free(ret); return NULL; - } X509_ALGOR *PKCS5_pbe2_set(const EVP_CIPHER *cipher, int iter, @@ -225,7 +221,7 @@ X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, if (salt) memcpy(osalt->data, salt, saltlen); - else if (RAND_pseudo_bytes(osalt->data, saltlen) < 0) + else if (RAND_bytes(osalt->data, saltlen) <= 0) goto merr; if (iter <= 0) diff --git a/thirdparty/openssl/crypto/asn1/t_req.c b/thirdparty/openssl/crypto/asn1/t_req.c index 024553ab19..70aba4cc3b 100644 --- a/thirdparty/openssl/crypto/asn1/t_req.c +++ b/thirdparty/openssl/crypto/asn1/t_req.c @@ -196,6 +196,7 @@ int X509_REQ_print_ex(BIO *bp, X509_REQ *x, unsigned long nmflags, if (BIO_puts(bp, ":") <= 0) goto err; if ((type == V_ASN1_PRINTABLESTRING) || + (type == V_ASN1_UTF8STRING) || (type == V_ASN1_T61STRING) || (type == V_ASN1_IA5STRING)) { if (BIO_write(bp, (char *)bs->data, bs->length) diff --git a/thirdparty/openssl/crypto/asn1/tasn_dec.c b/thirdparty/openssl/crypto/asn1/tasn_dec.c index 6bdcd5c542..d49a5d5792 100644 --- a/thirdparty/openssl/crypto/asn1/tasn_dec.c +++ b/thirdparty/openssl/crypto/asn1/tasn_dec.c @@ -400,7 +400,9 @@ int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, if (tt->flags & ASN1_TFLG_ADB_MASK) { const ASN1_TEMPLATE *seqtt; ASN1_VALUE **pseqval; - seqtt = asn1_do_adb(pval, tt, 1); + seqtt = asn1_do_adb(pval, tt, 0); + if (seqtt == NULL) + continue; pseqval = asn1_get_field_ptr(pval, seqtt); ASN1_template_free(pseqval, seqtt); } @@ -411,7 +413,7 @@ int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, const ASN1_TEMPLATE *seqtt; ASN1_VALUE **pseqval; seqtt = asn1_do_adb(pval, tt, 1); - if (!seqtt) + if (seqtt == NULL) goto err; pseqval = asn1_get_field_ptr(pval, seqtt); /* Have we ran out of data? */ @@ -476,7 +478,7 @@ int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, for (; i < it->tcount; tt++, i++) { const ASN1_TEMPLATE *seqtt; seqtt = asn1_do_adb(pval, tt, 1); - if (!seqtt) + if (seqtt == NULL) goto err; if (seqtt->flags & ASN1_TFLG_OPTIONAL) { ASN1_VALUE **pseqval; @@ -671,6 +673,7 @@ static int asn1_template_noexp_d2i(ASN1_VALUE **val, } len -= p - q; if (!sk_ASN1_VALUE_push((STACK_OF(ASN1_VALUE) *)*val, skfield)) { + ASN1_item_ex_free(&skfield, ASN1_ITEM_ptr(tt->item)); ASN1err(ASN1_F_ASN1_TEMPLATE_NOEXP_D2I, ERR_R_MALLOC_FAILURE); goto err; } diff --git a/thirdparty/openssl/crypto/asn1/tasn_enc.c b/thirdparty/openssl/crypto/asn1/tasn_enc.c index f7f83e56a9..081a9d534f 100644 --- a/thirdparty/openssl/crypto/asn1/tasn_enc.c +++ b/thirdparty/openssl/crypto/asn1/tasn_enc.c @@ -59,6 +59,7 @@ #include <stddef.h> #include <string.h> +#include <limits.h> #include "cryptlib.h" #include <openssl/asn1.h> #include <openssl/asn1t.h> @@ -216,17 +217,19 @@ int ASN1_item_ex_i2d(ASN1_VALUE **pval, unsigned char **out, for (i = 0, tt = it->templates; i < it->tcount; tt++, i++) { const ASN1_TEMPLATE *seqtt; ASN1_VALUE **pseqval; + int tmplen; seqtt = asn1_do_adb(pval, tt, 1); if (!seqtt) return 0; pseqval = asn1_get_field_ptr(pval, seqtt); - /* FIXME: check for errors in enhanced version */ - seqcontlen += asn1_template_ex_i2d(pseqval, NULL, seqtt, - -1, aclass); + tmplen = asn1_template_ex_i2d(pseqval, NULL, seqtt, -1, aclass); + if (tmplen == -1 || (tmplen > INT_MAX - seqcontlen)) + return -1; + seqcontlen += tmplen; } seqlen = ASN1_object_size(ndef, seqcontlen, tag); - if (!out) + if (!out || seqlen == -1) return seqlen; /* Output SEQUENCE header */ ASN1_put_object(out, ndef, seqcontlen, tag, aclass); @@ -339,19 +342,24 @@ static int asn1_template_ex_i2d(ASN1_VALUE **pval, unsigned char **out, /* Determine total length of items */ skcontlen = 0; for (i = 0; i < sk_ASN1_VALUE_num(sk); i++) { + int tmplen; skitem = sk_ASN1_VALUE_value(sk, i); - skcontlen += ASN1_item_ex_i2d(&skitem, NULL, - ASN1_ITEM_ptr(tt->item), - -1, iclass); + tmplen = ASN1_item_ex_i2d(&skitem, NULL, ASN1_ITEM_ptr(tt->item), + -1, iclass); + if (tmplen == -1 || (skcontlen > INT_MAX - tmplen)) + return -1; + skcontlen += tmplen; } sklen = ASN1_object_size(ndef, skcontlen, sktag); + if (sklen == -1) + return -1; /* If EXPLICIT need length of surrounding tag */ if (flags & ASN1_TFLG_EXPTAG) ret = ASN1_object_size(ndef, sklen, ttag); else ret = sklen; - if (!out) + if (!out || ret == -1) return ret; /* Now encode this lot... */ @@ -380,7 +388,7 @@ static int asn1_template_ex_i2d(ASN1_VALUE **pval, unsigned char **out, return 0; /* Find length of EXPLICIT tag */ ret = ASN1_object_size(ndef, i, ttag); - if (out) { + if (out && ret != -1) { /* Output tag and item */ ASN1_put_object(out, ndef, i, ttag, tclass); ASN1_item_ex_i2d(pval, out, ASN1_ITEM_ptr(tt->item), -1, iclass); diff --git a/thirdparty/openssl/crypto/asn1/tasn_new.c b/thirdparty/openssl/crypto/asn1/tasn_new.c index b0c73beeb5..54f459d1ed 100644 --- a/thirdparty/openssl/crypto/asn1/tasn_new.c +++ b/thirdparty/openssl/crypto/asn1/tasn_new.c @@ -158,7 +158,7 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it, } asn1_set_choice_selector(pval, -1, it); if (asn1_cb && !asn1_cb(ASN1_OP_NEW_POST, pval, it, NULL)) - goto auxerr; + goto auxerr2; break; case ASN1_ITYPE_NDEF_SEQUENCE: @@ -186,10 +186,10 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it, for (i = 0, tt = it->templates; i < it->tcount; tt++, i++) { pseqval = asn1_get_field_ptr(pval, tt); if (!ASN1_template_new(pseqval, tt)) - goto memerr; + goto memerr2; } if (asn1_cb && !asn1_cb(ASN1_OP_NEW_POST, pval, it, NULL)) - goto auxerr; + goto auxerr2; break; } #ifdef CRYPTO_MDEBUG @@ -198,6 +198,8 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it, #endif return 1; + memerr2: + ASN1_item_ex_free(pval, it); memerr: ASN1err(ASN1_F_ASN1_ITEM_EX_COMBINE_NEW, ERR_R_MALLOC_FAILURE); #ifdef CRYPTO_MDEBUG @@ -206,9 +208,10 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it, #endif return 0; + auxerr2: + ASN1_item_ex_free(pval, it); auxerr: ASN1err(ASN1_F_ASN1_ITEM_EX_COMBINE_NEW, ASN1_R_AUX_ERROR); - ASN1_item_ex_free(pval, it); #ifdef CRYPTO_MDEBUG if (it->sname) CRYPTO_pop_info(); diff --git a/thirdparty/openssl/crypto/asn1/tasn_prn.c b/thirdparty/openssl/crypto/asn1/tasn_prn.c index 5e7d53e985..f628caddbd 100644 --- a/thirdparty/openssl/crypto/asn1/tasn_prn.c +++ b/thirdparty/openssl/crypto/asn1/tasn_prn.c @@ -204,7 +204,8 @@ static int asn1_item_print_ctx(BIO *out, ASN1_VALUE **fld, int indent, } else asn1_cb = 0; - if (*fld == NULL) { + if (((it->itype != ASN1_ITYPE_PRIMITIVE) + || (it->utype != V_ASN1_BOOLEAN)) && *fld == NULL) { if (pctx->flags & ASN1_PCTX_FLAGS_SHOW_ABSENT) { if (!nohdr && !asn1_print_fsname(out, indent, fname, sname, pctx)) return 0; @@ -446,6 +447,8 @@ static int asn1_print_integer_ctx(BIO *out, ASN1_INTEGER *str, char *s; int ret = 1; s = i2s_ASN1_INTEGER(NULL, str); + if (s == NULL) + return 0; if (BIO_puts(out, s) <= 0) ret = 0; OPENSSL_free(s); @@ -496,11 +499,16 @@ static int asn1_primitive_print(BIO *out, ASN1_VALUE **fld, return 0; if (pf && pf->prim_print) return pf->prim_print(out, fld, it, indent, pctx); - str = (ASN1_STRING *)*fld; - if (it->itype == ASN1_ITYPE_MSTRING) + if (it->itype == ASN1_ITYPE_MSTRING) { + str = (ASN1_STRING *)*fld; utype = str->type & ~V_ASN1_NEG; - else + } else { utype = it->utype; + if (utype == V_ASN1_BOOLEAN) + str = NULL; + else + str = (ASN1_STRING *)*fld; + } if (utype == V_ASN1_ANY) { ASN1_TYPE *atype = (ASN1_TYPE *)*fld; utype = atype->type; diff --git a/thirdparty/openssl/crypto/asn1/tasn_utl.c b/thirdparty/openssl/crypto/asn1/tasn_utl.c index 41726d8feb..e14889feb1 100644 --- a/thirdparty/openssl/crypto/asn1/tasn_utl.c +++ b/thirdparty/openssl/crypto/asn1/tasn_utl.c @@ -234,7 +234,7 @@ const ASN1_TEMPLATE *asn1_do_adb(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt, sfld = offset2ptr(*pval, adb->offset); /* Check if NULL */ - if (!sfld) { + if (*sfld == NULL) { if (!adb->null_tt) goto err; return adb->null_tt; diff --git a/thirdparty/openssl/crypto/asn1/x_bignum.c b/thirdparty/openssl/crypto/asn1/x_bignum.c index eaf046639d..c644199c9f 100644 --- a/thirdparty/openssl/crypto/asn1/x_bignum.c +++ b/thirdparty/openssl/crypto/asn1/x_bignum.c @@ -78,6 +78,8 @@ static int bn_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype, const ASN1_ITEM *it); static int bn_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, int utype, char *free_cont, const ASN1_ITEM *it); +static int bn_print(BIO *out, ASN1_VALUE **pval, const ASN1_ITEM *it, + int indent, const ASN1_PCTX *pctx); static ASN1_PRIMITIVE_FUNCS bignum_pf = { NULL, 0, @@ -85,7 +87,8 @@ static ASN1_PRIMITIVE_FUNCS bignum_pf = { bn_free, 0, bn_c2i, - bn_i2c + bn_i2c, + bn_print }; ASN1_ITEM_start(BIGNUM) @@ -151,3 +154,13 @@ static int bn_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, } return 1; } + +static int bn_print(BIO *out, ASN1_VALUE **pval, const ASN1_ITEM *it, + int indent, const ASN1_PCTX *pctx) +{ + if (!BN_print(out, *(BIGNUM **)pval)) + return 0; + if (BIO_puts(out, "\n") <= 0) + return 0; + return 1; +} diff --git a/thirdparty/openssl/crypto/asn1/x_crl.c b/thirdparty/openssl/crypto/asn1/x_crl.c index 027950330d..c78ded89ef 100644 --- a/thirdparty/openssl/crypto/asn1/x_crl.c +++ b/thirdparty/openssl/crypto/asn1/x_crl.c @@ -254,6 +254,7 @@ static int crl_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, for (idx = 0; idx < sk_X509_EXTENSION_num(exts); idx++) { int nid; + ext = sk_X509_EXTENSION_value(exts, idx); nid = OBJ_obj2nid(ext->object); if (nid == NID_freshest_crl) @@ -263,7 +264,7 @@ static int crl_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, if ((nid == NID_issuing_distribution_point) || (nid == NID_authority_key_identifier) || (nid == NID_delta_crl)) - break;; + continue; crl->flags |= EXFLAG_CRITICAL; break; } diff --git a/thirdparty/openssl/crypto/asn1/x_long.c b/thirdparty/openssl/crypto/asn1/x_long.c index 3aed44a3dd..aecb95069d 100644 --- a/thirdparty/openssl/crypto/asn1/x_long.c +++ b/thirdparty/openssl/crypto/asn1/x_long.c @@ -126,7 +126,7 @@ static int long_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype, * set. */ if (ltmp < 0) - utmp = -ltmp - 1; + utmp = 0 - (unsigned long)ltmp - 1; else utmp = ltmp; clen = BN_num_bits_word(utmp); @@ -155,19 +155,41 @@ static int long_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype, static int long_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, int utype, char *free_cont, const ASN1_ITEM *it) { - int neg, i; + int neg = -1, i; long ltmp; unsigned long utmp = 0; char *cp = (char *)pval; + + if (len) { + /* + * Check possible pad byte. Worst case, we're skipping past actual + * content, but since that's only with 0x00 and 0xff and we set neg + * accordingly, the result will be correct in the end anyway. + */ + switch (cont[0]) { + case 0xff: + cont++; + len--; + neg = 1; + break; + case 0: + cont++; + len--; + neg = 0; + break; + } + } if (len > (int)sizeof(long)) { ASN1err(ASN1_F_LONG_C2I, ASN1_R_INTEGER_TOO_LARGE_FOR_LONG); return 0; } - /* Is it negative? */ - if (len && (cont[0] & 0x80)) - neg = 1; - else - neg = 0; + if (neg == -1) { + /* Is it negative? */ + if (len && (cont[0] & 0x80)) + neg = 1; + else + neg = 0; + } utmp = 0; for (i = 0; i < len; i++) { utmp <<= 8; @@ -178,8 +200,8 @@ static int long_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, } ltmp = (long)utmp; if (neg) { - ltmp++; ltmp = -ltmp; + ltmp--; } if (ltmp == it->size) { ASN1err(ASN1_F_LONG_C2I, ASN1_R_INTEGER_TOO_LARGE_FOR_LONG); diff --git a/thirdparty/openssl/crypto/asn1/x_name.c b/thirdparty/openssl/crypto/asn1/x_name.c index a858c2993b..1fb7ad1cbf 100644 --- a/thirdparty/openssl/crypto/asn1/x_name.c +++ b/thirdparty/openssl/crypto/asn1/x_name.c @@ -178,6 +178,16 @@ static void x509_name_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it) *pval = NULL; } +static void local_sk_X509_NAME_ENTRY_free(STACK_OF(X509_NAME_ENTRY) *ne) +{ + sk_X509_NAME_ENTRY_free(ne); +} + +static void local_sk_X509_NAME_ENTRY_pop_free(STACK_OF(X509_NAME_ENTRY) *ne) +{ + sk_X509_NAME_ENTRY_pop_free(ne, X509_NAME_ENTRY_free); +} + static int x509_name_ex_d2i(ASN1_VALUE **val, const unsigned char **in, long len, const ASN1_ITEM *it, int tag, int aclass, @@ -199,10 +209,8 @@ static int x509_name_ex_d2i(ASN1_VALUE **val, int i, j, ret; STACK_OF(X509_NAME_ENTRY) *entries; X509_NAME_ENTRY *entry; - if (len > X509_NAME_MAX) { - ASN1err(ASN1_F_X509_NAME_EX_D2I, ASN1_R_TOO_LONG); - return 0; - } + if (len > X509_NAME_MAX) + len = X509_NAME_MAX; q = p; /* Get internal representation of Name */ @@ -230,13 +238,14 @@ static int x509_name_ex_d2i(ASN1_VALUE **val, entry->set = i; if (!sk_X509_NAME_ENTRY_push(nm.x->entries, entry)) goto err; + sk_X509_NAME_ENTRY_set(entries, j, NULL); } - sk_X509_NAME_ENTRY_free(entries); } - sk_STACK_OF_X509_NAME_ENTRY_free(intname.s); ret = x509_name_canon(nm.x); if (!ret) goto err; + sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname.s, + local_sk_X509_NAME_ENTRY_free); nm.x->modified = 0; *val = nm.a; *in = p; @@ -244,6 +253,8 @@ static int x509_name_ex_d2i(ASN1_VALUE **val, err: if (nm.x != NULL) X509_NAME_free(nm.x); + sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname.s, + local_sk_X509_NAME_ENTRY_pop_free); ASN1err(ASN1_F_X509_NAME_EX_D2I, ERR_R_NESTED_ASN1_ERROR); return 0; } @@ -269,16 +280,6 @@ static int x509_name_ex_i2d(ASN1_VALUE **val, unsigned char **out, return ret; } -static void local_sk_X509_NAME_ENTRY_free(STACK_OF(X509_NAME_ENTRY) *ne) -{ - sk_X509_NAME_ENTRY_free(ne); -} - -static void local_sk_X509_NAME_ENTRY_pop_free(STACK_OF(X509_NAME_ENTRY) *ne) -{ - sk_X509_NAME_ENTRY_pop_free(ne, X509_NAME_ENTRY_free); -} - static int x509_name_encode(X509_NAME *a) { union { @@ -301,8 +302,10 @@ static int x509_name_encode(X509_NAME *a) entries = sk_X509_NAME_ENTRY_new_null(); if (!entries) goto memerr; - if (!sk_STACK_OF_X509_NAME_ENTRY_push(intname.s, entries)) + if (!sk_STACK_OF_X509_NAME_ENTRY_push(intname.s, entries)) { + sk_X509_NAME_ENTRY_free(entries); goto memerr; + } set = entry->set; } if (!sk_X509_NAME_ENTRY_push(entries, entry)) @@ -372,8 +375,10 @@ static int x509_name_canon(X509_NAME *a) entries = sk_X509_NAME_ENTRY_new_null(); if (!entries) goto err; - if (!sk_STACK_OF_X509_NAME_ENTRY_push(intname, entries)) + if (!sk_STACK_OF_X509_NAME_ENTRY_push(intname, entries)) { + sk_X509_NAME_ENTRY_free(entries); goto err; + } set = entry->set; } tmpentry = X509_NAME_ENTRY_new(); diff --git a/thirdparty/openssl/crypto/asn1/x_x509.c b/thirdparty/openssl/crypto/asn1/x_x509.c index e31e1e750d..aada4a8413 100644 --- a/thirdparty/openssl/crypto/asn1/x_x509.c +++ b/thirdparty/openssl/crypto/asn1/x_x509.c @@ -199,12 +199,26 @@ X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp, long length) return NULL; } -int i2d_X509_AUX(X509 *a, unsigned char **pp) +/* + * Serialize trusted certificate to *pp or just return the required buffer + * length if pp == NULL. We ultimately want to avoid modifying *pp in the + * error path, but that depends on similar hygiene in lower-level functions. + * Here we avoid compounding the problem. + */ +static int i2d_x509_aux_internal(X509 *a, unsigned char **pp) { int length, tmplen; unsigned char *start = pp != NULL ? *pp : NULL; + + OPENSSL_assert(pp == NULL || *pp != NULL); + + /* + * This might perturb *pp on error, but fixing that belongs in i2d_X509() + * not here. It should be that if a == NULL length is zero, but we check + * both just in case. + */ length = i2d_X509(a, pp); - if (length < 0 || a == NULL) + if (length <= 0 || a == NULL) return length; tmplen = i2d_X509_CERT_AUX(a->aux, pp); @@ -218,6 +232,42 @@ int i2d_X509_AUX(X509 *a, unsigned char **pp) return length; } +/* + * Serialize trusted certificate to *pp, or just return the required buffer + * length if pp == NULL. + * + * When pp is not NULL, but *pp == NULL, we allocate the buffer, but since + * we're writing two ASN.1 objects back to back, we can't have i2d_X509() do + * the allocation, nor can we allow i2d_X509_CERT_AUX() to increment the + * allocated buffer. + */ +int i2d_X509_AUX(X509 *a, unsigned char **pp) +{ + int length; + unsigned char *tmp; + + /* Buffer provided by caller */ + if (pp == NULL || *pp != NULL) + return i2d_x509_aux_internal(a, pp); + + /* Obtain the combined length */ + if ((length = i2d_x509_aux_internal(a, NULL)) <= 0) + return length; + + /* Allocate requisite combined storage */ + *pp = tmp = OPENSSL_malloc(length); + if (tmp == NULL) + return -1; /* Push error onto error stack? */ + + /* Encode, but keep *pp at the originally malloced pointer */ + length = i2d_x509_aux_internal(a, &tmp); + if (length <= 0) { + OPENSSL_free(*pp); + *pp = NULL; + } + return length; +} + int i2d_re_X509_tbs(X509 *x, unsigned char **pp) { x->cert_info->enc.modified = 1; diff --git a/thirdparty/openssl/crypto/bio/b_print.c b/thirdparty/openssl/crypto/bio/b_print.c index 90248fa2aa..eb3ab75934 100644 --- a/thirdparty/openssl/crypto/bio/b_print.c +++ b/thirdparty/openssl/crypto/bio/b_print.c @@ -423,9 +423,15 @@ _dopr(char **sbuffer, break; } } - *truncated = (currlen > *maxlen - 1); - if (*truncated) - currlen = *maxlen - 1; + /* + * We have to truncate if there is no dynamic buffer and we have filled the + * static buffer. + */ + if (buffer == NULL) { + *truncated = (currlen > *maxlen - 1); + if (*truncated) + currlen = *maxlen - 1; + } if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0')) return 0; *retlen = currlen - 1; @@ -496,7 +502,7 @@ fmtint(char **sbuffer, if (!(flags & DP_F_UNSIGNED)) { if (value < 0) { signvalue = '-'; - uvalue = -value; + uvalue = -(unsigned LLONG)value; } else if (flags & DP_F_PLUS) signvalue = '+'; else if (flags & DP_F_SPACE) diff --git a/thirdparty/openssl/crypto/bio/bf_nbio.c b/thirdparty/openssl/crypto/bio/bf_nbio.c index a04f32a008..4842bb4c82 100644 --- a/thirdparty/openssl/crypto/bio/bf_nbio.c +++ b/thirdparty/openssl/crypto/bio/bf_nbio.c @@ -139,7 +139,7 @@ static int nbiof_read(BIO *b, char *out, int outl) BIO_clear_retry_flags(b); #if 1 - if (RAND_pseudo_bytes(&n, 1) < 0) + if (RAND_bytes(&n, 1) <= 0) return -1; num = (n & 0x07); @@ -179,7 +179,7 @@ static int nbiof_write(BIO *b, const char *in, int inl) num = nt->lwn; nt->lwn = 0; } else { - if (RAND_pseudo_bytes(&n, 1) < 0) + if (RAND_bytes(&n, 1) <= 0) return -1; num = (n & 7); } diff --git a/thirdparty/openssl/crypto/bio/bio_cb.c b/thirdparty/openssl/crypto/bio/bio_cb.c index d3e860686c..f96294bb43 100644 --- a/thirdparty/openssl/crypto/bio/bio_cb.c +++ b/thirdparty/openssl/crypto/bio/bio_cb.c @@ -78,6 +78,9 @@ long MS_CALLBACK BIO_debug_callback(BIO *bio, int cmd, const char *argp, len = BIO_snprintf(buf,sizeof buf,"BIO[%p]: ",(void *)bio); + /* Ignore errors and continue printing the other information. */ + if (len < 0) + len = 0; p = buf + len; p_maxlen = sizeof(buf) - len; diff --git a/thirdparty/openssl/crypto/bio/bss_bio.c b/thirdparty/openssl/crypto/bio/bss_bio.c index 4d8727f8f8..3dd8187729 100644 --- a/thirdparty/openssl/crypto/bio/bss_bio.c +++ b/thirdparty/openssl/crypto/bio/bss_bio.c @@ -149,9 +149,13 @@ static int bio_new(BIO *bio) return 0; b->peer = NULL; + b->closed = 0; + b->len = 0; + b->offset = 0; /* enough for one TLS record (just a default) */ b->size = 17 * 1024; b->buf = NULL; + b->request = 0; bio->ptr = b; return 1; @@ -655,16 +659,15 @@ static long bio_ctrl(BIO *bio, int cmd, long num, void *ptr) break; case BIO_CTRL_EOF: - { - BIO *other_bio = ptr; - - if (other_bio) { - struct bio_bio_st *other_b = other_bio->ptr; + if (b->peer != NULL) { + struct bio_bio_st *peer_b = b->peer->ptr; - assert(other_b != NULL); - ret = other_b->len == 0 && other_b->closed; - } else + if (peer_b->len == 0 && peer_b->closed) ret = 1; + else + ret = 0; + } else { + ret = 1; } break; diff --git a/thirdparty/openssl/crypto/bio/bss_file.c b/thirdparty/openssl/crypto/bio/bss_file.c index bfba93e62b..0cf67e5b77 100644 --- a/thirdparty/openssl/crypto/bio/bss_file.c +++ b/thirdparty/openssl/crypto/bio/bss_file.c @@ -174,7 +174,11 @@ BIO *BIO_new_file(const char *filename, const char *mode) if (file == NULL) { SYSerr(SYS_F_FOPEN, get_last_sys_error()); ERR_add_error_data(5, "fopen('", filename, "','", mode, "')"); - if (errno == ENOENT) + if (errno == ENOENT +# ifdef ENXIO + || errno == ENXIO +# endif + ) BIOerr(BIO_F_BIO_NEW_FILE, BIO_R_NO_SUCH_FILE); else BIOerr(BIO_F_BIO_NEW_FILE, ERR_R_SYS_LIB); @@ -247,7 +251,7 @@ static int MS_CALLBACK file_read(BIO *b, char *out, int outl) ret = fread(out, 1, (int)outl, (FILE *)b->ptr); if (ret == 0 && (b->flags & BIO_FLAGS_UPLINK) ? UP_ferror((FILE *)b->ptr) : - ferror((FILE *)b->ptr)) { + ferror((FILE *)b->ptr)) { SYSerr(SYS_F_FREAD, get_last_sys_error()); BIOerr(BIO_F_FILE_READ, ERR_R_SYS_LIB); ret = -1; @@ -283,6 +287,7 @@ static long MS_CALLBACK file_ctrl(BIO *b, int cmd, long num, void *ptr) FILE *fp = (FILE *)b->ptr; FILE **fpp; char p[4]; + int st; switch (cmd) { case BIO_C_FILE_SEEK: @@ -314,8 +319,11 @@ static long MS_CALLBACK file_ctrl(BIO *b, int cmd, long num, void *ptr) # if defined(__MINGW32__) && defined(__MSVCRT__) && !defined(_IOB_ENTRIES) # define _IOB_ENTRIES 20 # endif -# if defined(_IOB_ENTRIES) /* Safety net to catch purely internal BIO_set_fp calls */ +# if defined(_MSC_VER) && _MSC_VER>=1900 + if (ptr == stdin || ptr == stdout || ptr == stderr) + BIO_clear_flags(b, BIO_FLAGS_UPLINK); +# elif defined(_IOB_ENTRIES) if ((size_t)ptr >= (size_t)stdin && (size_t)ptr < (size_t)(stdin + _IOB_ENTRIES)) BIO_clear_flags(b, BIO_FLAGS_UPLINK); @@ -420,10 +428,14 @@ static long MS_CALLBACK file_ctrl(BIO *b, int cmd, long num, void *ptr) b->shutdown = (int)num; break; case BIO_CTRL_FLUSH: - if (b->flags & BIO_FLAGS_UPLINK) - UP_fflush(b->ptr); - else - fflush((FILE *)b->ptr); + st = b->flags & BIO_FLAGS_UPLINK + ? UP_fflush(b->ptr) : fflush((FILE *)b->ptr); + if (st == EOF) { + SYSerr(SYS_F_FFLUSH, get_last_sys_error()); + ERR_add_error_data(1, "fflush()"); + BIOerr(BIO_F_FILE_CTRL, ERR_R_SYS_LIB); + ret = 0; + } break; case BIO_CTRL_DUP: ret = 1; diff --git a/thirdparty/openssl/crypto/bio/bss_rtcp.c b/thirdparty/openssl/crypto/bio/bss_rtcp.c index 09f14f48dc..5c98a8234d 100644 --- a/thirdparty/openssl/crypto/bio/bss_rtcp.c +++ b/thirdparty/openssl/crypto/bio/bss_rtcp.c @@ -170,6 +170,8 @@ static int rtcp_new(BIO *bi) bi->num = 0; bi->flags = 0; bi->ptr = OPENSSL_malloc(sizeof(struct rpc_ctx)); + if (bi->ptr == NULL) + return (0); ctx = (struct rpc_ctx *)bi->ptr; ctx->filled = 0; ctx->pos = 0; diff --git a/thirdparty/openssl/crypto/bn/asm/x86_64-gcc.c b/thirdparty/openssl/crypto/bn/asm/x86_64-gcc.c index d77dc433d4..1729b479d4 100644 --- a/thirdparty/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/thirdparty/openssl/crypto/bn/asm/x86_64-gcc.c @@ -194,7 +194,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) BN_ULONG ret, waste; asm("divq %4":"=a"(ret), "=d"(waste) - : "a"(l), "d"(h), "g"(d) + : "a"(l), "d"(h), "r"(d) : "cc"); return ret; diff --git a/thirdparty/openssl/crypto/bn/bn_div.c b/thirdparty/openssl/crypto/bn/bn_div.c index 72e6ce3f74..bc37671cf1 100644 --- a/thirdparty/openssl/crypto/bn/bn_div.c +++ b/thirdparty/openssl/crypto/bn/bn_div.c @@ -155,7 +155,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, ({ asm volatile ( \ "divl %4" \ : "=a"(q), "=d"(rem) \ - : "a"(n1), "d"(n0), "g"(d0) \ + : "a"(n1), "d"(n0), "r"(d0) \ : "cc"); \ q; \ }) @@ -170,7 +170,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, ({ asm volatile ( \ "divq %4" \ : "=a"(q), "=d"(rem) \ - : "a"(n1), "d"(n0), "g"(d0) \ + : "a"(n1), "d"(n0), "r"(d0) \ : "cc"); \ q; \ }) diff --git a/thirdparty/openssl/crypto/bn/bn_exp.c b/thirdparty/openssl/crypto/bn/bn_exp.c index 1670f01d1d..195a7867a4 100644 --- a/thirdparty/openssl/crypto/bn/bn_exp.c +++ b/thirdparty/openssl/crypto/bn/bn_exp.c @@ -180,8 +180,9 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) goto err; } } - if (r != rr) - BN_copy(r, rr); + if (r != rr && BN_copy(r, rr) == NULL) + goto err; + ret = 1; err: BN_CTX_end(ctx); diff --git a/thirdparty/openssl/crypto/bn/bn_lib.c b/thirdparty/openssl/crypto/bn/bn_lib.c index 80105fff41..10b78f5126 100644 --- a/thirdparty/openssl/crypto/bn/bn_lib.c +++ b/thirdparty/openssl/crypto/bn/bn_lib.c @@ -569,7 +569,7 @@ void BN_clear(BIGNUM *a) { bn_check_top(a); if (a->d != NULL) - memset(a->d, 0, a->dmax * sizeof(a->d[0])); + OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0])); a->top = 0; a->neg = 0; } diff --git a/thirdparty/openssl/crypto/bn/bn_mul.c b/thirdparty/openssl/crypto/bn/bn_mul.c index b174850b6b..3c618dc307 100644 --- a/thirdparty/openssl/crypto/bn/bn_mul.c +++ b/thirdparty/openssl/crypto/bn/bn_mul.c @@ -1083,8 +1083,9 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) end: #endif bn_correct_top(rr); - if (r != rr) - BN_copy(r, rr); + if (r != rr && BN_copy(r, rr) == NULL) + goto err; + ret = 1; err: bn_check_top(r); diff --git a/thirdparty/openssl/crypto/bn/bn_prime.c b/thirdparty/openssl/crypto/bn/bn_prime.c index 1d256874c9..e911e15785 100644 --- a/thirdparty/openssl/crypto/bn/bn_prime.c +++ b/thirdparty/openssl/crypto/bn/bn_prime.c @@ -252,7 +252,6 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_CTX *ctx = NULL; BIGNUM *A1, *A1_odd, *check; /* taken from ctx */ BN_MONT_CTX *mont = NULL; - const BIGNUM *A = NULL; if (BN_cmp(a, BN_value_one()) <= 0) return 0; @@ -278,24 +277,14 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, goto err; BN_CTX_start(ctx); - /* A := abs(a) */ - if (a->neg) { - BIGNUM *t; - if ((t = BN_CTX_get(ctx)) == NULL) - goto err; - BN_copy(t, a); - t->neg = 0; - A = t; - } else - A = a; A1 = BN_CTX_get(ctx); A1_odd = BN_CTX_get(ctx); check = BN_CTX_get(ctx); if (check == NULL) goto err; - /* compute A1 := A - 1 */ - if (!BN_copy(A1, A)) + /* compute A1 := a - 1 */ + if (!BN_copy(A1, a)) goto err; if (!BN_sub_word(A1, 1)) goto err; @@ -311,11 +300,11 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, if (!BN_rshift(A1_odd, A1, k)) goto err; - /* Montgomery setup for computations mod A */ + /* Montgomery setup for computations mod a */ mont = BN_MONT_CTX_new(); if (mont == NULL) goto err; - if (!BN_MONT_CTX_set(mont, A, ctx)) + if (!BN_MONT_CTX_set(mont, a, ctx)) goto err; for (i = 0; i < checks; i++) { @@ -323,9 +312,9 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, goto err; if (!BN_add_word(check, 1)) goto err; - /* now 1 <= check < A */ + /* now 1 <= check < a */ - j = witness(check, A, A1, A1_odd, k, ctx, mont); + j = witness(check, a, A1, A1_odd, k, ctx, mont); if (j == -1) goto err; if (j) { diff --git a/thirdparty/openssl/crypto/bn/bn_prime.h b/thirdparty/openssl/crypto/bn/bn_prime.h index 5cf0de169e..489af8b424 100644 --- a/thirdparty/openssl/crypto/bn/bn_prime.h +++ b/thirdparty/openssl/crypto/bn/bn_prime.h @@ -64,263 +64,263 @@ typedef unsigned short prime_t; typedef unsigned char prime_t; #endif static const prime_t primes[NUMPRIMES] = { - 2, 3, 5, 7, 11, 13, 17, 19, - 23, 29, 31, 37, 41, 43, 47, 53, - 59, 61, 67, 71, 73, 79, 83, 89, - 97, 101, 103, 107, 109, 113, 127, 131, - 137, 139, 149, 151, 157, 163, 167, 173, - 179, 181, 191, 193, 197, 199, 211, 223, - 227, 229, 233, 239, 241, 251, + 2, 3, 5, 7, 11, 13, 17, 19, + 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, + 97, 101, 103, 107, 109, 113, 127, 131, + 137, 139, 149, 151, 157, 163, 167, 173, + 179, 181, 191, 193, 197, 199, 211, 223, + 227, 229, 233, 239, 241, 251, #ifndef EIGHT_BIT - 257, 263, - 269, 271, 277, 281, 283, 293, 307, 311, - 313, 317, 331, 337, 347, 349, 353, 359, - 367, 373, 379, 383, 389, 397, 401, 409, - 419, 421, 431, 433, 439, 443, 449, 457, - 461, 463, 467, 479, 487, 491, 499, 503, - 509, 521, 523, 541, 547, 557, 563, 569, - 571, 577, 587, 593, 599, 601, 607, 613, - 617, 619, 631, 641, 643, 647, 653, 659, - 661, 673, 677, 683, 691, 701, 709, 719, - 727, 733, 739, 743, 751, 757, 761, 769, - 773, 787, 797, 809, 811, 821, 823, 827, - 829, 839, 853, 857, 859, 863, 877, 881, - 883, 887, 907, 911, 919, 929, 937, 941, - 947, 953, 967, 971, 977, 983, 991, 997, - 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, - 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, - 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, - 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, - 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, - 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, - 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, - 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, - 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, - 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, - 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, - 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, - 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, - 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, - 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, - 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, - 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, - 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, - 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, - 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, - 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, - 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, - 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, - 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, - 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, - 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, - 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, - 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, - 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, - 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, - 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, - 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, - 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, - 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, - 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, - 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, - 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, - 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, - 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, - 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, - 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, - 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, - 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, - 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, - 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, - 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, - 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, - 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, - 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, - 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, - 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, - 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, - 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, - 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, - 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, - 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, - 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, - 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, - 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, - 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, - 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, - 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, - 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, - 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, - 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, - 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, - 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, - 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, - 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, - 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, - 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, - 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, - 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, - 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, - 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, - 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, - 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, - 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, - 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, - 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, - 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, - 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, - 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, - 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, - 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, - 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, - 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, - 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, - 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, - 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, - 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, - 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, - 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, - 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, - 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, - 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, - 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, - 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, - 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, - 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, - 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, - 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, - 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, - 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, - 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, - 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, - 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, - 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, - 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, - 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, - 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, - 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, - 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, - 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, - 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, - 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, - 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, - 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, - 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, - 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, - 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, - 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, - 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, - 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, - 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, - 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, - 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, - 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, - 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, - 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, - 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, - 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, - 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, - 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, - 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, - 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, - 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, - 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, - 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, - 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, - 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, - 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, - 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, - 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, - 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, - 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, - 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, - 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, - 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, - 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, - 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, - 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, - 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, - 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, - 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, - 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, - 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, - 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, - 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, - 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, - 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, - 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, - 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, - 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, - 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, - 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, - 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, - 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, - 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, - 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, - 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, - 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, - 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, - 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, - 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, - 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, - 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, - 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, - 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, - 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, - 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, - 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, - 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, - 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, - 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, - 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, - 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, - 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, - 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, - 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, - 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, - 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, - 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, - 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, - 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, - 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, - 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, - 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, - 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, - 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, - 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, - 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, - 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, - 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, - 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, - 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, - 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, - 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, - 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, - 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, - 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, - 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, - 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, - 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, - 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, - 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, - 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, - 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, - 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, - 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, - 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, - 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, - 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, - 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, - 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, - 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, - 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, - 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, - 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, - 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, - 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, - 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, - 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, - 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, - 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, + 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, + 313, 317, 331, 337, 347, 349, 353, 359, + 367, 373, 379, 383, 389, 397, 401, 409, + 419, 421, 431, 433, 439, 443, 449, 457, + 461, 463, 467, 479, 487, 491, 499, 503, + 509, 521, 523, 541, 547, 557, 563, 569, + 571, 577, 587, 593, 599, 601, 607, 613, + 617, 619, 631, 641, 643, 647, 653, 659, + 661, 673, 677, 683, 691, 701, 709, 719, + 727, 733, 739, 743, 751, 757, 761, 769, + 773, 787, 797, 809, 811, 821, 823, 827, + 829, 839, 853, 857, 859, 863, 877, 881, + 883, 887, 907, 911, 919, 929, 937, 941, + 947, 953, 967, 971, 977, 983, 991, 997, + 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, + 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, + 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, + 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, + 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, + 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, + 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, + 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, + 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, + 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, + 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, + 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, + 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, + 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, + 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, + 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, + 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, + 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, + 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, + 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, + 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, + 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, + 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, + 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, + 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, + 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, + 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, + 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, + 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, + 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, + 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, + 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, + 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, + 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, + 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, + 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, + 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, + 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, + 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, + 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, + 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, + 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, + 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, + 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, + 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, + 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, + 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, + 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, + 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, + 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, + 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, + 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, + 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, + 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, + 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, + 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, + 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, + 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, + 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, + 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, + 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, + 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, + 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, + 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, + 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, + 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, + 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, + 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, + 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, + 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, + 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, + 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, + 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, + 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, + 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, + 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, + 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, + 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, + 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, + 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, + 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, + 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, + 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, + 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, + 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, + 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, + 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, + 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, + 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, + 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, + 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, + 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, + 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, + 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, + 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, + 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, + 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, + 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, + 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, + 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, + 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, + 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, + 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, + 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, + 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, + 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, + 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, + 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, + 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, + 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, + 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, + 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, + 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, + 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, + 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, + 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, + 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, + 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, + 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, + 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, + 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, + 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, + 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, + 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, + 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, + 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, + 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, + 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, + 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, + 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, + 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, + 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, + 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, + 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, + 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, + 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, + 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, + 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, + 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, + 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, + 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, + 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, + 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, + 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, + 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, + 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, + 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, + 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, + 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, + 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, + 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, + 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, + 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, + 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, + 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, + 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, + 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, + 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, + 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, + 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, + 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, + 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, + 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, + 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, + 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, + 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, + 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, + 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, + 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, + 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, + 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, + 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, + 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, + 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, + 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, + 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, + 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, + 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, + 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, + 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, + 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, + 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, + 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, + 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, + 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, + 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, + 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, + 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, + 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, + 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, + 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, + 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, + 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, + 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, + 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, + 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, + 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, + 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, + 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, + 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, + 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, + 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, + 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, + 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, + 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, + 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, + 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, + 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, + 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, + 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, + 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, + 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, + 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, + 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, + 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, + 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, + 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, + 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, + 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, + 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, + 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, + 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, + 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, + 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, + 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, + 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, + 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, + 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, + 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, + 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, + 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, + 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, + 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, + 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, + 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, #endif }; diff --git a/thirdparty/openssl/crypto/bn/bn_print.c b/thirdparty/openssl/crypto/bn/bn_print.c index bfa31efc56..f85a6550a5 100644 --- a/thirdparty/openssl/crypto/bn/bn_print.c +++ b/thirdparty/openssl/crypto/bn/bn_print.c @@ -72,12 +72,9 @@ char *BN_bn2hex(const BIGNUM *a) char *buf; char *p; - if (a->neg && BN_is_zero(a)) { - /* "-0" == 3 bytes including NULL terminator */ - buf = OPENSSL_malloc(3); - } else { - buf = OPENSSL_malloc(a->top * BN_BYTES * 2 + 2); - } + if (BN_is_zero(a)) + return OPENSSL_strdup("0"); + buf = OPENSSL_malloc(a->top * BN_BYTES * 2 + 2); if (buf == NULL) { BNerr(BN_F_BN_BN2HEX, ERR_R_MALLOC_FAILURE); goto err; @@ -85,8 +82,6 @@ char *BN_bn2hex(const BIGNUM *a) p = buf; if (a->neg) *(p++) = '-'; - if (BN_is_zero(a)) - *(p++) = '0'; for (i = a->top - 1; i >= 0; i--) { for (j = BN_BITS2 - 8; j >= 0; j -= 8) { /* strip leading zeros */ @@ -111,6 +106,7 @@ char *BN_bn2dec(const BIGNUM *a) char *p; BIGNUM *t = NULL; BN_ULONG *bn_data = NULL, *lp; + int bn_data_num; /*- * get an upper bound for the length of the decimal integer @@ -120,9 +116,9 @@ char *BN_bn2dec(const BIGNUM *a) */ i = BN_num_bits(a) * 3; num = (i / 10 + i / 1000 + 1) + 1; - bn_data = - (BN_ULONG *)OPENSSL_malloc((num / BN_DEC_NUM + 1) * sizeof(BN_ULONG)); - buf = (char *)OPENSSL_malloc(num + 3); + bn_data_num = num / BN_DEC_NUM + 1; + bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG)); + buf = OPENSSL_malloc(num + 3); if ((buf == NULL) || (bn_data == NULL)) { BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE); goto err; @@ -140,9 +136,12 @@ char *BN_bn2dec(const BIGNUM *a) if (BN_is_negative(t)) *p++ = '-'; - i = 0; while (!BN_is_zero(t)) { + if (lp - bn_data >= bn_data_num) + goto err; *lp = BN_div_word(t, BN_DEC_CONV); + if (*lp == (BN_ULONG)-1) + goto err; lp++; } lp--; @@ -240,10 +239,12 @@ int BN_hex2bn(BIGNUM **bn, const char *a) } ret->top = h; bn_correct_top(ret); - ret->neg = neg; *bn = ret; bn_check_top(ret); + /* Don't set the negative flag if it's zero. */ + if (ret->top != 0) + ret->neg = neg; return (num); err: if (*bn == NULL) @@ -295,7 +296,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) if (j == BN_DEC_NUM) j = 0; l = 0; - while (*a) { + while (--i >= 0) { l *= 10; l += *a - '0'; a++; @@ -306,11 +307,13 @@ int BN_dec2bn(BIGNUM **bn, const char *a) j = 0; } } - ret->neg = neg; bn_correct_top(ret); *bn = ret; bn_check_top(ret); + /* Don't set the negative flag if it's zero. */ + if (ret->top != 0) + ret->neg = neg; return (num); err: if (*bn == NULL) @@ -321,6 +324,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) int BN_asc2bn(BIGNUM **bn, const char *a) { const char *p = a; + if (*p == '-') p++; @@ -331,7 +335,8 @@ int BN_asc2bn(BIGNUM **bn, const char *a) if (!BN_dec2bn(bn, p)) return 0; } - if (*a == '-') + /* Don't set the negative flag if it's zero. */ + if (*a == '-' && (*bn)->top != 0) (*bn)->neg = 1; return 1; } diff --git a/thirdparty/openssl/crypto/bn/bn_rand.c b/thirdparty/openssl/crypto/bn/bn_rand.c index f9fb2e9e45..60d3f2260b 100644 --- a/thirdparty/openssl/crypto/bn/bn_rand.c +++ b/thirdparty/openssl/crypto/bn/bn_rand.c @@ -121,15 +121,14 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) int ret = 0, bit, bytes, mask; time_t tim; - if (bits < 0 || (bits == 1 && top > 0)) { - BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL); - return 0; - } - if (bits == 0) { + if (top != -1 || bottom != 0) + goto toosmall; BN_zero(rnd); return 1; } + if (bits < 0 || (bits == 1 && top > 0)) + goto toosmall; bytes = (bits + 7) / 8; bit = (bits - 1) % 8; @@ -145,13 +144,9 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) time(&tim); RAND_add(&tim, sizeof(tim), 0.0); - if (pseudorand) { - if (RAND_pseudo_bytes(buf, bytes) == -1) - goto err; - } else { - if (RAND_bytes(buf, bytes) <= 0) - goto err; - } + /* We ignore the value of pseudorand and always call RAND_bytes */ + if (RAND_bytes(buf, bytes) <= 0) + goto err; #if 1 if (pseudorand == 2) { @@ -199,6 +194,10 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) } bn_check_top(rnd); return (ret); + +toosmall: + BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL); + return 0; } int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) diff --git a/thirdparty/openssl/crypto/bn/bn_sqr.c b/thirdparty/openssl/crypto/bn/bn_sqr.c index 3ca69879ee..256d26e8db 100644 --- a/thirdparty/openssl/crypto/bn/bn_sqr.c +++ b/thirdparty/openssl/crypto/bn/bn_sqr.c @@ -143,8 +143,9 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) rr->top = max - 1; else rr->top = max; - if (rr != r) - BN_copy(r, rr); + if (r != rr && BN_copy(r, rr) == NULL) + goto err; + ret = 1; err: bn_check_top(rr); diff --git a/thirdparty/openssl/crypto/bn/bn_word.c b/thirdparty/openssl/crypto/bn/bn_word.c index b031a60b5b..9b5f9cb98c 100644 --- a/thirdparty/openssl/crypto/bn/bn_word.c +++ b/thirdparty/openssl/crypto/bn/bn_word.c @@ -72,10 +72,32 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) if (w == 0) return (BN_ULONG)-1; +#ifndef BN_LLONG + /* + * If |w| is too long and we don't have BN_ULLONG then we need to fall + * back to using BN_div_word + */ + if (w > ((BN_ULONG)1 << BN_BITS4)) { + BIGNUM *tmp = BN_dup(a); + if (tmp == NULL) + return (BN_ULONG)-1; + + ret = BN_div_word(tmp, w); + BN_free(tmp); + + return ret; + } +#endif + bn_check_top(a); w &= BN_MASK2; for (i = a->top - 1; i >= 0; i--) { #ifndef BN_LLONG + /* + * We can assume here that | w <= ((BN_ULONG)1 << BN_BITS4) | and so + * | ret < ((BN_ULONG)1 << BN_BITS4) | and therefore the shifts here are + * safe and will not overflow + */ ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w; ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w; #else diff --git a/thirdparty/openssl/crypto/cms/cms_enc.c b/thirdparty/openssl/crypto/cms/cms_enc.c index b14b4b68b5..90b1fcc750 100644 --- a/thirdparty/openssl/crypto/cms/cms_enc.c +++ b/thirdparty/openssl/crypto/cms/cms_enc.c @@ -119,7 +119,7 @@ BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec) /* Generate a random IV if we need one */ ivlen = EVP_CIPHER_CTX_iv_length(ctx); if (ivlen > 0) { - if (RAND_pseudo_bytes(iv, ivlen) <= 0) + if (RAND_bytes(iv, ivlen) <= 0) goto err; piv = iv; } @@ -179,10 +179,9 @@ BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec) CMS_R_CIPHER_INITIALISATION_ERROR); goto err; } - - if (piv) { + if (enc) { calg->parameter = ASN1_TYPE_new(); - if (!calg->parameter) { + if (calg->parameter == NULL) { CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, ERR_R_MALLOC_FAILURE); goto err; } @@ -191,6 +190,11 @@ BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec) CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR); goto err; } + /* If parameter type not set omit parameter */ + if (calg->parameter->type == V_ASN1_UNDEF) { + ASN1_TYPE_free(calg->parameter); + calg->parameter = NULL; + } } ok = 1; diff --git a/thirdparty/openssl/crypto/cms/cms_ess.c b/thirdparty/openssl/crypto/cms/cms_ess.c index 8631a2eb2b..8212560628 100644 --- a/thirdparty/openssl/crypto/cms/cms_ess.c +++ b/thirdparty/openssl/crypto/cms/cms_ess.c @@ -107,8 +107,7 @@ CMS_ReceiptRequest *CMS_ReceiptRequest_create0(unsigned char *id, int idlen, else { if (!ASN1_STRING_set(rr->signedContentIdentifier, NULL, 32)) goto merr; - if (RAND_pseudo_bytes(rr->signedContentIdentifier->data, 32) - <= 0) + if (RAND_bytes(rr->signedContentIdentifier->data, 32) <= 0) goto err; } diff --git a/thirdparty/openssl/crypto/cms/cms_kari.c b/thirdparty/openssl/crypto/cms/cms_kari.c index 2cfcdb29cd..ee283172d3 100644 --- a/thirdparty/openssl/crypto/cms/cms_kari.c +++ b/thirdparty/openssl/crypto/cms/cms_kari.c @@ -401,9 +401,12 @@ static int cms_wrap_init(CMS_KeyAgreeRecipientInfo *kari, * Pick a cipher based on content encryption cipher. If it is DES3 use * DES3 wrap otherwise use AES wrap similar to key size. */ +#ifndef OPENSSL_NO_DES if (EVP_CIPHER_type(cipher) == NID_des_ede3_cbc) kekcipher = EVP_des_ede3_wrap(); - else if (keylen <= 16) + else +#endif + if (keylen <= 16) kekcipher = EVP_aes_128_wrap(); else if (keylen <= 24) kekcipher = EVP_aes_192_wrap(); diff --git a/thirdparty/openssl/crypto/cms/cms_lib.c b/thirdparty/openssl/crypto/cms/cms_lib.c index d6cb60d02d..6d27c4969b 100644 --- a/thirdparty/openssl/crypto/cms/cms_lib.c +++ b/thirdparty/openssl/crypto/cms/cms_lib.c @@ -413,6 +413,8 @@ static STACK_OF(CMS_CertificateChoices) return &cms->d.signedData->certificates; case NID_pkcs7_enveloped: + if (cms->d.envelopedData->originatorInfo == NULL) + return NULL; return &cms->d.envelopedData->originatorInfo->certificates; default: @@ -488,6 +490,8 @@ static STACK_OF(CMS_RevocationInfoChoice) return &cms->d.signedData->crls; case NID_pkcs7_enveloped: + if (cms->d.envelopedData->originatorInfo == NULL) + return NULL; return &cms->d.envelopedData->originatorInfo->crls; default: diff --git a/thirdparty/openssl/crypto/cms/cms_pwri.c b/thirdparty/openssl/crypto/cms/cms_pwri.c index b91c01691f..5c817caf2f 100644 --- a/thirdparty/openssl/crypto/cms/cms_pwri.c +++ b/thirdparty/openssl/crypto/cms/cms_pwri.c @@ -134,7 +134,7 @@ CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, ivlen = EVP_CIPHER_CTX_iv_length(&ctx); if (ivlen > 0) { - if (RAND_pseudo_bytes(iv, ivlen) <= 0) + if (RAND_bytes(iv, ivlen) <= 0) goto err; if (EVP_EncryptInit_ex(&ctx, NULL, NULL, NULL, iv) <= 0) { CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, ERR_R_EVP_LIB); @@ -301,7 +301,7 @@ static int kek_wrap_key(unsigned char *out, size_t *outlen, memcpy(out + 4, in, inlen); /* Add random padding to end */ if (olen > inlen + 4 - && RAND_pseudo_bytes(out + 4 + inlen, olen - 4 - inlen) < 0) + && RAND_bytes(out + 4 + inlen, olen - 4 - inlen) <= 0) return 0; /* Encrypt twice */ EVP_EncryptUpdate(ctx, out, &dummy, out, olen); diff --git a/thirdparty/openssl/crypto/comp/c_rle.c b/thirdparty/openssl/crypto/comp/c_rle.c index e9aabbd166..41919613ee 100644 --- a/thirdparty/openssl/crypto/comp/c_rle.c +++ b/thirdparty/openssl/crypto/comp/c_rle.c @@ -31,12 +31,11 @@ static int rle_compress_block(COMP_CTX *ctx, unsigned char *out, unsigned int olen, unsigned char *in, unsigned int ilen) { - /* int i; */ + if (ilen == 0) + return 0; - if (ilen == 0 || olen < (ilen - 1)) { - /* ZZZZZZZZZZZZZZZZZZZZZZ */ - return (-1); - } + if (olen <= ilen) + return -1; *(out++) = 0; memcpy(out, in, ilen); @@ -49,14 +48,16 @@ static int rle_expand_block(COMP_CTX *ctx, unsigned char *out, { int i; - if (olen < (ilen - 1)) { - /* ZZZZZZZZZZZZZZZZZZZZZZ */ - return (-1); - } + if (ilen == 0) + return 0; + + if (olen < (ilen - 1)) + return -1; i = *(in++); - if (i == 0) { - memcpy(out, in, ilen - 1); - } + if (i != 0) + return -1; + + memcpy(out, in, ilen - 1); return (ilen - 1); } diff --git a/thirdparty/openssl/crypto/conf/conf_def.c b/thirdparty/openssl/crypto/conf/conf_def.c index 68c77cec7d..75e309aaca 100644 --- a/thirdparty/openssl/crypto/conf/conf_def.c +++ b/thirdparty/openssl/crypto/conf/conf_def.c @@ -69,6 +69,12 @@ #include <openssl/buffer.h> #include <openssl/err.h> +/* + * The maximum length we can grow a value to after variable expansion. 64k + * should be more than enough for all reasonable uses. + */ +#define MAX_CONF_VALUE_LENGTH 65536 + static char *eat_ws(CONF *conf, char *p); static char *eat_alpha_numeric(CONF *conf, char *p); static void clear_comments(CONF *conf, char *p); @@ -530,6 +536,8 @@ static int str_copy(CONF *conf, char *section, char **pto, char *from) } else if (IS_EOF(conf, *from)) break; else if (*from == '$') { + size_t newsize; + /* try to expand it */ rrp = NULL; s = &(from[1]); @@ -584,8 +592,12 @@ static int str_copy(CONF *conf, char *section, char **pto, char *from) CONFerr(CONF_F_STR_COPY, CONF_R_VARIABLE_HAS_NO_VALUE); goto err; } - if (!BUF_MEM_grow_clean(buf, - (strlen(p) + buf->length - (e - from)))) { + newsize = strlen(p) + buf->length - (e - from); + if (newsize > MAX_CONF_VALUE_LENGTH) { + CONFerr(CONF_F_STR_COPY, CONF_R_VARIABLE_EXPANSION_TOO_LONG); + goto err; + } + if (!BUF_MEM_grow_clean(buf, newsize)) { CONFerr(CONF_F_STR_COPY, ERR_R_MALLOC_FAILURE); goto err; } diff --git a/thirdparty/openssl/crypto/conf/conf_def.h b/thirdparty/openssl/crypto/conf/conf_def.h index 7d897b89f1..48b3442181 100644 --- a/thirdparty/openssl/crypto/conf/conf_def.h +++ b/thirdparty/openssl/crypto/conf/conf_def.h @@ -81,34 +81,34 @@ #define KEYTYPES(c) ((unsigned short *)((c)->meth_data)) #ifndef CHARSET_EBCDIC -# define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT) -# define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT) -# define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF) -# define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC) -# define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER) -# define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS) -# define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC) +# define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT) +# define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT) +# define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF) +# define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC) +# define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER) +# define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS) +# define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC) # define IS_ALPHA_NUMERIC_PUNCT(c,a) \ (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT) -# define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE) -# define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE) -# define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT) +# define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE) +# define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE) +# define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT) -#else /* CHARSET_EBCDIC */ +#else /*CHARSET_EBCDIC*/ -# define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT) -# define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT) -# define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF) -# define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC) -# define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER) -# define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS) -# define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC) +# define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT) +# define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT) +# define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF) +# define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC) +# define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER) +# define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS) +# define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC) # define IS_ALPHA_NUMERIC_PUNCT(c,a) \ (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT) -# define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE) -# define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE) -# define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT) -#endif /* CHARSET_EBCDIC */ +# define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE) +# define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE) +# define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT) +#endif /*CHARSET_EBCDIC*/ static unsigned short CONF_type_default[256] = { 0x0008, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, diff --git a/thirdparty/openssl/crypto/conf/conf_err.c b/thirdparty/openssl/crypto/conf/conf_err.c index bb5e2fe252..b0b6896f83 100644 --- a/thirdparty/openssl/crypto/conf/conf_err.c +++ b/thirdparty/openssl/crypto/conf/conf_err.c @@ -115,6 +115,8 @@ static ERR_STRING_DATA CONF_str_reasons[] = { {ERR_REASON(CONF_R_UNABLE_TO_CREATE_NEW_SECTION), "unable to create new section"}, {ERR_REASON(CONF_R_UNKNOWN_MODULE_NAME), "unknown module name"}, + {ERR_REASON(CONF_R_VARIABLE_EXPANSION_TOO_LONG), + "variable expansion too long"}, {ERR_REASON(CONF_R_VARIABLE_HAS_NO_VALUE), "variable has no value"}, {0, NULL} }; diff --git a/thirdparty/openssl/crypto/conf/conf_mod.c b/thirdparty/openssl/crypto/conf/conf_mod.c index 9acfca4f71..e0c9a67ff6 100644 --- a/thirdparty/openssl/crypto/conf/conf_mod.c +++ b/thirdparty/openssl/crypto/conf/conf_mod.c @@ -288,6 +288,10 @@ static CONF_MODULE *module_add(DSO *dso, const char *name, tmod->dso = dso; tmod->name = BUF_strdup(name); + if (tmod->name == NULL) { + OPENSSL_free(tmod); + return NULL; + } tmod->init = ifunc; tmod->finish = ffunc; tmod->links = 0; diff --git a/thirdparty/openssl/crypto/conf/ssleay.cnf b/thirdparty/openssl/crypto/conf/ssleay.cnf deleted file mode 100644 index ed33af601e..0000000000 --- a/thirdparty/openssl/crypto/conf/ssleay.cnf +++ /dev/null @@ -1,78 +0,0 @@ -# -# This is a test configuration file for use in SSLeay etc... -# - -init = 5 -in\#it1 =10 -init2='10' -init3='10\'' -init4="10'" -init5='='10\'' again' - -SSLeay::version = 0.5.0 - -[genrsa] -default_bits = 512 -SSLEAY::version = 0.5.0 - -[gendh] -default_bits = 512 -def_generator = 2 - -[s_client] -cipher1 = DES_CBC_MD5:DES_CBC_SHA:DES_EDE_SHA:RC4_MD5\ -cipher2 = 'DES_CBC_MD5 DES_CBC_SHA DES_EDE_SHA RC4_MD5' -cipher3 = "DES_CBC_MD5 DES_CBC_SHA DES_EDE_SHA RC4_MD5" -cipher4 = DES_CBC_MD5 DES_CBC_SHA DES_EDE_SHA RC4_MD5 - -[ default ] -cert_dir = $ENV::HOME/.ca_certs - -HOME = /tmp/eay - -tmp_cert_dir = $HOME/.ca_certs -tmp2_cert_dir = thisis$(HOME)stuff - -LOGNAME = Eric Young (home=$HOME) - -[ special ] - -H=$HOME -H=$default::HOME -H=$ENV::HOME -# -# SSLeay example configuration file. -# This is mostly being used for generation of certificate requests. -# - -RANDFILE = $HOME/.rand - -[ req ] -default_bits = 512 -default_keyfile = privkey.pem - -Attribute_type_1 = countryName -Attribute_text_1 = Country Name (2 letter code) -Attribute_default_1 = AU - -Attribute_type_2 = stateOrProvinceName -Attribute_text_2 = State or Province Name (full name) -Attribute_default_2 = Queensland - -Attribute_type_3 = localityName -Attribute_text_3 = Locality Name (eg, city) - -Attribute_type_4 = organizationName -Attribute_text_4 = Organization Name (eg, company) -Attribute_default_4 = Mincom Pty Ltd - -Attribute_type_5 = organizationalUnitName -Attribute_text_5 = Organizational Unit Name (eg, section) -Attribute_default_5 = TR - -Attribute_type_6 = commonName -Attribute_text_6 = Common Name (eg, YOUR name) - -Attribute_type_7 = emailAddress -Attribute_text_7 = Email Address - diff --git a/thirdparty/openssl/crypto/crypto-lib.com b/thirdparty/openssl/crypto/crypto-lib.com deleted file mode 100644 index 1423cac288..0000000000 --- a/thirdparty/openssl/crypto/crypto-lib.com +++ /dev/null @@ -1,1537 +0,0 @@ -$! -$! CRYPTO-LIB.COM -$! Written By: Robert Byer -$! Vice-President -$! A-Com Computing, Inc. -$! byer@mail.all-net.net -$! -$! Changes by Richard Levitte <richard@levitte.org> -$! Zoltan Arpadffy <arpadffy@polarhome.com> -$! -$! This command files compiles and creates the "[.xxx.EXE.CRYPTO]LIBCRYPTO.OLB" -$! library for OpenSSL. The "xxx" denotes the machine architecture, ALPHA, -$! IA64 or VAX. -$! -$! It was re-written so it would try to determine what "C" compiler to use -$! or you can specify which "C" compiler to use. -$! -$! Specify the following as P1 to build just that part or ALL to just -$! build everything. -$! -$! LIBRARY To just compile the [.xxx.EXE.CRYPTO]LIBCRYPTO.OLB Library. -$! APPS To just compile the [.xxx.EXE.CRYPTO]*.EXE -$! ALL To do both LIBRARY and APPS -$! -$! Specify DEBUG or NODEBUG as P2 to compile with or without debugger -$! information. -$! -$! Specify which compiler at P3 to try to compile under. -$! -$! VAXC For VAX C. -$! DECC For DEC C. -$! GNUC For GNU C. -$! -$! If you don't specify a compiler, it will try to determine which -$! "C" compiler to use. -$! -$! P4, if defined, sets a TCP/IP library to use, through one of the following -$! keywords: -$! -$! UCX For UCX -$! TCPIP For TCPIP (post UCX) -$! SOCKETSHR For SOCKETSHR+NETLIB -$! -$! P5, if defined, sets a compiler thread NOT needed on OpenVMS 7.1 (and up) -$! -$! P6, if defined, sets a choice of crypto methods to compile. -$! WARNING: this should only be done to recompile some part of an already -$! fully compiled library. -$! -$! P7, if defined, specifies the C pointer size. Ignored on VAX. -$! ("64=ARGV" gives more efficient code with HP C V7.3 or newer.) -$! Supported values are: -$! -$! "" Compile with default (/NOPOINTER_SIZE) -$! 32 Compile with /POINTER_SIZE=32 (SHORT) -$! 64 Compile with /POINTER_SIZE=64[=ARGV] (LONG[=ARGV]). -$! (Automatically select ARGV if compiler supports it.) -$! 64= Compile with /POINTER_SIZE=64 (LONG). -$! 64=ARGV Compile with /POINTER_SIZE=64=ARGV (LONG=ARGV). -$! -$! P8, if defined, specifies a directory where ZLIB files (zlib.h, -$! libz.olb) may be found. Optionally, a non-default object library -$! name may be included ("dev:[dir]libz_64.olb", for example). -$! -$! -$! Announce/identify. -$! -$ proc = f$environment( "procedure") -$ write sys$output "@@@ "+ - - f$parse( proc, , , "name")+ f$parse( proc, , , "type") -$! -$! Define A TCP/IP Library That We Will Need To Link To. -$! (That Is, If We Need To Link To One.) -$! -$ TCPIP_LIB = "" -$ ZLIB_LIB = "" -$! -$! Check Which Architecture We Are Using. -$! -$ IF (F$GETSYI("CPU").LT.128) -$ THEN -$! -$! The Architecture Is VAX -$! -$ ARCH = "VAX" -$! -$! Else... -$! -$ ELSE -$! -$! The Architecture Is Alpha, IA64 or whatever comes in the future. -$! -$ ARCH = F$EDIT( F$GETSYI( "ARCH_NAME"), "UPCASE") -$ IF (ARCH .EQS. "") THEN ARCH = "UNK" -$! -$! End The Architecture Check. -$! -$ ENDIF -$! -$ ARCHD = ARCH -$ LIB32 = "32" -$ OPT_FILE = "" -$ POINTER_SIZE = "" -$! -$! Define The Different Encryption Types. -$! NOTE: Some might think this list ugly. However, it's made this way to -$! reflect the SDIRS variable in [-]Makefile.org as closely as possible, -$! thereby making it fairly easy to verify that the lists are the same. -$! -$ ET_WHIRLPOOL = "WHRLPOOL" -$ IF ARCH .EQS. "VAX" THEN ET_WHIRLPOOL = "" -$ ENCRYPT_TYPES = "Basic,"+ - - "OBJECTS,"+ - - "MD4,MD5,SHA,MDC2,HMAC,RIPEMD,"+ET_WHIRLPOOL+","+ - - "DES,AES,RC2,RC4,IDEA,BF,CAST,CAMELLIA,SEED,MODES,"+ - - "BN,EC,RSA,DSA,ECDSA,DH,ECDH,DSO,ENGINE,"+ - - "BUFFER,BIO,STACK,LHASH,RAND,ERR,"+ - - "EVP,EVP_2,EVP_3,ASN1,ASN1_2,PEM,X509,X509V3,"+ - - "CONF,TXT_DB,PKCS7,PKCS12,COMP,OCSP,UI,KRB5,"+ - - "CMS,PQUEUE,TS,JPAKE,SRP,STORE,CMAC" -$! -$! Check To Make Sure We Have Valid Command Line Parameters. -$! -$ GOSUB CHECK_OPTIONS -$! -$! Define The OBJ and EXE Directories. -$! -$ OBJ_DIR := SYS$DISK:[-.'ARCHD'.OBJ.CRYPTO] -$ EXE_DIR := SYS$DISK:[-.'ARCHD'.EXE.CRYPTO] -$! -$! Specify the destination directory in any /MAP option. -$! -$ if (LINKMAP .eqs. "MAP") -$ then -$ LINKMAP = LINKMAP+ "=''EXE_DIR'" -$ endif -$! -$! Add the location prefix to the linker options file name. -$! -$ if (OPT_FILE .nes. "") -$ then -$ OPT_FILE = EXE_DIR+ OPT_FILE -$ endif -$! -$! Initialise logical names and such -$! -$ GOSUB INITIALISE -$! -$! Tell The User What Kind of Machine We Run On. -$! -$ WRITE SYS$OUTPUT "Host system architecture: ''ARCHD'" -$! -$! -$! Check To See If The Architecture Specific OBJ Directory Exists. -$! -$ IF (F$PARSE(OBJ_DIR).EQS."") -$ THEN -$! -$! It Dosen't Exist, So Create It. -$! -$ CREATE/DIR 'OBJ_DIR' -$! -$! End The Architecture Specific OBJ Directory Check. -$! -$ ENDIF -$! -$! Check To See If The Architecture Specific Directory Exists. -$! -$ IF (F$PARSE(EXE_DIR).EQS."") -$ THEN -$! -$! It Dosen't Exist, So Create It. -$! -$ CREATE/DIRECTORY 'EXE_DIR' -$! -$! End The Architecture Specific Directory Check. -$! -$ ENDIF -$! -$! Define The Library Name. -$! -$ LIB_NAME := 'EXE_DIR'SSL_LIBCRYPTO'LIB32'.OLB -$! -$! Define The CRYPTO-LIB We Are To Use. -$! -$ CRYPTO_LIB := 'EXE_DIR'SSL_LIBCRYPTO'LIB32'.OLB -$! -$! Check To See If We Already Have A "[.xxx.EXE.CRYPTO]LIBCRYPTO.OLB" Library... -$! -$ IF (F$SEARCH(LIB_NAME).EQS."") -$ THEN -$! -$! Guess Not, Create The Library. -$! -$ LIBRARY/CREATE/OBJECT 'LIB_NAME' -$! -$! End The Library Check. -$! -$ ENDIF -$! -$! Build our options file for the application -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Define The Different Encryption "library" Strings. -$! -$!!! Test apps disabled, as they aren't supported at all, -$!!! not even in the unix build -$!!! APPS_DES = "DES/DES,CBC3_ENC" -$!!! APPS_PKCS7 = "ENC/ENC;DEC/DEC;SIGN/SIGN;VERIFY/VERIFY,EXAMPLE" -$ -$! These variables are ordered as the SDIRS variable from the top Makefile.org -$! The contents of these variables are copied from the LIBOBJ variable in the -$! corresponding Makefile from each corresponding subdirectory, with .o stripped -$! and spaces replaced with commas. -$ LIB_ = "cryptlib,mem,mem_dbg,cversion,ex_data,cpt_err,ebcdic,"+ - - "uid,o_time,o_str,o_dir,o_fips,o_init,fips_ers,mem_clr" -$ LIB_OBJECTS = "o_names,obj_dat,obj_lib,obj_err,obj_xref" -$ LIB_MD2 = "md2_dgst,md2_one" -$ LIB_MD4 = "md4_dgst,md4_one" -$ LIB_MD5 = "md5_dgst,md5_one" -$ LIB_SHA = "sha_dgst,sha1dgst,sha_one,sha1_one,sha256,sha512" -$ LIB_MDC2 = "mdc2dgst,mdc2_one" -$ LIB_HMAC = "hmac,hm_ameth,hm_pmeth" -$ LIB_RIPEMD = "rmd_dgst,rmd_one" -$ LIB_WHRLPOOL = "wp_dgst,wp_block" -$ LIB_DES = "set_key,ecb_enc,cbc_enc,"+ - - "ecb3_enc,cfb64enc,cfb64ede,cfb_enc,ofb64ede,"+ - - "enc_read,enc_writ,ofb64enc,"+ - - "ofb_enc,str2key,pcbc_enc,qud_cksm,rand_key,"+ - - "des_enc,fcrypt_b,"+ - - "fcrypt,xcbc_enc,rpc_enc,cbc_cksm,"+ - - "ede_cbcm_enc,des_old,des_old2,read2pwd" -$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ - - "aes_core,aes_cbc" -$ LIB_RC2 = "rc2_ecb,rc2_skey,rc2_cbc,rc2cfb64,rc2ofb64" -$ LIB_RC4 = "rc4_enc,rc4_skey,rc4_utl" -$ LIB_RC5 = "rc5_skey,rc5_ecb,rc5_enc,rc5cfb64,rc5ofb64" -$ LIB_IDEA = "i_cbc,i_cfb64,i_ofb64,i_ecb,i_skey" -$ LIB_BF = "bf_skey,bf_ecb,bf_enc,bf_cfb64,bf_ofb64" -$ LIB_CAST = "c_skey,c_ecb,c_enc,c_cfb64,c_ofb64" -$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,"+ - - "cmll_utl,camellia,cmll_misc,cmll_cbc" -$ LIB_SEED = "seed,seed_ecb,seed_cbc,seed_cfb,seed_ofb" -$ LIB_MODES = "cbc128,ctr128,cts128,cfb128,ofb128,gcm128,"+ - - "ccm128,xts128,wrap128" -$ LIB_BN_ASM = "[.asm]vms.mar,vms-helper" -$ IF F$TRNLNM("OPENSSL_NO_ASM") .OR. ARCH .NES. "VAX" THEN - - LIB_BN_ASM = "bn_asm" -$ LIB_BN = "bn_add,bn_div,bn_exp,bn_lib,bn_ctx,bn_mul,bn_mod,"+ - - "bn_print,bn_rand,bn_shift,bn_word,bn_blind,"+ - - "bn_kron,bn_sqrt,bn_gcd,bn_prime,bn_err,bn_sqr,"+LIB_BN_ASM+","+ - - "bn_recp,bn_mont,bn_mpi,bn_exp2,bn_gf2m,bn_nist,"+ - - "bn_depr,bn_const,bn_x931p" -$ LIB_EC = "ec_lib,ecp_smpl,ecp_mont,ecp_nist,ec_cvt,ec_mult,"+ - - "ec_err,ec_curve,ec_check,ec_print,ec_asn1,ec_key,"+ - - "ec2_smpl,ec2_mult,ec_ameth,ec_pmeth,eck_prn,"+ - - "ecp_nistp224,ecp_nistp256,ecp_nistp521,ecp_nistputil,"+ - - "ecp_oct,ec2_oct,ec_oct" -$ LIB_RSA = "rsa_eay,rsa_gen,rsa_lib,rsa_sign,rsa_saos,rsa_err,"+ - - "rsa_pk1,rsa_ssl,rsa_none,rsa_oaep,rsa_chk,rsa_null,"+ - - "rsa_pss,rsa_x931,rsa_asn1,rsa_depr,rsa_ameth,rsa_prn,"+ - - "rsa_pmeth,rsa_crpt" -$ LIB_DSA = "dsa_gen,dsa_key,dsa_lib,dsa_asn1,dsa_vrf,dsa_sign,"+ - - "dsa_err,dsa_ossl,dsa_depr,dsa_ameth,dsa_pmeth,dsa_prn" -$ LIB_ECDSA = "ecs_lib,ecs_asn1,ecs_ossl,ecs_sign,ecs_vrf,ecs_err" -$ LIB_DH = "dh_asn1,dh_gen,dh_key,dh_lib,dh_check,dh_err,dh_depr,"+ - - "dh_ameth,dh_pmeth,dh_prn,dh_rfc5114,dh_kdf" -$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err,ech_kdf" -$ LIB_DSO = "dso_dl,dso_dlfcn,dso_err,dso_lib,dso_null,"+ - - "dso_openssl,dso_win32,dso_vms,dso_beos" -$ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ - - "eng_table,eng_pkey,eng_fat,eng_all,"+ - - "tb_rsa,tb_dsa,tb_ecdsa,tb_dh,tb_ecdh,tb_rand,tb_store,"+ - - "tb_cipher,tb_digest,tb_pkmeth,tb_asnmth,"+ - - "eng_openssl,eng_cnf,eng_dyn,eng_cryptodev,"+ - - "eng_rdrand" -$ LIB_BUFFER = "buffer,buf_str,buf_err" -$ LIB_BIO = "bio_lib,bio_cb,bio_err,"+ - - "bss_mem,bss_null,bss_fd,"+ - - "bss_file,bss_sock,bss_conn,"+ - - "bf_null,bf_buff,b_print,b_dump,"+ - - "b_sock,bss_acpt,bf_nbio,bss_log,bss_bio,"+ - - "bss_dgram,"+ - - "bf_lbuf,bss_rtcp" ! The last two are VMS specific -$ LIB_STACK = "stack" -$ LIB_LHASH = "lhash,lh_stats" -$ LIB_RAND = "md_rand,randfile,rand_lib,rand_err,rand_egd,"+ - - "rand_vms" ! The last one is VMS specific -$ LIB_ERR = "err,err_all,err_prn" -$ LIB_EVP = "encode,digest,evp_enc,evp_key,evp_acnf,evp_cnf,"+ - - "e_des,e_bf,e_idea,e_des3,e_camellia,"+ - - "e_rc4,e_aes,names,e_seed,"+ - - "e_xcbc_d,e_rc2,e_cast,e_rc5" -$ LIB_EVP_2 = "m_null,m_md2,m_md4,m_md5,m_sha,m_sha1,m_wp," + - - "m_dss,m_dss1,m_mdc2,m_ripemd,m_ecdsa,"+ - - "p_open,p_seal,p_sign,p_verify,p_lib,p_enc,p_dec,"+ - - "bio_md,bio_b64,bio_enc,evp_err,e_null,"+ - - "c_all,c_allc,c_alld,evp_lib,bio_ok,"+- - "evp_pkey,evp_pbe,p5_crpt,p5_crpt2" -$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,"+ - - "e_aes_cbc_hmac_sha1,e_aes_cbc_hmac_sha256,e_rc4_hmac_md5" -$ LIB_ASN1 = "a_object,a_bitstr,a_utctm,a_gentm,a_time,a_int,a_octet,"+ - - "a_print,a_type,a_set,a_dup,a_d2i_fp,a_i2d_fp,"+ - - "a_enum,a_utf8,a_sign,a_digest,a_verify,a_mbstr,a_strex,"+ - - "x_algor,x_val,x_pubkey,x_sig,x_req,x_attrib,x_bignum,"+ - - "x_long,x_name,x_x509,x_x509a,x_crl,x_info,x_spki,nsseq,"+ - - "x_nx509,d2i_pu,d2i_pr,i2d_pu,i2d_pr" -$ LIB_ASN1_2 = "t_req,t_x509,t_x509a,t_crl,t_pkey,t_spki,t_bitst,"+ - - "tasn_new,tasn_fre,tasn_enc,tasn_dec,tasn_utl,tasn_typ,"+ - - "tasn_prn,ameth_lib,"+ - - "f_int,f_string,n_pkey,"+ - - "f_enum,x_pkey,a_bool,x_exten,bio_asn1,bio_ndef,asn_mime,"+ - - "asn1_gen,asn1_par,asn1_lib,asn1_err,a_bytes,a_strnid,"+ - - "evp_asn1,asn_pack,p5_pbe,p5_pbev2,p8_pkey,asn_moid" -$ LIB_PEM = "pem_sign,pem_seal,pem_info,pem_lib,pem_all,pem_err,"+ - - "pem_x509,pem_xaux,pem_oth,pem_pk8,pem_pkey,pvkfmt" -$ LIB_X509 = "x509_def,x509_d2,x509_r2x,x509_cmp,"+ - - "x509_obj,x509_req,x509spki,x509_vfy,"+ - - "x509_set,x509cset,x509rset,x509_err,"+ - - "x509name,x509_v3,x509_ext,x509_att,"+ - - "x509type,x509_lu,x_all,x509_txt,"+ - - "x509_trs,by_file,by_dir,x509_vpm" -$ LIB_X509V3 = "v3_bcons,v3_bitst,v3_conf,v3_extku,v3_ia5,v3_lib,"+ - - "v3_prn,v3_utl,v3err,v3_genn,v3_alt,v3_skey,v3_akey,v3_pku,"+ - - "v3_int,v3_enum,v3_sxnet,v3_cpols,v3_crld,v3_purp,v3_info,"+ - - "v3_ocsp,v3_akeya,v3_pmaps,v3_pcons,v3_ncons,v3_pcia,v3_pci,"+ - - "pcy_cache,pcy_node,pcy_data,pcy_map,pcy_tree,pcy_lib,"+ - - "v3_asid,v3_addr,v3_scts" -$ LIB_CONF = "conf_err,conf_lib,conf_api,conf_def,conf_mod,conf_mall,conf_sap" -$ LIB_TXT_DB = "txt_db" -$ LIB_PKCS7 = "pk7_asn1,pk7_lib,pkcs7err,pk7_doit,pk7_smime,pk7_attr,"+ - - "pk7_mime,bio_pk7" -$ LIB_PKCS12 = "p12_add,p12_asn,p12_attr,p12_crpt,p12_crt,p12_decr,"+ - - "p12_init,p12_key,p12_kiss,p12_mutl,"+ - - "p12_utl,p12_npas,pk12err,p12_p8d,p12_p8e" -$ LIB_COMP = "comp_lib,comp_err,"+ - - "c_rle,c_zlib" -$ LIB_OCSP = "ocsp_asn,ocsp_ext,ocsp_ht,ocsp_lib,ocsp_cl,"+ - - "ocsp_srv,ocsp_prn,ocsp_vfy,ocsp_err" -$ LIB_UI_COMPAT = ",ui_compat" -$ LIB_UI = "ui_err,ui_lib,ui_openssl,ui_util"+LIB_UI_COMPAT -$ LIB_KRB5 = "krb5_asn" -$ LIB_CMS = "cms_lib,cms_asn1,cms_att,cms_io,cms_smime,cms_err,"+ - - "cms_sd,cms_dd,cms_cd,cms_env,cms_enc,cms_ess,"+ - - "cms_pwri,cms_kari" -$ LIB_PQUEUE = "pqueue" -$ LIB_TS = "ts_err,ts_req_utils,ts_req_print,ts_rsp_utils,ts_rsp_print,"+ - - "ts_rsp_sign,ts_rsp_verify,ts_verify_ctx,ts_lib,ts_conf,"+ - - "ts_asn1" -$ LIB_JPAKE = "jpake,jpake_err" -$ LIB_SRP = "srp_lib,srp_vfy" -$ LIB_STORE = "str_err,str_lib,str_meth,str_mem" -$ LIB_CMAC = "cmac,cm_ameth,cm_pmeth" -$! -$! Setup exceptional compilations -$! -$ CC3_SHOWN = 0 -$ CC4_SHOWN = 0 -$ CC5_SHOWN = 0 -$ CC6_SHOWN = 0 -$! -$! The following lists must have leading and trailing commas, and no -$! embedded spaces. (They are scanned for ",name,".) -$! -$ ! Add definitions for no threads on OpenVMS 7.1 and higher. -$ COMPILEWITH_CC3 = ",bss_rtcp," -$ ! Disable the DOLLARID warning. Not needed with /STANDARD=RELAXED. -$ COMPILEWITH_CC4 = "" !!! ",a_utctm,bss_log,o_time,o_dir," -$ ! Disable disjoint optimization on VAX with DECC. -$ COMPILEWITH_CC5 = ",md2_dgst,md4_dgst,md5_dgst,mdc2dgst," + - - "seed,sha_dgst,sha1dgst,rmd_dgst,bf_enc," -$ ! Disable the MIXLINKAGE warning. -$ COMPILEWITH_CC6 = "" !!! ",enc_read,set_key," -$! -$! Figure Out What Other Modules We Are To Build. -$! -$ BUILD_SET: -$! -$! Define A Module Counter. -$! -$ MODULE_COUNTER = 0 -$! -$! Top Of The Loop. -$! -$ MODULE_NEXT: -$! -$! Extract The Module Name From The Encryption List. -$! -$ MODULE_NAME = F$EDIT(F$ELEMENT(MODULE_COUNTER,",",ENCRYPT_TYPES),"COLLAPSE") -$ IF MODULE_NAME.EQS."Basic" THEN MODULE_NAME = "" -$ MODULE_NAME1 = MODULE_NAME -$! -$! Check To See If We Are At The End Of The Module List. -$! -$ IF (MODULE_NAME.EQS.",") -$ THEN -$! -$! We Are At The End Of The Module List, Go To MODULE_DONE. -$! -$ GOTO MODULE_DONE -$! -$! End The Module List Check. -$! -$ ENDIF -$! -$! Increment The Moudle Counter. -$! -$ MODULE_COUNTER = MODULE_COUNTER + 1 -$! -$! Create The Library and Apps Module Names. -$! -$ LIB_MODULE = "LIB_" + MODULE_NAME -$ APPS_MODULE = "APPS_" + MODULE_NAME -$ IF (F$EXTRACT(0,5,MODULE_NAME).EQS."ASN1_") -$ THEN -$ MODULE_NAME = "ASN1" -$ ENDIF -$ IF (F$EXTRACT(0,4,MODULE_NAME).EQS."EVP_") -$ THEN -$ MODULE_NAME = "EVP" -$ ENDIF -$! -$! Set state (can be LIB and APPS) -$! -$ STATE = "LIB" -$ IF BUILDALL .EQS. "APPS" THEN STATE = "APPS" -$! -$! Check if the library module name actually is defined -$! -$ IF F$TYPE('LIB_MODULE') .EQS. "" -$ THEN -$ WRITE SYS$ERROR "" -$ WRITE SYS$ERROR "The module ",MODULE_NAME1," does not exist. Continuing..." -$ WRITE SYS$ERROR "" -$ GOTO MODULE_NEXT -$ ENDIF -$! -$! Top Of The Module Loop. -$! -$ MODULE_AGAIN: -$! -$! Tell The User What Module We Are Building. -$! -$ IF (MODULE_NAME1.NES."") -$ THEN -$ IF STATE .EQS. "LIB" -$ THEN -$ WRITE SYS$OUTPUT "Compiling The ",MODULE_NAME1," Library Files. (",BUILDALL,",",STATE,")" -$ ELSE IF F$TYPE('APPS_MODULE') .NES. "" -$ THEN -$ WRITE SYS$OUTPUT "Compiling The ",MODULE_NAME1," Applications. (",BUILDALL,",",STATE,")" -$ ENDIF -$ ENDIF -$ ENDIF -$! -$! Define A File Counter And Set It To "0". -$! -$ FILE_COUNTER = 0 -$ APPLICATION = "" -$ APPLICATION_COUNTER = 0 -$! -$! Top Of The File Loop. -$! -$ NEXT_FILE: -$! -$! Look in the LIB_MODULE is we're in state LIB -$! -$ IF STATE .EQS. "LIB" -$ THEN -$! -$! O.K, Extract The File Name From The File List. -$! -$ FILE_NAME = F$EDIT(F$ELEMENT(FILE_COUNTER,",",'LIB_MODULE'),"COLLAPSE") -$! -$! else -$! -$ ELSE -$ FILE_NAME = "," -$! -$ IF F$TYPE('APPS_MODULE') .NES. "" -$ THEN -$! -$! Extract The File Name From The File List. -$! This part is a bit more complicated. -$! -$ IF APPLICATION .EQS. "" -$ THEN -$ APPLICATION = F$ELEMENT(APPLICATION_COUNTER,";",'APPS_MODULE') -$ APPLICATION_COUNTER = APPLICATION_COUNTER + 1 -$ APPLICATION_OBJECTS = F$ELEMENT(1,"/",APPLICATION) -$ APPLICATION = F$ELEMENT(0,"/",APPLICATION) -$ FILE_COUNTER = 0 -$ ENDIF -$ -$! WRITE SYS$OUTPUT "DEBUG: SHOW SYMBOL APPLICATION*" -$! SHOW SYMBOL APPLICATION* -$! -$ IF APPLICATION .NES. ";" -$ THEN -$ FILE_NAME = F$EDIT(F$ELEMENT(FILE_COUNTER,",",APPLICATION_OBJECTS),"COLLAPSE") -$ IF FILE_NAME .EQS. "," -$ THEN -$ APPLICATION = "" -$ GOTO NEXT_FILE -$ ENDIF -$ ENDIF -$ ENDIF -$ ENDIF -$! -$! Check To See If We Are At The End Of The File List. -$! -$ IF (FILE_NAME.EQS.",") -$ THEN -$! -$! We Are At The End Of The File List, Change State Or Goto FILE_DONE. -$! -$ IF STATE .EQS. "LIB" .AND. BUILDALL .NES. "LIBRARY" -$ THEN -$ STATE = "APPS" -$ GOTO MODULE_AGAIN -$ ELSE -$ GOTO FILE_DONE -$ ENDIF -$! -$! End The File List Check. -$! -$ ENDIF -$! -$! Increment The Counter. -$! -$ FILE_COUNTER = FILE_COUNTER + 1 -$! -$! Create The Source File Name. -$! -$ TMP_FILE_NAME = F$ELEMENT(1,"]",FILE_NAME) -$ IF TMP_FILE_NAME .EQS. "]" THEN TMP_FILE_NAME = FILE_NAME -$ IF F$ELEMENT(0,".",TMP_FILE_NAME) .EQS. TMP_FILE_NAME THEN - - FILE_NAME = FILE_NAME + ".c" -$ IF (MODULE_NAME.NES."") -$ THEN -$ SOURCE_FILE = "SYS$DISK:[." + MODULE_NAME+ "]" + FILE_NAME -$ ELSE -$ SOURCE_FILE = "SYS$DISK:[]" + FILE_NAME -$ ENDIF -$ SOURCE_FILE = SOURCE_FILE - "][" -$! -$! Create The Object File Name. -$! -$ OBJECT_FILE = OBJ_DIR + F$PARSE(FILE_NAME,,,"NAME","SYNTAX_ONLY") + ".OBJ" -$ ON WARNING THEN GOTO NEXT_FILE -$! -$! Check To See If The File We Want To Compile Is Actually There. -$! -$ IF (F$SEARCH(SOURCE_FILE).EQS."") -$ THEN -$! -$! Tell The User That The File Doesn't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File ",SOURCE_FILE," Doesn't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ GOTO EXIT -$! -$! End The File Exist Check. -$! -$ ENDIF -$! -$! Tell The User We Are Compiling The File. -$! -$ IF (MODULE_NAME.EQS."") -$ THEN -$ WRITE SYS$OUTPUT "Compiling The ",FILE_NAME," File. (",BUILDALL,",",STATE,")" -$ ENDIF -$ IF (MODULE_NAME.NES."") -$ THEN -$ WRITE SYS$OUTPUT " ",FILE_NAME,"" -$ ENDIF -$! -$! Compile The File. -$! -$ ON ERROR THEN GOTO NEXT_FILE -$ FILE_NAME0 = ","+ F$ELEMENT(0,".",FILE_NAME)+ "," -$ IF FILE_NAME - ".mar" .NES. FILE_NAME -$ THEN -$ MACRO/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$ ELSE -$ IF COMPILEWITH_CC3 - FILE_NAME0 .NES. COMPILEWITH_CC3 -$ THEN -$ write sys$output " \Using special rule (3)" -$ if (.not. CC3_SHOWN) -$ then -$ CC3_SHOWN = 1 -$ x = " "+ CC3 -$ write /symbol sys$output x -$ endif -$ CC3/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$ ELSE -$ IF COMPILEWITH_CC4 - FILE_NAME0 .NES. COMPILEWITH_CC4 -$ THEN -$ write /symbol sys$output " \Using special rule (4)" -$ if (.not. CC4_SHOWN) -$ then -$ CC4_SHOWN = 1 -$ x = " "+ CC4 -$ write /symbol sys$output x -$ endif -$ CC4/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$ ELSE -$ IF CC5_DIFFERENT .AND. - - (COMPILEWITH_CC5 - FILE_NAME0 .NES. COMPILEWITH_CC5) -$ THEN -$ write sys$output " \Using special rule (5)" -$ if (.not. CC5_SHOWN) -$ then -$ CC5_SHOWN = 1 -$ x = " "+ CC5 -$ write /symbol sys$output x -$ endif -$ CC5/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$ ELSE -$ IF COMPILEWITH_CC6 - FILE_NAME0 .NES. COMPILEWITH_CC6 -$ THEN -$ write sys$output " \Using special rule (6)" -$ if (.not. CC6_SHOWN) -$ then -$ CC6_SHOWN = 1 -$ x = " "+ CC6 -$ write /symbol sys$output x -$ endif -$ CC6/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$ ELSE -$ CC/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$ ENDIF -$ ENDIF -$ ENDIF -$ ENDIF -$ ENDIF -$ IF STATE .EQS. "LIB" -$ THEN -$! -$! Add It To The Library. -$! -$ LIBRARY/REPLACE 'LIB_NAME' 'OBJECT_FILE' -$! -$! Time To Clean Up The Object File. -$! -$ DELETE 'OBJECT_FILE';* -$ ENDIF -$! -$! Go Back And Do It Again. -$! -$ GOTO NEXT_FILE -$! -$! All Done With This Library Part. -$! -$ FILE_DONE: -$! -$! Time To Build Some Applications -$! -$ IF F$TYPE('APPS_MODULE') .NES. "" .AND. BUILDALL .NES. "LIBRARY" -$ THEN -$ APPLICATION_COUNTER = 0 -$ NEXT_APPLICATION: -$ APPLICATION = F$ELEMENT(APPLICATION_COUNTER,";",'APPS_MODULE') -$ IF APPLICATION .EQS. ";" THEN GOTO APPLICATION_DONE -$ -$ APPLICATION_COUNTER = APPLICATION_COUNTER + 1 -$ APPLICATION_OBJECTS = F$ELEMENT(1,"/",APPLICATION) -$ APPLICATION = F$ELEMENT(0,"/",APPLICATION) -$ -$! WRITE SYS$OUTPUT "DEBUG: SHOW SYMBOL APPLICATION*" -$! SHOW SYMBOL APPLICATION* -$! -$! Tell the user what happens -$! -$ WRITE SYS$OUTPUT " ",APPLICATION,".exe" -$! -$! Link The Program. -$! -$ ON ERROR THEN GOTO NEXT_APPLICATION -$! -$! Link With A TCP/IP Library. -$! -$ LINK /'DEBUGGER' /'LINKMAP' /'TRACEBACK' - - /EXE='EXE_DIR''APPLICATION'.EXE - - 'OBJ_DIR''APPLICATION_OBJECTS', - - 'CRYPTO_LIB'/LIBRARY - - 'TCPIP_LIB' - - 'ZLIB_LIB' - - ,'OPT_FILE' /OPTIONS -$! -$ GOTO NEXT_APPLICATION -$ APPLICATION_DONE: -$ ENDIF -$! -$! Go Back And Get The Next Module. -$! -$ GOTO MODULE_NEXT -$! -$! All Done With This Module. -$! -$ MODULE_DONE: -$! -$! Tell The User That We Are All Done. -$! -$ WRITE SYS$OUTPUT "All Done..." -$ EXIT: -$ GOSUB CLEANUP -$ EXIT -$! -$! Check For The Link Option FIle. -$! -$ CHECK_OPT_FILE: -$! -$! Check To See If We Need To Make A VAX C Option File. -$! -$ IF (COMPILER.EQS."VAXC") -$ THEN -$! -$! Check To See If We Already Have A VAX C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! We Need A VAX C Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Against -! The Sharable VAX C Runtime Library. -! -SYS$SHARE:VAXCRTL.EXE/SHARE -$EOD -$! -$! End The Option File Check. -$! -$ ENDIF -$! -$! End The VAXC Check. -$! -$ ENDIF -$! -$! Check To See If We Need A GNU C Option File. -$! -$ IF (COMPILER.EQS."GNUC") -$ THEN -$! -$! Check To See If We Already Have A GNU C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! We Need A GNU C Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Against -! The Sharable C Runtime Library. -! -GNU_CC:[000000]GCCLIB/LIBRARY -SYS$SHARE:VAXCRTL/SHARE -$EOD -$! -$! End The Option File Check. -$! -$ ENDIF -$! -$! End The GNU C Check. -$! -$ ENDIF -$! -$! Check To See If We Need A DEC C Option File. -$! -$ IF (COMPILER.EQS."DECC") -$ THEN -$! -$! Check To See If We Already Have A DEC C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! Figure Out If We Need A non-VAX Or A VAX Linker Option File. -$! -$ IF ARCH .EQS. "VAX" -$ THEN -$! -$! We Need A DEC C Linker Option File For VAX. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Against -! The Sharable DEC C Runtime Library. -! -SYS$SHARE:DECC$SHR.EXE/SHARE -$EOD -$! -$! Else... -$! -$ ELSE -$! -$! Create The non-VAX Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File For non-VAX To Link Against -! The Sharable C Runtime Library. -! -SYS$SHARE:CMA$OPEN_LIB_SHR/SHARE -SYS$SHARE:CMA$OPEN_RTL/SHARE -$EOD -$! -$! End The DEC C Option File Check. -$! -$ ENDIF -$! -$! End The Option File Search. -$! -$ ENDIF -$! -$! End The DEC C Check. -$! -$ ENDIF -$! -$! Tell The User What Linker Option File We Are Using. -$! -$ WRITE SYS$OUTPUT "Using Linker Option File ",OPT_FILE,"." -$! -$! Time To RETURN. -$! -$ RETURN -$! -$! Check The User's Options. -$! -$ CHECK_OPTIONS: -$! -$! Check To See If P1 Is Blank. -$! -$ IF (P1.EQS."ALL") -$ THEN -$! -$! P1 Is Blank, So Build Everything. -$! -$ BUILDALL = "TRUE" -$! -$! Else... -$! -$ ELSE -$! -$! Else, Check To See If P1 Has A Valid Argument. -$! -$ IF (P1.EQS."LIBRARY").OR.(P1.EQS."APPS") -$ THEN -$! -$! A Valid Argument. -$! -$ BUILDALL = P1 -$! -$! Else... -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P1," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " ALL : Just Build Everything." -$ WRITE SYS$OUTPUT " LIBRARY : To Compile Just The [.xxx.EXE.CRYPTO]LIBCRYPTO.OLB Library." -$ WRITE SYS$OUTPUT " APPS : To Compile Just The [.xxx.EXE.CRYPTO]*.EXE Programs." -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " Where 'xxx' Stands For:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " ALPHA[64]: Alpha Architecture." -$ WRITE SYS$OUTPUT " IA64[64] : IA64 Architecture." -$ WRITE SYS$OUTPUT " VAX : VAX Architecture." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! End The P1 Check. -$! -$ ENDIF -$! -$! Check To See If P2 Is Blank. -$! -$ IF (P2.EQS."NODEBUG") -$ THEN -$! -$! P2 Is NODEBUG, So Compile Without The Debugger Information. -$! -$ DEBUGGER = "NODEBUG" -$ LINKMAP = "NOMAP" -$ TRACEBACK = "NOTRACEBACK" -$ GCC_OPTIMIZE = "OPTIMIZE" -$ CC_OPTIMIZE = "OPTIMIZE" -$ MACRO_OPTIMIZE = "OPTIMIZE" -$ WRITE SYS$OUTPUT "No Debugger Information Will Be Produced During Compile." -$ WRITE SYS$OUTPUT "Compiling With Compiler Optimization." -$ ELSE -$! -$! Check To See If We Are To Compile With Debugger Information. -$! -$ IF (P2.EQS."DEBUG") -$ THEN -$! -$! Compile With Debugger Information. -$! -$ DEBUGGER = "DEBUG" -$ LINKMAP = "MAP" -$ TRACEBACK = "TRACEBACK" -$ GCC_OPTIMIZE = "NOOPTIMIZE" -$ CC_OPTIMIZE = "NOOPTIMIZE" -$ MACRO_OPTIMIZE = "NOOPTIMIZE" -$ WRITE SYS$OUTPUT "Debugger Information Will Be Produced During Compile." -$ WRITE SYS$OUTPUT "Compiling Without Compiler Optimization." -$ ELSE -$! -$! They Entered An Invalid Option. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P2," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " DEBUG : Compile With The Debugger Information." -$ WRITE SYS$OUTPUT " NODEBUG : Compile Without The Debugger Information." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! End The P2 Check. -$! -$ ENDIF -$! -$! Special Threads For OpenVMS v7.1 Or Later -$! -$! Written By: Richard Levitte -$! richard@levitte.org -$! -$! -$! Check To See If We Have A Option For P5. -$! -$ IF (P5.EQS."") -$ THEN -$! -$! Get The Version Of VMS We Are Using. -$! -$ ISSEVEN := -$ TMP = F$ELEMENT(0,"-",F$EXTRACT(1,4,F$GETSYI("VERSION"))) -$ TMP = F$INTEGER(F$ELEMENT(0,".",TMP)+F$ELEMENT(1,".",TMP)) -$! -$! Check To See If The VMS Version Is v7.1 Or Later. -$! -$ IF (TMP.GE.71) -$ THEN -$! -$! We Have OpenVMS v7.1 Or Later, So Use The Special Threads. -$! -$ ISSEVEN := ,PTHREAD_USE_D4 -$! -$! End The VMS Version Check. -$! -$ ENDIF -$! -$! End The P5 Check. -$! -$ ENDIF -$! -$! Check P7 (POINTER_SIZE). -$! -$ IF (P7 .NES. "") .AND. (ARCH .NES. "VAX") -$ THEN -$! -$ IF (P7 .EQS. "32") -$ THEN -$ POINTER_SIZE = " /POINTER_SIZE=32" -$ ELSE -$ POINTER_SIZE = F$EDIT( P7, "COLLAPSE, UPCASE") -$ IF ((POINTER_SIZE .EQS. "64") .OR. - - (POINTER_SIZE .EQS. "64=") .OR. - - (POINTER_SIZE .EQS. "64=ARGV")) -$ THEN -$ ARCHD = ARCH+ "_64" -$ LIB32 = "" -$ POINTER_SIZE = " /POINTER_SIZE=64" -$ ELSE -$! -$! Tell The User Entered An Invalid Option. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ", P7, - - " Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT - - " """" : Compile with default (short) pointers." -$ WRITE SYS$OUTPUT - - " 32 : Compile with 32-bit (short) pointers." -$ WRITE SYS$OUTPUT - - " 64 : Compile with 64-bit (long) pointers (auto ARGV)." -$ WRITE SYS$OUTPUT - - " 64= : Compile with 64-bit (long) pointers (no ARGV)." -$ WRITE SYS$OUTPUT - - " 64=ARGV : Compile with 64-bit (long) pointers (ARGV)." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$ ENDIF -$! -$ ENDIF -$! -$! End The P7 (POINTER_SIZE) Check. -$! -$ ENDIF -$! -$! Set basic C compiler /INCLUDE directories. -$! -$ CC_INCLUDES = "SYS$DISK:[.''ARCHD'],SYS$DISK:[],SYS$DISK:[-],"+ - - "SYS$DISK:[.ENGINE.VENDOR_DEFNS],SYS$DISK:[.MODES],SYS$DISK:[.ASN1],SYS$DISK:[.EVP]" -$! -$! Check To See If P3 Is Blank. -$! -$ IF (P3.EQS."") -$ THEN -$! -$! O.K., The User Didn't Specify A Compiler, Let's Try To -$! Find Out Which One To Use. -$! -$! Check To See If We Have GNU C. -$! -$ IF (F$TRNLNM("GNU_CC").NES."") -$ THEN -$! -$! Looks Like GNUC, Set To Use GNUC. -$! -$ P3 = "GNUC" -$! -$! Else... -$! -$ ELSE -$! -$! Check To See If We Have VAXC Or DECC. -$! -$ IF (ARCH.NES."VAX").OR.(F$TRNLNM("DECC$CC_DEFAULT").NES."") -$ THEN -$! -$! Looks Like DECC, Set To Use DECC. -$! -$ P3 = "DECC" -$! -$! Else... -$! -$ ELSE -$! -$! Looks Like VAXC, Set To Use VAXC. -$! -$ P3 = "VAXC" -$! -$! End The VAXC Compiler Check. -$! -$ ENDIF -$! -$! End The DECC & VAXC Compiler Check. -$! -$ ENDIF -$! -$! End The Compiler Check. -$! -$ ENDIF -$! -$! Check To See If We Have A Option For P4. -$! -$ IF (P4.EQS."") -$ THEN -$! -$! Find out what socket library we have available -$! -$ IF F$PARSE("SOCKETSHR:") .NES. "" -$ THEN -$! -$! We have SOCKETSHR, and it is my opinion that it's the best to use. -$! -$ P4 = "SOCKETSHR" -$! -$! Tell the user -$! -$ WRITE SYS$OUTPUT "Using SOCKETSHR for TCP/IP" -$! -$! Else, let's look for something else -$! -$ ELSE -$! -$! Like UCX (the reason to do this before Multinet is that the UCX -$! emulation is easier to use...) -$! -$ IF F$TRNLNM("UCX$IPC_SHR") .NES. "" - - .OR. F$PARSE("SYS$SHARE:UCX$IPC_SHR.EXE") .NES. "" - - .OR. F$PARSE("SYS$LIBRARY:UCX$IPC.OLB") .NES. "" -$ THEN -$! -$! Last resort: a UCX or UCX-compatible library -$! -$ P4 = "UCX" -$! -$! Tell the user -$! -$ WRITE SYS$OUTPUT "Using UCX or an emulation thereof for TCP/IP" -$! -$! That was all... -$! -$ ENDIF -$ ENDIF -$ ENDIF -$! -$! Set Up Initial CC Definitions, Possibly With User Ones -$! -$ CCDEFS = "TCPIP_TYPE_''P4',DSO_VMS" -$ IF F$TYPE(USER_CCDEFS) .NES. "" THEN CCDEFS = CCDEFS + "," + USER_CCDEFS -$ CCEXTRAFLAGS = "" -$ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS -$ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR" -$ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" -$ THEN -$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," -$ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS -$ ENDIF -$! -$! Check To See If We Have A ZLIB Option. -$! -$ ZLIB = P8 -$ IF (ZLIB .NES. "") -$ THEN -$! -$! Check for expected ZLIB files. -$! -$ err = 0 -$ file1 = f$parse( "zlib.h", ZLIB, , , "SYNTAX_ONLY") -$ if (f$search( file1) .eqs. "") -$ then -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ", ZLIB, " Is Invalid." -$ WRITE SYS$OUTPUT " Can't find header: ''file1'" -$ err = 1 -$ endif -$ file1 = f$parse( "A.;", ZLIB)- "A.;" -$! -$ file2 = f$parse( ZLIB, "libz.olb", , , "SYNTAX_ONLY") -$ if (f$search( file2) .eqs. "") -$ then -$ if (err .eq. 0) -$ then -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ", ZLIB, " Is Invalid." -$ endif -$ WRITE SYS$OUTPUT " Can't find library: ''file2'" -$ WRITE SYS$OUTPUT "" -$ err = err+ 2 -$ endif -$ if (err .eq. 1) -$ then -$ WRITE SYS$OUTPUT "" -$ endif -$! -$ if (err .ne. 0) -$ then -$ EXIT -$ endif -$! -$ CCDEFS = """ZLIB=1"", "+ CCDEFS -$ CC_INCLUDES = CC_INCLUDES+ ", "+ file1 -$ ZLIB_LIB = ", ''file2' /library" -$! -$! Print info -$! -$ WRITE SYS$OUTPUT "ZLIB library spec: ", file2 -$! -$! End The ZLIB Check. -$! -$ ENDIF -$! -$! Check To See If The User Entered A Valid Parameter. -$! -$ IF (P3.EQS."VAXC").OR.(P3.EQS."DECC").OR.(P3.EQS."GNUC") -$ THEN -$! -$! Check To See If The User Wanted DECC. -$! -$ IF (P3.EQS."DECC") -$ THEN -$! -$! Looks Like DECC, Set To Use DECC. -$! -$ COMPILER = "DECC" -$! -$! Tell The User We Are Using DECC. -$! -$ WRITE SYS$OUTPUT "Using DECC 'C' Compiler." -$! -$! Use DECC... -$! -$ CC = "CC" -$ IF ARCH.EQS."VAX" .AND. F$TRNLNM("DECC$CC_DEFAULT").NES."/DECC" - - THEN CC = "CC/DECC" -$ CC = CC + " /''CC_OPTIMIZE' /''DEBUGGER' /STANDARD=RELAXED"+ - - "''POINTER_SIZE' /NOLIST /PREFIX=ALL" + - - " /INCLUDE=(''CC_INCLUDES')"+ - - CCEXTRAFLAGS -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "VAX_DECC_OPTIONS.OPT" -$! -$! End DECC Check. -$! -$ ENDIF -$! -$! Check To See If We Are To Use VAXC. -$! -$ IF (P3.EQS."VAXC") -$ THEN -$! -$! Looks Like VAXC, Set To Use VAXC. -$! -$ COMPILER = "VAXC" -$! -$! Tell The User We Are Using VAX C. -$! -$ WRITE SYS$OUTPUT "Using VAXC 'C' Compiler." -$! -$! Compile Using VAXC. -$! -$ CC = "CC" -$ IF ARCH.NES."VAX" -$ THEN -$ WRITE SYS$OUTPUT "There is no VAX C on ''ARCH'!" -$ EXIT -$ ENDIF -$ IF F$TRNLNM("DECC$CC_DEFAULT").EQS."/DECC" THEN CC = "CC/VAXC" -$ CC = CC + "/''CC_OPTIMIZE'/''DEBUGGER'/NOLIST" + - - "/INCLUDE=(''CC_INCLUDES')"+ - - CCEXTRAFLAGS -$ CCDEFS = """VAXC""," + CCDEFS -$! -$! Define <sys> As SYS$COMMON:[SYSLIB] -$! -$ DEFINE/NOLOG SYS SYS$COMMON:[SYSLIB] -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "VAX_VAXC_OPTIONS.OPT" -$! -$! End VAXC Check -$! -$ ENDIF -$! -$! Check To See If We Are To Use GNU C. -$! -$ IF (P3.EQS."GNUC") -$ THEN -$! -$! Looks Like GNUC, Set To Use GNUC. -$! -$ COMPILER = "GNUC" -$! -$! Tell The User We Are Using GNUC. -$! -$ WRITE SYS$OUTPUT "Using GNU 'C' Compiler." -$! -$! Use GNU C... -$! -$ CC = "GCC/NOCASE_HACK/''GCC_OPTIMIZE'/''DEBUGGER'/NOLIST" + - - "/INCLUDE=(''CC_INCLUDES')"+ - - CCEXTRAFLAGS -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "VAX_GNUC_OPTIONS.OPT" -$! -$! End The GNU C Check. -$! -$ ENDIF -$! -$! Set up default defines -$! -$ CCDEFS = """FLAT_INC=1""," + CCDEFS -$! -$! Finish up the definition of CC. -$! -$ IF COMPILER .EQS. "DECC" -$ THEN -$! Not all compiler versions support MAYLOSEDATA3. -$ OPT_TEST = "MAYLOSEDATA3" -$ DEFINE /USER_MODE SYS$ERROR NL: -$ DEFINE /USER_MODE SYS$OUTPUT NL: -$ 'CC' /NOCROSS_REFERENCE /NOLIST /NOOBJECT - - /WARNINGS = DISABLE = ('OPT_TEST', EMPTYFILE) NL: -$ IF ($SEVERITY) -$ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN - - CCDISABLEWARNINGS = CCDISABLEWARNINGS+ "," -$ CCDISABLEWARNINGS = CCDISABLEWARNINGS+ OPT_TEST -$ ENDIF -$ IF CCDISABLEWARNINGS .EQS. "" -$ THEN -$ CC4DISABLEWARNINGS = "DOLLARID" -$ CC6DISABLEWARNINGS = "MIXLINKAGE" -$ ELSE -$ CC4DISABLEWARNINGS = CCDISABLEWARNINGS + ",DOLLARID" -$ CC6DISABLEWARNINGS = CCDISABLEWARNINGS + ",MIXLINKAGE" -$ CCDISABLEWARNINGS = " /WARNING=(DISABLE=(" + CCDISABLEWARNINGS + "))" -$ ENDIF -$ CC4DISABLEWARNINGS = " /WARNING=(DISABLE=(" + CC4DISABLEWARNINGS + "))" -$ CC6DISABLEWARNINGS = " /WARNING=(DISABLE=(" + CC6DISABLEWARNINGS + "))" -$ ELSE -$ CCDISABLEWARNINGS = "" -$ CC4DISABLEWARNINGS = "" -$ CC6DISABLEWARNINGS = "" -$ ENDIF -$ CC3 = CC + " /DEFINE=(" + CCDEFS + ISSEVEN + ")" + CCDISABLEWARNINGS -$ CC = CC + " /DEFINE=(" + CCDEFS + ")" + CCDISABLEWARNINGS -$ IF ARCH .EQS. "VAX" .AND. COMPILER .EQS. "DECC" .AND. P2 .NES. "DEBUG" -$ THEN -$ CC5 = CC + " /OPTIMIZE=NODISJOINT" -$ CC5_DIFFERENT = 1 -$ ELSE -$ CC5 = CC -$ CC5_DIFFERENT = 0 -$ ENDIF -$ CC4 = CC - CCDISABLEWARNINGS + CC4DISABLEWARNINGS -$ CC6 = CC - CCDISABLEWARNINGS + CC6DISABLEWARNINGS -$! -$! Show user the result -$! -$ WRITE/SYMBOL SYS$OUTPUT "Main C Compiling Command: ",CC -$! -$! Else The User Entered An Invalid Argument. -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P3," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " VAXC : To Compile With VAX C." -$ WRITE SYS$OUTPUT " DECC : To Compile With DEC C." -$ WRITE SYS$OUTPUT " GNUC : To Compile With GNU C." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! Build a MACRO command for the architecture at hand -$! -$ IF ARCH .EQS. "VAX" THEN MACRO = "MACRO/''DEBUGGER'" -$ IF ARCH .NES. "VAX" THEN MACRO = "MACRO/MIGRATION/''DEBUGGER'/''MACRO_OPTIMIZE'" -$! -$! Show user the result -$! -$ WRITE/SYMBOL SYS$OUTPUT "Main MACRO Compiling Command: ",MACRO -$! -$! Time to check the contents, and to make sure we get the correct library. -$! -$ IF P4.EQS."SOCKETSHR" .OR. P4.EQS."MULTINET" .OR. P4.EQS."UCX" - - .OR. P4.EQS."TCPIP" .OR. P4.EQS."NONE" -$ THEN -$! -$! Check to see if SOCKETSHR was chosen -$! -$ IF P4.EQS."SOCKETSHR" -$ THEN -$! -$! Set the library to use SOCKETSHR -$! -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]SOCKETSHR_SHR.OPT /OPTIONS" -$! -$! Done with SOCKETSHR -$! -$ ENDIF -$! -$! Check to see if MULTINET was chosen -$! -$ IF P4.EQS."MULTINET" -$ THEN -$! -$! Set the library to use UCX emulation. -$! -$ P4 = "UCX" -$! -$! Done with MULTINET -$! -$ ENDIF -$! -$! Check to see if UCX was chosen -$! -$ IF P4.EQS."UCX" -$ THEN -$! -$! Set the library to use UCX. -$! -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]UCX_SHR_DECC.OPT /OPTIONS" -$ IF F$TRNLNM("UCX$IPC_SHR") .NES. "" -$ THEN -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]UCX_SHR_DECC_LOG.OPT /OPTIONS" -$ ELSE -$ IF COMPILER .NES. "DECC" .AND. ARCH .EQS. "VAX" THEN - - TCPIP_LIB = ",SYS$DISK:[-.VMS]UCX_SHR_VAXC.OPT /OPTIONS" -$ ENDIF -$! -$! Done with UCX -$! -$ ENDIF -$! -$! Check to see if TCPIP was chosen -$! -$ IF P4.EQS."TCPIP" -$ THEN -$! -$! Set the library to use TCPIP (post UCX). -$! -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]TCPIP_SHR_DECC.OPT /OPTIONS" -$! -$! Done with TCPIP -$! -$ ENDIF -$! -$! Check to see if NONE was chosen -$! -$ IF P4.EQS."NONE" -$ THEN -$! -$! Do not use a TCPIP library. -$! -$ TCPIP_LIB = "" -$! -$! Done with TCPIP -$! -$ ENDIF -$! -$! Print info -$! -$ WRITE SYS$OUTPUT "TCP/IP library spec: ", TCPIP_LIB- "," -$! -$! Else The User Entered An Invalid Argument. -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P4," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " SOCKETSHR : To link with SOCKETSHR TCP/IP library." -$ WRITE SYS$OUTPUT " UCX : To link with UCX TCP/IP library." -$ WRITE SYS$OUTPUT " TCPIP : To link with TCPIP (post UCX) TCP/IP library." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! Done with TCP/IP libraries -$! -$ ENDIF -$! -$! Check if the user wanted to compile just a subset of all the encryption -$! methods. -$! -$ IF P6 .NES. "" -$ THEN -$ ENCRYPT_TYPES = P6 -$ ENDIF -$! -$! Time To RETURN... -$! -$ RETURN -$! -$ INITIALISE: -$! -$! Save old value of the logical name OPENSSL -$! -$ __SAVE_OPENSSL = F$TRNLNM("OPENSSL","LNM$PROCESS_TABLE") -$! -$! Save directory information -$! -$ __HERE = F$PARSE(F$PARSE("A.;",F$ENVIRONMENT("PROCEDURE"))-"A.;","[]A.;") - "A.;" -$ __HERE = F$EDIT(__HERE,"UPCASE") -$ __TOP = __HERE - "CRYPTO]" -$ __INCLUDE = __TOP + "INCLUDE.OPENSSL]" -$! -$! Set up the logical name OPENSSL to point at the include directory -$! -$ DEFINE OPENSSL/NOLOG '__INCLUDE' -$! -$! Done -$! -$ RETURN -$! -$ CLEANUP: -$! -$! Restore the logical name OPENSSL if it had a value -$! -$ IF __SAVE_OPENSSL .EQS. "" -$ THEN -$ DEASSIGN OPENSSL -$ ELSE -$ DEFINE/NOLOG OPENSSL '__SAVE_OPENSSL' -$ ENDIF -$! -$! Done -$! -$ RETURN diff --git a/thirdparty/openssl/crypto/des/asm/des_enc.m4 b/thirdparty/openssl/crypto/des/asm/des_enc.m4 deleted file mode 100644 index dda08e126d..0000000000 --- a/thirdparty/openssl/crypto/des/asm/des_enc.m4 +++ /dev/null @@ -1,2101 +0,0 @@ -! des_enc.m4 -! des_enc.S (generated from des_enc.m4) -! -! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. -! -! Version 1.0. 32-bit version. -! -! June 8, 2000. -! -! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation -! by Andy Polyakov. -! -! January 1, 2003. -! -! Assembler version: Copyright Svend Olaf Mikkelsen. -! -! Original C code: Copyright Eric A. Young. -! -! This code can be freely used by LibDES/SSLeay/OpenSSL users. -! -! The LibDES/SSLeay/OpenSSL copyright notices must be respected. -! -! This version can be redistributed. -! -! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S -! -! Global registers 1 to 5 are used. This is the same as done by the -! cc compiler. The UltraSPARC load/store little endian feature is used. -! -! Instruction grouping often refers to one CPU cycle. -! -! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S -! -! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S -! -! Performance improvement according to './apps/openssl speed des' -! -! 32-bit build: -! 23% faster than cc-5.2 -xarch=v8plus -xO5 -! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 -! 64-bit build: -! 50% faster than cc-5.2 -xarch=v9 -xO5 -! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 -! - -.ident "des_enc.m4 2.1" -.file "des_enc-sparc.S" - -#include <openssl/opensslconf.h> - -#if defined(__SUNPRO_C) && defined(__sparcv9) -# define ABI64 /* They've said -xarch=v9 at command line */ -#elif defined(__GNUC__) && defined(__arch64__) -# define ABI64 /* They've said -m64 at command line */ -#endif - -#ifdef ABI64 - .register %g2,#scratch - .register %g3,#scratch -# define FRAME -192 -# define BIAS 2047 -# define LDPTR ldx -# define STPTR stx -# define ARG0 128 -# define ARGSZ 8 -# ifndef OPENSSL_SYSNAME_ULTRASPARC -# define OPENSSL_SYSNAME_ULTRASPARC -# endif -#else -# define FRAME -96 -# define BIAS 0 -# define LDPTR ld -# define STPTR st -# define ARG0 68 -# define ARGSZ 4 -#endif - -#define LOOPS 7 - -#define global0 %g0 -#define global1 %g1 -#define global2 %g2 -#define global3 %g3 -#define global4 %g4 -#define global5 %g5 - -#define local0 %l0 -#define local1 %l1 -#define local2 %l2 -#define local3 %l3 -#define local4 %l4 -#define local5 %l5 -#define local7 %l6 -#define local6 %l7 - -#define in0 %i0 -#define in1 %i1 -#define in2 %i2 -#define in3 %i3 -#define in4 %i4 -#define in5 %i5 -#define in6 %i6 -#define in7 %i7 - -#define out0 %o0 -#define out1 %o1 -#define out2 %o2 -#define out3 %o3 -#define out4 %o4 -#define out5 %o5 -#define out6 %o6 -#define out7 %o7 - -#define stub stb - -changequote({,}) - - -! Macro definitions: - - -! {ip_macro} -! -! The logic used in initial and final permutations is the same as in -! the C code. The permutations are done with a clever shift, xor, and -! technique. -! -! The macro also loads address sbox 1 to 5 to global 1 to 5, address -! sbox 6 to local6, and addres sbox 8 to out3. -! -! Rotates the halfs 3 left to bring the sbox bits in convenient positions. -! -! Loads key first round from address in parameter 5 to out0, out1. -! -! After the the original LibDES initial permutation, the resulting left -! is in the variable initially used for right and vice versa. The macro -! implements the possibility to keep the halfs in the original registers. -! -! parameter 1 left -! parameter 2 right -! parameter 3 result left (modify in first round) -! parameter 4 result right (use in first round) -! parameter 5 key address -! parameter 6 1/2 for include encryption/decryption -! parameter 7 1 for move in1 to in3 -! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 -! parameter 9 1 for load ks3 and ks2 to in4 and in3 - -define(ip_macro, { - -! {ip_macro} -! $1 $2 $4 $3 $5 $6 $7 $8 $9 - - ld [out2+256], local1 - srl $2, 4, local4 - - xor local4, $1, local4 - ifelse($7,1,{mov in1, in3},{nop}) - - ld [out2+260], local2 - and local4, local1, local4 - ifelse($8,1,{mov in3, in4},{}) - ifelse($8,2,{mov in4, in3},{}) - - ld [out2+280], out4 ! loop counter - sll local4, 4, local1 - xor $1, local4, $1 - - ld [out2+264], local3 - srl $1, 16, local4 - xor $2, local1, $2 - - ifelse($9,1,{LDPTR KS3, in4},{}) - xor local4, $2, local4 - nop !sethi %hi(DES_SPtrans), global1 ! sbox addr - - ifelse($9,1,{LDPTR KS2, in3},{}) - and local4, local2, local4 - nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr - - sll local4, 16, local1 - xor $2, local4, $2 - - srl $2, 2, local4 - xor $1, local1, $1 - - sethi %hi(16711680), local5 - xor local4, $1, local4 - - and local4, local3, local4 - or local5, 255, local5 - - sll local4, 2, local2 - xor $1, local4, $1 - - srl $1, 8, local4 - xor $2, local2, $2 - - xor local4, $2, local4 - add global1, 768, global4 - - and local4, local5, local4 - add global1, 1024, global5 - - ld [out2+272], local7 - sll local4, 8, local1 - xor $2, local4, $2 - - srl $2, 1, local4 - xor $1, local1, $1 - - ld [$5], out0 ! key 7531 - xor local4, $1, local4 - add global1, 256, global2 - - ld [$5+4], out1 ! key 8642 - and local4, local7, local4 - add global1, 512, global3 - - sll local4, 1, local1 - xor $1, local4, $1 - - sll $1, 3, local3 - xor $2, local1, $2 - - sll $2, 3, local2 - add global1, 1280, local6 ! address sbox 8 - - srl $1, 29, local4 - add global1, 1792, out3 ! address sbox 8 - - srl $2, 29, local1 - or local4, local3, $4 - - or local2, local1, $3 - - ifelse($6, 1, { - - ld [out2+284], local5 ! 0x0000FC00 used in the rounds - or local2, local1, $3 - xor $4, out0, local1 - - call .des_enc.1 - and local1, 252, local1 - - },{}) - - ifelse($6, 2, { - - ld [out2+284], local5 ! 0x0000FC00 used in the rounds - or local2, local1, $3 - xor $4, out0, local1 - - call .des_dec.1 - and local1, 252, local1 - - },{}) -}) - - -! {rounds_macro} -! -! The logic used in the DES rounds is the same as in the C code, -! except that calculations for sbox 1 and sbox 5 begin before -! the previous round is finished. -! -! In each round one half (work) is modified based on key and the -! other half (use). -! -! In this version we do two rounds in a loop repeated 7 times -! and two rounds seperately. -! -! One half has the bits for the sboxes in the following positions: -! -! 777777xx555555xx333333xx111111xx -! -! 88xx666666xx444444xx222222xx8888 -! -! The bits for each sbox are xor-ed with the key bits for that box. -! The above xx bits are cleared, and the result used for lookup in -! the sbox table. Each sbox entry contains the 4 output bits permuted -! into 32 bits according to the P permutation. -! -! In the description of DES, left and right are switched after -! each round, except after last round. In this code the original -! left and right are kept in the same register in all rounds, meaning -! that after the 16 rounds the result for right is in the register -! originally used for left. -! -! parameter 1 first work (left in first round) -! parameter 2 first use (right in first round) -! parameter 3 enc/dec 1/-1 -! parameter 4 loop label -! parameter 5 key address register -! parameter 6 optional address for key next encryption/decryption -! parameter 7 not empty for include retl -! -! also compares in2 to 8 - -define(rounds_macro, { - -! {rounds_macro} -! $1 $2 $3 $4 $5 $6 $7 $8 $9 - - xor $2, out0, local1 - - ld [out2+284], local5 ! 0x0000FC00 - ba $4 - and local1, 252, local1 - - .align 32 - -$4: - ! local6 is address sbox 6 - ! out3 is address sbox 8 - ! out4 is loop counter - - ld [global1+local1], local1 - xor $2, out1, out1 ! 8642 - xor $2, out0, out0 ! 7531 - ! fmovs %f0, %f0 ! fxor used for alignment - - srl out1, 4, local0 ! rotate 4 right - and out0, local5, local3 ! 3 - ! fmovs %f0, %f0 - - ld [$5+$3*8], local7 ! key 7531 next round - srl local3, 8, local3 ! 3 - and local0, 252, local2 ! 2 - ! fmovs %f0, %f0 - - ld [global3+local3],local3 ! 3 - sll out1, 28, out1 ! rotate - xor $1, local1, $1 ! 1 finished, local1 now sbox 7 - - ld [global2+local2], local2 ! 2 - srl out0, 24, local1 ! 7 - or out1, local0, out1 ! rotate - - ldub [out2+local1], local1 ! 7 (and 0xFC) - srl out1, 24, local0 ! 8 - and out1, local5, local4 ! 4 - - ldub [out2+local0], local0 ! 8 (and 0xFC) - srl local4, 8, local4 ! 4 - xor $1, local2, $1 ! 2 finished local2 now sbox 6 - - ld [global4+local4],local4 ! 4 - srl out1, 16, local2 ! 6 - xor $1, local3, $1 ! 3 finished local3 now sbox 5 - - ld [out3+local0],local0 ! 8 - and local2, 252, local2 ! 6 - add global1, 1536, local5 ! address sbox 7 - - ld [local6+local2], local2 ! 6 - srl out0, 16, local3 ! 5 - xor $1, local4, $1 ! 4 finished - - ld [local5+local1],local1 ! 7 - and local3, 252, local3 ! 5 - xor $1, local0, $1 ! 8 finished - - ld [global5+local3],local3 ! 5 - xor $1, local2, $1 ! 6 finished - subcc out4, 1, out4 - - ld [$5+$3*8+4], out0 ! key 8642 next round - xor $1, local7, local2 ! sbox 5 next round - xor $1, local1, $1 ! 7 finished - - srl local2, 16, local2 ! sbox 5 next round - xor $1, local3, $1 ! 5 finished - - ld [$5+$3*16+4], out1 ! key 8642 next round again - and local2, 252, local2 ! sbox5 next round -! next round - xor $1, local7, local7 ! 7531 - - ld [global5+local2], local2 ! 5 - srl local7, 24, local3 ! 7 - xor $1, out0, out0 ! 8642 - - ldub [out2+local3], local3 ! 7 (and 0xFC) - srl out0, 4, local0 ! rotate 4 right - and local7, 252, local1 ! 1 - - sll out0, 28, out0 ! rotate - xor $2, local2, $2 ! 5 finished local2 used - - srl local0, 8, local4 ! 4 - and local0, 252, local2 ! 2 - ld [local5+local3], local3 ! 7 - - srl local0, 16, local5 ! 6 - or out0, local0, out0 ! rotate - ld [global2+local2], local2 ! 2 - - srl out0, 24, local0 - ld [$5+$3*16], out0 ! key 7531 next round - and local4, 252, local4 ! 4 - - and local5, 252, local5 ! 6 - ld [global4+local4], local4 ! 4 - xor $2, local3, $2 ! 7 finished local3 used - - and local0, 252, local0 ! 8 - ld [local6+local5], local5 ! 6 - xor $2, local2, $2 ! 2 finished local2 now sbox 3 - - srl local7, 8, local2 ! 3 start - ld [out3+local0], local0 ! 8 - xor $2, local4, $2 ! 4 finished - - and local2, 252, local2 ! 3 - ld [global1+local1], local1 ! 1 - xor $2, local5, $2 ! 6 finished local5 used - - ld [global3+local2], local2 ! 3 - xor $2, local0, $2 ! 8 finished - add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer - - ld [out2+284], local5 ! 0x0000FC00 - xor $2, out0, local4 ! sbox 1 next round - xor $2, local1, $2 ! 1 finished - - xor $2, local2, $2 ! 3 finished -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bne,pt %icc, $4 -#else - bne $4 -#endif - and local4, 252, local1 ! sbox 1 next round - -! two rounds more: - - ld [global1+local1], local1 - xor $2, out1, out1 - xor $2, out0, out0 - - srl out1, 4, local0 ! rotate - and out0, local5, local3 - - ld [$5+$3*8], local7 ! key 7531 - srl local3, 8, local3 - and local0, 252, local2 - - ld [global3+local3],local3 - sll out1, 28, out1 ! rotate - xor $1, local1, $1 ! 1 finished, local1 now sbox 7 - - ld [global2+local2], local2 - srl out0, 24, local1 - or out1, local0, out1 ! rotate - - ldub [out2+local1], local1 - srl out1, 24, local0 - and out1, local5, local4 - - ldub [out2+local0], local0 - srl local4, 8, local4 - xor $1, local2, $1 ! 2 finished local2 now sbox 6 - - ld [global4+local4],local4 - srl out1, 16, local2 - xor $1, local3, $1 ! 3 finished local3 now sbox 5 - - ld [out3+local0],local0 - and local2, 252, local2 - add global1, 1536, local5 ! address sbox 7 - - ld [local6+local2], local2 - srl out0, 16, local3 - xor $1, local4, $1 ! 4 finished - - ld [local5+local1],local1 - and local3, 252, local3 - xor $1, local0, $1 - - ld [global5+local3],local3 - xor $1, local2, $1 ! 6 finished - cmp in2, 8 - - ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter - xor $1, local7, local2 ! sbox 5 next round - xor $1, local1, $1 ! 7 finished - - ld [$5+$3*8+4], out0 - srl local2, 16, local2 ! sbox 5 next round - xor $1, local3, $1 ! 5 finished - - and local2, 252, local2 -! next round (two rounds more) - xor $1, local7, local7 ! 7531 - - ld [global5+local2], local2 - srl local7, 24, local3 - xor $1, out0, out0 ! 8642 - - ldub [out2+local3], local3 - srl out0, 4, local0 ! rotate - and local7, 252, local1 - - sll out0, 28, out0 ! rotate - xor $2, local2, $2 ! 5 finished local2 used - - srl local0, 8, local4 - and local0, 252, local2 - ld [local5+local3], local3 - - srl local0, 16, local5 - or out0, local0, out0 ! rotate - ld [global2+local2], local2 - - srl out0, 24, local0 - ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption - and local4, 252, local4 - - and local5, 252, local5 - ld [global4+local4], local4 - xor $2, local3, $2 ! 7 finished local3 used - - and local0, 252, local0 - ld [local6+local5], local5 - xor $2, local2, $2 ! 2 finished local2 now sbox 3 - - srl local7, 8, local2 ! 3 start - ld [out3+local0], local0 - xor $2, local4, $2 - - and local2, 252, local2 - ld [global1+local1], local1 - xor $2, local5, $2 ! 6 finished local5 used - - ld [global3+local2], local2 - srl $1, 3, local3 - xor $2, local0, $2 - - ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption - sll $1, 29, local4 - xor $2, local1, $2 - - ifelse($7,{}, {}, {retl}) - xor $2, local2, $2 -}) - - -! {fp_macro} -! -! parameter 1 right (original left) -! parameter 2 left (original right) -! parameter 3 1 for optional store to [in0] -! parameter 4 1 for load input/output address to local5/7 -! -! The final permutation logic switches the halfes, meaning that -! left and right ends up the the registers originally used. - -define(fp_macro, { - -! {fp_macro} -! $1 $2 $3 $4 $5 $6 $7 $8 $9 - - ! initially undo the rotate 3 left done after initial permutation - ! original left is received shifted 3 right and 29 left in local3/4 - - sll $2, 29, local1 - or local3, local4, $1 - - srl $2, 3, $2 - sethi %hi(0x55555555), local2 - - or $2, local1, $2 - or local2, %lo(0x55555555), local2 - - srl $2, 1, local3 - sethi %hi(0x00ff00ff), local1 - xor local3, $1, local3 - or local1, %lo(0x00ff00ff), local1 - and local3, local2, local3 - sethi %hi(0x33333333), local4 - sll local3, 1, local2 - - xor $1, local3, $1 - - srl $1, 8, local3 - xor $2, local2, $2 - xor local3, $2, local3 - or local4, %lo(0x33333333), local4 - and local3, local1, local3 - sethi %hi(0x0000ffff), local1 - sll local3, 8, local2 - - xor $2, local3, $2 - - srl $2, 2, local3 - xor $1, local2, $1 - xor local3, $1, local3 - or local1, %lo(0x0000ffff), local1 - and local3, local4, local3 - sethi %hi(0x0f0f0f0f), local4 - sll local3, 2, local2 - - ifelse($4,1, {LDPTR INPUT, local5}) - xor $1, local3, $1 - - ifelse($4,1, {LDPTR OUTPUT, local7}) - srl $1, 16, local3 - xor $2, local2, $2 - xor local3, $2, local3 - or local4, %lo(0x0f0f0f0f), local4 - and local3, local1, local3 - sll local3, 16, local2 - - xor $2, local3, local1 - - srl local1, 4, local3 - xor $1, local2, $1 - xor local3, $1, local3 - and local3, local4, local3 - sll local3, 4, local2 - - xor $1, local3, $1 - - ! optional store: - - ifelse($3,1, {st $1, [in0]}) - - xor local1, local2, $2 - - ifelse($3,1, {st $2, [in0+4]}) - -}) - - -! {fp_ip_macro} -! -! Does initial permutation for next block mixed with -! final permutation for current block. -! -! parameter 1 original left -! parameter 2 original right -! parameter 3 left ip -! parameter 4 right ip -! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 -! 2: mov in4 to in3 -! -! also adds -8 to length in2 and loads loop counter to out4 - -define(fp_ip_macro, { - -! {fp_ip_macro} -! $1 $2 $3 $4 $5 $6 $7 $8 $9 - - define({temp1},{out4}) - define({temp2},{local3}) - - define({ip1},{local1}) - define({ip2},{local2}) - define({ip4},{local4}) - define({ip5},{local5}) - - ! $1 in local3, local4 - - ld [out2+256], ip1 - sll out5, 29, temp1 - or local3, local4, $1 - - srl out5, 3, $2 - ifelse($5,2,{mov in4, in3}) - - ld [out2+272], ip5 - srl $4, 4, local0 - or $2, temp1, $2 - - srl $2, 1, temp1 - xor temp1, $1, temp1 - - and temp1, ip5, temp1 - xor local0, $3, local0 - - sll temp1, 1, temp2 - xor $1, temp1, $1 - - and local0, ip1, local0 - add in2, -8, in2 - - sll local0, 4, local7 - xor $3, local0, $3 - - ld [out2+268], ip4 - srl $1, 8, temp1 - xor $2, temp2, $2 - ld [out2+260], ip2 - srl $3, 16, local0 - xor $4, local7, $4 - xor temp1, $2, temp1 - xor local0, $4, local0 - and temp1, ip4, temp1 - and local0, ip2, local0 - sll temp1, 8, temp2 - xor $2, temp1, $2 - sll local0, 16, local7 - xor $4, local0, $4 - - srl $2, 2, temp1 - xor $1, temp2, $1 - - ld [out2+264], temp2 ! ip3 - srl $4, 2, local0 - xor $3, local7, $3 - xor temp1, $1, temp1 - xor local0, $3, local0 - and temp1, temp2, temp1 - and local0, temp2, local0 - sll temp1, 2, temp2 - xor $1, temp1, $1 - sll local0, 2, local7 - xor $3, local0, $3 - - srl $1, 16, temp1 - xor $2, temp2, $2 - srl $3, 8, local0 - xor $4, local7, $4 - xor temp1, $2, temp1 - xor local0, $4, local0 - and temp1, ip2, temp1 - and local0, ip4, local0 - sll temp1, 16, temp2 - xor $2, temp1, local4 - sll local0, 8, local7 - xor $4, local0, $4 - - srl $4, 1, local0 - xor $3, local7, $3 - - srl local4, 4, temp1 - xor local0, $3, local0 - - xor $1, temp2, $1 - and local0, ip5, local0 - - sll local0, 1, local7 - xor temp1, $1, temp1 - - xor $3, local0, $3 - xor $4, local7, $4 - - sll $3, 3, local5 - and temp1, ip1, temp1 - - sll temp1, 4, temp2 - xor $1, temp1, $1 - - ifelse($5,1,{LDPTR KS2, in4}) - sll $4, 3, local2 - xor local4, temp2, $2 - - ! reload since used as temporar: - - ld [out2+280], out4 ! loop counter - - srl $3, 29, local0 - ifelse($5,1,{add in4, 120, in4}) - - ifelse($5,1,{LDPTR KS1, in3}) - srl $4, 29, local7 - - or local0, local5, $4 - or local2, local7, $3 - -}) - - - -! {load_little_endian} -! -! parameter 1 address -! parameter 2 destination left -! parameter 3 destination right -! parameter 4 temporar -! parameter 5 label - -define(load_little_endian, { - -! {load_little_endian} -! $1 $2 $3 $4 $5 $6 $7 $8 $9 - - ! first in memory to rightmost in register - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - andcc $1, 3, global0 - bne,pn %icc, $5 - nop - - lda [$1] 0x88, $2 - add $1, 4, $4 - - ba,pt %icc, $5a - lda [$4] 0x88, $3 -#endif - -$5: - ldub [$1+3], $2 - - ldub [$1+2], $4 - sll $2, 8, $2 - or $2, $4, $2 - - ldub [$1+1], $4 - sll $2, 8, $2 - or $2, $4, $2 - - ldub [$1+0], $4 - sll $2, 8, $2 - or $2, $4, $2 - - - ldub [$1+3+4], $3 - - ldub [$1+2+4], $4 - sll $3, 8, $3 - or $3, $4, $3 - - ldub [$1+1+4], $4 - sll $3, 8, $3 - or $3, $4, $3 - - ldub [$1+0+4], $4 - sll $3, 8, $3 - or $3, $4, $3 -$5a: - -}) - - -! {load_little_endian_inc} -! -! parameter 1 address -! parameter 2 destination left -! parameter 3 destination right -! parameter 4 temporar -! parameter 4 label -! -! adds 8 to address - -define(load_little_endian_inc, { - -! {load_little_endian_inc} -! $1 $2 $3 $4 $5 $6 $7 $8 $9 - - ! first in memory to rightmost in register - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - andcc $1, 3, global0 - bne,pn %icc, $5 - nop - - lda [$1] 0x88, $2 - add $1, 4, $1 - - lda [$1] 0x88, $3 - ba,pt %icc, $5a - add $1, 4, $1 -#endif - -$5: - ldub [$1+3], $2 - - ldub [$1+2], $4 - sll $2, 8, $2 - or $2, $4, $2 - - ldub [$1+1], $4 - sll $2, 8, $2 - or $2, $4, $2 - - ldub [$1+0], $4 - sll $2, 8, $2 - or $2, $4, $2 - - ldub [$1+3+4], $3 - add $1, 8, $1 - - ldub [$1+2+4-8], $4 - sll $3, 8, $3 - or $3, $4, $3 - - ldub [$1+1+4-8], $4 - sll $3, 8, $3 - or $3, $4, $3 - - ldub [$1+0+4-8], $4 - sll $3, 8, $3 - or $3, $4, $3 -$5a: - -}) - - -! {load_n_bytes} -! -! Loads 1 to 7 bytes little endian -! Remaining bytes are zeroed. -! -! parameter 1 address -! parameter 2 length -! parameter 3 destination register left -! parameter 4 destination register right -! parameter 5 temp -! parameter 6 temp2 -! parameter 7 label -! parameter 8 return label - -define(load_n_bytes, { - -! {load_n_bytes} -! $1 $2 $5 $6 $7 $8 $7 $8 $9 - -$7.0: call .+8 - sll $2, 2, $6 - - add %o7,$7.jmp.table-$7.0,$5 - - add $5, $6, $5 - mov 0, $4 - - ld [$5], $5 - - jmp %o7+$5 - mov 0, $3 - -$7.7: - ldub [$1+6], $5 - sll $5, 16, $5 - or $3, $5, $3 -$7.6: - ldub [$1+5], $5 - sll $5, 8, $5 - or $3, $5, $3 -$7.5: - ldub [$1+4], $5 - or $3, $5, $3 -$7.4: - ldub [$1+3], $5 - sll $5, 24, $5 - or $4, $5, $4 -$7.3: - ldub [$1+2], $5 - sll $5, 16, $5 - or $4, $5, $4 -$7.2: - ldub [$1+1], $5 - sll $5, 8, $5 - or $4, $5, $4 -$7.1: - ldub [$1+0], $5 - ba $8 - or $4, $5, $4 - - .align 4 - -$7.jmp.table: - .word 0 - .word $7.1-$7.0 - .word $7.2-$7.0 - .word $7.3-$7.0 - .word $7.4-$7.0 - .word $7.5-$7.0 - .word $7.6-$7.0 - .word $7.7-$7.0 -}) - - -! {store_little_endian} -! -! parameter 1 address -! parameter 2 source left -! parameter 3 source right -! parameter 4 temporar - -define(store_little_endian, { - -! {store_little_endian} -! $1 $2 $3 $4 $5 $6 $7 $8 $9 - - ! rightmost in register to first in memory - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - andcc $1, 3, global0 - bne,pn %icc, $5 - nop - - sta $2, [$1] 0x88 - add $1, 4, $4 - - ba,pt %icc, $5a - sta $3, [$4] 0x88 -#endif - -$5: - and $2, 255, $4 - stub $4, [$1+0] - - srl $2, 8, $4 - and $4, 255, $4 - stub $4, [$1+1] - - srl $2, 16, $4 - and $4, 255, $4 - stub $4, [$1+2] - - srl $2, 24, $4 - stub $4, [$1+3] - - - and $3, 255, $4 - stub $4, [$1+0+4] - - srl $3, 8, $4 - and $4, 255, $4 - stub $4, [$1+1+4] - - srl $3, 16, $4 - and $4, 255, $4 - stub $4, [$1+2+4] - - srl $3, 24, $4 - stub $4, [$1+3+4] - -$5a: - -}) - - -! {store_n_bytes} -! -! Stores 1 to 7 bytes little endian -! -! parameter 1 address -! parameter 2 length -! parameter 3 source register left -! parameter 4 source register right -! parameter 5 temp -! parameter 6 temp2 -! parameter 7 label -! parameter 8 return label - -define(store_n_bytes, { - -! {store_n_bytes} -! $1 $2 $5 $6 $7 $8 $7 $8 $9 - -$7.0: call .+8 - sll $2, 2, $6 - - add %o7,$7.jmp.table-$7.0,$5 - - add $5, $6, $5 - - ld [$5], $5 - - jmp %o7+$5 - nop - -$7.7: - srl $3, 16, $5 - and $5, 0xff, $5 - stub $5, [$1+6] -$7.6: - srl $3, 8, $5 - and $5, 0xff, $5 - stub $5, [$1+5] -$7.5: - and $3, 0xff, $5 - stub $5, [$1+4] -$7.4: - srl $4, 24, $5 - stub $5, [$1+3] -$7.3: - srl $4, 16, $5 - and $5, 0xff, $5 - stub $5, [$1+2] -$7.2: - srl $4, 8, $5 - and $5, 0xff, $5 - stub $5, [$1+1] -$7.1: - and $4, 0xff, $5 - - - ba $8 - stub $5, [$1] - - .align 4 - -$7.jmp.table: - - .word 0 - .word $7.1-$7.0 - .word $7.2-$7.0 - .word $7.3-$7.0 - .word $7.4-$7.0 - .word $7.5-$7.0 - .word $7.6-$7.0 - .word $7.7-$7.0 -}) - - -define(testvalue,{1}) - -define(register_init, { - -! For test purposes: - - sethi %hi(testvalue), local0 - or local0, %lo(testvalue), local0 - - ifelse($1,{},{}, {mov local0, $1}) - ifelse($2,{},{}, {mov local0, $2}) - ifelse($3,{},{}, {mov local0, $3}) - ifelse($4,{},{}, {mov local0, $4}) - ifelse($5,{},{}, {mov local0, $5}) - ifelse($6,{},{}, {mov local0, $6}) - ifelse($7,{},{}, {mov local0, $7}) - ifelse($8,{},{}, {mov local0, $8}) - - mov local0, local1 - mov local0, local2 - mov local0, local3 - mov local0, local4 - mov local0, local5 - mov local0, local7 - mov local0, local6 - mov local0, out0 - mov local0, out1 - mov local0, out2 - mov local0, out3 - mov local0, out4 - mov local0, out5 - mov local0, global1 - mov local0, global2 - mov local0, global3 - mov local0, global4 - mov local0, global5 - -}) - -.section ".text" - - .align 32 - -.des_enc: - - ! key address in3 - ! loads key next encryption/decryption first round from [in4] - - rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) - - - .align 32 - -.des_dec: - - ! implemented with out5 as first parameter to avoid - ! register exchange in ede modes - - ! key address in4 - ! loads key next encryption/decryption first round from [in3] - - rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) - - - -! void DES_encrypt1(data, ks, enc) -! ******************************* - - .align 32 - .global DES_encrypt1 - .type DES_encrypt1,#function - -DES_encrypt1: - - save %sp, FRAME, %sp - - sethi %hi(.PIC.DES_SPtrans-1f),global1 - or global1,%lo(.PIC.DES_SPtrans-1f),global1 -1: call .+8 - add %o7,global1,global1 - sub global1,.PIC.DES_SPtrans-.des_and,out2 - - ld [in0], in5 ! left - cmp in2, 0 ! enc - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - be,pn %icc, .encrypt.dec ! enc/dec -#else - be .encrypt.dec -#endif - ld [in0+4], out5 ! right - - ! parameter 6 1/2 for include encryption/decryption - ! parameter 7 1 for move in1 to in3 - ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 - - ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) - - rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used - - fp_macro(in5, out5, 1) ! 1 for store to [in0] - - ret - restore - -.encrypt.dec: - - add in1, 120, in3 ! use last subkey for first round - - ! parameter 6 1/2 for include encryption/decryption - ! parameter 7 1 for move in1 to in3 - ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 - - ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 - - fp_macro(out5, in5, 1) ! 1 for store to [in0] - - ret - restore - -.DES_encrypt1.end: - .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 - - -! void DES_encrypt2(data, ks, enc) -!********************************* - - ! encrypts/decrypts without initial/final permutation - - .align 32 - .global DES_encrypt2 - .type DES_encrypt2,#function - -DES_encrypt2: - - save %sp, FRAME, %sp - - sethi %hi(.PIC.DES_SPtrans-1f),global1 - or global1,%lo(.PIC.DES_SPtrans-1f),global1 -1: call .+8 - add %o7,global1,global1 - sub global1,.PIC.DES_SPtrans-.des_and,out2 - - ! Set sbox address 1 to 6 and rotate halfs 3 left - ! Errors caught by destest? Yes. Still? *NO* - - !sethi %hi(DES_SPtrans), global1 ! address sbox 1 - - !or global1, %lo(DES_SPtrans), global1 ! sbox 1 - - add global1, 256, global2 ! sbox 2 - add global1, 512, global3 ! sbox 3 - - ld [in0], out5 ! right - add global1, 768, global4 ! sbox 4 - add global1, 1024, global5 ! sbox 5 - - ld [in0+4], in5 ! left - add global1, 1280, local6 ! sbox 6 - add global1, 1792, out3 ! sbox 8 - - ! rotate - - sll in5, 3, local5 - mov in1, in3 ! key address to in3 - - sll out5, 3, local7 - srl in5, 29, in5 - - srl out5, 29, out5 - add in5, local5, in5 - - add out5, local7, out5 - cmp in2, 0 - - ! we use our own stackframe - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - be,pn %icc, .encrypt2.dec ! decryption -#else - be .encrypt2.dec -#endif - STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] - - ld [in3], out0 ! key 7531 first round - mov LOOPS, out4 ! loop counter - - ld [in3+4], out1 ! key 8642 first round - sethi %hi(0x0000FC00), local5 - - call .des_enc - mov in3, in4 - - ! rotate - sll in5, 29, in0 - srl in5, 3, in5 - sll out5, 29, in1 - add in5, in0, in5 - srl out5, 3, out5 - LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 - add out5, in1, out5 - st in5, [in0] - st out5, [in0+4] - - ret - restore - - -.encrypt2.dec: - - add in3, 120, in4 - - ld [in4], out0 ! key 7531 first round - mov LOOPS, out4 ! loop counter - - ld [in4+4], out1 ! key 8642 first round - sethi %hi(0x0000FC00), local5 - - mov in5, local1 ! left expected in out5 - mov out5, in5 - - call .des_dec - mov local1, out5 - -.encrypt2.finish: - - ! rotate - sll in5, 29, in0 - srl in5, 3, in5 - sll out5, 29, in1 - add in5, in0, in5 - srl out5, 3, out5 - LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 - add out5, in1, out5 - st out5, [in0] - st in5, [in0+4] - - ret - restore - -.DES_encrypt2.end: - .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 - - -! void DES_encrypt3(data, ks1, ks2, ks3) -! ************************************** - - .align 32 - .global DES_encrypt3 - .type DES_encrypt3,#function - -DES_encrypt3: - - save %sp, FRAME, %sp - - sethi %hi(.PIC.DES_SPtrans-1f),global1 - or global1,%lo(.PIC.DES_SPtrans-1f),global1 -1: call .+8 - add %o7,global1,global1 - sub global1,.PIC.DES_SPtrans-.des_and,out2 - - ld [in0], in5 ! left - add in2, 120, in4 ! ks2 - - ld [in0+4], out5 ! right - mov in3, in2 ! save ks3 - - ! parameter 6 1/2 for include encryption/decryption - ! parameter 7 1 for mov in1 to in3 - ! parameter 8 1 for mov in3 to in4 - ! parameter 9 1 for load ks3 and ks2 to in4 and in3 - - ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) - - call .des_dec - mov in2, in3 ! preload ks3 - - call .des_enc - nop - - fp_macro(in5, out5, 1) - - ret - restore - -.DES_encrypt3.end: - .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 - - -! void DES_decrypt3(data, ks1, ks2, ks3) -! ************************************** - - .align 32 - .global DES_decrypt3 - .type DES_decrypt3,#function - -DES_decrypt3: - - save %sp, FRAME, %sp - - sethi %hi(.PIC.DES_SPtrans-1f),global1 - or global1,%lo(.PIC.DES_SPtrans-1f),global1 -1: call .+8 - add %o7,global1,global1 - sub global1,.PIC.DES_SPtrans-.des_and,out2 - - ld [in0], in5 ! left - add in3, 120, in4 ! ks3 - - ld [in0+4], out5 ! right - mov in2, in3 ! ks2 - - ! parameter 6 1/2 for include encryption/decryption - ! parameter 7 1 for mov in1 to in3 - ! parameter 8 1 for mov in3 to in4 - ! parameter 9 1 for load ks3 and ks2 to in4 and in3 - - ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) - - call .des_enc - add in1, 120, in4 ! preload ks1 - - call .des_dec - nop - - fp_macro(out5, in5, 1) - - ret - restore - -.DES_decrypt3.end: - .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 - -! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) -! ***************************************************************** - - - .align 32 - .global DES_ncbc_encrypt - .type DES_ncbc_encrypt,#function - -DES_ncbc_encrypt: - - save %sp, FRAME, %sp - - define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) - define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) - define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) - - sethi %hi(.PIC.DES_SPtrans-1f),global1 - or global1,%lo(.PIC.DES_SPtrans-1f),global1 -1: call .+8 - add %o7,global1,global1 - sub global1,.PIC.DES_SPtrans-.des_and,out2 - - cmp in5, 0 ! enc - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - be,pn %icc, .ncbc.dec -#else - be .ncbc.dec -#endif - STPTR in4, IVEC - - ! addr left right temp label - load_little_endian(in4, in5, out5, local3, .LLE1) ! iv - - addcc in2, -8, in2 ! bytes missing when first block done - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bl,pn %icc, .ncbc.enc.seven.or.less -#else - bl .ncbc.enc.seven.or.less -#endif - mov in3, in4 ! schedule - -.ncbc.enc.next.block: - - load_little_endian(in0, out4, global4, local3, .LLE2) ! block - -.ncbc.enc.next.block_1: - - xor in5, out4, in5 ! iv xor - xor out5, global4, out5 ! iv xor - - ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 - ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) - -.ncbc.enc.next.block_2: - -!// call .des_enc ! compares in2 to 8 -! rounds inlined for alignment purposes - - add global1, 768, global4 ! address sbox 4 since register used below - - rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bl,pn %icc, .ncbc.enc.next.block_fp -#else - bl .ncbc.enc.next.block_fp -#endif - add in0, 8, in0 ! input address - - ! If 8 or more bytes are to be encrypted after this block, - ! we combine final permutation for this block with initial - ! permutation for next block. Load next block: - - load_little_endian(in0, global3, global4, local5, .LLE12) - - ! parameter 1 original left - ! parameter 2 original right - ! parameter 3 left ip - ! parameter 4 right ip - ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 - ! 2: mov in4 to in3 - ! - ! also adds -8 to length in2 and loads loop counter to out4 - - fp_ip_macro(out0, out1, global3, global4, 2) - - store_little_endian(in1, out0, out1, local3, .SLE10) ! block - - ld [in3], out0 ! key 7531 first round next block - mov in5, local1 - xor global3, out5, in5 ! iv xor next block - - ld [in3+4], out1 ! key 8642 - add global1, 512, global3 ! address sbox 3 since register used - xor global4, local1, out5 ! iv xor next block - - ba .ncbc.enc.next.block_2 - add in1, 8, in1 ! output adress - -.ncbc.enc.next.block_fp: - - fp_macro(in5, out5) - - store_little_endian(in1, in5, out5, local3, .SLE1) ! block - - addcc in2, -8, in2 ! bytes missing when next block done - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 -#else - bpos .ncbc.enc.next.block -#endif - add in1, 8, in1 - -.ncbc.enc.seven.or.less: - - cmp in2, -8 - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - ble,pt %icc, .ncbc.enc.finish -#else - ble .ncbc.enc.finish -#endif - nop - - add in2, 8, local1 ! bytes to load - - ! addr, length, dest left, dest right, temp, temp2, label, ret label - load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) - - ! Loads 1 to 7 bytes little endian to global4, out4 - - -.ncbc.enc.finish: - - LDPTR IVEC, local4 - store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec - - ret - restore - - -.ncbc.dec: - - STPTR in0, INPUT - cmp in2, 0 ! length - add in3, 120, in3 - - LDPTR IVEC, local7 ! ivec -#ifdef OPENSSL_SYSNAME_ULTRASPARC - ble,pn %icc, .ncbc.dec.finish -#else - ble .ncbc.dec.finish -#endif - mov in3, in4 ! schedule - - STPTR in1, OUTPUT - mov in0, local5 ! input - - load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec - -.ncbc.dec.next.block: - - load_little_endian(local5, in5, out5, local3, .LLE4) ! block - - ! parameter 6 1/2 for include encryption/decryption - ! parameter 7 1 for mov in1 to in3 - ! parameter 8 1 for mov in3 to in4 - - ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 - - fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 - - ! in2 is bytes left to be stored - ! in2 is compared to 8 in the rounds - - xor out5, in0, out4 ! iv xor -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bl,pn %icc, .ncbc.dec.seven.or.less -#else - bl .ncbc.dec.seven.or.less -#endif - xor in5, in1, global4 ! iv xor - - ! Load ivec next block now, since input and output address might be the same. - - load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv - - store_little_endian(local7, out4, global4, local3, .SLE3) - - STPTR local5, INPUT - add local7, 8, local7 - addcc in2, -8, in2 - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bg,pt %icc, .ncbc.dec.next.block -#else - bg .ncbc.dec.next.block -#endif - STPTR local7, OUTPUT - - -.ncbc.dec.store.iv: - - LDPTR IVEC, local4 ! ivec - store_little_endian(local4, in0, in1, local5, .SLE4) - -.ncbc.dec.finish: - - ret - restore - -.ncbc.dec.seven.or.less: - - load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec - - store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) - - -.DES_ncbc_encrypt.end: - .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt - - -! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) -! ************************************************************************** - - - .align 32 - .global DES_ede3_cbc_encrypt - .type DES_ede3_cbc_encrypt,#function - -DES_ede3_cbc_encrypt: - - save %sp, FRAME, %sp - - define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) - define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) - define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) - - sethi %hi(.PIC.DES_SPtrans-1f),global1 - or global1,%lo(.PIC.DES_SPtrans-1f),global1 -1: call .+8 - add %o7,global1,global1 - sub global1,.PIC.DES_SPtrans-.des_and,out2 - - LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc - LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec - cmp local3, 0 ! enc - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - be,pn %icc, .ede3.dec -#else - be .ede3.dec -#endif - STPTR in4, KS2 - - STPTR in5, KS3 - - load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec - - addcc in2, -8, in2 ! bytes missing after next block - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bl,pn %icc, .ede3.enc.seven.or.less -#else - bl .ede3.enc.seven.or.less -#endif - STPTR in3, KS1 - -.ede3.enc.next.block: - - load_little_endian(in0, out4, global4, local3, .LLE7) - -.ede3.enc.next.block_1: - - LDPTR KS2, in4 - xor in5, out4, in5 ! iv xor - xor out5, global4, out5 ! iv xor - - LDPTR KS1, in3 - add in4, 120, in4 ! for decryption we use last subkey first - nop - - ip_macro(in5, out5, in5, out5, in3) - -.ede3.enc.next.block_2: - - call .des_enc ! ks1 in3 - nop - - call .des_dec ! ks2 in4 - LDPTR KS3, in3 - - call .des_enc ! ks3 in3 compares in2 to 8 - nop - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bl,pn %icc, .ede3.enc.next.block_fp -#else - bl .ede3.enc.next.block_fp -#endif - add in0, 8, in0 - - ! If 8 or more bytes are to be encrypted after this block, - ! we combine final permutation for this block with initial - ! permutation for next block. Load next block: - - load_little_endian(in0, global3, global4, local5, .LLE11) - - ! parameter 1 original left - ! parameter 2 original right - ! parameter 3 left ip - ! parameter 4 right ip - ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 - ! 2: mov in4 to in3 - ! - ! also adds -8 to length in2 and loads loop counter to out4 - - fp_ip_macro(out0, out1, global3, global4, 1) - - store_little_endian(in1, out0, out1, local3, .SLE9) ! block - - mov in5, local1 - xor global3, out5, in5 ! iv xor next block - - ld [in3], out0 ! key 7531 - add global1, 512, global3 ! address sbox 3 - xor global4, local1, out5 ! iv xor next block - - ld [in3+4], out1 ! key 8642 - add global1, 768, global4 ! address sbox 4 - ba .ede3.enc.next.block_2 - add in1, 8, in1 - -.ede3.enc.next.block_fp: - - fp_macro(in5, out5) - - store_little_endian(in1, in5, out5, local3, .SLE5) ! block - - addcc in2, -8, in2 ! bytes missing when next block done - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bpos,pt %icc, .ede3.enc.next.block -#else - bpos .ede3.enc.next.block -#endif - add in1, 8, in1 - -.ede3.enc.seven.or.less: - - cmp in2, -8 - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - ble,pt %icc, .ede3.enc.finish -#else - ble .ede3.enc.finish -#endif - nop - - add in2, 8, local1 ! bytes to load - - ! addr, length, dest left, dest right, temp, temp2, label, ret label - load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) - -.ede3.enc.finish: - - LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec - store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec - - ret - restore - -.ede3.dec: - - STPTR in0, INPUT - add in5, 120, in5 - - STPTR in1, OUTPUT - mov in0, local5 - add in3, 120, in3 - - STPTR in3, KS1 - cmp in2, 0 - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - ble %icc, .ede3.dec.finish -#else - ble .ede3.dec.finish -#endif - STPTR in5, KS3 - - LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv - load_little_endian(local7, in0, in1, local3, .LLE8) - -.ede3.dec.next.block: - - load_little_endian(local5, in5, out5, local3, .LLE9) - - ! parameter 6 1/2 for include encryption/decryption - ! parameter 7 1 for mov in1 to in3 - ! parameter 8 1 for mov in3 to in4 - ! parameter 9 1 for load ks3 and ks2 to in4 and in3 - - ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 - - call .des_enc ! ks2 in3 - LDPTR KS1, in4 - - call .des_dec ! ks1 in4 - nop - - fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 - - ! in2 is bytes left to be stored - ! in2 is compared to 8 in the rounds - - xor out5, in0, out4 -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bl,pn %icc, .ede3.dec.seven.or.less -#else - bl .ede3.dec.seven.or.less -#endif - xor in5, in1, global4 - - load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block - - store_little_endian(local7, out4, global4, local3, .SLE7) ! block - - STPTR local5, INPUT - addcc in2, -8, in2 - add local7, 8, local7 - -#ifdef OPENSSL_SYSNAME_ULTRASPARC - bg,pt %icc, .ede3.dec.next.block -#else - bg .ede3.dec.next.block -#endif - STPTR local7, OUTPUT - -.ede3.dec.store.iv: - - LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec - store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec - -.ede3.dec.finish: - - ret - restore - -.ede3.dec.seven.or.less: - - load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv - - store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) - - -.DES_ede3_cbc_encrypt.end: - .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt - - .align 256 - .type .des_and,#object - .size .des_and,284 - -.des_and: - -! This table is used for AND 0xFC when it is known that register -! bits 8-31 are zero. Makes it possible to do three arithmetic -! operations in one cycle. - - .byte 0, 0, 0, 0, 4, 4, 4, 4 - .byte 8, 8, 8, 8, 12, 12, 12, 12 - .byte 16, 16, 16, 16, 20, 20, 20, 20 - .byte 24, 24, 24, 24, 28, 28, 28, 28 - .byte 32, 32, 32, 32, 36, 36, 36, 36 - .byte 40, 40, 40, 40, 44, 44, 44, 44 - .byte 48, 48, 48, 48, 52, 52, 52, 52 - .byte 56, 56, 56, 56, 60, 60, 60, 60 - .byte 64, 64, 64, 64, 68, 68, 68, 68 - .byte 72, 72, 72, 72, 76, 76, 76, 76 - .byte 80, 80, 80, 80, 84, 84, 84, 84 - .byte 88, 88, 88, 88, 92, 92, 92, 92 - .byte 96, 96, 96, 96, 100, 100, 100, 100 - .byte 104, 104, 104, 104, 108, 108, 108, 108 - .byte 112, 112, 112, 112, 116, 116, 116, 116 - .byte 120, 120, 120, 120, 124, 124, 124, 124 - .byte 128, 128, 128, 128, 132, 132, 132, 132 - .byte 136, 136, 136, 136, 140, 140, 140, 140 - .byte 144, 144, 144, 144, 148, 148, 148, 148 - .byte 152, 152, 152, 152, 156, 156, 156, 156 - .byte 160, 160, 160, 160, 164, 164, 164, 164 - .byte 168, 168, 168, 168, 172, 172, 172, 172 - .byte 176, 176, 176, 176, 180, 180, 180, 180 - .byte 184, 184, 184, 184, 188, 188, 188, 188 - .byte 192, 192, 192, 192, 196, 196, 196, 196 - .byte 200, 200, 200, 200, 204, 204, 204, 204 - .byte 208, 208, 208, 208, 212, 212, 212, 212 - .byte 216, 216, 216, 216, 220, 220, 220, 220 - .byte 224, 224, 224, 224, 228, 228, 228, 228 - .byte 232, 232, 232, 232, 236, 236, 236, 236 - .byte 240, 240, 240, 240, 244, 244, 244, 244 - .byte 248, 248, 248, 248, 252, 252, 252, 252 - - ! 5 numbers for initil/final permutation - - .word 0x0f0f0f0f ! offset 256 - .word 0x0000ffff ! 260 - .word 0x33333333 ! 264 - .word 0x00ff00ff ! 268 - .word 0x55555555 ! 272 - - .word 0 ! 276 - .word LOOPS ! 280 - .word 0x0000FC00 ! 284 - - .global DES_SPtrans - .type DES_SPtrans,#object - .size DES_SPtrans,2048 -.align 64 -DES_SPtrans: -.PIC.DES_SPtrans: - ! nibble 0 - .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 - .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 - .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 - .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 - .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 - .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 - .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 - .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 - .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 - .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 - .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 - .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 - .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 - .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 - .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 - .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 - ! nibble 1 - .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 - .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 - .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 - .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 - .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 - .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 - .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 - .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 - .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 - .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 - .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 - .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 - .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 - .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 - .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 - .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 - ! nibble 2 - .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 - .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 - .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 - .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 - .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 - .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 - .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 - .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 - .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 - .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 - .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 - .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 - .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 - .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 - .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 - .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 - ! nibble 3 - .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 - .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 - .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 - .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 - .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 - .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 - .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 - .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 - .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 - .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 - .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 - .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 - .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 - .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 - .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 - .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 - ! nibble 4 - .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 - .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 - .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 - .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 - .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 - .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 - .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 - .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 - .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 - .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 - .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 - .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 - .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 - .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 - .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 - .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 - ! nibble 5 - .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 - .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 - .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 - .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 - .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 - .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 - .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 - .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 - .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 - .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 - .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 - .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 - .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 - .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 - .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 - .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 - ! nibble 6 - .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 - .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 - .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 - .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 - .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 - .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 - .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 - .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 - .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 - .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 - .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 - .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 - .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 - .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 - .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 - .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 - ! nibble 7 - .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 - .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 - .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 - .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 - .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 - .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 - .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 - .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 - .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 - .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 - .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 - .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 - .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 - .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 - .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 - .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 - diff --git a/thirdparty/openssl/crypto/des/des-lib.com b/thirdparty/openssl/crypto/des/des-lib.com deleted file mode 100644 index 348f1c0470..0000000000 --- a/thirdparty/openssl/crypto/des/des-lib.com +++ /dev/null @@ -1,1005 +0,0 @@ -$! -$! DES-LIB.COM -$! Written By: Robert Byer -$! Vice-President -$! A-Com Computing, Inc. -$! byer@mail.all-net.net -$! -$! Changes by Richard Levitte <richard@levitte.org> -$! -$! This command files compiles and creates the -$! "[.xxx.EXE.CRYPTO.DES]LIBDES.OLB" library. The "xxx" denotes the machine -$! architecture of ALPHA, IA64 or VAX. -$! -$! It was re-written to try to determine which "C" compiler to try to use -$! or the user can specify a compiler in P3. -$! -$! Specify one of the following to build just that part, specify "ALL" to -$! just build everything. -$! -$! ALL To Just Build "Everything". -$! LIBRARY To Just Build The [.xxx.EXE.CRYPTO.DES]LIBDES.OLB Library. -$! DESTEST To Just Build The [.xxx.EXE.CRYPTO.DES]DESTEST.EXE Program. -$! SPEED To Just Build The [.xxx.EXE.CRYPTO.DES]SPEED.EXE Program. -$! RPW To Just Build The [.xxx.EXE.CRYPTO.DES]RPW.EXE Program. -$! DES To Just Build The [.xxx.EXE.CRYPTO.DES]DES.EXE Program. -$! DES_OPTS To Just Build The [.xxx.EXE.CRYPTO.DES]DES_OPTS.EXE Program. -$! -$! Specify either DEBUG or NODEBUG as P2 to compile with or without -$! debugging information. -$! -$! Specify which compiler at P3 to try to compile under. -$! -$! VAXC For VAX C. -$! DECC For DEC C. -$! GNUC For GNU C. -$! -$! If you don't speficy a compiler, it will try to determine which -$! "C" compiler to try to use. -$! -$! P4, if defined, sets a compiler thread NOT needed on OpenVMS 7.1 (and up) -$! -$! -$! Make sure we know what architecture we run on. -$! -$! -$! Check Which Architecture We Are Using. -$! -$ IF (F$GETSYI("CPU").LT.128) -$ THEN -$! -$! The Architecture Is VAX -$! -$ ARCH := VAX -$! -$! Else... -$! -$ ELSE -$! -$! The Architecture Is Alpha, IA64 or whatever comes in the future. -$! -$ ARCH = F$EDIT( F$GETSYI( "ARCH_NAME"), "UPCASE") -$ IF (ARCH .EQS. "") THEN ARCH = "UNK" -$! -$! End The Architecture Check. -$! -$ ENDIF -$! -$! Define The OBJ Directory Name. -$! -$ OBJ_DIR := SYS$DISK:[--.'ARCH'.OBJ.CRYPTO.DES] -$! -$! Define The EXE Directory Name. -$! -$ EXE_DIR :== SYS$DISK:[--.'ARCH'.EXE.CRYPTO.DES] -$! -$! Check To Make Sure We Have Valid Command Line Parameters. -$! -$ GOSUB CHECK_OPTIONS -$! -$! Tell The User What Kind of Machine We Run On. -$! -$ WRITE SYS$OUTPUT "Compiling On A ",ARCH," Machine." -$! -$! Check To See If The Architecture Specific OBJ Directory Exists. -$! -$ IF (F$PARSE(OBJ_DIR).EQS."") -$ THEN -$! -$! It Dosen't Exist, So Create It. -$! -$ CREATE/DIR 'OBJ_DIR' -$! -$! End The Architecture Specific OBJ Directory Check. -$! -$ ENDIF -$! -$! Check To See If The Architecture Specific Directory Exists. -$! -$ IF (F$PARSE(EXE_DIR).EQS."") -$ THEN -$! -$! It Dosen't Exist, So Create It. -$! -$ CREATE/DIR 'EXE_DIR' -$! -$! End The Architecture Specific Directory Check. -$! -$ ENDIF -$! -$! Define The Library Name. -$! -$ LIB_NAME := 'EXE_DIR'LIBDES.OLB -$! -$! Check To See What We Are To Do. -$! -$ IF (BUILDALL.EQS."TRUE") -$ THEN -$! -$! Since Nothing Special Was Specified, Do Everything. -$! -$ GOSUB LIBRARY -$ GOSUB DESTEST -$ GOSUB SPEED -$ GOSUB RPW -$ GOSUB DES -$ GOSUB DES_OPTS -$! -$! Else... -$! -$ ELSE -$! -$! Build Just What The User Wants Us To Build. -$! -$ GOSUB 'BUILDALL' -$! -$! End The BUILDALL Check. -$! -$ ENDIF -$! -$! Time To EXIT. -$! -$ EXIT -$ LIBRARY: -$! -$! Tell The User That We Are Compiling. -$! -$ WRITE SYS$OUTPUT "Compiling The ",LIB_NAME," Files." -$! -$! Check To See If We Already Have A "[.xxx.EXE.CRYPTO.DES]LIBDES.OLB" Library... -$! -$ IF (F$SEARCH(LIB_NAME).EQS."") -$ THEN -$! -$! Guess Not, Create The Library. -$! -$ LIBRARY/CREATE/OBJECT 'LIB_NAME' -$! -$! End The Library Exist Check. -$! -$ ENDIF -$! -$! Define The DES Library Files. -$! -$ LIB_DES = "set_key,ecb_enc,cbc_enc,"+ - - "ecb3_enc,cfb64enc,cfb64ede,cfb_enc,ofb64ede,"+ - - "enc_read,enc_writ,ofb64enc,"+ - - "ofb_enc,str2key,pcbc_enc,qud_cksm,rand_key,"+ - - "des_enc,fcrypt_b,read2pwd,"+ - - "fcrypt,xcbc_enc,read_pwd,rpc_enc,cbc_cksm,supp" -$! -$! Define A File Counter And Set It To "0". -$! -$ FILE_COUNTER = 0 -$! -$! Top Of The File Loop. -$! -$ NEXT_FILE: -$! -$! O.K, Extract The File Name From The File List. -$! -$ FILE_NAME = F$ELEMENT(FILE_COUNTER,",",LIB_DES) -$! -$! Check To See If We Are At The End Of The File List. -$! -$ IF (FILE_NAME.EQS.",") THEN GOTO FILE_DONE -$! -$! Increment The Counter. -$! -$ FILE_COUNTER = FILE_COUNTER + 1 -$! -$! Create The Source File Name. -$! -$ SOURCE_FILE = "SYS$DISK:[]" + FILE_NAME + ".C" -$! -$! Tell The User We Are Compiling The Source File. -$! -$ WRITE SYS$OUTPUT " ",FILE_NAME,".C" -$! -$! Create The Object File Name. -$! -$ OBJECT_FILE = OBJ_DIR + FILE_NAME + "." + ARCH + "OBJ" -$ ON WARNING THEN GOTO NEXT_FILE -$! -$! Check To See If The File We Want To Compile Actually Exists. -$! -$ IF (F$SEARCH(SOURCE_FILE).EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File ",SOURCE_FILE," Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The File Exists Check. -$! -$ ENDIF -$! -$! Compile The File. -$! -$ ON ERROR THEN GOTO NEXT_FILE -$ CC/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$! -$! Add It To The Library. -$! -$ LIBRARY/REPLACE/OBJECT 'LIB_NAME' 'OBJECT_FILE' -$! -$! Time To Clean Up The Object File. -$! -$ DELETE 'OBJECT_FILE';* -$! -$! Go Back And Do It Again. -$! -$ GOTO NEXT_FILE -$! -$! All Done With This Library Part. -$! -$ FILE_DONE: -$! -$! Tell The User That We Are All Done. -$! -$ WRITE SYS$OUTPUT "Library ",LIB_NAME," Built." -$! -$! All Done, Time To Return. -$! -$ RETURN -$! -$! Compile The DESTEST Program. -$! -$ DESTEST: -$! -$! Check To See If We Have The Proper Libraries. -$! -$ GOSUB LIB_CHECK -$! -$! Check To See If We Have A Linker Option File. -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Check To See If The File We Want To Compile Actually Exists. -$! -$ IF (F$SEARCH("SYS$DISK:[]DESTEST.C").EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File DESTEST.C Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The DESTEST.C File Check. -$! -$ ENDIF -$! -$! Tell The User What We Are Building. -$! -$ WRITE SYS$OUTPUT "Building ",EXE_DIR,"DESTEST.EXE" -$! -$! Compile The DESTEST Program. -$! -$ CC/OBJECT='OBJ_DIR'DESTEST.OBJ SYS$DISK:[]DESTEST.C -$! -$! Link The DESTEST Program. -$! -$ LINK/'DEBUGGER'/'TRACEBACK'/CONTIGUOUS/EXE='EXE_DIR'DESTEST.EXE - - 'OBJ_DIR'DESTEST.OBJ,'LIB_NAME'/LIBRARY,'OPT_FILE'/OPTION -$! -$! All Done, Time To Return. -$! -$ RETURN -$! -$! Compile The SPEED Program. -$! -$ SPEED: -$! -$! Check To See If We Have The Proper Libraries. -$! -$ GOSUB LIB_CHECK -$! -$! Check To See If We Have A Linker Option File. -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Check To See If The File We Want To Compile Actually Exists. -$! -$ IF (F$SEARCH("SYS$DISK:[]SPEED.C").EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File SPEED.C Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The SPEED.C File Check. -$! -$ ENDIF -$! -$! Tell The User What We Are Building. -$! -$ WRITE SYS$OUTPUT "Building ",EXE_DIR,"SPEED.EXE" -$! -$! Compile The SPEED Program. -$! -$ CC/OBJECT='OBJ_DIR'SPEED.OBJ SYS$DISK:[]SPEED.C -$! -$! Link The SPEED Program. -$! -$ LINK/'DEBUGGER'/'TRACEBACK'/CONTIGUOUS/EXE='EXE_DIR'SPEED.EXE - - 'OBJ_DIR'SPEED.OBJ,'LIB_NAME'/LIBRARY,'OPT_FILE'/OPTION -$! -$! All Done, Time To Return. -$! -$ RETURN -$! -$! Compile The RPW Program. -$! -$ RPW: -$! -$! Check To See If We Have The Proper Libraries. -$! -$ GOSUB LIB_CHECK -$! -$! Check To See If We Have A Linker Option File. -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Check To See If The File We Want To Compile Actually Exists. -$! -$ IF (F$SEARCH("SYS$DISK:[]RPW.C").EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File RPW.C Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The RPW.C File Check. -$! -$ ENDIF -$! -$! Tell The User What We Are Building. -$! -$ WRITE SYS$OUTPUT "Building ",EXE_DIR,"RPW.EXE" -$! -$! Compile The RPW Program. -$! -$ CC/OBJECT='OBJ_DIR'RPW.OBJ SYS$DISK:[]RPW.C -$! -$! Link The RPW Program. -$! -$ LINK/'DEBUGGER'/'TRACEBACK'/CONTIGUOUS/EXE='EXE_DIR'RPW.EXE - - 'OBJ_DIR'RPW.OBJ,'LIB_NAME'/LIBRARY,'OPT_FILE'/OPTION -$! -$! All Done, Time To Return. -$! -$ RETURN -$! -$! Compile The DES Program. -$! -$ DES: -$! -$! Check To See If We Have The Proper Libraries. -$! -$ GOSUB LIB_CHECK -$! -$! Check To See If We Have A Linker Option File. -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Check To See If The File We Want To Compile Actually Exists. -$! -$ IF (F$SEARCH("SYS$DISK:[]DES.C").EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File DES.C Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The DES.C File Check. -$! -$ ENDIF -$! -$! Tell The User What We Are Building. -$! -$ WRITE SYS$OUTPUT "Building ",EXE_DIR,"DES.EXE" -$! -$! Compile The DES Program. -$! -$ CC/OBJECT='OBJ_DIR'DES.OBJ SYS$DISK:[]DES.C -$ CC/OBJECT='OBJ_DIR'DES.OBJ SYS$DISK:[]CBC3_ENC.C -$! -$! Link The DES Program. -$! -$ LINK/'DEBUGGER'/'TRACEBACK'/CONTIGUOUS/EXE='EXE_DIR'DES.EXE - - 'OBJ_DIR'DES.OBJ,'OBJ_DIR'CBC3_ENC.OBJ,- - 'LIB_NAME'/LIBRARY,'OPT_FILE'/OPTION -$! -$! All Done, Time To Return. -$! -$ RETURN -$! -$! Compile The DES_OPTS Program. -$! -$ DES_OPTS: -$! -$! Check To See If We Have The Proper Libraries. -$! -$ GOSUB LIB_CHECK -$! -$! Check To See If We Have A Linker Option File. -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Check To See If The File We Want To Compile Actually Exists. -$! -$ IF (F$SEARCH("SYS$DISK:[]DES_OPTS.C").EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File DES_OPTS.C Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The DES_OPTS.C File Check. -$! -$ ENDIF -$! -$! Tell The User What We Are Building. -$! -$ WRITE SYS$OUTPUT "Building ",EXE_DIR,"DES_OPTS.EXE" -$! -$! Compile The DES_OPTS Program. -$! -$ CC/OBJECT='OBJ_DIR'DES_OPTS.OBJ SYS$DISK:[]DES_OPTS.C -$! -$! Link The DES_OPTS Program. -$! -$ LINK/'DEBUGGER'/'TRACEBACK'/CONTIGUOUS/EXE='EXE_DIR'DES_OPTS.EXE - - 'OBJ_DIR'DES_OPTS.OBJ,'LIB_NAME'/LIBRARY,'OPT_FILE'/OPTION -$! -$! All Done, Time To Return. -$! -$ RETURN -$ EXIT -$! -$! Check For The Link Option FIle. -$! -$ CHECK_OPT_FILE: -$! -$! Check To See If We Need To Make A VAX C Option File. -$! -$ IF (COMPILER.EQS."VAXC") -$ THEN -$! -$! Check To See If We Already Have A VAX C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! We Need A VAX C Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Agianst -! The Sharable VAX C Runtime Library. -! -SYS$SHARE:VAXCRTL.EXE/SHARE -$EOD -$! -$! End The Option File Check. -$! -$ ENDIF -$! -$! End The VAXC Check. -$! -$ ENDIF -$! -$! Check To See If We Need A GNU C Option File. -$! -$ IF (COMPILER.EQS."GNUC") -$ THEN -$! -$! Check To See If We Already Have A GNU C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! We Need A GNU C Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Agianst -! The Sharable C Runtime Library. -! -GNU_CC:[000000]GCCLIB/LIBRARY -SYS$SHARE:VAXCRTL/SHARE -$EOD -$! -$! End The Option File Check. -$! -$ ENDIF -$! -$! End The GNU C Check. -$! -$ ENDIF -$! -$! Check To See If We Need A DEC C Option File. -$! -$ IF (COMPILER.EQS."DECC") -$ THEN -$! -$! Check To See If We Already Have A DEC C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! Figure Out If We Need An non-VAX Or A VAX Linker Option File. -$! -$ IF (F$GETSYI("CPU").LT.128) -$ THEN -$! -$! We Need A DEC C Linker Option File For VAX. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Agianst -! The Sharable DEC C Runtime Library. -! -SYS$SHARE:DECC$SHR.EXE/SHARE -$EOD -$! -$! Else... -$! -$ ELSE -$! -$! Create The non-VAX Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File For non-VAX To Link Agianst -! The Sharable C Runtime Library. -! -SYS$SHARE:CMA$OPEN_LIB_SHR/SHARE -SYS$SHARE:CMA$OPEN_RTL/SHARE -$EOD -$! -$! End The DEC C Option File Check. -$! -$ ENDIF -$! -$! End The Option File Search. -$! -$ ENDIF -$! -$! End The DEC C Check. -$! -$ ENDIF -$! -$! Tell The User What Linker Option File We Are Using. -$! -$ WRITE SYS$OUTPUT "Using Linker Option File ",OPT_FILE,"." -$! -$! Time To RETURN. -$! -$ RETURN -$! -$! Library Check. -$! -$ LIB_CHECK: -$! -$! Look For The Library LIBDES.OLB. -$! -$ IF (F$SEARCH(LIB_NAME).EQS."") -$ THEN -$! -$! Tell The User We Can't Find The [.xxx.CRYPTO.DES]LIBDES.OLB Library. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "Can't Find The Library ",LIB_NAME,"." -$ WRITE SYS$OUTPUT "We Can't Link Without It." -$ WRITE SYS$OUTPUT "" -$! -$! Since We Can't Link Without It, Exit. -$! -$ EXIT -$ ENDIF -$! -$! Time To Return. -$! -$ RETURN -$! -$! Check The User's Options. -$! -$ CHECK_OPTIONS: -$! -$! Check To See If We Are To "Just Build Everything". -$! -$ IF (P1.EQS."ALL") -$ THEN -$! -$! P1 Is "ALL", So Build Everything. -$! -$ BUILDALL = "TRUE" -$! -$! Else... -$! -$ ELSE -$! -$! Else, Check To See If P1 Has A Valid Argument. -$! -$ IF (P1.EQS."LIBRARY").OR.(P1.EQS."DESTEST").OR.(P1.EQS."SPEED") - - .OR.(P1.EQS."RPW").OR.(P1.EQS."DES").OR.(P1.EQS."DES_OPTS") -$ THEN -$! -$! A Valid Argument. -$! -$ BUILDALL = P1 -$! -$! Else... -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P1," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " ALL : Just Build Everything." -$ WRITE SYS$OUTPUT " LIBRARY : To Compile Just The [.xxx.EXE.CRYPTO.DES]LIBDES.OLB Library." -$ WRITE SYS$OUTPUT " DESTEST : To Compile Just The [.xxx.EXE.CRYPTO.DES]DESTEST.EXE Program." -$ WRITE SYS$OUTPUT " SPEED : To Compile Just The [.xxx.EXE.CRYPTO.DES]SPEED.EXE Program." -$ WRITE SYS$OUTPUT " RPW : To Compile Just The [.xxx.EXE.CRYPTO.DES]RPW.EXE Program." -$ WRITE SYS$OUTPUT " DES : To Compile Just The [.xxx.EXE.CRYPTO.DES]DES.EXE Program." -$ WRITE SYS$OUTPUT " DES_OPTS : To Compile Just The [.xxx.EXE.CRYTPO.DES]DES_OPTS.EXE Program." -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " Where 'xxx' Stands For: " -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " ALPHA : Alpha Architecture." -$ WRITE SYS$OUTPUT " IA64 : IA64 Architecture." -$ WRITE SYS$OUTPUT " VAX : VAX Architecture." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! End The P1 Check. -$! -$ ENDIF -$! -$! Check To See If We Are To Compile Without Debugger Information. -$! -$ IF (P2.EQS."NODEBUG") -$ THEN -$! -$! P2 Is Blank, So Compile Without Debugger Information. -$! -$ DEBUGGER = "NODEBUG" -$ TRACEBACK = "NOTRACEBACK" -$ GCC_OPTIMIZE = "OPTIMIZE" -$ CC_OPTIMIZE = "OPTIMIZE" -$ WRITE SYS$OUTPUT "No Debugger Information Will Be Produced During Compile." -$ WRITE SYS$OUTPUT "Compiling With Compiler Optimization." -$! -$! Else... -$! -$ ELSE -$! -$! Check To See If We Are To Compile With Debugger Information. -$! -$ IF (P2.EQS."DEBUG") -$ THEN -$! -$! Compile With Debugger Information. -$! -$ DEBUGGER = "DEBUG" -$ TRACEBACK = "TRACEBACK" -$ GCC_OPTIMIZE = "NOOPTIMIZE" -$ CC_OPTIMIZE = "NOOPTIMIZE" -$ WRITE SYS$OUTPUT "Debugger Information Will Be Produced During Compile." -$ WRITE SYS$OUTPUT "Compiling Without Compiler Optimization." -$! -$! Else... -$! -$ ELSE -$! -$! Tell The User Entered An Invalid Option.. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P2," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " DEBUG : Compile With The Debugger Information." -$ WRITE SYS$OUTPUT " NODEBUG : Compile Without The Debugger Information." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! End The P2 Check. -$! -$ ENDIF -$! -$! Special Threads For OpenVMS v7.1 Or Later. -$! -$! Written By: Richard Levitte -$! richard@levitte.org -$! -$! -$! Check To See If We Have A Option For P4. -$! -$ IF (P4.EQS."") -$ THEN -$! -$! Get The Version Of VMS We Are Using. -$! -$ ISSEVEN := "" -$ TMP = F$ELEMENT(0,"-",F$EXTRACT(1,4,F$GETSYI("VERSION"))) -$ TMP = F$INTEGER(F$ELEMENT(0,".",TMP)+F$ELEMENT(1,".",TMP)) -$! -$! Check To See If The VMS Version Is v7.1 Or Later. -$! -$ IF (TMP.GE.71) -$ THEN -$! -$! We Have OpenVMS v7.1 Or Later, So Use The Special Threads. -$! -$ ISSEVEN := ,PTHREAD_USE_D4 -$! -$! End The VMS Version Check. -$! -$ ENDIF -$! -$! End The P4 Check. -$! -$ ENDIF -$! -$! Check To See If P3 Is Blank. -$! -$ IF (P3.EQS."") -$ THEN -$! -$! O.K., The User Didn't Specify A Compiler, Let's Try To -$! Find Out Which One To Use. -$! -$! Check To See If We Have GNU C. -$! -$ IF (F$TRNLNM("GNU_CC").NES."") -$ THEN -$! -$! Looks Like GNUC, Set To Use GNUC. -$! -$ P3 = "GNUC" -$! -$! Else... -$! -$ ELSE -$! -$! Check To See If We Have VAXC Or DECC. -$! -$ IF (ARCH.NES."VAX").OR.(F$TRNLNM("DECC$CC_DEFAULT").NES."") -$ THEN -$! -$! Looks Like DECC, Set To Use DECC. -$! -$ P3 = "DECC" -$! -$! Else... -$! -$ ELSE -$! -$! Looks Like VAXC, Set To Use VAXC. -$! -$ P3 = "VAXC" -$! -$! End The VAXC Compiler Check. -$! -$ ENDIF -$! -$! End The DECC & VAXC Compiler Check. -$! -$ ENDIF -$! -$! End The Compiler Check. -$! -$ ENDIF -$! -$! Set Up Initial CC Definitions, Possibly With User Ones -$! -$ CCDEFS = "" -$ IF F$TYPE(USER_CCDEFS) .NES. "" THEN CCDEFS = USER_CCDEFS -$ CCEXTRAFLAGS = "" -$ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS -$ CCDISABLEWARNINGS = "" -$ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" THEN - - CCDISABLEWARNINGS = USER_CCDISABLEWARNINGS -$! -$! Check To See If The User Entered A Valid Paramter. -$! -$ IF (P3.EQS."VAXC").OR.(P3.EQS."DECC").OR.(P3.EQS."GNUC") -$ THEN -$! -$! Check To See If The User Wanted DECC. -$! -$ IF (P3.EQS."DECC") -$ THEN -$! -$! Looks Like DECC, Set To Use DECC. -$! -$ COMPILER = "DECC" -$! -$! Tell The User We Are Using DECC. -$! -$ WRITE SYS$OUTPUT "Using DECC 'C' Compiler." -$! -$! Use DECC... -$! -$ CC = "CC" -$ IF ARCH.EQS."VAX" .AND. F$TRNLNM("DECC$CC_DEFAULT").NES."/DECC" - - THEN CC = "CC/DECC" -$ CC = CC + "/''CC_OPTIMIZE'/''DEBUGGER'/STANDARD=ANSI89" + - - "/NOLIST/PREFIX=ALL" + CCEXTRAFLAGS -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "''EXE_DIR'VAX_DECC_OPTIONS.OPT" -$! -$! End DECC Check. -$! -$ ENDIF -$! -$! Check To See If We Are To Use VAXC. -$! -$ IF (P3.EQS."VAXC") -$ THEN -$! -$! Looks Like VAXC, Set To Use VAXC. -$! -$ COMPILER = "VAXC" -$! -$! Tell The User We Are Using VAX C. -$! -$ WRITE SYS$OUTPUT "Using VAXC 'C' Compiler." -$! -$! Compile Using VAXC. -$! -$ CC = "CC" -$ IF ARCH.NES."VAX" -$ THEN -$ WRITE SYS$OUTPUT "There is no VAX C on ''ARCH'!" -$ EXIT -$ ENDIF -$ IF F$TRNLNM("DECC$CC_DEFAULT").EQS."/DECC" THEN CC = "CC/VAXC" -$ CC = CC + "/''CC_OPTIMIZE'/''DEBUGGER'/NOLIST" + CCEXTRAFLAGS -$ CCDEFS = """VAXC""," + CCDEFS -$! -$! Define <sys> As SYS$COMMON:[SYSLIB] -$! -$ DEFINE/NOLOG SYS SYS$COMMON:[SYSLIB] -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "''EXE_DIR'VAX_VAXC_OPTIONS.OPT" -$! -$! End VAXC Check -$! -$ ENDIF -$! -$! Check To See If We Are To Use GNU C. -$! -$ IF (P3.EQS."GNUC") -$ THEN -$! -$! Looks Like GNUC, Set To Use GNUC. -$! -$ COMPILER = "GNUC" -$! -$! Tell The User We Are Using GNUC. -$! -$ WRITE SYS$OUTPUT "Using GNU 'C' Compiler." -$! -$! Use GNU C... -$! -$ CC = "GCC/NOCASE_HACK/''GCC_OPTIMIZE'/''DEBUGGER'/NOLIST" + CCEXTRAFLAGS -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "''EXE_DIR'VAX_GNUC_OPTIONS.OPT" -$! -$! End The GNU C Check. -$! -$ ENDIF -$! -$! Set up default defines -$! -$ CCDEFS = """FLAT_INC=1""," + CCDEFS -$! -$! Finish up the definition of CC. -$! -$ IF COMPILER .EQS. "DECC" -$ THEN -$ IF CCDISABLEWARNINGS .EQS. "" -$ THEN -$ CC4DISABLEWARNINGS = "DOLLARID" -$ ELSE -$ CC4DISABLEWARNINGS = CCDISABLEWARNINGS + ",DOLLARID" -$ CCDISABLEWARNINGS = "/WARNING=(DISABLE=(" + CCDISABLEWARNINGS + "))" -$ ENDIF -$ CC4DISABLEWARNINGS = "/WARNING=(DISABLE=(" + CC4DISABLEWARNINGS + "))" -$ ELSE -$ CCDISABLEWARNINGS = "" -$ CC4DISABLEWARNINGS = "" -$ ENDIF -$ CC = CC + "/DEFINE=(" + CCDEFS + ")" + CCDISABLEWARNINGS -$! -$! Show user the result -$! -$ WRITE SYS$OUTPUT "Main Compiling Command: ",CC -$! -$! Else The User Entered An Invalid Argument. -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P3," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " VAXC : To Compile With VAX C." -$ WRITE SYS$OUTPUT " DECC : To Compile With DEC C." -$ WRITE SYS$OUTPUT " GNUC : To Compile With GNU C." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The P3 Check. -$! -$ ENDIF -$! -$! Time To RETURN... -$! -$ RETURN diff --git a/thirdparty/openssl/crypto/des/des.c b/thirdparty/openssl/crypto/des/des.c index 586aed7237..d7374382d8 100644 --- a/thirdparty/openssl/crypto/des/des.c +++ b/thirdparty/openssl/crypto/des/des.c @@ -456,7 +456,7 @@ void doencryption(void) len = l - rem; if (feof(DES_IN)) { for (i = 7 - rem; i > 0; i--) { - if (RAND_pseudo_bytes(buf + l++, 1) < 0) + if (RAND_bytes(buf + l++, 1) <= 0) goto problems; } buf[l++] = rem; diff --git a/thirdparty/openssl/crypto/des/enc_writ.c b/thirdparty/openssl/crypto/des/enc_writ.c index bfaabde516..c2aaa8e98c 100644 --- a/thirdparty/openssl/crypto/des/enc_writ.c +++ b/thirdparty/openssl/crypto/des/enc_writ.c @@ -135,7 +135,7 @@ int DES_enc_write(int fd, const void *_buf, int len, if (len < 8) { cp = shortbuf; memcpy(shortbuf, buf, len); - if (RAND_pseudo_bytes(shortbuf + len, 8 - len) < 0) { + if (RAND_bytes(shortbuf + len, 8 - len) <= 0) { return -1; } rnum = 8; diff --git a/thirdparty/openssl/crypto/des/makefile.bc b/thirdparty/openssl/crypto/des/makefile.bc deleted file mode 100644 index 1fe6d4915a..0000000000 --- a/thirdparty/openssl/crypto/des/makefile.bc +++ /dev/null @@ -1,50 +0,0 @@ -# -# Origional BC Makefile from Teun <Teun.Nijssen@kub.nl> -# -# -CC = bcc -TLIB = tlib /0 /C -# note: the -3 flag produces code for 386, 486, Pentium etc; omit it for 286s -OPTIMIZE= -3 -O2 -#WINDOWS= -W -CFLAGS = -c -ml -d $(OPTIMIZE) $(WINDOWS) -DMSDOS -LFLAGS = -ml $(WINDOWS) - -.c.obj: - $(CC) $(CFLAGS) $*.c - -.obj.exe: - $(CC) $(LFLAGS) -e$*.exe $*.obj libdes.lib - -all: $(LIB) destest.exe rpw.exe des.exe speed.exe - -# "make clean": use a directory containing only libdes .exe and .obj files... -clean: - del *.exe - del *.obj - del libdes.lib - del libdes.rsp - -OBJS= cbc_cksm.obj cbc_enc.obj ecb_enc.obj pcbc_enc.obj \ - qud_cksm.obj rand_key.obj set_key.obj str2key.obj \ - enc_read.obj enc_writ.obj fcrypt.obj cfb_enc.obj \ - ecb3_enc.obj ofb_enc.obj cbc3_enc.obj read_pwd.obj\ - cfb64enc.obj ofb64enc.obj ede_enc.obj cfb64ede.obj\ - ofb64ede.obj supp.obj - -LIB= libdes.lib - -$(LIB): $(OBJS) - del $(LIB) - makersp "+%s &\n" &&| - $(OBJS) -| >libdes.rsp - $(TLIB) libdes.lib @libdes.rsp,nul - del libdes.rsp - -destest.exe: destest.obj libdes.lib -rpw.exe: rpw.obj libdes.lib -speed.exe: speed.obj libdes.lib -des.exe: des.obj libdes.lib - - diff --git a/thirdparty/openssl/crypto/des/set_key.c b/thirdparty/openssl/crypto/des/set_key.c index 8fd8fe14bb..d9c5e7fcb3 100644 --- a/thirdparty/openssl/crypto/des/set_key.c +++ b/thirdparty/openssl/crypto/des/set_key.c @@ -120,7 +120,7 @@ int DES_check_key_parity(const_DES_cblock *key) } /*- - * Weak and semi week keys as take from + * Weak and semi weak keys as taken from * %A D.W. Davies * %A W.L. Price * %T Security for Computer Networks diff --git a/thirdparty/openssl/crypto/des/t/test b/thirdparty/openssl/crypto/des/t/test deleted file mode 100644 index 97acd0552e..0000000000 --- a/thirdparty/openssl/crypto/des/t/test +++ /dev/null @@ -1,27 +0,0 @@ -#!./perl - -BEGIN { push(@INC, qw(../../../lib ../../lib ../lib lib)); } - -use DES; - -$key='00000000'; -$ks=DES::set_key($key); -@a=split(//,$ks); -foreach (@a) { printf "%02x-",ord($_); } -print "\n"; - - -$key=DES::random_key(); -print "($_)\n"; -@a=split(//,$key); -foreach (@a) { printf "%02x-",ord($_); } -print "\n"; -$str="this is and again into the breach"; -($k1,$k2)=DES::string_to_2keys($str); -@a=split(//,$k1); -foreach (@a) { printf "%02x-",ord($_); } -print "\n"; -@a=split(//,$k2); -foreach (@a) { printf "%02x-",ord($_); } -print "\n"; - diff --git a/thirdparty/openssl/crypto/des/times/486-50.sol b/thirdparty/openssl/crypto/des/times/486-50.sol deleted file mode 100644 index 0de62d6db3..0000000000 --- a/thirdparty/openssl/crypto/des/times/486-50.sol +++ /dev/null @@ -1,16 +0,0 @@ -Solaris 2.4, 486 50mhz, gcc 2.6.3 -options des ecb/s -16 r2 i 43552.51 100.0% -16 r1 i 43487.45 99.9% -16 c p 43003.23 98.7% -16 r2 p 42339.00 97.2% -16 c i 41900.91 96.2% -16 r1 p 41360.64 95.0% - 4 c i 38728.48 88.9% - 4 c p 38225.63 87.8% - 4 r1 i 38085.79 87.4% - 4 r2 i 37825.64 86.9% - 4 r2 p 34611.00 79.5% - 4 r1 p 31802.00 73.0% --DDES_UNROLL -DDES_RISC2 - diff --git a/thirdparty/openssl/crypto/des/times/586-100.lnx b/thirdparty/openssl/crypto/des/times/586-100.lnx deleted file mode 100644 index 4323914a11..0000000000 --- a/thirdparty/openssl/crypto/des/times/586-100.lnx +++ /dev/null @@ -1,20 +0,0 @@ -Pentium 100 -Linux 2 kernel -gcc 2.7.0 -O3 -fomit-frame-pointer -No X server running, just a console, it makes the top speed jump from 151,000 -to 158,000 :-). -options des ecb/s -assember 281000.00 177.1% -16 r1 p 158667.40 100.0% -16 r1 i 148471.70 93.6% -16 r2 p 143961.80 90.7% -16 r2 i 141689.20 89.3% - 4 r1 i 140100.00 88.3% - 4 r2 i 134049.40 84.5% -16 c i 124145.20 78.2% -16 c p 121584.20 76.6% - 4 c i 118116.00 74.4% - 4 r2 p 117977.90 74.4% - 4 c p 114971.40 72.5% - 4 r1 p 114578.40 72.2% --DDES_UNROLL -DDES_RISC1 -DDES_PTR diff --git a/thirdparty/openssl/crypto/des/times/686-200.fre b/thirdparty/openssl/crypto/des/times/686-200.fre deleted file mode 100644 index 7d83f6adee..0000000000 --- a/thirdparty/openssl/crypto/des/times/686-200.fre +++ /dev/null @@ -1,18 +0,0 @@ -Pentium 100 -Free BSD 2.1.5 kernel -gcc 2.7.2.2 -O3 -fomit-frame-pointer -options des ecb/s -assember 578000.00 133.1% -16 r2 i 434454.80 100.0% -16 r1 i 433621.43 99.8% -16 r2 p 431375.69 99.3% - 4 r1 i 423722.30 97.5% - 4 r2 i 422399.40 97.2% -16 r1 p 421739.40 97.1% -16 c i 399027.94 91.8% -16 c p 372251.70 85.7% - 4 c i 365118.35 84.0% - 4 c p 352880.51 81.2% - 4 r2 p 255104.90 58.7% - 4 r1 p 251289.18 57.8% --DDES_UNROLL -DDES_RISC2 diff --git a/thirdparty/openssl/crypto/des/times/sparc.gcc b/thirdparty/openssl/crypto/des/times/sparc.gcc deleted file mode 100644 index 8eaa042104..0000000000 --- a/thirdparty/openssl/crypto/des/times/sparc.gcc +++ /dev/null @@ -1,17 +0,0 @@ -solaris 2.5.1 - sparc 10 50mhz - gcc 2.7.2 - -options des ecb/s -16 c i 124382.70 100.0% - 4 c i 118884.68 95.6% -16 c p 112261.20 90.3% -16 r2 i 111777.10 89.9% -16 r2 p 108896.30 87.5% -16 r1 p 108791.59 87.5% - 4 c p 107290.10 86.3% - 4 r1 p 104583.80 84.1% -16 r1 i 104206.20 83.8% - 4 r2 p 103709.80 83.4% - 4 r2 i 98306.43 79.0% - 4 r1 i 91525.80 73.6% --DDES_UNROLL - diff --git a/thirdparty/openssl/crypto/dh/dh_ameth.c b/thirdparty/openssl/crypto/dh/dh_ameth.c index ac72468bd1..4558283576 100644 --- a/thirdparty/openssl/crypto/dh/dh_ameth.c +++ b/thirdparty/openssl/crypto/dh/dh_ameth.c @@ -519,7 +519,7 @@ static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) static int dh_missing_parameters(const EVP_PKEY *a) { - if (!a->pkey.dh->p || !a->pkey.dh->g) + if (a->pkey.dh == NULL || a->pkey.dh->p == NULL || a->pkey.dh->g == NULL) return 1; return 0; } diff --git a/thirdparty/openssl/crypto/dh/dh_key.c b/thirdparty/openssl/crypto/dh/dh_key.c index 1d80fb2c5f..387558f146 100644 --- a/thirdparty/openssl/crypto/dh/dh_key.c +++ b/thirdparty/openssl/crypto/dh/dh_key.c @@ -223,6 +223,8 @@ static int compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh) goto err; BN_CTX_start(ctx); tmp = BN_CTX_get(ctx); + if (tmp == NULL) + goto err; if (dh->priv_key == NULL) { DHerr(DH_F_COMPUTE_KEY, DH_R_NO_PRIVATE_VALUE); diff --git a/thirdparty/openssl/crypto/dsa/dsa_ameth.c b/thirdparty/openssl/crypto/dsa/dsa_ameth.c index cc83d6e6ad..c4fa105747 100644 --- a/thirdparty/openssl/crypto/dsa/dsa_ameth.c +++ b/thirdparty/openssl/crypto/dsa/dsa_ameth.c @@ -350,7 +350,7 @@ static int dsa_missing_parameters(const EVP_PKEY *pkey) { DSA *dsa; dsa = pkey->pkey.dsa; - if ((dsa->p == NULL) || (dsa->q == NULL) || (dsa->g == NULL)) + if (dsa == NULL || dsa->p == NULL || dsa->q == NULL || dsa->g == NULL) return 1; return 0; } diff --git a/thirdparty/openssl/crypto/dsa/dsa_gen.c b/thirdparty/openssl/crypto/dsa/dsa_gen.c index 15f3bb4f3f..1fce0f81c2 100644 --- a/thirdparty/openssl/crypto/dsa/dsa_gen.c +++ b/thirdparty/openssl/crypto/dsa/dsa_gen.c @@ -185,6 +185,9 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, p = BN_CTX_get(ctx); test = BN_CTX_get(ctx); + if (test == NULL) + goto err; + if (!BN_lshift(test, BN_value_one(), bits - 1)) goto err; @@ -197,7 +200,7 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, goto err; if (!seed_len || !seed_in) { - if (RAND_pseudo_bytes(seed, qsize) < 0) + if (RAND_bytes(seed, qsize) <= 0) goto err; seed_is_random = 1; } else { @@ -491,7 +494,7 @@ int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, goto err; if (!seed_in) { - if (RAND_pseudo_bytes(seed, seed_len) < 0) + if (RAND_bytes(seed, seed_len) <= 0) goto err; } /* step 2 */ diff --git a/thirdparty/openssl/crypto/dsa/dsa_ossl.c b/thirdparty/openssl/crypto/dsa/dsa_ossl.c index efc4f1b6ae..58013a4a13 100644 --- a/thirdparty/openssl/crypto/dsa/dsa_ossl.c +++ b/thirdparty/openssl/crypto/dsa/dsa_ossl.c @@ -247,11 +247,13 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, do if (!BN_rand_range(&k, dsa->q)) goto err; - while (BN_is_zero(&k)) ; + while (BN_is_zero(&k)); + if ((dsa->flags & DSA_FLAG_NO_EXP_CONSTTIME) == 0) { BN_set_flags(&k, BN_FLG_CONSTTIME); } + if (dsa->flags & DSA_FLAG_CACHE_MONT_P) { if (!BN_MONT_CTX_set_locked(&dsa->method_mont_p, CRYPTO_LOCK_DSA, dsa->p, ctx)) @@ -264,6 +266,8 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, if (!BN_copy(&kq, &k)) goto err; + BN_set_flags(&kq, BN_FLG_CONSTTIME); + /* * We do not want timing information to leak the length of k, so we * compute g^k using an equivalent exponent of fixed length. (This @@ -282,6 +286,7 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, } else { K = &k; } + DSA_BN_MOD_EXP(goto err, dsa, r, dsa->g, K, dsa->p, ctx, dsa->method_mont_p); if (!BN_mod(r, r, dsa->q, ctx)) diff --git a/thirdparty/openssl/crypto/dsa/dsa_pmeth.c b/thirdparty/openssl/crypto/dsa/dsa_pmeth.c index 42b8bb0862..78724839b5 100644 --- a/thirdparty/openssl/crypto/dsa/dsa_pmeth.c +++ b/thirdparty/openssl/crypto/dsa/dsa_pmeth.c @@ -180,7 +180,7 @@ static int pkey_dsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) DSAerr(DSA_F_PKEY_DSA_CTRL, DSA_R_INVALID_DIGEST_TYPE); return 0; } - dctx->md = p2; + dctx->pmd = p2; return 1; case EVP_PKEY_CTRL_MD: diff --git a/thirdparty/openssl/crypto/ec/ec2_mult.c b/thirdparty/openssl/crypto/ec/ec2_mult.c index 68cc8771d5..1f9cc00aea 100644 --- a/thirdparty/openssl/crypto/ec/ec2_mult.c +++ b/thirdparty/openssl/crypto/ec/ec2_mult.c @@ -267,7 +267,7 @@ static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, BN_CTX *ctx) { BIGNUM *x1, *x2, *z1, *z2; - int ret = 0, i; + int ret = 0, i, group_top; BN_ULONG mask, word; if (r == point) { @@ -297,10 +297,12 @@ static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, x2 = &r->X; z2 = &r->Y; - bn_wexpand(x1, group->field.top); - bn_wexpand(z1, group->field.top); - bn_wexpand(x2, group->field.top); - bn_wexpand(z2, group->field.top); + group_top = group->field.top; + if (bn_wexpand(x1, group_top) == NULL + || bn_wexpand(z1, group_top) == NULL + || bn_wexpand(x2, group_top) == NULL + || bn_wexpand(z2, group_top) == NULL) + goto err; if (!BN_GF2m_mod_arr(x1, &point->X, group->poly)) goto err; /* x1 = x */ @@ -329,14 +331,14 @@ static int ec_GF2m_montgomery_point_multiply(const EC_GROUP *group, for (; i >= 0; i--) { word = scalar->d[i]; while (mask) { - BN_consttime_swap(word & mask, x1, x2, group->field.top); - BN_consttime_swap(word & mask, z1, z2, group->field.top); + BN_consttime_swap(word & mask, x1, x2, group_top); + BN_consttime_swap(word & mask, z1, z2, group_top); if (!gf2m_Madd(group, &point->X, x2, z2, x1, z1, ctx)) goto err; if (!gf2m_Mdouble(group, x1, z1, ctx)) goto err; - BN_consttime_swap(word & mask, x1, x2, group->field.top); - BN_consttime_swap(word & mask, z1, z2, group->field.top); + BN_consttime_swap(word & mask, x1, x2, group_top); + BN_consttime_swap(word & mask, z1, z2, group_top); mask >>= 1; } mask = BN_TBIT; diff --git a/thirdparty/openssl/crypto/ec/ec_ameth.c b/thirdparty/openssl/crypto/ec/ec_ameth.c index 83e208cfe4..2c41c6e7a9 100644 --- a/thirdparty/openssl/crypto/ec/ec_ameth.c +++ b/thirdparty/openssl/crypto/ec/ec_ameth.c @@ -66,9 +66,12 @@ #endif #include <openssl/asn1t.h> #include "asn1_locl.h" +#include "ec_lcl.h" +#ifndef OPENSSL_NO_CMS static int ecdh_cms_decrypt(CMS_RecipientInfo *ri); static int ecdh_cms_encrypt(CMS_RecipientInfo *ri); +#endif static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) { @@ -221,6 +224,8 @@ static int eckey_pub_cmp(const EVP_PKEY *a, const EVP_PKEY *b) const EC_GROUP *group = EC_KEY_get0_group(b->pkey.ec); const EC_POINT *pa = EC_KEY_get0_public_key(a->pkey.ec), *pb = EC_KEY_get0_public_key(b->pkey.ec); + if (group == NULL || pa == NULL || pb == NULL) + return -2; r = EC_POINT_cmp(group, pa, pb, NULL); if (r == 0) return 1; @@ -299,15 +304,13 @@ static int eckey_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) static int eckey_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) { - EC_KEY *ec_key; + EC_KEY ec_key = *(pkey->pkey.ec); unsigned char *ep, *p; int eplen, ptype; void *pval; - unsigned int tmp_flags, old_flags; + unsigned int old_flags; - ec_key = pkey->pkey.ec; - - if (!eckey_param2type(&ptype, &pval, ec_key)) { + if (!eckey_param2type(&ptype, &pval, &ec_key)) { ECerr(EC_F_ECKEY_PRIV_ENCODE, EC_R_DECODE_ERROR); return 0; } @@ -318,34 +321,31 @@ static int eckey_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) * do not include the parameters in the SEC1 private key see PKCS#11 * 12.11 */ - old_flags = EC_KEY_get_enc_flags(ec_key); - tmp_flags = old_flags | EC_PKEY_NO_PARAMETERS; - EC_KEY_set_enc_flags(ec_key, tmp_flags); - eplen = i2d_ECPrivateKey(ec_key, NULL); + old_flags = EC_KEY_get_enc_flags(&ec_key); + EC_KEY_set_enc_flags(&ec_key, old_flags | EC_PKEY_NO_PARAMETERS); + + eplen = i2d_ECPrivateKey(&ec_key, NULL); if (!eplen) { - EC_KEY_set_enc_flags(ec_key, old_flags); ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_EC_LIB); return 0; } ep = (unsigned char *)OPENSSL_malloc(eplen); if (!ep) { - EC_KEY_set_enc_flags(ec_key, old_flags); ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_MALLOC_FAILURE); return 0; } p = ep; - if (!i2d_ECPrivateKey(ec_key, &p)) { - EC_KEY_set_enc_flags(ec_key, old_flags); + if (!i2d_ECPrivateKey(&ec_key, &p)) { OPENSSL_free(ep); ECerr(EC_F_ECKEY_PRIV_ENCODE, ERR_R_EC_LIB); return 0; } - /* restore old encoding flags */ - EC_KEY_set_enc_flags(ec_key, old_flags); if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(NID_X9_62_id_ecPublicKey), 0, - ptype, pval, ep, eplen)) + ptype, pval, ep, eplen)) { + OPENSSL_free(ep); return 0; + } return 1; } @@ -378,7 +378,7 @@ static int ec_bits(const EVP_PKEY *pkey) static int ec_missing_parameters(const EVP_PKEY *pkey) { - if (EC_KEY_get0_group(pkey->pkey.ec) == NULL) + if (pkey->pkey.ec == NULL || EC_KEY_get0_group(pkey->pkey.ec) == NULL) return 1; return 0; } @@ -398,6 +398,8 @@ static int ec_cmp_parameters(const EVP_PKEY *a, const EVP_PKEY *b) { const EC_GROUP *group_a = EC_KEY_get0_group(a->pkey.ec), *group_b = EC_KEY_get0_group(b->pkey.ec); + if (group_a == NULL || group_b == NULL) + return -2; if (EC_GROUP_cmp(group_a, group_b, NULL)) return 0; else diff --git a/thirdparty/openssl/crypto/ec/ec_asn1.c b/thirdparty/openssl/crypto/ec/ec_asn1.c index 33abf61f44..b0cd3e1788 100644 --- a/thirdparty/openssl/crypto/ec/ec_asn1.c +++ b/thirdparty/openssl/crypto/ec/ec_asn1.c @@ -62,17 +62,22 @@ #include <openssl/asn1t.h> #include <openssl/objects.h> +#define OSSL_NELEM(x) (sizeof(x)/sizeof(x[0])) + int EC_GROUP_get_basis_type(const EC_GROUP *group) { - int i = 0; + int i; if (EC_METHOD_get_field_type(EC_GROUP_method_of(group)) != NID_X9_62_characteristic_two_field) /* everything else is currently not supported */ return 0; - while (group->poly[i] != 0) - i++; + /* Find the last non-zero element of group->poly[] */ + for (i = 0; + i < (int)OSSL_NELEM(group->poly) && group->poly[i] != 0; + i++) + continue; if (i == 4) return NID_X9_62_ppBasis; diff --git a/thirdparty/openssl/crypto/ec/ec_key.c b/thirdparty/openssl/crypto/ec/ec_key.c index bc94ab5661..456080ecfe 100644 --- a/thirdparty/openssl/crypto/ec/ec_key.c +++ b/thirdparty/openssl/crypto/ec/ec_key.c @@ -377,9 +377,9 @@ int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, return 0; } ctx = BN_CTX_new(); - if (!ctx) - goto err; - + if (ctx == NULL) + return 0; + BN_CTX_start(ctx); point = EC_POINT_new(key->group); if (!point) @@ -432,10 +432,9 @@ int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, ok = 1; err: - if (ctx) - BN_CTX_free(ctx); - if (point) - EC_POINT_free(point); + BN_CTX_end(ctx); + BN_CTX_free(ctx); + EC_POINT_free(point); return ok; } diff --git a/thirdparty/openssl/crypto/ec/ec_mult.c b/thirdparty/openssl/crypto/ec/ec_mult.c index 23b8c3089b..24ca67a6ef 100644 --- a/thirdparty/openssl/crypto/ec/ec_mult.c +++ b/thirdparty/openssl/crypto/ec/ec_mult.c @@ -68,10 +68,14 @@ #include "ec_lcl.h" /* - * This file implements the wNAF-based interleaving multi-exponentation method - * (<URL:http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#multiexp>); - * for multiplication with precomputation, we use wNAF splitting - * (<URL:http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#fastexp>). + * This file implements the wNAF-based interleaving multi-exponentiation method + * Formerly at: + * http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#multiexp + * You might now find it here: + * http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 + * http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf + * For multiplication with precomputation, we use wNAF splitting, formerly at: + * http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#fastexp */ /* structure for precomputed multiples of the generator */ diff --git a/thirdparty/openssl/crypto/ec/eck_prn.c b/thirdparty/openssl/crypto/ec/eck_prn.c index df9b37a750..176ec1f173 100644 --- a/thirdparty/openssl/crypto/ec/eck_prn.c +++ b/thirdparty/openssl/crypto/ec/eck_prn.c @@ -342,7 +342,7 @@ static int print_bin(BIO *fp, const char *name, const unsigned char *buf, size_t len, int off) { size_t i; - char str[128]; + char str[128 + 1 + 4]; if (buf == NULL) return 1; diff --git a/thirdparty/openssl/crypto/ec/ecp_nistz256.c b/thirdparty/openssl/crypto/ec/ecp_nistz256.c index ca44d0aaee..99b8d613c8 100644 --- a/thirdparty/openssl/crypto/ec/ecp_nistz256.c +++ b/thirdparty/openssl/crypto/ec/ecp_nistz256.c @@ -82,19 +82,36 @@ typedef struct ec_pre_comp_st { } EC_PRE_COMP; /* Functions implemented in assembly */ +/* + * Most of below mentioned functions *preserve* the property of inputs + * being fully reduced, i.e. being in [0, modulus) range. Simply put if + * inputs are fully reduced, then output is too. Note that reverse is + * not true, in sense that given partially reduced inputs output can be + * either, not unlikely reduced. And "most" in first sentence refers to + * the fact that given the calculations flow one can tolerate that + * addition, 1st function below, produces partially reduced result *if* + * multiplications by 2 and 3, which customarily use addition, fully + * reduce it. This effectively gives two options: a) addition produces + * fully reduced result [as long as inputs are, just like remaining + * functions]; b) addition is allowed to produce partially reduced + * result, but multiplications by 2 and 3 perform additional reduction + * step. Choice between the two can be platform-specific, but it was a) + * in all cases so far... + */ +/* Modular add: res = a+b mod P */ +void ecp_nistz256_add(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); /* Modular mul by 2: res = 2*a mod P */ void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); -/* Modular div by 2: res = a/2 mod P */ -void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS], - const BN_ULONG a[P256_LIMBS]); /* Modular mul by 3: res = 3*a mod P */ void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); -/* Modular add: res = a+b mod P */ -void ecp_nistz256_add(BN_ULONG res[P256_LIMBS], - const BN_ULONG a[P256_LIMBS], - const BN_ULONG b[P256_LIMBS]); + +/* Modular div by 2: res = a/2 mod P */ +void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); /* Modular sub: res = a-b mod P */ void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS], @@ -205,21 +222,29 @@ static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS], return is_zero(res); } -static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS]) +static BN_ULONG is_one(const BIGNUM *z) { - BN_ULONG res; - - res = a[0] ^ ONE[0]; - res |= a[1] ^ ONE[1]; - res |= a[2] ^ ONE[2]; - res |= a[3] ^ ONE[3]; - if (P256_LIMBS == 8) { - res |= a[4] ^ ONE[4]; - res |= a[5] ^ ONE[5]; - res |= a[6] ^ ONE[6]; + BN_ULONG res = 0; + BN_ULONG *a = z->d; + + if (z->top == (P256_LIMBS - P256_LIMBS / 8)) { + res = a[0] ^ ONE[0]; + res |= a[1] ^ ONE[1]; + res |= a[2] ^ ONE[2]; + res |= a[3] ^ ONE[3]; + if (P256_LIMBS == 8) { + res |= a[4] ^ ONE[4]; + res |= a[5] ^ ONE[5]; + res |= a[6] ^ ONE[6]; + /* + * no check for a[7] (being zero) on 32-bit platforms, + * because value of "one" takes only 7 limbs. + */ + } + res = is_zero(res); } - return is_zero(res); + return res; } static int ecp_nistz256_set_words(BIGNUM *a, BN_ULONG words[P256_LIMBS]) @@ -315,19 +340,16 @@ static void ecp_nistz256_point_add(P256_POINT *r, const BN_ULONG *in2_y = b->Y; const BN_ULONG *in2_z = b->Z; - /* We encode infinity as (0,0), which is not on the curve, - * so it is OK. */ - in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | - in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + /* + * Infinity in encoded as (,,0) + */ + in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]); if (P256_LIMBS == 8) - in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | - in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]); - in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | - in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); + in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]); if (P256_LIMBS == 8) - in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | - in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); + in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]); in1infty = is_zero(in1infty); in2infty = is_zero(in2infty); @@ -416,15 +438,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r, const BN_ULONG *in2_y = b->Y; /* - * In affine representation we encode infty as (0,0), which is not on the - * curve, so it is OK + * Infinity in encoded as (,,0) */ - in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | - in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]); if (P256_LIMBS == 8) - in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | - in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]); + /* + * In affine representation we encode infinity as (0,0), which is + * not on the curve, so it is OK + */ in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); if (P256_LIMBS == 8) @@ -741,9 +764,8 @@ static int ecp_nistz256_is_affine_G(const EC_POINT *generator) { return (generator->X.top == P256_LIMBS) && (generator->Y.top == P256_LIMBS) && - (generator->Z.top == (P256_LIMBS - P256_LIMBS / 8)) && is_equal(generator->X.d, def_xG) && - is_equal(generator->Y.d, def_yG) && is_one(generator->Z.d); + is_equal(generator->Y.d, def_yG) && is_one(&generator->Z); } static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) @@ -1249,6 +1271,8 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, } else #endif { + BN_ULONG infty; + /* First window */ wvalue = (p_str[0] << 1) & mask; index += window_size; @@ -1260,7 +1284,30 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, ecp_nistz256_neg(p.p.Z, p.p.Y); copy_conditional(p.p.Y, p.p.Z, wvalue & 1); - memcpy(p.p.Z, ONE, sizeof(ONE)); + /* + * Since affine infinity is encoded as (0,0) and + * Jacobian ias (,,0), we need to harmonize them + * by assigning "one" or zero to Z. + */ + infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] | + p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]); + if (P256_LIMBS == 8) + infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] | + p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]); + + infty = 0 - is_zero(infty); + infty = ~infty; + + p.p.Z[0] = ONE[0] & infty; + p.p.Z[1] = ONE[1] & infty; + p.p.Z[2] = ONE[2] & infty; + p.p.Z[3] = ONE[3] & infty; + if (P256_LIMBS == 8) { + p.p.Z[4] = ONE[4] & infty; + p.p.Z[5] = ONE[5] & infty; + p.p.Z[6] = ONE[6] & infty; + p.p.Z[7] = ONE[7] & infty; + } for (i = 1; i < 37; i++) { unsigned int off = (index - 1) / 8; @@ -1331,7 +1378,7 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, !ecp_nistz256_set_words(&r->Z, p.p.Z)) { goto err; } - r->Z_is_one = is_one(p.p.Z) & 1; + r->Z_is_one = is_one(&r->Z) & 1; ret = 1; diff --git a/thirdparty/openssl/crypto/ecdh/ech_ossl.c b/thirdparty/openssl/crypto/ecdh/ech_ossl.c index df115cc262..d3b05247fe 100644 --- a/thirdparty/openssl/crypto/ecdh/ech_ossl.c +++ b/thirdparty/openssl/crypto/ecdh/ech_ossl.c @@ -212,7 +212,9 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, BN_CTX_end(ctx); if (ctx) BN_CTX_free(ctx); - if (buf) + if (buf) { + OPENSSL_cleanse(buf, buflen); OPENSSL_free(buf); + } return (ret); } diff --git a/thirdparty/openssl/crypto/engine/eng_cryptodev.c b/thirdparty/openssl/crypto/engine/eng_cryptodev.c index 8fb9c3373d..af59471c47 100644 --- a/thirdparty/openssl/crypto/engine/eng_cryptodev.c +++ b/thirdparty/openssl/crypto/engine/eng_cryptodev.c @@ -26,6 +26,7 @@ * */ +#include <string.h> #include <openssl/objects.h> #include <openssl/engine.h> #include <openssl/evp.h> @@ -809,14 +810,15 @@ static int cryptodev_digest_update(EVP_MD_CTX *ctx, const void *data, if (!(ctx->flags & EVP_MD_CTX_FLAG_ONESHOT)) { /* if application doesn't support one buffer */ - state->mac_data = + char *mac_data = OPENSSL_realloc(state->mac_data, state->mac_len + count); - if (!state->mac_data) { + if (mac_data == NULL) { printf("cryptodev_digest_update: realloc failed\n"); return (0); } + state->mac_data = mac_data; memcpy(state->mac_data + state->mac_len, data, count); state->mac_len += count; @@ -934,11 +936,15 @@ static int cryptodev_digest_copy(EVP_MD_CTX *to, const EVP_MD_CTX *from) return (0); } + dstate->mac_len = fstate->mac_len; if (fstate->mac_len != 0) { if (fstate->mac_data != NULL) { dstate->mac_data = OPENSSL_malloc(fstate->mac_len); + if (dstate->mac_data == NULL) { + printf("cryptodev_digest_init: malloc failed\n"); + return 0; + } memcpy(dstate->mac_data, fstate->mac_data, fstate->mac_len); - dstate->mac_len = fstate->mac_len; } } @@ -1064,8 +1070,7 @@ static void zapparams(struct crypt_kop *kop) int i; for (i = 0; i < kop->crk_iparams + kop->crk_oparams; i++) { - if (kop->crk_param[i].crp_p) - free(kop->crk_param[i].crp_p); + OPENSSL_free(kop->crk_param[i].crp_p); kop->crk_param[i].crp_p = NULL; kop->crk_param[i].crp_nbits = 0; } @@ -1078,16 +1083,25 @@ cryptodev_asym(struct crypt_kop *kop, int rlen, BIGNUM *r, int slen, int fd, ret = -1; if ((fd = get_asym_dev_crypto()) < 0) - return (ret); + return ret; if (r) { - kop->crk_param[kop->crk_iparams].crp_p = calloc(rlen, sizeof(char)); + kop->crk_param[kop->crk_iparams].crp_p = OPENSSL_malloc(rlen); + if (kop->crk_param[kop->crk_iparams].crp_p == NULL) + return ret; + memset(kop->crk_param[kop->crk_iparams].crp_p, 0, (size_t)rlen); kop->crk_param[kop->crk_iparams].crp_nbits = rlen * 8; kop->crk_oparams++; } if (s) { - kop->crk_param[kop->crk_iparams + 1].crp_p = - calloc(slen, sizeof(char)); + kop->crk_param[kop->crk_iparams + 1].crp_p = OPENSSL_malloc(slen); + /* No need to free the kop->crk_iparams parameter if it was allocated, + * callers of this routine have to free allocated parameters through + * zapparams both in case of success and failure + */ + if (kop->crk_param[kop->crk_iparams+1].crp_p == NULL) + return ret; + memset(kop->crk_param[kop->crk_iparams + 1].crp_p, 0, (size_t)slen); kop->crk_param[kop->crk_iparams + 1].crp_nbits = slen * 8; kop->crk_oparams++; } @@ -1100,7 +1114,7 @@ cryptodev_asym(struct crypt_kop *kop, int rlen, BIGNUM *r, int slen, ret = 0; } - return (ret); + return ret; } static int diff --git a/thirdparty/openssl/crypto/err/err.c b/thirdparty/openssl/crypto/err/err.c index e77d963b6b..0b1fcfc1f1 100644 --- a/thirdparty/openssl/crypto/err/err.c +++ b/thirdparty/openssl/crypto/err/err.c @@ -172,6 +172,7 @@ static ERR_STRING_DATA ERR_str_functs[] = { # endif {ERR_PACK(0, SYS_F_OPENDIR, 0), "opendir"}, {ERR_PACK(0, SYS_F_FREAD, 0), "fread"}, + {ERR_PACK(0, SYS_F_FFLUSH, 0), "fflush"}, {0, NULL}, }; @@ -868,6 +869,9 @@ void ERR_error_string_n(unsigned long e, char *buf, size_t len) const char *ls, *fs, *rs; unsigned long l, f, r; + if (len == 0) + return; + l = ERR_GET_LIB(e); f = ERR_GET_FUNC(e); r = ERR_GET_REASON(e); diff --git a/thirdparty/openssl/crypto/err/openssl.ec b/thirdparty/openssl/crypto/err/openssl.ec deleted file mode 100644 index 139afe3234..0000000000 --- a/thirdparty/openssl/crypto/err/openssl.ec +++ /dev/null @@ -1,98 +0,0 @@ -# crypto/err/openssl.ec - -# configuration file for util/mkerr.pl - -# files that may have to be rewritten by util/mkerr.pl -L ERR NONE NONE -L BN crypto/bn/bn.h crypto/bn/bn_err.c -L RSA crypto/rsa/rsa.h crypto/rsa/rsa_err.c -L DH crypto/dh/dh.h crypto/dh/dh_err.c -L EVP crypto/evp/evp.h crypto/evp/evp_err.c -L BUF crypto/buffer/buffer.h crypto/buffer/buf_err.c -L OBJ crypto/objects/objects.h crypto/objects/obj_err.c -L PEM crypto/pem/pem.h crypto/pem/pem_err.c -L DSA crypto/dsa/dsa.h crypto/dsa/dsa_err.c -L X509 crypto/x509/x509.h crypto/x509/x509_err.c -L ASN1 crypto/asn1/asn1.h crypto/asn1/asn1_err.c -L CONF crypto/conf/conf.h crypto/conf/conf_err.c -L CRYPTO crypto/crypto.h crypto/cpt_err.c -L EC crypto/ec/ec.h crypto/ec/ec_err.c -L SSL ssl/ssl.h ssl/ssl_err.c -L BIO crypto/bio/bio.h crypto/bio/bio_err.c -L PKCS7 crypto/pkcs7/pkcs7.h crypto/pkcs7/pkcs7err.c -L X509V3 crypto/x509v3/x509v3.h crypto/x509v3/v3err.c -L PKCS12 crypto/pkcs12/pkcs12.h crypto/pkcs12/pk12err.c -L RAND crypto/rand/rand.h crypto/rand/rand_err.c -L DSO crypto/dso/dso.h crypto/dso/dso_err.c -L ENGINE crypto/engine/engine.h crypto/engine/eng_err.c -L OCSP crypto/ocsp/ocsp.h crypto/ocsp/ocsp_err.c -L UI crypto/ui/ui.h crypto/ui/ui_err.c -L COMP crypto/comp/comp.h crypto/comp/comp_err.c -L ECDSA crypto/ecdsa/ecdsa.h crypto/ecdsa/ecs_err.c -L ECDH crypto/ecdh/ecdh.h crypto/ecdh/ech_err.c -L STORE crypto/store/store.h crypto/store/str_err.c -L TS crypto/ts/ts.h crypto/ts/ts_err.c -L HMAC crypto/hmac/hmac.h crypto/hmac/hmac_err.c -L CMS crypto/cms/cms.h crypto/cms/cms_err.c -L JPAKE crypto/jpake/jpake.h crypto/jpake/jpake_err.c - -# additional header files to be scanned for function names -L NONE crypto/x509/x509_vfy.h NONE -L NONE crypto/ec/ec_lcl.h NONE -L NONE crypto/asn1/asn_lcl.h NONE -L NONE crypto/cms/cms_lcl.h NONE -L NONE ssl/ssl_locl.h NONE - - -F RSAREF_F_RSA_BN2BIN -F RSAREF_F_RSA_PRIVATE_DECRYPT -F RSAREF_F_RSA_PRIVATE_ENCRYPT -F RSAREF_F_RSA_PUBLIC_DECRYPT -F RSAREF_F_RSA_PUBLIC_ENCRYPT -#F SSL_F_CLIENT_CERTIFICATE - -R SSL_R_SSLV3_ALERT_UNEXPECTED_MESSAGE 1010 -R SSL_R_SSLV3_ALERT_BAD_RECORD_MAC 1020 -R SSL_R_TLSV1_ALERT_DECRYPTION_FAILED 1021 -R SSL_R_TLSV1_ALERT_RECORD_OVERFLOW 1022 -R SSL_R_SSLV3_ALERT_DECOMPRESSION_FAILURE 1030 -R SSL_R_SSLV3_ALERT_HANDSHAKE_FAILURE 1040 -R SSL_R_SSLV3_ALERT_NO_CERTIFICATE 1041 -R SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 1042 -R SSL_R_SSLV3_ALERT_UNSUPPORTED_CERTIFICATE 1043 -R SSL_R_SSLV3_ALERT_CERTIFICATE_REVOKED 1044 -R SSL_R_SSLV3_ALERT_CERTIFICATE_EXPIRED 1045 -R SSL_R_SSLV3_ALERT_CERTIFICATE_UNKNOWN 1046 -R SSL_R_SSLV3_ALERT_ILLEGAL_PARAMETER 1047 -R SSL_R_TLSV1_ALERT_UNKNOWN_CA 1048 -R SSL_R_TLSV1_ALERT_ACCESS_DENIED 1049 -R SSL_R_TLSV1_ALERT_DECODE_ERROR 1050 -R SSL_R_TLSV1_ALERT_DECRYPT_ERROR 1051 -R SSL_R_TLSV1_ALERT_EXPORT_RESTRICTION 1060 -R SSL_R_TLSV1_ALERT_PROTOCOL_VERSION 1070 -R SSL_R_TLSV1_ALERT_INSUFFICIENT_SECURITY 1071 -R SSL_R_TLSV1_ALERT_INTERNAL_ERROR 1080 -R SSL_R_TLSV1_ALERT_INAPPROPRIATE_FALLBACK 1086 -R SSL_R_TLSV1_ALERT_USER_CANCELLED 1090 -R SSL_R_TLSV1_ALERT_NO_RENEGOTIATION 1100 -R SSL_R_TLSV1_UNSUPPORTED_EXTENSION 1110 -R SSL_R_TLSV1_CERTIFICATE_UNOBTAINABLE 1111 -R SSL_R_TLSV1_UNRECOGNIZED_NAME 1112 -R SSL_R_TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE 1113 -R SSL_R_TLSV1_BAD_CERTIFICATE_HASH_VALUE 1114 - -R RSAREF_R_CONTENT_ENCODING 0x0400 -R RSAREF_R_DATA 0x0401 -R RSAREF_R_DIGEST_ALGORITHM 0x0402 -R RSAREF_R_ENCODING 0x0403 -R RSAREF_R_KEY 0x0404 -R RSAREF_R_KEY_ENCODING 0x0405 -R RSAREF_R_LEN 0x0406 -R RSAREF_R_MODULUS_LEN 0x0407 -R RSAREF_R_NEED_RANDOM 0x0408 -R RSAREF_R_PRIVATE_KEY 0x0409 -R RSAREF_R_PUBLIC_KEY 0x040a -R RSAREF_R_SIGNATURE 0x040b -R RSAREF_R_SIGNATURE_ENCODING 0x040c -R RSAREF_R_ENCRYPTION_ALGORITHM 0x040d - diff --git a/thirdparty/openssl/crypto/evp/bio_enc.c b/thirdparty/openssl/crypto/evp/bio_enc.c index 363e0246ae..0806f233b6 100644 --- a/thirdparty/openssl/crypto/evp/bio_enc.c +++ b/thirdparty/openssl/crypto/evp/bio_enc.c @@ -201,9 +201,14 @@ static int enc_read(BIO *b, char *out, int outl) break; } } else { - EVP_CipherUpdate(&(ctx->cipher), - (unsigned char *)ctx->buf, &ctx->buf_len, - (unsigned char *)&(ctx->buf[BUF_OFFSET]), i); + if (!EVP_CipherUpdate(&ctx->cipher, + (unsigned char *)ctx->buf, &ctx->buf_len, + (unsigned char *)&(ctx->buf[BUF_OFFSET]), + i)) { + BIO_clear_retry_flags(b); + ctx->ok = 0; + return 0; + } ctx->cont = 1; /* * Note: it is possible for EVP_CipherUpdate to decrypt zero @@ -260,9 +265,13 @@ static int enc_write(BIO *b, const char *in, int inl) ctx->buf_off = 0; while (inl > 0) { n = (inl > ENC_BLOCK_SIZE) ? ENC_BLOCK_SIZE : inl; - EVP_CipherUpdate(&(ctx->cipher), - (unsigned char *)ctx->buf, &ctx->buf_len, - (unsigned char *)in, n); + if (!EVP_CipherUpdate(&ctx->cipher, + (unsigned char *)ctx->buf, &ctx->buf_len, + (unsigned char *)in, n)) { + BIO_clear_retry_flags(b); + ctx->ok = 0; + return 0; + } inl -= n; in += n; diff --git a/thirdparty/openssl/crypto/evp/bio_ok.c b/thirdparty/openssl/crypto/evp/bio_ok.c index 5c32e35e17..16e151f110 100644 --- a/thirdparty/openssl/crypto/evp/bio_ok.c +++ b/thirdparty/openssl/crypto/evp/bio_ok.c @@ -491,7 +491,7 @@ static int sig_out(BIO *b) * FIXME: there's absolutely no guarantee this makes any sense at all, * particularly now EVP_MD_CTX has been restructured. */ - if (RAND_pseudo_bytes(md->md_data, md->digest->md_size) < 0) + if (RAND_bytes(md->md_data, md->digest->md_size) <= 0) goto berr; memcpy(&(ctx->buf[ctx->buf_len]), md->md_data, md->digest->md_size); longswap(&(ctx->buf[ctx->buf_len]), md->digest->md_size); diff --git a/thirdparty/openssl/crypto/evp/c_all.c b/thirdparty/openssl/crypto/evp/c_all.c index a3ed00d4c1..719e34d22f 100644 --- a/thirdparty/openssl/crypto/evp/c_all.c +++ b/thirdparty/openssl/crypto/evp/c_all.c @@ -82,9 +82,4 @@ void OPENSSL_add_all_algorithms_noconf(void) OPENSSL_cpuid_setup(); OpenSSL_add_all_ciphers(); OpenSSL_add_all_digests(); -#ifndef OPENSSL_NO_ENGINE -# if defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV) - ENGINE_setup_bsd_cryptodev(); -# endif -#endif } diff --git a/thirdparty/openssl/crypto/evp/digest.c b/thirdparty/openssl/crypto/evp/digest.c index 5b642b23fc..4db179629d 100644 --- a/thirdparty/openssl/crypto/evp/digest.c +++ b/thirdparty/openssl/crypto/evp/digest.c @@ -253,10 +253,10 @@ int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl) int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *data, size_t count) { #ifdef OPENSSL_FIPS - return FIPS_digestupdate(ctx, data, count); -#else - return ctx->update(ctx, data, count); + if (FIPS_mode()) + return FIPS_digestupdate(ctx, data, count); #endif + return ctx->update(ctx, data, count); } /* The caller can assume that this removes any secret data from the context */ @@ -271,10 +271,11 @@ int EVP_DigestFinal(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) /* The caller can assume that this removes any secret data from the context */ int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) { -#ifdef OPENSSL_FIPS - return FIPS_digestfinal(ctx, md, size); -#else int ret; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_digestfinal(ctx, md, size); +#endif OPENSSL_assert(ctx->digest->md_size <= EVP_MAX_MD_SIZE); ret = ctx->digest->final(ctx, md); @@ -284,9 +285,8 @@ int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) ctx->digest->cleanup(ctx); EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_CLEANED); } - memset(ctx->md_data, 0, ctx->digest->ctx_size); + OPENSSL_cleanse(ctx->md_data, ctx->digest->ctx_size); return ret; -#endif } int EVP_MD_CTX_copy(EVP_MD_CTX *out, const EVP_MD_CTX *in) diff --git a/thirdparty/openssl/crypto/evp/e_aes.c b/thirdparty/openssl/crypto/evp/e_aes.c index 1734a823c1..b45b364466 100644 --- a/thirdparty/openssl/crypto/evp/e_aes.c +++ b/thirdparty/openssl/crypto/evp/e_aes.c @@ -155,10 +155,10 @@ void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, const unsigned char ivec[AES_BLOCK_SIZE]); # endif # ifdef AES_XTS_ASM -void AES_xts_encrypt(const char *inp, char *out, size_t len, +void AES_xts_encrypt(const unsigned char *inp, unsigned char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); -void AES_xts_decrypt(const char *inp, char *out, size_t len, +void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); # endif @@ -1120,6 +1120,8 @@ BLOCK_CIPHER_generic_pack(NID_aes, 128, EVP_CIPH_FLAG_FIPS) static int aes_gcm_cleanup(EVP_CIPHER_CTX *c) { EVP_AES_GCM_CTX *gctx = c->cipher_data; + if (gctx == NULL) + return 0; OPENSSL_cleanse(&gctx->gcm, sizeof(gctx->gcm)); if (gctx->iv != c->iv) OPENSSL_free(gctx->iv); @@ -1235,10 +1237,15 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { unsigned int len = c->buf[arg - 2] << 8 | c->buf[arg - 1]; /* Correct length for explicit IV */ + if (len < EVP_GCM_TLS_EXPLICIT_IV_LEN) + return 0; len -= EVP_GCM_TLS_EXPLICIT_IV_LEN; /* If decrypting correct for tag too */ - if (!c->encrypt) + if (!c->encrypt) { + if (len < EVP_GCM_TLS_TAG_LEN) + return 0; len -= EVP_GCM_TLS_TAG_LEN; + } c->buf[arg - 2] = len >> 8; c->buf[arg - 1] = len & 0xff; } diff --git a/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c b/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c index 6dfd590a4a..d114710e98 100644 --- a/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -859,6 +859,8 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, key->payload_length = len; if ((key->aux.tls_ver = p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) { + if (len < AES_BLOCK_SIZE) + return 0; len -= AES_BLOCK_SIZE; p[arg - 2] = len >> 8; p[arg - 1] = len; diff --git a/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c b/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c index 46c9d03389..917ae0751d 100644 --- a/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c +++ b/thirdparty/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -825,15 +825,19 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, case EVP_CTRL_AEAD_TLS1_AAD: { unsigned char *p = ptr; - unsigned int len = p[arg - 2] << 8 | p[arg - 1]; + unsigned int len; if (arg != EVP_AEAD_TLS1_AAD_LEN) return -1; + len = p[arg - 2] << 8 | p[arg - 1]; + if (ctx->encrypt) { key->payload_length = len; if ((key->aux.tls_ver = p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) { + if (len < AES_BLOCK_SIZE) + return 0; len -= AES_BLOCK_SIZE; p[arg - 2] = len >> 8; p[arg - 1] = len; diff --git a/thirdparty/openssl/crypto/evp/e_des3.c b/thirdparty/openssl/crypto/evp/e_des3.c index 0e910d6d80..ab8126e5c9 100644 --- a/thirdparty/openssl/crypto/evp/e_des3.c +++ b/thirdparty/openssl/crypto/evp/e_des3.c @@ -212,6 +212,8 @@ static int des_ede3_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, size_t n; unsigned char c[1], d[1]; + if (!EVP_CIPHER_CTX_test_flags(ctx, EVP_CIPH_FLAG_LENGTH_BITS)) + inl *= 8; for (n = 0; n < inl; ++n) { c[0] = (in[n / 8] & (1 << (7 - n % 8))) ? 0x80 : 0; DES_ede3_cfb_encrypt(c, d, 1, 1, diff --git a/thirdparty/openssl/crypto/evp/e_rc4_hmac_md5.c b/thirdparty/openssl/crypto/evp/e_rc4_hmac_md5.c index 2da1117829..93cfe3f107 100644 --- a/thirdparty/openssl/crypto/evp/e_rc4_hmac_md5.c +++ b/thirdparty/openssl/crypto/evp/e_rc4_hmac_md5.c @@ -99,7 +99,7 @@ static int rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx, return 1; } -# if !defined(OPENSSL_NO_ASM) && ( \ +# if defined(RC4_ASM) && defined(MD5_ASM) && ( \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) && \ @@ -254,6 +254,8 @@ static int rc4_hmac_md5_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, MD5_Init(&key->tail); MD5_Update(&key->tail, hmac_key, sizeof(hmac_key)); + OPENSSL_cleanse(hmac_key, sizeof(hmac_key)); + return 1; } case EVP_CTRL_AEAD_TLS1_AAD: @@ -267,6 +269,8 @@ static int rc4_hmac_md5_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, len = p[arg - 2] << 8 | p[arg - 1]; if (!ctx->encrypt) { + if (len < MD5_DIGEST_LENGTH) + return -1; len -= MD5_DIGEST_LENGTH; p[arg - 2] = len >> 8; p[arg - 1] = len; diff --git a/thirdparty/openssl/crypto/evp/e_seed.c b/thirdparty/openssl/crypto/evp/e_seed.c index 7249d1b1ee..3d01eacac0 100644 --- a/thirdparty/openssl/crypto/evp/e_seed.c +++ b/thirdparty/openssl/crypto/evp/e_seed.c @@ -70,7 +70,8 @@ typedef struct { } EVP_SEED_KEY; IMPLEMENT_BLOCK_CIPHER(seed, ks, SEED, EVP_SEED_KEY, NID_seed, - 16, 16, 16, 128, 0, seed_init_key, 0, 0, 0, 0) + 16, 16, 16, 128, EVP_CIPH_FLAG_DEFAULT_ASN1, + seed_init_key, 0, 0, 0, 0) static int seed_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) diff --git a/thirdparty/openssl/crypto/evp/evp_enc.c b/thirdparty/openssl/crypto/evp/evp_enc.c index 7d7be245b0..be577bac76 100644 --- a/thirdparty/openssl/crypto/evp/evp_enc.c +++ b/thirdparty/openssl/crypto/evp/evp_enc.c @@ -170,7 +170,7 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, #ifdef OPENSSL_FIPS if (FIPS_mode()) { - const EVP_CIPHER *fcipher; + const EVP_CIPHER *fcipher = NULL; if (cipher) fcipher = evp_get_fips_cipher(cipher); if (fcipher) @@ -182,6 +182,7 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, if (ctx->cipher->ctx_size) { ctx->cipher_data = OPENSSL_malloc(ctx->cipher->ctx_size); if (!ctx->cipher_data) { + ctx->cipher = NULL; EVPerr(EVP_F_EVP_CIPHERINIT_EX, ERR_R_MALLOC_FAILURE); return 0; } @@ -193,6 +194,7 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ctx->flags &= EVP_CIPHER_CTX_FLAG_WRAP_ALLOW; if (ctx->cipher->flags & EVP_CIPH_CTRL_INIT) { if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) { + ctx->cipher = NULL; EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_INITIALIZATION_ERROR); return 0; } @@ -654,6 +656,7 @@ int EVP_CIPHER_CTX_copy(EVP_CIPHER_CTX *out, const EVP_CIPHER_CTX *in) if (in->cipher_data && in->cipher->ctx_size) { out->cipher_data = OPENSSL_malloc(in->cipher->ctx_size); if (!out->cipher_data) { + out->cipher = NULL; EVPerr(EVP_F_EVP_CIPHER_CTX_COPY, ERR_R_MALLOC_FAILURE); return 0; } @@ -661,6 +664,10 @@ int EVP_CIPHER_CTX_copy(EVP_CIPHER_CTX *out, const EVP_CIPHER_CTX *in) } if (in->cipher->flags & EVP_CIPH_CUSTOM_COPY) - return in->cipher->ctrl((EVP_CIPHER_CTX *)in, EVP_CTRL_COPY, 0, out); + if (!in->cipher->ctrl((EVP_CIPHER_CTX *)in, EVP_CTRL_COPY, 0, out)) { + out->cipher = NULL; + EVPerr(EVP_F_EVP_CIPHER_CTX_COPY, EVP_R_INITIALIZATION_ERROR); + return 0; + } return 1; } diff --git a/thirdparty/openssl/crypto/evp/evp_err.c b/thirdparty/openssl/crypto/evp/evp_err.c index 15cf5532b3..bcd841eb77 100644 --- a/thirdparty/openssl/crypto/evp/evp_err.c +++ b/thirdparty/openssl/crypto/evp/evp_err.c @@ -1,6 +1,6 @@ /* crypto/evp/evp_err.c */ /* ==================================================================== - * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2016 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -192,6 +192,7 @@ static ERR_STRING_DATA EVP_str_reasons[] = { {ERR_REASON(EVP_R_INPUT_NOT_INITIALIZED), "input not initialized"}, {ERR_REASON(EVP_R_INVALID_DIGEST), "invalid digest"}, {ERR_REASON(EVP_R_INVALID_FIPS_MODE), "invalid fips mode"}, + {ERR_REASON(EVP_R_INVALID_KEY), "invalid key"}, {ERR_REASON(EVP_R_INVALID_KEY_LENGTH), "invalid key length"}, {ERR_REASON(EVP_R_INVALID_OPERATION), "invalid operation"}, {ERR_REASON(EVP_R_IV_TOO_LARGE), "iv too large"}, diff --git a/thirdparty/openssl/crypto/evp/evptests.txt b/thirdparty/openssl/crypto/evp/evptests.txt deleted file mode 100644 index 4e9958b3b5..0000000000 --- a/thirdparty/openssl/crypto/evp/evptests.txt +++ /dev/null @@ -1,401 +0,0 @@ -#cipher:key:iv:plaintext:ciphertext:0/1(decrypt/encrypt) -#aadcipher:key:iv:plaintext:ciphertext:aad:tag:0/1(decrypt/encrypt) -#digest:::input:output - -# SHA(1) tests (from shatest.c) -SHA1:::616263:a9993e364706816aba3e25717850c26c9cd0d89d - -# MD5 tests (from md5test.c) -MD5::::d41d8cd98f00b204e9800998ecf8427e -MD5:::61:0cc175b9c0f1b6a831c399e269772661 -MD5:::616263:900150983cd24fb0d6963f7d28e17f72 -MD5:::6d65737361676520646967657374:f96b697d7cb7938d525a2f31aaf161d0 -MD5:::6162636465666768696a6b6c6d6e6f707172737475767778797a:c3fcd3d76192e4007dfb496cca67e13b -MD5:::4142434445464748494a4b4c4d4e4f505152535455565758595a6162636465666768696a6b6c6d6e6f707172737475767778797a30313233343536373839:d174ab98d277d9f5a5611c2c9f419d9f -MD5:::3132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930:57edf4a22be3c955ac49da2e2107b67a - -# AES 128 ECB tests (from FIPS-197 test vectors, encrypt) - -AES-128-ECB:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:69C4E0D86A7B0430D8CDB78070B4C55A:1 - -# AES 192 ECB tests (from FIPS-197 test vectors, encrypt) - -AES-192-ECB:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:DDA97CA4864CDFE06EAF70A0EC0D7191:1 - -# AES 256 ECB tests (from FIPS-197 test vectors, encrypt) - -AES-256-ECB:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:8EA2B7CA516745BFEAFC49904B496089:1 - -# AES 128 ECB tests (from NIST test vectors, encrypt) - -#AES-128-ECB:00000000000000000000000000000000::00000000000000000000000000000000:C34C052CC0DA8D73451AFE5F03BE297F:1 - -# AES 128 ECB tests (from NIST test vectors, decrypt) - -#AES-128-ECB:00000000000000000000000000000000::44416AC2D1F53C583303917E6BE9EBE0:00000000000000000000000000000000:0 - -# AES 192 ECB tests (from NIST test vectors, decrypt) - -#AES-192-ECB:000000000000000000000000000000000000000000000000::48E31E9E256718F29229319C19F15BA4:00000000000000000000000000000000:0 - -# AES 256 ECB tests (from NIST test vectors, decrypt) - -#AES-256-ECB:0000000000000000000000000000000000000000000000000000000000000000::058CCFFDBBCB382D1F6F56585D8A4ADE:00000000000000000000000000000000:0 - -# AES 128 CBC tests (from NIST test vectors, encrypt) - -#AES-128-CBC:00000000000000000000000000000000:00000000000000000000000000000000:00000000000000000000000000000000:8A05FC5E095AF4848A08D328D3688E3D:1 - -# AES 192 CBC tests (from NIST test vectors, encrypt) - -#AES-192-CBC:000000000000000000000000000000000000000000000000:00000000000000000000000000000000:00000000000000000000000000000000:7BD966D53AD8C1BB85D2ADFAE87BB104:1 - -# AES 256 CBC tests (from NIST test vectors, encrypt) - -#AES-256-CBC:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:00000000000000000000000000000000:FE3C53653E2F45B56FCD88B2CC898FF0:1 - -# AES 128 CBC tests (from NIST test vectors, decrypt) - -#AES-128-CBC:00000000000000000000000000000000:00000000000000000000000000000000:FACA37E0B0C85373DF706E73F7C9AF86:00000000000000000000000000000000:0 - -# AES tests from NIST document SP800-38A -# For all ECB encrypts and decrypts, the transformed sequence is -# AES-bits-ECB:key::plaintext:ciphertext:encdec -# ECB-AES128.Encrypt and ECB-AES128.Decrypt -AES-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::6BC1BEE22E409F96E93D7E117393172A:3AD77BB40D7A3660A89ECAF32466EF97 -AES-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::AE2D8A571E03AC9C9EB76FAC45AF8E51:F5D3D58503B9699DE785895A96FDBAAF -AES-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::30C81C46A35CE411E5FBC1191A0A52EF:43B1CD7F598ECE23881B00E3ED030688 -AES-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::F69F2445DF4F9B17AD2B417BE66C3710:7B0C785E27E8AD3F8223207104725DD4 -# ECB-AES192.Encrypt and ECB-AES192.Decrypt -AES-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::6BC1BEE22E409F96E93D7E117393172A:BD334F1D6E45F25FF712A214571FA5CC -AES-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::AE2D8A571E03AC9C9EB76FAC45AF8E51:974104846D0AD3AD7734ECB3ECEE4EEF -AES-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::30C81C46A35CE411E5FBC1191A0A52EF:EF7AFD2270E2E60ADCE0BA2FACE6444E -AES-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::F69F2445DF4F9B17AD2B417BE66C3710:9A4B41BA738D6C72FB16691603C18E0E -# ECB-AES256.Encrypt and ECB-AES256.Decrypt -AES-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::6BC1BEE22E409F96E93D7E117393172A:F3EED1BDB5D2A03C064B5A7E3DB181F8 -AES-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::AE2D8A571E03AC9C9EB76FAC45AF8E51:591CCB10D410ED26DC5BA74A31362870 -AES-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::30C81C46A35CE411E5FBC1191A0A52EF:B6ED21B99CA6F4F9F153E7B1BEAFED1D -AES-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::F69F2445DF4F9B17AD2B417BE66C3710:23304B7A39F9F3FF067D8D8F9E24ECC7 -# For all CBC encrypts and decrypts, the transformed sequence is -# AES-bits-CBC:key:IV/ciphertext':plaintext:ciphertext:encdec -# CBC-AES128.Encrypt and CBC-AES128.Decrypt -AES-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:7649ABAC8119B246CEE98E9B12E9197D -AES-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:7649ABAC8119B246CEE98E9B12E9197D:AE2D8A571E03AC9C9EB76FAC45AF8E51:5086CB9B507219EE95DB113A917678B2 -AES-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:5086CB9B507219EE95DB113A917678B2:30C81C46A35CE411E5FBC1191A0A52EF:73BED6B8E3C1743B7116E69E22229516 -AES-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:73BED6B8E3C1743B7116E69E22229516:F69F2445DF4F9B17AD2B417BE66C3710:3FF1CAA1681FAC09120ECA307586E1A7 -# CBC-AES192.Encrypt and CBC-AES192.Decrypt -AES-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:4F021DB243BC633D7178183A9FA071E8 -AES-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:4F021DB243BC633D7178183A9FA071E8:AE2D8A571E03AC9C9EB76FAC45AF8E51:B4D9ADA9AD7DEDF4E5E738763F69145A -AES-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:B4D9ADA9AD7DEDF4E5E738763F69145A:30C81C46A35CE411E5FBC1191A0A52EF:571B242012FB7AE07FA9BAAC3DF102E0 -AES-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:571B242012FB7AE07FA9BAAC3DF102E0:F69F2445DF4F9B17AD2B417BE66C3710:08B0E27988598881D920A9E64F5615CD -# CBC-AES256.Encrypt and CBC-AES256.Decrypt -AES-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:F58C4C04D6E5F1BA779EABFB5F7BFBD6 -AES-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:F58C4C04D6E5F1BA779EABFB5F7BFBD6:AE2D8A571E03AC9C9EB76FAC45AF8E51:9CFC4E967EDB808D679F777BC6702C7D -AES-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:9CFC4E967EDB808D679F777BC6702C7D:30C81C46A35CE411E5FBC1191A0A52EF:39F23369A9D9BACFA530E26304231461 -AES-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:39F23369A9D9BACFA530E26304231461:F69F2445DF4F9B17AD2B417BE66C3710:B2EB05E2C39BE9FCDA6C19078C6A9D1B -# We don't support CFB{1,8}-AESxxx.{En,De}crypt -# For all CFB128 encrypts and decrypts, the transformed sequence is -# AES-bits-CFB:key:IV/ciphertext':plaintext:ciphertext:encdec -# CFB128-AES128.Encrypt -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:3B3FD92EB72DAD20333449F8E83CFB4A:1 -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:3B3FD92EB72DAD20333449F8E83CFB4A:AE2D8A571E03AC9C9EB76FAC45AF8E51:C8A64537A0B3A93FCDE3CDAD9F1CE58B:1 -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:C8A64537A0B3A93FCDE3CDAD9F1CE58B:30C81C46A35CE411E5FBC1191A0A52EF:26751F67A3CBB140B1808CF187A4F4DF:1 -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:26751F67A3CBB140B1808CF187A4F4DF:F69F2445DF4F9B17AD2B417BE66C3710:C04B05357C5D1C0EEAC4C66F9FF7F2E6:1 -# CFB128-AES128.Decrypt -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:3B3FD92EB72DAD20333449F8E83CFB4A:0 -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:3B3FD92EB72DAD20333449F8E83CFB4A:AE2D8A571E03AC9C9EB76FAC45AF8E51:C8A64537A0B3A93FCDE3CDAD9F1CE58B:0 -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:C8A64537A0B3A93FCDE3CDAD9F1CE58B:30C81C46A35CE411E5FBC1191A0A52EF:26751F67A3CBB140B1808CF187A4F4DF:0 -AES-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:26751F67A3CBB140B1808CF187A4F4DF:F69F2445DF4F9B17AD2B417BE66C3710:C04B05357C5D1C0EEAC4C66F9FF7F2E6:0 -# CFB128-AES192.Encrypt -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CDC80D6FDDF18CAB34C25909C99A4174:1 -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:CDC80D6FDDF18CAB34C25909C99A4174:AE2D8A571E03AC9C9EB76FAC45AF8E51:67CE7F7F81173621961A2B70171D3D7A:1 -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:67CE7F7F81173621961A2B70171D3D7A:30C81C46A35CE411E5FBC1191A0A52EF:2E1E8A1DD59B88B1C8E60FED1EFAC4C9:1 -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:2E1E8A1DD59B88B1C8E60FED1EFAC4C9:F69F2445DF4F9B17AD2B417BE66C3710:C05F9F9CA9834FA042AE8FBA584B09FF:1 -# CFB128-AES192.Decrypt -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CDC80D6FDDF18CAB34C25909C99A4174:0 -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:CDC80D6FDDF18CAB34C25909C99A4174:AE2D8A571E03AC9C9EB76FAC45AF8E51:67CE7F7F81173621961A2B70171D3D7A:0 -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:67CE7F7F81173621961A2B70171D3D7A:30C81C46A35CE411E5FBC1191A0A52EF:2E1E8A1DD59B88B1C8E60FED1EFAC4C9:0 -AES-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:2E1E8A1DD59B88B1C8E60FED1EFAC4C9:F69F2445DF4F9B17AD2B417BE66C3710:C05F9F9CA9834FA042AE8FBA584B09FF:0 -# CFB128-AES256.Encrypt -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:DC7E84BFDA79164B7ECD8486985D3860:1 -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:DC7E84BFDA79164B7ECD8486985D3860:AE2D8A571E03AC9C9EB76FAC45AF8E51:39FFED143B28B1C832113C6331E5407B:1 -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:39FFED143B28B1C832113C6331E5407B:30C81C46A35CE411E5FBC1191A0A52EF:DF10132415E54B92A13ED0A8267AE2F9:1 -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:DF10132415E54B92A13ED0A8267AE2F9:F69F2445DF4F9B17AD2B417BE66C3710:75A385741AB9CEF82031623D55B1E471:1 -# CFB128-AES256.Decrypt -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:DC7E84BFDA79164B7ECD8486985D3860:0 -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:DC7E84BFDA79164B7ECD8486985D3860:AE2D8A571E03AC9C9EB76FAC45AF8E51:39FFED143B28B1C832113C6331E5407B:0 -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:39FFED143B28B1C832113C6331E5407B:30C81C46A35CE411E5FBC1191A0A52EF:DF10132415E54B92A13ED0A8267AE2F9:0 -AES-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:DF10132415E54B92A13ED0A8267AE2F9:F69F2445DF4F9B17AD2B417BE66C3710:75A385741AB9CEF82031623D55B1E471:0 -# For all OFB encrypts and decrypts, the transformed sequence is -# AES-bits-CFB:key:IV/output':plaintext:ciphertext:encdec -# OFB-AES128.Encrypt -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:3B3FD92EB72DAD20333449F8E83CFB4A:1 -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:50FE67CC996D32B6DA0937E99BAFEC60:AE2D8A571E03AC9C9EB76FAC45AF8E51:7789508D16918F03F53C52DAC54ED825:1 -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:D9A4DADA0892239F6B8B3D7680E15674:30C81C46A35CE411E5FBC1191A0A52EF:9740051E9C5FECF64344F7A82260EDCC:1 -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:A78819583F0308E7A6BF36B1386ABF23:F69F2445DF4F9B17AD2B417BE66C3710:304C6528F659C77866A510D9C1D6AE5E:1 -# OFB-AES128.Decrypt -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:3B3FD92EB72DAD20333449F8E83CFB4A:0 -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:50FE67CC996D32B6DA0937E99BAFEC60:AE2D8A571E03AC9C9EB76FAC45AF8E51:7789508D16918F03F53C52DAC54ED825:0 -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:D9A4DADA0892239F6B8B3D7680E15674:30C81C46A35CE411E5FBC1191A0A52EF:9740051E9C5FECF64344F7A82260EDCC:0 -AES-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:A78819583F0308E7A6BF36B1386ABF23:F69F2445DF4F9B17AD2B417BE66C3710:304C6528F659C77866A510D9C1D6AE5E:0 -# OFB-AES192.Encrypt -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CDC80D6FDDF18CAB34C25909C99A4174:1 -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:A609B38DF3B1133DDDFF2718BA09565E:AE2D8A571E03AC9C9EB76FAC45AF8E51:FCC28B8D4C63837C09E81700C1100401:1 -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:52EF01DA52602FE0975F78AC84BF8A50:30C81C46A35CE411E5FBC1191A0A52EF:8D9A9AEAC0F6596F559C6D4DAF59A5F2:1 -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:BD5286AC63AABD7EB067AC54B553F71D:F69F2445DF4F9B17AD2B417BE66C3710:6D9F200857CA6C3E9CAC524BD9ACC92A:1 -# OFB-AES192.Decrypt -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CDC80D6FDDF18CAB34C25909C99A4174:0 -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:A609B38DF3B1133DDDFF2718BA09565E:AE2D8A571E03AC9C9EB76FAC45AF8E51:FCC28B8D4C63837C09E81700C1100401:0 -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:52EF01DA52602FE0975F78AC84BF8A50:30C81C46A35CE411E5FBC1191A0A52EF:8D9A9AEAC0F6596F559C6D4DAF59A5F2:0 -AES-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:BD5286AC63AABD7EB067AC54B553F71D:F69F2445DF4F9B17AD2B417BE66C3710:6D9F200857CA6C3E9CAC524BD9ACC92A:0 -# OFB-AES256.Encrypt -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:DC7E84BFDA79164B7ECD8486985D3860:1 -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:B7BF3A5DF43989DD97F0FA97EBCE2F4A:AE2D8A571E03AC9C9EB76FAC45AF8E51:4FEBDC6740D20B3AC88F6AD82A4FB08D:1 -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E1C656305ED1A7A6563805746FE03EDC:30C81C46A35CE411E5FBC1191A0A52EF:71AB47A086E86EEDF39D1C5BBA97C408:1 -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:41635BE625B48AFC1666DD42A09D96E7:F69F2445DF4F9B17AD2B417BE66C3710:0126141D67F37BE8538F5A8BE740E484:1 -# OFB-AES256.Decrypt -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:DC7E84BFDA79164B7ECD8486985D3860:0 -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:B7BF3A5DF43989DD97F0FA97EBCE2F4A:AE2D8A571E03AC9C9EB76FAC45AF8E51:4FEBDC6740D20B3AC88F6AD82A4FB08D:0 -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E1C656305ED1A7A6563805746FE03EDC:30C81C46A35CE411E5FBC1191A0A52EF:71AB47A086E86EEDF39D1C5BBA97C408:0 -AES-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:41635BE625B48AFC1666DD42A09D96E7:F69F2445DF4F9B17AD2B417BE66C3710:0126141D67F37BE8538F5A8BE740E484:0 - -# AES Counter test vectors from RFC3686 -aes-128-ctr:AE6852F8121067CC4BF7A5765577F39E:00000030000000000000000000000001:53696E676C6520626C6F636B206D7367:E4095D4FB7A7B3792D6175A3261311B8:1 -aes-128-ctr:7E24067817FAE0D743D6CE1F32539163:006CB6DBC0543B59DA48D90B00000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:5104A106168A72D9790D41EE8EDAD388EB2E1EFC46DA57C8FCE630DF9141BE28:1 -aes-128-ctr:7691BE035E5020A8AC6E618529F9A0DC:00E0017B27777F3F4A1786F000000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223:C1CF48A89F2FFDD9CF4652E9EFDB72D74540A42BDE6D7836D59A5CEAAEF3105325B2072F:1 - -aes-192-ctr:16AF5B145FC9F579C175F93E3BFB0EED863D06CCFDB78515:0000004836733C147D6D93CB00000001:53696E676C6520626C6F636B206D7367:4B55384FE259C9C84E7935A003CBE928:1 -aes-192-ctr:7C5CB2401B3DC33C19E7340819E0F69C678C3DB8E6F6A91A:0096B03B020C6EADC2CB500D00000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:453243FC609B23327EDFAAFA7131CD9F8490701C5AD4A79CFC1FE0FF42F4FB00:1 -aes-192-ctr:02BF391EE8ECB159B959617B0965279BF59B60A786D3E0FE:0007BDFD5CBD60278DCC091200000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223:96893FC55E5C722F540B7DD1DDF7E758D288BC95C69165884536C811662F2188ABEE0935:1 - -aes-256-ctr:776BEFF2851DB06F4C8A0542C8696F6C6A81AF1EEC96B4D37FC1D689E6C1C104:00000060DB5672C97AA8F0B200000001:53696E676C6520626C6F636B206D7367:145AD01DBF824EC7560863DC71E3E0C0:1 -aes-256-ctr:F6D66D6BD52D59BB0796365879EFF886C66DD51A5B6A99744B50590C87A23884:00FAAC24C1585EF15A43D87500000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:F05E231B3894612C49EE000B804EB2A9B8306B508F839D6A5530831D9344AF1C:1 -aes-256-ctr:FF7A617CE69148E4F1726E2F43581DE2AA62D9F805532EDFF1EED687FB54153D:001CC5B751A51D70A1C1114800000001:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20212223:EB6C52821D0BBBF7CE7594462ACA4FAAB407DF866569FD07F48CC0B583D6071F1EC0E6B8:1 - -# DES ECB tests (from destest) - -DES-ECB:0000000000000000::0000000000000000:8CA64DE9C1B123A7 -DES-ECB:FFFFFFFFFFFFFFFF::FFFFFFFFFFFFFFFF:7359B2163E4EDC58 -DES-ECB:3000000000000000::1000000000000001:958E6E627A05557B -DES-ECB:1111111111111111::1111111111111111:F40379AB9E0EC533 -DES-ECB:0123456789ABCDEF::1111111111111111:17668DFC7292532D -DES-ECB:1111111111111111::0123456789ABCDEF:8A5AE1F81AB8F2DD -DES-ECB:FEDCBA9876543210::0123456789ABCDEF:ED39D950FA74BCC4 - -# DESX-CBC tests (from destest) -DESX-CBC:0123456789abcdeff1e0d3c2b5a49786fedcba9876543210:fedcba9876543210:37363534333231204E6F77206973207468652074696D6520666F722000000000:846B2914851E9A2954732F8AA0A611C115CDC2D7951B1053A63C5E03B21AA3C4 - -# DES EDE3 CBC tests (from destest) -DES-EDE3-CBC:0123456789abcdeff1e0d3c2b5a49786fedcba9876543210:fedcba9876543210:37363534333231204E6F77206973207468652074696D6520666F722000000000:3FE301C962AC01D02213763C1CBD4CDC799657C064ECF5D41C673812CFDE9675 - -# RC4 tests (from rc4test) -RC4:0123456789abcdef0123456789abcdef::0123456789abcdef:75b7878099e0c596 -RC4:0123456789abcdef0123456789abcdef::0000000000000000:7494c2e7104b0879 -RC4:00000000000000000000000000000000::0000000000000000:de188941a3375d3a -RC4:ef012345ef012345ef012345ef012345::0000000000000000000000000000000000000000:d6a141a7ec3c38dfbd615a1162e1c7ba36b67858 -RC4:0123456789abcdef0123456789abcdef::123456789ABCDEF0123456789ABCDEF0123456789ABCDEF012345678:66a0949f8af7d6891f7f832ba833c00c892ebe30143ce28740011ecf -RC4:ef012345ef012345ef012345ef012345::00000000000000000000:d6a141a7ec3c38dfbd61 - - -# Camellia tests from RFC3713 -# For all ECB encrypts and decrypts, the transformed sequence is -# CAMELLIA-bits-ECB:key::plaintext:ciphertext:encdec -CAMELLIA-128-ECB:0123456789abcdeffedcba9876543210::0123456789abcdeffedcba9876543210:67673138549669730857065648eabe43 -CAMELLIA-192-ECB:0123456789abcdeffedcba98765432100011223344556677::0123456789abcdeffedcba9876543210:b4993401b3e996f84ee5cee7d79b09b9 -CAMELLIA-256-ECB:0123456789abcdeffedcba987654321000112233445566778899aabbccddeeff::0123456789abcdeffedcba9876543210:9acc237dff16d76c20ef7c919e3a7509 - -# ECB-CAMELLIA128.Encrypt -CAMELLIA-128-ECB:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:77CF412067AF8270613529149919546F:1 -CAMELLIA-192-ECB:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:B22F3C36B72D31329EEE8ADDC2906C68:1 -CAMELLIA-256-ECB:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:2EDF1F3418D53B88841FC8985FB1ECF2:1 - -# ECB-CAMELLIA128.Encrypt and ECB-CAMELLIA128.Decrypt -CAMELLIA-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::6BC1BEE22E409F96E93D7E117393172A:432FC5DCD628115B7C388D770B270C96 -CAMELLIA-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::AE2D8A571E03AC9C9EB76FAC45AF8E51:0BE1F14023782A22E8384C5ABB7FAB2B -CAMELLIA-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::30C81C46A35CE411E5FBC1191A0A52EF:A0A1ABCD1893AB6FE0FE5B65DF5F8636 -CAMELLIA-128-ECB:2B7E151628AED2A6ABF7158809CF4F3C::F69F2445DF4F9B17AD2B417BE66C3710:E61925E0D5DFAA9BB29F815B3076E51A - -# ECB-CAMELLIA192.Encrypt and ECB-CAMELLIA192.Decrypt -CAMELLIA-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::6BC1BEE22E409F96E93D7E117393172A:CCCC6C4E138B45848514D48D0D3439D3 -CAMELLIA-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::AE2D8A571E03AC9C9EB76FAC45AF8E51:5713C62C14B2EC0F8393B6AFD6F5785A -CAMELLIA-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::30C81C46A35CE411E5FBC1191A0A52EF:B40ED2B60EB54D09D030CF511FEEF366 -CAMELLIA-192-ECB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B::F69F2445DF4F9B17AD2B417BE66C3710:909DBD95799096748CB27357E73E1D26 - -# ECB-CAMELLIA256.Encrypt and ECB-CAMELLIA256.Decrypt -CAMELLIA-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::6BC1BEE22E409F96E93D7E117393172A:BEFD219B112FA00098919CD101C9CCFA -CAMELLIA-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::AE2D8A571E03AC9C9EB76FAC45AF8E51:C91D3A8F1AEA08A9386CF4B66C0169EA -CAMELLIA-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::30C81C46A35CE411E5FBC1191A0A52EF:A623D711DC5F25A51BB8A80D56397D28 -CAMELLIA-256-ECB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4::F69F2445DF4F9B17AD2B417BE66C3710:7960109FB6DC42947FCFE59EA3C5EB6B - -# For all CBC encrypts and decrypts, the transformed sequence is -# CAMELLIA-bits-CBC:key:IV/ciphertext':plaintext:ciphertext:encdec -# CBC-CAMELLIA128.Encrypt and CBC-CAMELLIA128.Decrypt -CAMELLIA-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:1607CF494B36BBF00DAEB0B503C831AB -CAMELLIA-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:1607CF494B36BBF00DAEB0B503C831AB:AE2D8A571E03AC9C9EB76FAC45AF8E51:A2F2CF671629EF7840C5A5DFB5074887 -CAMELLIA-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:A2F2CF671629EF7840C5A5DFB5074887:30C81C46A35CE411E5FBC1191A0A52EF:0F06165008CF8B8B5A63586362543E54 -CAMELLIA-128-CBC:2B7E151628AED2A6ABF7158809CF4F3C:36A84CDAFD5F9A85ADA0F0A993D6D577:F69F2445DF4F9B17AD2B417BE66C3710:74C64268CDB8B8FAF5B34E8AF3732980 - -# CBC-CAMELLIA192.Encrypt and CBC-CAMELLIA192.Decrypt -CAMELLIA-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:2A4830AB5AC4A1A2405955FD2195CF93 -CAMELLIA-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:2A4830AB5AC4A1A2405955FD2195CF93:AE2D8A571E03AC9C9EB76FAC45AF8E51:5D5A869BD14CE54264F892A6DD2EC3D5 -CAMELLIA-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:5D5A869BD14CE54264F892A6DD2EC3D5:30C81C46A35CE411E5FBC1191A0A52EF:37D359C3349836D884E310ADDF68C449 -CAMELLIA-192-CBC:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:37D359C3349836D884E310ADDF68C449:F69F2445DF4F9B17AD2B417BE66C3710:01FAAA930B4AB9916E9668E1428C6B08 - -# CBC-CAMELLIA256.Encrypt and CBC-CAMELLIA256.Decrypt -CAMELLIA-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:E6CFA35FC02B134A4D2C0B6737AC3EDA -CAMELLIA-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E6CFA35FC02B134A4D2C0B6737AC3EDA:AE2D8A571E03AC9C9EB76FAC45AF8E51:36CBEB73BD504B4070B1B7DE2B21EB50 -CAMELLIA-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:36CBEB73BD504B4070B1B7DE2B21EB50:30C81C46A35CE411E5FBC1191A0A52EF:E31A6055297D96CA3330CDF1B1860A83 -CAMELLIA-256-CBC:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E31A6055297D96CA3330CDF1B1860A83:F69F2445DF4F9B17AD2B417BE66C3710:5D563F6D1CCCF236051C0C5C1C58F28F - -# We don't support CFB{1,8}-CAMELLIAxxx.{En,De}crypt -# For all CFB128 encrypts and decrypts, the transformed sequence is -# CAMELLIA-bits-CFB:key:IV/ciphertext':plaintext:ciphertext:encdec -# CFB128-CAMELLIA128.Encrypt -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:14F7646187817EB586599146B82BD719:1 -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:14F7646187817EB586599146B82BD719:AE2D8A571E03AC9C9EB76FAC45AF8E51:A53D28BB82DF741103EA4F921A44880B:1 -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:A53D28BB82DF741103EA4F921A44880B:30C81C46A35CE411E5FBC1191A0A52EF:9C2157A664626D1DEF9EA420FDE69B96:1 -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:9C2157A664626D1DEF9EA420FDE69B96:F69F2445DF4F9B17AD2B417BE66C3710:742A25F0542340C7BAEF24CA8482BB09:1 - -# CFB128-CAMELLIA128.Decrypt -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:14F7646187817EB586599146B82BD719:0 -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:14F7646187817EB586599146B82BD719:AE2D8A571E03AC9C9EB76FAC45AF8E51:A53D28BB82DF741103EA4F921A44880B:0 -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:A53D28BB82DF741103EA4F921A44880B:30C81C46A35CE411E5FBC1191A0A52EF:9C2157A664626D1DEF9EA420FDE69B96:0 -CAMELLIA-128-CFB:2B7E151628AED2A6ABF7158809CF4F3C:9C2157A664626D1DEF9EA420FDE69B96:F69F2445DF4F9B17AD2B417BE66C3710:742A25F0542340C7BAEF24CA8482BB09:0 - -# CFB128-CAMELLIA192.Encrypt -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:C832BB9780677DAA82D9B6860DCD565E:1 -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:C832BB9780677DAA82D9B6860DCD565E:AE2D8A571E03AC9C9EB76FAC45AF8E51:86F8491627906D780C7A6D46EA331F98:1 -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:86F8491627906D780C7A6D46EA331F98:30C81C46A35CE411E5FBC1191A0A52EF:69511CCE594CF710CB98BB63D7221F01:1 -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:69511CCE594CF710CB98BB63D7221F01:F69F2445DF4F9B17AD2B417BE66C3710:D5B5378A3ABED55803F25565D8907B84:1 - -# CFB128-CAMELLIA192.Decrypt -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:C832BB9780677DAA82D9B6860DCD565E:0 -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:C832BB9780677DAA82D9B6860DCD565E:AE2D8A571E03AC9C9EB76FAC45AF8E51:86F8491627906D780C7A6D46EA331F98:0 -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:86F8491627906D780C7A6D46EA331F98:30C81C46A35CE411E5FBC1191A0A52EF:69511CCE594CF710CB98BB63D7221F01:0 -CAMELLIA-192-CFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:69511CCE594CF710CB98BB63D7221F01:F69F2445DF4F9B17AD2B417BE66C3710:D5B5378A3ABED55803F25565D8907B84:0 - -# CFB128-CAMELLIA256.Encrypt -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CF6107BB0CEA7D7FB1BD31F5E7B06C93:1 -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:CF6107BB0CEA7D7FB1BD31F5E7B06C93:AE2D8A571E03AC9C9EB76FAC45AF8E51:89BEDB4CCDD864EA11BA4CBE849B5E2B:1 -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:89BEDB4CCDD864EA11BA4CBE849B5E2B:30C81C46A35CE411E5FBC1191A0A52EF:555FC3F34BDD2D54C62D9E3BF338C1C4:1 -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:555FC3F34BDD2D54C62D9E3BF338C1C4:F69F2445DF4F9B17AD2B417BE66C3710:5953ADCE14DB8C7F39F1BD39F359BFFA:1 - -# CFB128-CAMELLIA256.Decrypt -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CF6107BB0CEA7D7FB1BD31F5E7B06C93:0 -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:CF6107BB0CEA7D7FB1BD31F5E7B06C93:AE2D8A571E03AC9C9EB76FAC45AF8E51:89BEDB4CCDD864EA11BA4CBE849B5E2B:0 -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:89BEDB4CCDD864EA11BA4CBE849B5E2B:30C81C46A35CE411E5FBC1191A0A52EF:555FC3F34BDD2D54C62D9E3BF338C1C4:0 -CAMELLIA-256-CFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:555FC3F34BDD2D54C62D9E3BF338C1C4:F69F2445DF4F9B17AD2B417BE66C3710:5953ADCE14DB8C7F39F1BD39F359BFFA:0 - -# For all OFB encrypts and decrypts, the transformed sequence is -# CAMELLIA-bits-OFB:key:IV/output':plaintext:ciphertext:encdec -# OFB-CAMELLIA128.Encrypt -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:14F7646187817EB586599146B82BD719:1 -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:50FE67CC996D32B6DA0937E99BAFEC60:AE2D8A571E03AC9C9EB76FAC45AF8E51:25623DB569CA51E01482649977E28D84:1 -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:D9A4DADA0892239F6B8B3D7680E15674:30C81C46A35CE411E5FBC1191A0A52EF:C776634A60729DC657D12B9FCA801E98:1 -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:A78819583F0308E7A6BF36B1386ABF23:F69F2445DF4F9B17AD2B417BE66C3710:D776379BE0E50825E681DA1A4C980E8E:1 - -# OFB-CAMELLIA128.Decrypt -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:14F7646187817EB586599146B82BD719:0 -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:50FE67CC996D32B6DA0937E99BAFEC60:AE2D8A571E03AC9C9EB76FAC45AF8E51:25623DB569CA51E01482649977E28D84:0 -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:D9A4DADA0892239F6B8B3D7680E15674:30C81C46A35CE411E5FBC1191A0A52EF:C776634A60729DC657D12B9FCA801E98:0 -CAMELLIA-128-OFB:2B7E151628AED2A6ABF7158809CF4F3C:A78819583F0308E7A6BF36B1386ABF23:F69F2445DF4F9B17AD2B417BE66C3710:D776379BE0E50825E681DA1A4C980E8E:0 - -# OFB-CAMELLIA192.Encrypt -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:C832BB9780677DAA82D9B6860DCD565E:1 -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:A609B38DF3B1133DDDFF2718BA09565E:AE2D8A571E03AC9C9EB76FAC45AF8E51:8ECEB7D0350D72C7F78562AEBDF99339:1 -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:52EF01DA52602FE0975F78AC84BF8A50:30C81C46A35CE411E5FBC1191A0A52EF:BDD62DBBB9700846C53B507F544696F0:1 -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:BD5286AC63AABD7EB067AC54B553F71D:F69F2445DF4F9B17AD2B417BE66C3710:E28014E046B802F385C4C2E13EAD4A72:1 - -# OFB-CAMELLIA192.Decrypt -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:C832BB9780677DAA82D9B6860DCD565E:0 -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:A609B38DF3B1133DDDFF2718BA09565E:AE2D8A571E03AC9C9EB76FAC45AF8E51:8ECEB7D0350D72C7F78562AEBDF99339:0 -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:52EF01DA52602FE0975F78AC84BF8A50:30C81C46A35CE411E5FBC1191A0A52EF:BDD62DBBB9700846C53B507F544696F0:0 -CAMELLIA-192-OFB:8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B:BD5286AC63AABD7EB067AC54B553F71D:F69F2445DF4F9B17AD2B417BE66C3710:E28014E046B802F385C4C2E13EAD4A72:0 - -# OFB-CAMELLIA256.Encrypt -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CF6107BB0CEA7D7FB1BD31F5E7B06C93:1 -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:B7BF3A5DF43989DD97F0FA97EBCE2F4A:AE2D8A571E03AC9C9EB76FAC45AF8E51:127AD97E8E3994E4820027D7BA109368:1 -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E1C656305ED1A7A6563805746FE03EDC:30C81C46A35CE411E5FBC1191A0A52EF:6BFF6265A6A6B7A535BC65A80B17214E:1 -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:41635BE625B48AFC1666DD42A09D96E7:F69F2445DF4F9B17AD2B417BE66C3710:0A4A0404E26AA78A27CB271E8BF3CF20:1 - -# OFB-CAMELLIA256.Decrypt -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:000102030405060708090A0B0C0D0E0F:6BC1BEE22E409F96E93D7E117393172A:CF6107BB0CEA7D7FB1BD31F5E7B06C93:0 -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:B7BF3A5DF43989DD97F0FA97EBCE2F4A:AE2D8A571E03AC9C9EB76FAC45AF8E51:127AD97E8E3994E4820027D7BA109368:0 -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:E1C656305ED1A7A6563805746FE03EDC:30C81C46A35CE411E5FBC1191A0A52EF:6BFF6265A6A6B7A535BC65A80B17214E:0 -CAMELLIA-256-OFB:603DEB1015CA71BE2B73AEF0857D77811F352C073B6108D72D9810A30914DFF4:41635BE625B48AFC1666DD42A09D96E7:F69F2445DF4F9B17AD2B417BE66C3710:0A4A0404E26AA78A27CB271E8BF3CF20:0 - -# SEED test vectors from RFC4269 -SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBAC6E0054E166819AFF1CC6D346CDB:0 -SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:0 -SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:0 -SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:0 -SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBAC6E0054E166819AFF1CC6D346CDB:1 -SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1 -SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1 -SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1 - -# AES CCM 256 bit key -aes-256-ccm:1bde3251d41a8b5ea013c195ae128b218b3e0306376357077ef1c1c78548b92e:5b8e40746f6b98e00f1d13ff41:53bd72a97089e312422bf72e242377b3c6ee3e2075389b999c4ef7f28bd2b80a:9a5fcccdb4cf04e7293d2775cc76a488f042382d949b43b7d6bb2b9864786726:c17a32514eb6103f3249e076d4c871dc97e04b286699e54491dc18f6d734d4c0:2024931d73bca480c24a24ece6b6c2bf - -# AES GCM test vectors from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf -aes-128-gcm:00000000000000000000000000000000:000000000000000000000000::::58e2fccefa7e3061367f1d57a4e7455a -aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78::ab6e47d42cec13bdf53a67b21257bddf -aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985::4d5c2af327cd64a62cf35abd2ba6fab4 -aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091:feedfacedeadbeeffeedfacedeadbeefabaddad2:5bc94fbc3221a5db94fae95ae7121a47 -aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598:feedfacedeadbeeffeedfacedeadbeefabaddad2:3612d2e79e3b0785561be14aaca2fccb -aes-128-gcm:feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5:feedfacedeadbeeffeedfacedeadbeefabaddad2:619cc5aefffe0bfa462af43c1699d050 -aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000::::cd33b28ac773f74ba00ed1f312572435 -aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:98e7247c07f0fe411c267e4384b0f600::2ff58d80033927ab8ef4d4587514f0fb -aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256::9924a7c8587336bfb118024db8674a14 -aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710:feedfacedeadbeeffeedfacedeadbeefabaddad2:2519498e80f1478f37ba55bd6d27618c -aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7:feedfacedeadbeeffeedfacedeadbeefabaddad2:65dcc57fcf623a24094fcca40d3533f8 -aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b:feedfacedeadbeeffeedfacedeadbeefabaddad2:dcf566ff291c25bbb8568fc3d376a6d9 -aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000::::530f8afbc74536b9a963b4f1c4cb738b -aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:cea7403d4d606b6e074ec5d3baf39d18::d0d1c8a799996bf0265b98b5d48ab919 -aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad::b094dac5d93471bdec1a502270e3cc6c -aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662:feedfacedeadbeeffeedfacedeadbeefabaddad2:76fc6ece0f4e1768cddf8853bb2d551b -aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f:feedfacedeadbeeffeedfacedeadbeefabaddad2:3a337dbf46a792c45e454913fe2ea8f2 -aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f:feedfacedeadbeeffeedfacedeadbeefabaddad2:a44a8266ee1c8eb0c8b5d4cf5ae9f19a -# local add-ons, primarily streaming ghash tests -# 128 bytes aad -aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:::d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad:5fea793a2d6f974d37e68e0cb8ff9492 -# 48 bytes plaintext -aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0::9dd0a376b08e40eb00c35f29f9ea61a4 -# 80 bytes plaintext -aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d5270291::98885a3a22bd4742fe7b72172193b163 -# 128 bytes plaintext -aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d527029195b84d1b96c690ff2f2de30bf2ec89e00253786e126504f0dab90c48a30321de3345e6b0461e7c9e6c6b7afedde83f40::cac45f60e31efd3b5a43b98a22ce1aa1 -# 192 bytes plaintext, iv is chosen so that initial counter LSB is 0xFF -aes-128-gcm:00000000000000000000000000000000:ffffffff000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:56b3373ca9ef6e4a2b64fe1e9a17b61425f10d47a75a5fce13efc6bc784af24f4141bdd48cf7c770887afd573cca5418a9aeffcd7c5ceddfc6a78397b9a85b499da558257267caab2ad0b23ca476a53cb17fb41c4b8b475cb4f3f7165094c229c9e8c4dc0a2a5ff1903e501511221376a1cdb8364c5061a20cae74bc4acd76ceb0abc9fd3217ef9f8c90be402ddf6d8697f4f880dff15bfb7a6b28241ec8fe183c2d59e3f9dfff653c7126f0acb9e64211f42bae12af462b1070bef1ab5e3606::566f8ef683078bfdeeffa869d751a017 -# 80 bytes plaintext, submitted by Intel -aes-128-gcm:843ffcf5d2b72694d19ed01d01249412:dbcca32ebf9b804617c3aa9e:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f:6268c6fa2a80b2d137467f092f657ac04d89be2beaa623d61b5a868c8f03ff95d3dcee23ad2f1ab3a6c80eaf4b140eb05de3457f0fbc111a6b43d0763aa422a3013cf1dc37fe417d1fbfc449b75d4cc5:00000000000000000000000000000000101112131415161718191a1b1c1d1e1f:3b629ccfbc1119b7319e1dce2cd6fd6d - -# AES XTS test vectors from IEEE Std 1619-2007 -aes-128-xts:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000:917cf69ebd68b2ec9b9fe9a3eadda692cd43d2f59598ed858c02c2652fbf922e -aes-128-xts:1111111111111111111111111111111122222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:c454185e6a16936e39334038acef838bfb186fff7480adc4289382ecd6d394f0 -aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:af85336b597afc1a900b2eb21ec949d292df4c047e0b21532186a5971a227a89 -aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:00000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568 -aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:01000000000000000000000000000000:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd -aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:02000000000000000000000000000000:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd:fa762a3680b76007928ed4a4f49a9456031b704782e65e16cecb54ed7d017b5e18abd67b338e81078f21edb7868d901ebe9c731a7c18b5e6dec1d6a72e078ac9a4262f860beefa14f4e821018272e411a951502b6e79066e84252c3346f3aa62344351a291d4bedc7a07618bdea2af63145cc7a4b8d4070691ae890cd65733e7946e9021a1dffc4c59f159425ee6d50ca9b135fa6162cea18a939838dc000fb386fad086acce5ac07cb2ece7fd580b00cfa5e98589631dc25e8e2a3daf2ffdec26531659912c9d8f7a15e5865ea8fb5816d6207052bd7128cd743c12c8118791a4736811935eb982a532349e31dd401e0b660a568cb1a4711f552f55ded59f1f15bf7196b3ca12a91e488ef59d64f3a02bf45239499ac6176ae321c4a211ec545365971c5d3f4f09d4eb139bfdf2073d33180b21002b65cc9865e76cb24cd92c874c24c18350399a936ab3637079295d76c417776b94efce3a0ef7206b15110519655c956cbd8b2489405ee2b09a6b6eebe0c53790a12a8998378b33a5b71159625f4ba49d2a2fdba59fbf0897bc7aabd8d707dc140a80f0f309f835d3da54ab584e501dfa0ee977fec543f74186a802b9a37adb3e8291eca04d66520d229e60401e7282bef486ae059aa70696e0e305d777140a7a883ecdcb69b9ff938e8a4231864c69ca2c2043bed007ff3e605e014bcf518138dc3a25c5e236171a2d01d6 -aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fd000000000000000000000000000000:8e41b78c390b5af9d758bb214a67e9f6bf7727b09ac6124084c37611398fa45daad94868600ed391fb1acd4857a95b466e62ef9f4b377244d1c152e7b30d731aad30c716d214b707aed99eb5b5e580b3e887cf7497465651d4b60e6042051da3693c3b78c14489543be8b6ad0ba629565bba202313ba7b0d0c94a3252b676f46cc02ce0f8a7d34c0ed229129673c1f61aed579d08a9203a25aac3a77e9db60267996db38df637356d9dcd1632e369939f2a29d89345c66e05066f1a3677aef18dea4113faeb629e46721a66d0a7e785d3e29af2594eb67dfa982affe0aac058f6e15864269b135418261fc3afb089472cf68c45dd7f231c6249ba0255e1e033833fc4d00a3fe02132d7bc3873614b8aee34273581ea0325c81f0270affa13641d052d36f0757d484014354d02d6883ca15c24d8c3956b1bd027bcf41f151fd8023c5340e5606f37e90fdb87c86fb4fa634b3718a30bace06a66eaf8f63c4aa3b637826a87fe8cfa44282e92cb1615af3a28e53bc74c7cba1a0977be9065d0c1a5dec6c54ae38d37f37aa35283e048e5530a85c4e7a29d7b92ec0c3169cdf2a805c7604bce60049b9fb7b8eaac10f51ae23794ceba68bb58112e293b9b692ca721b37c662f8574ed4dba6f88e170881c82cddc1034a0ca7e284bf0962b6b26292d836fa9f73c1ac770eef0f2d3a1eaf61d3e03555fd424eedd67e18a18094f888:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637 -aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fe000000000000000000000000000000:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a -aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:ff000000000000000000000000000000:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a:3260ae8dad1f4a32c5cafe3ab0eb95549d461a67ceb9e5aa2d3afb62dece0553193ba50c75be251e08d1d08f1088576c7efdfaaf3f459559571e12511753b07af073f35da06af0ce0bbf6b8f5ccc5cea500ec1b211bd51f63b606bf6528796ca12173ba39b8935ee44ccce646f90a45bf9ccc567f0ace13dc2d53ebeedc81f58b2e41179dddf0d5a5c42f5d8506c1a5d2f8f59f3ea873cbcd0eec19acbf325423bd3dcb8c2b1bf1d1eaed0eba7f0698e4314fbeb2f1566d1b9253008cbccf45a2b0d9c5c9c21474f4076e02be26050b99dee4fd68a4cf890e496e4fcae7b70f94ea5a9062da0daeba1993d2ccd1dd3c244b8428801495a58b216547e7e847c46d1d756377b6242d2e5fb83bf752b54e0df71e889f3a2bb0f4c10805bf3c590376e3c24e22ff57f7fa965577375325cea5d920db94b9c336b455f6e894c01866fe9fbb8c8d3f70a2957285f6dfb5dcd8cbf54782f8fe7766d4723819913ac773421e3a31095866bad22c86a6036b2518b2059b4229d18c8c2ccbdf906c6cc6e82464ee57bddb0bebcb1dc645325bfb3e665ef7251082c88ebb1cf203bd779fdd38675713c8daadd17e1cabee432b09787b6ddf3304e38b731b45df5df51b78fcfb3d32466028d0ba36555e7e11ab0ee0666061d1645d962444bc47a38188930a84b4d561395c73c087021927ca638b7afc8a8679ccb84c26555440ec7f10445cd - -aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ff000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:1c3b3a102f770386e4836c99e370cf9bea00803f5e482357a4ae12d414a3e63b5d31e276f8fe4a8d66b317f9ac683f44680a86ac35adfc3345befecb4bb188fd5776926c49a3095eb108fd1098baec70aaa66999a72a82f27d848b21d4a741b0c5cd4d5fff9dac89aeba122961d03a757123e9870f8acf1000020887891429ca2a3e7a7d7df7b10355165c8b9a6d0a7de8b062c4500dc4cd120c0f7418dae3d0b5781c34803fa75421c790dfe1de1834f280d7667b327f6c8cd7557e12ac3a0f93ec05c52e0493ef31a12d3d9260f79a289d6a379bc70c50841473d1a8cc81ec583e9645e07b8d9670655ba5bbcfecc6dc3966380ad8fecb17b6ba02469a020a84e18e8f84252070c13e9f1f289be54fbc481457778f616015e1327a02b140f1505eb309326d68378f8374595c849d84f4c333ec4423885143cb47bd71c5edae9be69a2ffeceb1bec9de244fbe15992b11b77c040f12bd8f6a975a44a0f90c29a9abc3d4d893927284c58754cce294529f8614dcd2aba991925fedc4ae74ffac6e333b93eb4aff0479da9a410e4450e0dd7ae4c6e2910900575da401fc07059f645e8b7e9bfdef33943054ff84011493c27b3429eaedb4ed5376441a77ed43851ad77f16f541dfd269d50d6a5f14fb0aab1cbb4c1550be97f7ab4066193c4caa773dad38014bd2092fa755c824bb5e54c4f36ffda9fcea70b9c6e693e148c151 -aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffff0000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:77a31251618a15e6b92d1d66dffe7b50b50bad552305ba0217a610688eff7e11e1d0225438e093242d6db274fde801d4cae06f2092c728b2478559df58e837c2469ee4a4fa794e4bbc7f39bc026e3cb72c33b0888f25b4acf56a2a9804f1ce6d3d6e1dc6ca181d4b546179d55544aa7760c40d06741539c7e3cd9d2f6650b2013fd0eeb8c2b8e3d8d240ccae2d4c98320a7442e1c8d75a42d6e6cfa4c2eca1798d158c7aecdf82490f24bb9b38e108bcda12c3faf9a21141c3613b58367f922aaa26cd22f23d708dae699ad7cb40a8ad0b6e2784973dcb605684c08b8d6998c69aac049921871ebb65301a4619ca80ecb485a31d744223ce8ddc2394828d6a80470c092f5ba413c3378fa6054255c6f9df4495862bbb3287681f931b687c888abf844dfc8fc28331e579928cd12bd2390ae123cf03818d14dedde5c0c24c8ab018bfca75ca096f2d531f3d1619e785f1ada437cab92e980558b3dce1474afb75bfedbf8ff54cb2618e0244c9ac0d3c66fb51598cd2db11f9be39791abe447c63094f7c453b7ff87cb5bb36b7c79efb0872d17058b83b15ab0866ad8a58656c5a7e20dbdf308b2461d97c0ec0024a2715055249cf3b478ddd4740de654f75ca686e0d7345c69ed50cdc2a8b332b1f8824108ac937eb050585608ee734097fc09054fbff89eeaeea791f4a7ab1f9868294a4f9e27b42af8100cb9d59cef9645803 -aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffff00000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:e387aaa58ba483afa7e8eb469778317ecf4cf573aa9d4eac23f2cdf914e4e200a8b490e42ee646802dc6ee2b471b278195d60918ececb44bf79966f83faba0499298ebc699c0c8634715a320bb4f075d622e74c8c932004f25b41e361025b5a87815391f6108fc4afa6a05d9303c6ba68a128a55705d415985832fdeaae6c8e19110e84d1b1f199a2692119edc96132658f09da7c623efcec712537a3d94c0bf5d7e352ec94ae5797fdb377dc1551150721adf15bd26a8efc2fcaad56881fa9e62462c28f30ae1ceaca93c345cf243b73f542e2074a705bd2643bb9f7cc79bb6e7091ea6e232df0f9ad0d6cf502327876d82207abf2115cdacf6d5a48f6c1879a65b115f0f8b3cb3c59d15dd8c769bc014795a1837f3901b5845eb491adfefe097b1fa30a12fc1f65ba22905031539971a10f2f36c321bb51331cdefb39e3964c7ef079994f5b69b2edd83a71ef549971ee93f44eac3938fcdd61d01fa71799da3a8091c4c48aa9ed263ff0749df95d44fef6a0bb578ec69456aa5408ae32c7af08ad7ba8921287e3bbee31b767be06a0e705c864a769137df28292283ea81a2480241b44d9921cdbec1bc28dc1fda114bd8e5217ac9d8ebafa720e9da4f9ace231cc949e5b96fe76ffc21063fddc83a6b8679c00d35e09576a875305bed5f36ed242c8900dd1fa965bc950dfce09b132263a1eef52dd6888c309f5a7d712826 -aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffff000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:bf53d2dade78e822a4d949a9bc6766b01b06a8ef70d26748c6a7fc36d80ae4c5520f7c4ab0ac8544424fa405162fef5a6b7f229498063618d39f0003cb5fb8d1c86b643497da1ff945c8d3bedeca4f479702a7a735f043ddb1d6aaade3c4a0ac7ca7f3fa5279bef56f82cd7a2f38672e824814e10700300a055e1630b8f1cb0e919f5e942010a416e2bf48cb46993d3cb6a51c19bacf864785a00bc2ecff15d350875b246ed53e68be6f55bd7e05cfc2b2ed6432198a6444b6d8c247fab941f569768b5c429366f1d3f00f0345b96123d56204c01c63b22ce78baf116e525ed90fdea39fa469494d3866c31e05f295ff21fea8d4e6e13d67e47ce722e9698a1c1048d68ebcde76b86fcf976eab8aa9790268b7068e017a8b9b749409514f1053027fd16c3786ea1bac5f15cb79711ee2abe82f5cf8b13ae73030ef5b9e4457e75d1304f988d62dd6fc4b94ed38ba831da4b7634971b6cd8ec325d9c61c00f1df73627ed3745a5e8489f3a95c69639c32cd6e1d537a85f75cc844726e8a72fc0077ad22000f1d5078f6b866318c668f1ad03d5a5fced5219f2eabbd0aa5c0f460d183f04404a0d6f469558e81fab24a167905ab4c7878502ad3e38fdbe62a41556cec37325759533ce8f25f367c87bb5578d667ae93f9e2fd99bcbc5f2fbba88cf6516139420fcff3b7361d86322c4bd84c82f335abb152c4a93411373aaa8220 -aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffffff0000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:64497e5a831e4a932c09be3e5393376daa599548b816031d224bbf50a818ed2350eae7e96087c8a0db51ad290bd00c1ac1620857635bf246c176ab463be30b808da548081ac847b158e1264be25bb0910bbc92647108089415d45fab1b3d2604e8a8eff1ae4020cfa39936b66827b23f371b92200be90251e6d73c5f86de5fd4a950781933d79a28272b782a2ec313efdfcc0628f43d744c2dc2ff3dcb66999b50c7ca895b0c64791eeaa5f29499fb1c026f84ce5b5c72ba1083cddb5ce45434631665c333b60b11593fb253c5179a2c8db813782a004856a1653011e93fb6d876c18366dd8683f53412c0c180f9c848592d593f8609ca736317d356e13e2bff3a9f59cd9aeb19cd482593d8c46128bb32423b37a9adfb482b99453fbe25a41bf6feb4aa0bef5ed24bf73c762978025482c13115e4015aac992e5613a3b5c2f685b84795cb6e9b2656d8c88157e52c42f978d8634c43d06fea928f2822e465aa6576e9bf419384506cc3ce3c54ac1a6f67dc66f3b30191e698380bc999b05abce19dc0c6dcc2dd001ec535ba18deb2df1a101023108318c75dc98611a09dc48a0acdec676fabdf222f07e026f059b672b56e5cbc8e1d21bbd867dd927212054681d70ea737134cdfce93b6f82ae22423274e58a0821cc5502e2d0ab4585e94de6975be5e0b4efce51cd3e70c25a1fbbbd609d273ad5b0d59631c531f6a0a57b9 - -aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10:6c1625db4671522d3d7599601de7ca09ed -aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f1011:d069444b7a7e0cab09e24447d24deb1fedbf -aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f101112:e5df1351c0544ba1350b3363cd8ef4beedbf9d -aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10111213:9d84c813f719aa2c7be3f66171c7c5c2edbf9dac -aes-128-xts:e0e1e2e3e4e5e6e7e8e9eaebecedeeefc0c1c2c3c4c5c6c7c8c9cacbcccdcecf:21436587a90000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:38b45812ef43a05bd957e545907e223b954ab4aaf088303ad910eadf14b42be68b2461149d8c8ba85f992be970bc621f1b06573f63e867bf5875acafa04e42ccbd7bd3c2a0fb1fff791ec5ec36c66ae4ac1e806d81fbf709dbe29e471fad38549c8e66f5345d7c1eb94f405d1ec785cc6f6a68f6254dd8339f9d84057e01a17741990482999516b5611a38f41bb6478e6f173f320805dd71b1932fc333cb9ee39936beea9ad96fa10fb4112b901734ddad40bc1878995f8e11aee7d141a2f5d48b7a4e1e7f0b2c04830e69a4fd1378411c2f287edf48c6c4e5c247a19680f7fe41cefbd49b582106e3616cbbe4dfb2344b2ae9519391f3e0fb4922254b1d6d2d19c6d4d537b3a26f3bcc51588b32f3eca0829b6a5ac72578fb814fb43cf80d64a233e3f997a3f02683342f2b33d25b492536b93becb2f5e1a8b82f5b883342729e8ae09d16938841a21a97fb543eea3bbff59f13c1a18449e398701c1ad51648346cbc04c27bb2da3b93a1372ccae548fb53bee476f9e9c91773b1bb19828394d55d3e1a20ed69113a860b6829ffa847224604435070221b257e8dff783615d2cae4803a93aa4334ab482a0afac9c0aeda70b45a481df5dec5df8cc0f423c77a5fd46cd312021d4b438862419a791be03bb4d97c0e59578542531ba466a83baf92cefc151b5cc1611a167893819b63fb8a6b18e86de60290fa72b797b0ce59f3 -# AES wrap tests from RFC3394 -id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5 -id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D -id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7 -id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2 -id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1 -id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21 diff --git a/thirdparty/openssl/crypto/evp/openbsd_hw.c b/thirdparty/openssl/crypto/evp/openbsd_hw.c index 75d12e2330..07decf2674 100644 --- a/thirdparty/openssl/crypto/evp/openbsd_hw.c +++ b/thirdparty/openssl/crypto/evp/openbsd_hw.c @@ -133,6 +133,10 @@ static int dev_crypto_init_key(EVP_CIPHER_CTX *ctx, int cipher, return 0; CDATA(ctx)->key = OPENSSL_malloc(MAX_HW_KEY); + if (CDATA(ctx)->key == NULL { + err("CDATA(ctx)->key memory allocation failed"); + return 0; + } assert(ctx->cipher->iv_len <= MAX_HW_IV); @@ -186,6 +190,11 @@ static int dev_crypto_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (((unsigned long)in & 3) || cinl != inl) { cin = OPENSSL_malloc(cinl); + if (cin == NULL) { + err("cin - memory allocation failed"); + abort(); + return 0; + } memcpy(cin, in, inl); cryp.src = cin; } @@ -334,6 +343,11 @@ static int do_digest(int ses, unsigned char *md, const void *data, int len) char *dcopy; dcopy = OPENSSL_malloc(len); + if (dcopy == NULL) { + err("dcopy - memory allocation failed"); + abort(); + return 0; + } memcpy(dcopy, data, len); cryp.src = dcopy; cryp.dst = cryp.src; // FIXME!!! @@ -364,6 +378,10 @@ static int dev_crypto_md5_update(EVP_MD_CTX *ctx, const void *data, return do_digest(md_data->sess.ses, md_data->md, data, len); md_data->data = OPENSSL_realloc(md_data->data, md_data->len + len); + if (md_data->data == NULL) { + err("DEV_CRYPTO_MD5_UPDATE: unable to allocate memory"); + abort(); + } memcpy(md_data->data + md_data->len, data, len); md_data->len += len; @@ -397,6 +415,10 @@ static int dev_crypto_md5_copy(EVP_MD_CTX *to, const EVP_MD_CTX *from) assert(from->digest->flags & EVP_MD_FLAG_ONESHOT); to_md->data = OPENSSL_malloc(from_md->len); + if (to_md->data == NULL) { + err("DEV_CRYPTO_MD5_COPY: unable to allocate memory"); + abort(); + } memcpy(to_md->data, from_md->data, from_md->len); return 1; diff --git a/thirdparty/openssl/crypto/evp/p_lib.c b/thirdparty/openssl/crypto/evp/p_lib.c index c0171244d5..545d04fd77 100644 --- a/thirdparty/openssl/crypto/evp/p_lib.c +++ b/thirdparty/openssl/crypto/evp/p_lib.c @@ -130,6 +130,14 @@ int EVP_PKEY_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) EVPerr(EVP_F_EVP_PKEY_COPY_PARAMETERS, EVP_R_MISSING_PARAMETERS); goto err; } + + if (!EVP_PKEY_missing_parameters(to)) { + if (EVP_PKEY_cmp_parameters(to, from) == 1) + return 1; + EVPerr(EVP_F_EVP_PKEY_COPY_PARAMETERS, EVP_R_DIFFERENT_PARAMETERS); + return 0; + } + if (from->ameth && from->ameth->param_copy) return from->ameth->param_copy(to, from); err: diff --git a/thirdparty/openssl/crypto/evp/pmeth_fn.c b/thirdparty/openssl/crypto/evp/pmeth_fn.c index a8b7f2f6d5..727869e3ee 100644 --- a/thirdparty/openssl/crypto/evp/pmeth_fn.c +++ b/thirdparty/openssl/crypto/evp/pmeth_fn.c @@ -65,20 +65,22 @@ #include "evp_locl.h" #define M_check_autoarg(ctx, arg, arglen, err) \ - if (ctx->pmeth->flags & EVP_PKEY_FLAG_AUTOARGLEN) \ - { \ - size_t pksize = (size_t)EVP_PKEY_size(ctx->pkey); \ - if (!arg) \ - { \ - *arglen = pksize; \ - return 1; \ - } \ - else if (*arglen < pksize) \ - { \ - EVPerr(err, EVP_R_BUFFER_TOO_SMALL); /*ckerr_ignore*/\ - return 0; \ - } \ - } + if (ctx->pmeth->flags & EVP_PKEY_FLAG_AUTOARGLEN) { \ + size_t pksize = (size_t)EVP_PKEY_size(ctx->pkey); \ + \ + if (pksize == 0) { \ + EVPerr(err, EVP_R_INVALID_KEY); /*ckerr_ignore*/ \ + return 0; \ + } \ + if (!arg) { \ + *arglen = pksize; \ + return 1; \ + } \ + if (*arglen < pksize) { \ + EVPerr(err, EVP_R_BUFFER_TOO_SMALL); /*ckerr_ignore*/ \ + return 0; \ + } \ + } int EVP_PKEY_sign_init(EVP_PKEY_CTX *ctx) { diff --git a/thirdparty/openssl/crypto/evp/pmeth_gn.c b/thirdparty/openssl/crypto/evp/pmeth_gn.c index 6435f1b632..6a4d3573ff 100644 --- a/thirdparty/openssl/crypto/evp/pmeth_gn.c +++ b/thirdparty/openssl/crypto/evp/pmeth_gn.c @@ -149,8 +149,10 @@ int EVP_PKEY_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY **ppkey) if (!ppkey) return -1; - if (!*ppkey) + if (*ppkey == NULL) *ppkey = EVP_PKEY_new(); + if (*ppkey == NULL) + return -1; ret = ctx->pmeth->keygen(ctx, *ppkey); if (ret <= 0) { diff --git a/thirdparty/openssl/crypto/evp/pmeth_lib.c b/thirdparty/openssl/crypto/evp/pmeth_lib.c index 9f81d10021..b7b7bdcd02 100644 --- a/thirdparty/openssl/crypto/evp/pmeth_lib.c +++ b/thirdparty/openssl/crypto/evp/pmeth_lib.c @@ -91,7 +91,9 @@ static const EVP_PKEY_METHOD *standard_methods[] = { &ec_pkey_meth, #endif &hmac_pkey_meth, +#ifndef OPENSSL_NO_CMAC &cmac_pkey_meth, +#endif #ifndef OPENSSL_NO_DH &dhx_pkey_meth #endif @@ -186,6 +188,7 @@ static EVP_PKEY_CTX *int_ctx_new(EVP_PKEY *pkey, ENGINE *e, int id) if (pmeth->init) { if (pmeth->init(ret) <= 0) { + ret->pmeth = NULL; EVP_PKEY_CTX_free(ret); return NULL; } @@ -197,6 +200,7 @@ static EVP_PKEY_CTX *int_ctx_new(EVP_PKEY *pkey, ENGINE *e, int id) EVP_PKEY_METHOD *EVP_PKEY_meth_new(int id, int flags) { EVP_PKEY_METHOD *pmeth; + pmeth = OPENSSL_malloc(sizeof(EVP_PKEY_METHOD)); if (!pmeth) return NULL; @@ -205,33 +209,6 @@ EVP_PKEY_METHOD *EVP_PKEY_meth_new(int id, int flags) pmeth->pkey_id = id; pmeth->flags = flags | EVP_PKEY_FLAG_DYNAMIC; - - pmeth->init = 0; - pmeth->copy = 0; - pmeth->cleanup = 0; - pmeth->paramgen_init = 0; - pmeth->paramgen = 0; - pmeth->keygen_init = 0; - pmeth->keygen = 0; - pmeth->sign_init = 0; - pmeth->sign = 0; - pmeth->verify_init = 0; - pmeth->verify = 0; - pmeth->verify_recover_init = 0; - pmeth->verify_recover = 0; - pmeth->signctx_init = 0; - pmeth->signctx = 0; - pmeth->verifyctx_init = 0; - pmeth->verifyctx = 0; - pmeth->encrypt_init = 0; - pmeth->encrypt = 0; - pmeth->decrypt_init = 0; - pmeth->decrypt = 0; - pmeth->derive_init = 0; - pmeth->derive = 0; - pmeth->ctrl = 0; - pmeth->ctrl_str = 0; - return pmeth; } @@ -339,6 +316,7 @@ EVP_PKEY_CTX *EVP_PKEY_CTX_dup(EVP_PKEY_CTX *pctx) if (pctx->pmeth->copy(rctx, pctx) > 0) return rctx; + rctx->pmeth = NULL; EVP_PKEY_CTX_free(rctx); return NULL; diff --git a/thirdparty/openssl/crypto/ex_data.c b/thirdparty/openssl/crypto/ex_data.c index f96a51781a..108a1959ea 100644 --- a/thirdparty/openssl/crypto/ex_data.c +++ b/thirdparty/openssl/crypto/ex_data.c @@ -331,7 +331,11 @@ static EX_CLASS_ITEM *def_get_class(int class_index) * from the insert will be NULL */ (void)lh_EX_CLASS_ITEM_insert(ex_data, gen); - p = gen; + p = lh_EX_CLASS_ITEM_retrieve(ex_data, &d); + if (p != gen) { + sk_CRYPTO_EX_DATA_FUNCS_free(gen->meth); + OPENSSL_free(gen); + } } } } @@ -455,7 +459,7 @@ static int int_dup_ex_data(int class_index, CRYPTO_EX_DATA *to, CRYPTO_EX_DATA *from) { int mx, j, i; - char *ptr; + void *ptr; CRYPTO_EX_DATA_FUNCS **storage = NULL; EX_CLASS_ITEM *item; if (!from->sk) @@ -469,6 +473,8 @@ static int int_dup_ex_data(int class_index, CRYPTO_EX_DATA *to, if (j < mx) mx = j; if (mx > 0) { + if (!CRYPTO_set_ex_data(to, mx - 1, NULL)) + goto skip; storage = OPENSSL_malloc(mx * sizeof(CRYPTO_EX_DATA_FUNCS *)); if (!storage) goto skip; @@ -499,11 +505,12 @@ static void int_free_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad) int mx, i; EX_CLASS_ITEM *item; void *ptr; + CRYPTO_EX_DATA_FUNCS *f; CRYPTO_EX_DATA_FUNCS **storage = NULL; if (ex_data == NULL) - return; + goto err; if ((item = def_get_class(class_index)) == NULL) - return; + goto err; CRYPTO_r_lock(CRYPTO_LOCK_EX_DATA); mx = sk_CRYPTO_EX_DATA_FUNCS_num(item->meth); if (mx > 0) { @@ -515,23 +522,23 @@ static void int_free_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad) } skip: CRYPTO_r_unlock(CRYPTO_LOCK_EX_DATA); - if ((mx > 0) && !storage) { - CRYPTOerr(CRYPTO_F_INT_FREE_EX_DATA, ERR_R_MALLOC_FAILURE); - return; - } for (i = 0; i < mx; i++) { - if (storage[i] && storage[i]->free_func) { + if (storage != NULL) + f = storage[i]; + else { + CRYPTO_r_lock(CRYPTO_LOCK_EX_DATA); + f = sk_CRYPTO_EX_DATA_FUNCS_value(item->meth, i); + CRYPTO_r_unlock(CRYPTO_LOCK_EX_DATA); + } + if (f != NULL && f->free_func != NULL) { ptr = CRYPTO_get_ex_data(ad, i); - storage[i]->free_func(obj, ptr, ad, i, - storage[i]->argl, storage[i]->argp); + f->free_func(obj, ptr, ad, i, f->argl, f->argp); } } - if (storage) - OPENSSL_free(storage); - if (ad->sk) { - sk_void_free(ad->sk); - ad->sk = NULL; - } + OPENSSL_free(storage); + err: + sk_void_free(ad->sk); + ad->sk = NULL; } /********************************************************************/ diff --git a/thirdparty/openssl/crypto/hmac/hm_pmeth.c b/thirdparty/openssl/crypto/hmac/hm_pmeth.c index 0ffff79cc4..0a59a01cf0 100644 --- a/thirdparty/openssl/crypto/hmac/hm_pmeth.c +++ b/thirdparty/openssl/crypto/hmac/hm_pmeth.c @@ -99,15 +99,18 @@ static int pkey_hmac_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) sctx = src->data; dctx = dst->data; dctx->md = sctx->md; - HMAC_CTX_init(&dctx->ctx); if (!HMAC_CTX_copy(&dctx->ctx, &sctx->ctx)) - return 0; - if (sctx->ktmp.data) { + goto err; + if (sctx->ktmp.data != NULL) { if (!ASN1_OCTET_STRING_set(&dctx->ktmp, sctx->ktmp.data, sctx->ktmp.length)) - return 0; + goto err; } return 1; + err: + HMAC_CTX_cleanup(&dctx->ctx); + OPENSSL_free(dctx); + return 0; } static void pkey_hmac_cleanup(EVP_PKEY_CTX *ctx) diff --git a/thirdparty/openssl/crypto/hmac/hmac.c b/thirdparty/openssl/crypto/hmac/hmac.c index 51a0a3efcd..213504e85f 100644 --- a/thirdparty/openssl/crypto/hmac/hmac.c +++ b/thirdparty/openssl/crypto/hmac/hmac.c @@ -234,7 +234,7 @@ void HMAC_CTX_cleanup(HMAC_CTX *ctx) EVP_MD_CTX_cleanup(&ctx->i_ctx); EVP_MD_CTX_cleanup(&ctx->o_ctx); EVP_MD_CTX_cleanup(&ctx->md_ctx); - memset(ctx, 0, sizeof *ctx); + OPENSSL_cleanse(ctx, sizeof *ctx); } unsigned char *HMAC(const EVP_MD *evp_md, const void *key, int key_len, diff --git a/thirdparty/openssl/crypto/include/internal/bn_conf.h b/thirdparty/openssl/crypto/include/internal/bn_conf.h new file mode 100644 index 0000000000..34bd8b78b4 --- /dev/null +++ b/thirdparty/openssl/crypto/include/internal/bn_conf.h @@ -0,0 +1,28 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from crypto/include/internal/bn_conf.h.in */ +/* + * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef HEADER_BN_CONF_H +# define HEADER_BN_CONF_H + +/* + * The contents of this file are not used in the UEFI build, as + * both 32-bit and 64-bit builds are supported from a single run + * of the Configure script. + */ + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT + +#endif diff --git a/thirdparty/openssl/crypto/include/internal/dso_conf.h b/thirdparty/openssl/crypto/include/internal/dso_conf.h new file mode 100644 index 0000000000..7a52dd1f1a --- /dev/null +++ b/thirdparty/openssl/crypto/include/internal/dso_conf.h @@ -0,0 +1,16 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from crypto/include/internal/dso_conf.h.in */ +/* + * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef HEADER_DSO_CONF_H +# define HEADER_DSO_CONF_H + +# define DSO_EXTENSION ".so" +#endif diff --git a/thirdparty/openssl/crypto/install-crypto.com b/thirdparty/openssl/crypto/install-crypto.com deleted file mode 100755 index af1d75b526..0000000000 --- a/thirdparty/openssl/crypto/install-crypto.com +++ /dev/null @@ -1,196 +0,0 @@ -$! INSTALL.COM -- Installs the files in a given directory tree -$! -$! Author: Richard Levitte <richard@levitte.org> -$! Time of creation: 22-MAY-1998 10:13 -$! -$! Changes by Zoltan Arpadffy <zoli@polarhome.com> -$! -$! P1 root of the directory tree -$! P2 "64" for 64-bit pointers. -$! -$! -$! Announce/identify. -$! -$ proc = f$environment( "procedure") -$ write sys$output "@@@ "+ - - f$parse( proc, , , "name")+ f$parse( proc, , , "type") -$! -$ on error then goto tidy -$ on control_c then goto tidy -$! -$ if (p1 .eqs. "") -$ then -$ write sys$output "First argument missing." -$ write sys$output - - "It should be the directory where you want things installed." -$ exit -$ endif -$! -$ if (f$getsyi( "cpu") .lt. 128) -$ then -$ arch = "VAX" -$ else -$ arch = f$edit( f$getsyi( "arch_name"), "upcase") -$ if (arch .eqs. "") then arch = "UNK" -$ endif -$! -$ archd = arch -$ lib32 = "32" -$ shr = "_SHR32" -$! -$ if (p2 .nes. "") -$ then -$ if (p2 .eqs. "64") -$ then -$ archd = arch+ "_64" -$ lib32 = "" -$ shr = "_SHR" -$ else -$ if (p2 .nes. "32") -$ then -$ write sys$output "Second argument invalid." -$ write sys$output "It should be "32", "64", or nothing." -$ exit -$ endif -$ endif -$ endif -$! -$ root = f$parse( p1, "[]A.;0", , , "syntax_only, no_conceal") - "A.;0" -$ root_dev = f$parse( root, , , "device", "syntax_only") -$ root_dir = f$parse( root, , , "directory", "syntax_only") - - - "[000000." - "][" - "[" - "]" -$ root = root_dev + "[" + root_dir -$! -$ define /nolog wrk_sslroot 'root'.] /trans=conc -$ define /nolog wrk_sslinclude wrk_sslroot:[include] -$ define /nolog wrk_sslxlib wrk_sslroot:['arch'_lib] -$! -$ if f$parse("wrk_sslroot:[000000]") .eqs. "" then - - create /directory /log wrk_sslroot:[000000] -$ if f$parse("wrk_sslinclude:") .eqs. "" then - - create /directory /log wrk_sslinclude: -$ if f$parse("wrk_sslxlib:") .eqs. "" then - - create /directory /log wrk_sslxlib: -$! -$ sdirs := , - - 'archd', - - objects, - - md4, md5, sha, mdc2, hmac, ripemd, whrlpool, - - des, aes, rc2, rc4, idea, bf, cast, camellia, seed, - - bn, ec, rsa, dsa, ecdsa, dh, ecdh, dso, engine, - - buffer, bio, stack, lhash, rand, err, - - evp, asn1, pem, x509, x509v3, conf, txt_db, pkcs7, pkcs12, comp, ocsp, - - ui, krb5, - - store, cms, pqueue, ts, jpake -$! -$ exheader_ := crypto.h, opensslv.h, ebcdic.h, symhacks.h, ossl_typ.h -$ exheader_'archd' := opensslconf.h -$ exheader_objects := objects.h, obj_mac.h -$ exheader_md2 := md2.h -$ exheader_md4 := md4.h -$ exheader_md5 := md5.h -$ exheader_sha := sha.h -$ exheader_mdc2 := mdc2.h -$ exheader_hmac := hmac.h -$ exheader_ripemd := ripemd.h -$ exheader_whrlpool := whrlpool.h -$ exheader_des := des.h, des_old.h -$ exheader_aes := aes.h -$ exheader_rc2 := rc2.h -$ exheader_rc4 := rc4.h -$ exheader_rc5 := rc5.h -$ exheader_idea := idea.h -$ exheader_bf := blowfish.h -$ exheader_cast := cast.h -$ exheader_camellia := camellia.h -$ exheader_seed := seed.h -$ exheader_modes := modes.h -$ exheader_bn := bn.h -$ exheader_ec := ec.h -$ exheader_rsa := rsa.h -$ exheader_dsa := dsa.h -$ exheader_ecdsa := ecdsa.h -$ exheader_dh := dh.h -$ exheader_ecdh := ecdh.h -$ exheader_dso := dso.h -$ exheader_engine := engine.h -$ exheader_buffer := buffer.h -$ exheader_bio := bio.h -$ exheader_stack := stack.h, safestack.h -$ exheader_lhash := lhash.h -$ exheader_rand := rand.h -$ exheader_err := err.h -$ exheader_evp := evp.h -$ exheader_asn1 := asn1.h, asn1_mac.h, asn1t.h -$ exheader_pem := pem.h, pem2.h -$ exheader_x509 := x509.h, x509_vfy.h -$ exheader_x509v3 := x509v3.h -$ exheader_conf := conf.h, conf_api.h -$ exheader_txt_db := txt_db.h -$ exheader_pkcs7 := pkcs7.h -$ exheader_pkcs12 := pkcs12.h -$ exheader_comp := comp.h -$ exheader_ocsp := ocsp.h -$ exheader_ui := ui.h, ui_compat.h -$ exheader_krb5 := krb5_asn.h -$! exheader_store := store.h, str_compat.h -$ exheader_store := store.h -$ exheader_cms := cms.h -$ exheader_pqueue := pqueue.h -$ exheader_ts := ts.h -$ exheader_jpake := jpake.h -$ libs := ssl_libcrypto -$! -$ exe_dir := [-.'archd'.exe.crypto] -$! -$! Header files. -$! -$ i = 0 -$ loop_sdirs: -$ d = f$edit( f$element( i, ",", sdirs), "trim") -$ i = i + 1 -$ if d .eqs. "," then goto loop_sdirs_end -$ tmp = exheader_'d' -$ if (d .nes. "") then d = "."+ d -$ copy /protection = w:re ['d']'tmp' wrk_sslinclude: /log -$ goto loop_sdirs -$ loop_sdirs_end: -$! -$! Object libraries, shareable images. -$! -$ i = 0 -$ loop_lib: -$ e = f$edit( f$element( i, ",", libs), "trim") -$ i = i + 1 -$ if e .eqs. "," then goto loop_lib_end -$ set noon -$ file = exe_dir+ e+ lib32+ ".olb" -$ if f$search( file) .nes. "" -$ then -$ copy /protection = w:re 'file' wrk_sslxlib: /log -$ endif -$! -$ file = exe_dir+ e+ shr+ ".exe" -$ if f$search( file) .nes. "" -$ then -$ copy /protection = w:re 'file' wrk_sslxlib: /log -$ endif -$ set on -$ goto loop_lib -$ loop_lib_end: -$! -$ tidy: -$! -$ call deass wrk_sslroot -$ call deass wrk_sslinclude -$ call deass wrk_sslxlib -$! -$ exit -$! -$ deass: subroutine -$ if (f$trnlnm( p1, "LNM$PROCESS") .nes. "") -$ then -$ deassign /process 'p1' -$ endif -$ endsubroutine -$! diff --git a/thirdparty/openssl/crypto/jpake/jpake.c b/thirdparty/openssl/crypto/jpake/jpake.c index ebc0975575..2ba75f0172 100644 --- a/thirdparty/openssl/crypto/jpake/jpake.c +++ b/thirdparty/openssl/crypto/jpake/jpake.c @@ -116,6 +116,8 @@ JPAKE_CTX *JPAKE_CTX_new(const char *name, const char *peer_name, const BIGNUM *secret) { JPAKE_CTX *ctx = OPENSSL_malloc(sizeof *ctx); + if (ctx == NULL) + return NULL; JPAKE_CTX_init(ctx, name, peer_name, p, g, q, secret); @@ -151,6 +153,8 @@ static void hashbn(SHA_CTX *sha, const BIGNUM *bn) size_t l = BN_num_bytes(bn); unsigned char *bin = OPENSSL_malloc(l); + if (bin == NULL) + return; hashlength(sha, l); BN_bn2bin(bn, bin); SHA1_Update(sha, bin, l); diff --git a/thirdparty/openssl/crypto/lhash/lhash.c b/thirdparty/openssl/crypto/lhash/lhash.c index 53c5c138bb..f20353aea3 100644 --- a/thirdparty/openssl/crypto/lhash/lhash.c +++ b/thirdparty/openssl/crypto/lhash/lhash.c @@ -335,8 +335,8 @@ static void expand(_LHASH *lh) n = (LHASH_NODE **)OPENSSL_realloc(lh->b, (int)(sizeof(LHASH_NODE *) * j)); if (n == NULL) { -/* fputs("realloc error in lhash",stderr); */ lh->error++; + lh->num_nodes--; lh->p = 0; return; } diff --git a/thirdparty/openssl/crypto/md2/md2_dgst.c b/thirdparty/openssl/crypto/md2/md2_dgst.c index 9cd79f8d70..7f5d9ba69b 100644 --- a/thirdparty/openssl/crypto/md2/md2_dgst.c +++ b/thirdparty/openssl/crypto/md2/md2_dgst.c @@ -219,6 +219,6 @@ int MD2_Final(unsigned char *md, MD2_CTX *c) for (i = 0; i < 16; i++) md[i] = (UCHAR) (p1[i] & 0xff); - memset((char *)&c, 0, sizeof(c)); + OPENSSL_cleanse(c, sizeof(*c)); return 1; } diff --git a/thirdparty/openssl/crypto/md32_common.h b/thirdparty/openssl/crypto/md32_common.h index 96828d2693..b5a04bf133 100644 --- a/thirdparty/openssl/crypto/md32_common.h +++ b/thirdparty/openssl/crypto/md32_common.h @@ -109,6 +109,8 @@ * <appro@fy.chalmers.se> */ +#include <openssl/crypto.h> + #if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN) # error "DATA_ORDER must be defined!" #endif @@ -329,6 +331,12 @@ int HASH_UPDATE(HASH_CTX *c, const void *data_, size_t len) data += n; len -= n; c->num = 0; + /* + * We use memset rather than OPENSSL_cleanse() here deliberately. + * Using OPENSSL_cleanse() here could be a performance issue. It + * will get properly cleansed on finalisation so this isn't a + * security problem. + */ memset(p, 0, HASH_CBLOCK); /* keep it zeroed */ } else { memcpy(p + n, data, len); @@ -384,7 +392,7 @@ int HASH_FINAL(unsigned char *md, HASH_CTX *c) p -= HASH_CBLOCK; HASH_BLOCK_DATA_ORDER(c, p, 1); c->num = 0; - memset(p, 0, HASH_CBLOCK); + OPENSSL_cleanse(p, HASH_CBLOCK); #ifndef HASH_MAKE_STRING # error "HASH_MAKE_STRING must be defined!" diff --git a/thirdparty/openssl/crypto/mdc2/mdc2dgst.c b/thirdparty/openssl/crypto/mdc2/mdc2dgst.c index 6615cf84d7..2dce493633 100644 --- a/thirdparty/openssl/crypto/mdc2/mdc2dgst.c +++ b/thirdparty/openssl/crypto/mdc2/mdc2dgst.c @@ -91,7 +91,7 @@ int MDC2_Update(MDC2_CTX *c, const unsigned char *in, size_t len) i = c->num; if (i != 0) { - if (i + len < MDC2_BLOCK) { + if (len < MDC2_BLOCK - i) { /* partial block */ memcpy(&(c->data[i]), in, len); c->num += (int)len; diff --git a/thirdparty/openssl/crypto/mem.c b/thirdparty/openssl/crypto/mem.c index fdad49b76e..dd4c9ce9e0 100644 --- a/thirdparty/openssl/crypto/mem.c +++ b/thirdparty/openssl/crypto/mem.c @@ -82,6 +82,14 @@ static void *default_malloc_ex(size_t num, const char *file, int line) static void *(*malloc_ex_func) (size_t, const char *file, int line) = default_malloc_ex; +#ifdef OPENSSL_SYS_VMS +# if __INITIAL_POINTER_SIZE == 64 +# define realloc _realloc64 +# elif __INITIAL_POINTER_SIZE == 32 +# define realloc _realloc32 +# endif +#endif + static void *(*realloc_func) (void *, size_t) = realloc; static void *default_realloc_ex(void *str, size_t num, const char *file, int line) @@ -92,7 +100,11 @@ static void *default_realloc_ex(void *str, size_t num, static void *(*realloc_ex_func) (void *, size_t, const char *file, int line) = default_realloc_ex; -static void (*free_func) (void *) = free; +#ifdef OPENSSL_SYS_VMS + static void (*free_func) (__void_ptr64) = free; +#else + static void (*free_func) (void *) = free; +#endif static void *(*malloc_locked_func) (size_t) = malloc; static void *default_malloc_locked_ex(size_t num, const char *file, int line) @@ -103,7 +115,11 @@ static void *default_malloc_locked_ex(size_t num, const char *file, int line) static void *(*malloc_locked_ex_func) (size_t, const char *file, int line) = default_malloc_locked_ex; -static void (*free_locked_func) (void *) = free; +#ifdef OPENSSL_SYS_VMS + static void (*free_locked_func) (__void_ptr64) = free; +#else + static void (*free_locked_func) (void *) = free; +#endif /* may be changed as long as 'allow_customize_debug' is set */ /* XXX use correct function pointer types */ @@ -134,12 +150,12 @@ static long (*get_debug_options_func) (void) = NULL; int CRYPTO_set_mem_functions(void *(*m) (size_t), void *(*r) (void *, size_t), void (*f) (void *)) { - /* Dummy call just to ensure OPENSSL_init() gets linked in */ - OPENSSL_init(); if (!allow_customize) return 0; if ((m == 0) || (r == 0) || (f == 0)) return 0; + /* Dummy call just to ensure OPENSSL_init() gets linked in */ + OPENSSL_init(); malloc_func = m; malloc_ex_func = default_malloc_ex; realloc_func = r; @@ -298,18 +314,6 @@ void *CRYPTO_malloc_locked(int num, const char *file, int line) if (malloc_debug_func != NULL) malloc_debug_func(ret, num, file, line, 1); -#ifndef OPENSSL_CPUID_OBJ - /* - * Create a dependency on the value of 'cleanse_ctr' so our memory - * sanitisation function can't be optimised out. NB: We only do this for - * >2Kb so the overhead doesn't bother us. - */ - if (ret && (num > 2048)) { - extern unsigned char cleanse_ctr; - ((unsigned char *)ret)[0] = cleanse_ctr; - } -#endif - return ret; } @@ -346,18 +350,6 @@ void *CRYPTO_malloc(int num, const char *file, int line) if (malloc_debug_func != NULL) malloc_debug_func(ret, num, file, line, 1); -#ifndef OPENSSL_CPUID_OBJ - /* - * Create a dependency on the value of 'cleanse_ctr' so our memory - * sanitisation function can't be optimised out. NB: We only do this for - * >2Kb so the overhead doesn't bother us. - */ - if (ret && (num > 2048)) { - extern unsigned char cleanse_ctr; - ((unsigned char *)ret)[0] = cleanse_ctr; - } -#endif - return ret; } diff --git a/thirdparty/openssl/crypto/mem_clr.c b/thirdparty/openssl/crypto/mem_clr.c index ab85344eef..579e9d1825 100644 --- a/thirdparty/openssl/crypto/mem_clr.c +++ b/thirdparty/openssl/crypto/mem_clr.c @@ -60,22 +60,16 @@ #include <string.h> #include <openssl/crypto.h> -unsigned char cleanse_ctr = 0; +/* + * Pointer to memset is volatile so that compiler must de-reference + * the pointer and can't assume that it points to any function in + * particular (such as memset, which it then might further "optimize") + */ +typedef void *(*memset_t)(void *,int,size_t); + +static volatile memset_t memset_func = memset; void OPENSSL_cleanse(void *ptr, size_t len) { - unsigned char *p = ptr; - size_t loop = len, ctr = cleanse_ctr; - - if (ptr == NULL) - return; - - while (loop--) { - *(p++) = (unsigned char)ctr; - ctr += (17 + ((size_t)p & 0xF)); - } - p = memchr(ptr, (unsigned char)ctr, len); - if (p) - ctr += (63 + (size_t)p); - cleanse_ctr = (unsigned char)ctr; + memset_func(ptr, 0, len); } diff --git a/thirdparty/openssl/crypto/modes/ctr128.c b/thirdparty/openssl/crypto/modes/ctr128.c index bcafd6b6bf..d4b22728e6 100644 --- a/thirdparty/openssl/crypto/modes/ctr128.c +++ b/thirdparty/openssl/crypto/modes/ctr128.c @@ -100,7 +100,7 @@ static void ctr128_inc_aligned(unsigned char *counter) --n; d = data[n] += c; /* did addition carry? */ - c = ((d - c) ^ d) >> (sizeof(size_t) * 8 - 1); + c = ((d - c) & ~d) >> (sizeof(size_t) * 8 - 1); } while (n); } #endif diff --git a/thirdparty/openssl/crypto/o_dir.c b/thirdparty/openssl/crypto/o_dir.c index f9dbed8711..fb3b2fd8e4 100644 --- a/thirdparty/openssl/crypto/o_dir.c +++ b/thirdparty/openssl/crypto/o_dir.c @@ -73,7 +73,8 @@ #include "o_dir.h" #define LPDIR_H -#if defined OPENSSL_SYS_UNIX || defined DJGPP +#if defined OPENSSL_SYS_UNIX || defined DJGPP \ + || (defined __VMS_VER && __VMS_VER >= 70000000) # include "LPdir_unix.c" #elif defined OPENSSL_SYS_VMS # include "LPdir_vms.c" diff --git a/thirdparty/openssl/crypto/o_init.c b/thirdparty/openssl/crypto/o_init.c index 2088388128..185841ea04 100644 --- a/thirdparty/openssl/crypto/o_init.c +++ b/thirdparty/openssl/crypto/o_init.c @@ -73,6 +73,9 @@ void OPENSSL_init(void) done = 1; #ifdef OPENSSL_FIPS FIPS_set_locking_callbacks(CRYPTO_lock, CRYPTO_add_lock); +# ifndef OPENSSL_NO_DEPRECATED + FIPS_crypto_set_id_callback(CRYPTO_thread_id); +# endif FIPS_set_error_callbacks(ERR_put_error, ERR_add_error_vdata); FIPS_set_malloc_callbacks(CRYPTO_malloc, CRYPTO_free); RAND_init_fips(); diff --git a/thirdparty/openssl/crypto/o_str.c b/thirdparty/openssl/crypto/o_str.c index c10842300d..1854798e2c 100644 --- a/thirdparty/openssl/crypto/o_str.c +++ b/thirdparty/openssl/crypto/o_str.c @@ -59,16 +59,15 @@ #include <ctype.h> #include <e_os.h> +// -- GODOT start -- +#include <openssl/opensslconf.h> +// -- GODOT end -- #include "o_str.h" #if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && \ !defined(OPENSSL_SYSNAME_WIN32) && !defined(OPENSSL_SYSNAME_WINCE) && \ !defined(NETWARE_CLIB) -#ifdef _WIN32 -#include <string.h> -#else -#include <strings.h> -#endif +# include <strings.h> #endif int OPENSSL_strncasecmp(const char *str1, const char *str2, size_t n) diff --git a/thirdparty/openssl/crypto/o_time.c b/thirdparty/openssl/crypto/o_time.c index 635dae184d..04d805d9a9 100644..100755 --- a/thirdparty/openssl/crypto/o_time.c +++ b/thirdparty/openssl/crypto/o_time.c @@ -78,18 +78,36 @@ # include <descrip.h> # include <stdlib.h> # endif /* ndef VMS_GMTIME_OK */ -#endif + + +/* + * Needed to pick up the correct definitions and declarations in some of the + * DEC C Header Files (*.H). + */ +# define __NEW_STARLET 1 + +# if (defined(__alpha) || defined(__ia64)) +# include <iledef.h> +# else + +/* VAX */ +typedef struct _ile3 { /* Copied from ILEDEF.H for Alpha */ +# pragma __nomember_alignment + unsigned short int ile3$w_length; /* Length of buffer in bytes */ + unsigned short int ile3$w_code; /* Item code value */ + void *ile3$ps_bufaddr; /* Buffer address */ + unsigned short int *ile3$ps_retlen_addr; /* Address of word for returned length */ +} ILE3; +# endif /* alpha || ia64 */ +#endif /* OPENSSL_SYS_VMS */ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result) { struct tm *ts = NULL; #if defined(OPENSSL_THREADS) && !defined(OPENSSL_SYS_WIN32) && !defined(OPENSSL_SYS_OS2) && (!defined(OPENSSL_SYS_VMS) || defined(gmtime_r)) && !defined(OPENSSL_SYS_MACOSX) && !defined(OPENSSL_SYS_SUNOS) - /* - * should return &data, but doesn't on some systems, so we don't even - * look at the return value - */ - gmtime_r(timer, result); + if (gmtime_r(timer, result) == NULL) + return NULL; ts = result; #elif !defined(OPENSSL_SYS_VMS) || defined(VMS_GMTIME_OK) ts = gmtime(timer); @@ -105,26 +123,42 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result) static $DESCRIPTOR(lognam, "SYS$TIMEZONE_DIFFERENTIAL"); char logvalue[256]; unsigned int reslen = 0; - struct { - short buflen; - short code; - void *bufaddr; - unsigned int *reslen; - } itemlist[] = { - { - 0, LNM$_STRING, 0, 0 - }, - { - 0, 0, 0, 0 - }, - }; +# if __INITIAL_POINTER_SIZE == 64 + ILEB_64 itemlist[2], *pitem; +# else + ILE3 itemlist[2], *pitem; +# endif int status; time_t t; + + /* + * Setup an itemlist for the call to $TRNLNM - Translate Logical Name. + */ + pitem = itemlist; + +# if __INITIAL_POINTER_SIZE == 64 + pitem->ileb_64$w_mbo = 1; + pitem->ileb_64$w_code = LNM$_STRING; + pitem->ileb_64$l_mbmo = -1; + pitem->ileb_64$q_length = sizeof (logvalue); + pitem->ileb_64$pq_bufaddr = logvalue; + pitem->ileb_64$pq_retlen_addr = (unsigned __int64 *) &reslen; + pitem++; + /* Last item of the item list is null terminated */ + pitem->ileb_64$q_length = pitem->ileb_64$w_code = 0; +# else + pitem->ile3$w_length = sizeof (logvalue); + pitem->ile3$w_code = LNM$_STRING; + pitem->ile3$ps_bufaddr = logvalue; + pitem->ile3$ps_retlen_addr = (unsigned short int *) &reslen; + pitem++; + /* Last item of the item list is null terminated */ + pitem->ile3$w_length = pitem->ile3$w_code = 0; +# endif + + /* Get the value for SYS$TIMEZONE_DIFFERENTIAL */ - itemlist[0].buflen = sizeof(logvalue); - itemlist[0].bufaddr = logvalue; - itemlist[0].reslen = &reslen; status = sys$trnlnm(0, &tabnam, &lognam, 0, itemlist); if (!(status & 1)) return NULL; @@ -132,7 +166,7 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result) t = *timer; -/* The following is extracted from the DEC C header time.h */ + /* The following is extracted from the DEC C header time.h */ /* ** Beginning in OpenVMS Version 7.0 mktime, time, ctime, strftime ** have two implementations. One implementation is provided diff --git a/thirdparty/openssl/crypto/objects/o_names.c b/thirdparty/openssl/crypto/objects/o_names.c index 24859926ac..f106905ffa 100644 --- a/thirdparty/openssl/crypto/objects/o_names.c +++ b/thirdparty/openssl/crypto/objects/o_names.c @@ -191,7 +191,7 @@ int OBJ_NAME_add(const char *name, int type, const char *data) onp = (OBJ_NAME *)OPENSSL_malloc(sizeof(OBJ_NAME)); if (onp == NULL) { /* ERROR */ - return (0); + return 0; } onp->name = name; @@ -216,10 +216,11 @@ int OBJ_NAME_add(const char *name, int type, const char *data) } else { if (lh_OBJ_NAME_error(names_lh)) { /* ERROR */ - return (0); + OPENSSL_free(onp); + return 0; } } - return (1); + return 1; } int OBJ_NAME_remove(const char *name, int type) diff --git a/thirdparty/openssl/crypto/ocsp/ocsp_cl.c b/thirdparty/openssl/crypto/ocsp/ocsp_cl.c index b3612c8dfc..fca7db0b71 100644 --- a/thirdparty/openssl/crypto/ocsp/ocsp_cl.c +++ b/thirdparty/openssl/crypto/ocsp/ocsp_cl.c @@ -93,8 +93,10 @@ OCSP_ONEREQ *OCSP_request_add0_id(OCSP_REQUEST *req, OCSP_CERTID *cid) if (one->reqCert) OCSP_CERTID_free(one->reqCert); one->reqCert = cid; - if (req && !sk_OCSP_ONEREQ_push(req->tbsRequest->requestList, one)) + if (req && !sk_OCSP_ONEREQ_push(req->tbsRequest->requestList, one)) { + one->reqCert = NULL; /* do not free on error */ goto err; + } return one; err: OCSP_ONEREQ_free(one); diff --git a/thirdparty/openssl/crypto/ocsp/ocsp_ext.c b/thirdparty/openssl/crypto/ocsp/ocsp_ext.c index c19648c732..55af31b573 100644 --- a/thirdparty/openssl/crypto/ocsp/ocsp_ext.c +++ b/thirdparty/openssl/crypto/ocsp/ocsp_ext.c @@ -361,7 +361,7 @@ static int ocsp_add1_nonce(STACK_OF(X509_EXTENSION) **exts, ASN1_put_object(&tmpval, 0, len, V_ASN1_OCTET_STRING, V_ASN1_UNIVERSAL); if (val) memcpy(tmpval, val, len); - else if (RAND_pseudo_bytes(tmpval, len) < 0) + else if (RAND_bytes(tmpval, len) <= 0) goto err; if (!X509V3_add1_i2d(exts, NID_id_pkix_OCSP_Nonce, &os, 0, X509V3_ADD_REPLACE)) diff --git a/thirdparty/openssl/crypto/ocsp/ocsp_lib.c b/thirdparty/openssl/crypto/ocsp/ocsp_lib.c index cabf53933a..ff781e56e7 100644 --- a/thirdparty/openssl/crypto/ocsp/ocsp_lib.c +++ b/thirdparty/openssl/crypto/ocsp/ocsp_lib.c @@ -271,12 +271,18 @@ int OCSP_parse_url(const char *url, char **phost, char **pport, char **ppath, err: if (buf) OPENSSL_free(buf); - if (*ppath) + if (*ppath) { OPENSSL_free(*ppath); - if (*pport) + *ppath = NULL; + } + if (*pport) { OPENSSL_free(*pport); - if (*phost) + *pport = NULL; + } + if (*phost) { OPENSSL_free(*phost); + *phost = NULL; + } return 0; } diff --git a/thirdparty/openssl/crypto/opensslconf.h.in b/thirdparty/openssl/crypto/opensslconf.h.in deleted file mode 100644 index 7a1c85d6ec..0000000000 --- a/thirdparty/openssl/crypto/opensslconf.h.in +++ /dev/null @@ -1,154 +0,0 @@ -/* crypto/opensslconf.h.in */ - -/* Generate 80386 code? */ -#undef I386_ONLY - -#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ -#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) -#define ENGINESDIR "/usr/local/lib/engines" -#define OPENSSLDIR "/usr/local/ssl" -#endif -#endif - -#undef OPENSSL_UNISTD -#define OPENSSL_UNISTD <unistd.h> - -#undef OPENSSL_EXPORT_VAR_AS_FUNCTION - -#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) -#define IDEA_INT unsigned int -#endif - -#if defined(HEADER_MD2_H) && !defined(MD2_INT) -#define MD2_INT unsigned int -#endif - -#if defined(HEADER_RC2_H) && !defined(RC2_INT) -/* I need to put in a mod for the alpha - eay */ -#define RC2_INT unsigned int -#endif - -#if defined(HEADER_RC4_H) -#if !defined(RC4_INT) -/* using int types make the structure larger but make the code faster - * on most boxes I have tested - up to %20 faster. */ -/* - * I don't know what does "most" mean, but declaring "int" is a must on: - * - Intel P6 because partial register stalls are very expensive; - * - elder Alpha because it lacks byte load/store instructions; - */ -#define RC4_INT unsigned int -#endif -#if !defined(RC4_CHUNK) -/* - * This enables code handling data aligned at natural CPU word - * boundary. See crypto/rc4/rc4_enc.c for further details. - */ -#undef RC4_CHUNK -#endif -#endif - -#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) -/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a - * %20 speed up (longs are 8 bytes, int's are 4). */ -#ifndef DES_LONG -#define DES_LONG unsigned long -#endif -#endif - -#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) -#define CONFIG_HEADER_BN_H -#undef BN_LLONG - -/* Should we define BN_DIV2W here? */ - -/* Only one for the following should be defined */ -#undef SIXTY_FOUR_BIT_LONG -#undef SIXTY_FOUR_BIT -#define THIRTY_TWO_BIT -#endif - -#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) -#define CONFIG_HEADER_RC4_LOCL_H -/* if this is defined data[i] is used instead of *data, this is a %20 - * speedup on x86 */ -#undef RC4_INDEX -#endif - -#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) -#define CONFIG_HEADER_BF_LOCL_H -#undef BF_PTR -#endif /* HEADER_BF_LOCL_H */ - -#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) -#define CONFIG_HEADER_DES_LOCL_H -#ifndef DES_DEFAULT_OPTIONS -/* the following is tweaked from a config script, that is why it is a - * protected undef/define */ -#ifndef DES_PTR -#undef DES_PTR -#endif - -/* This helps C compiler generate the correct code for multiple functional - * units. It reduces register dependancies at the expense of 2 more - * registers */ -#ifndef DES_RISC1 -#undef DES_RISC1 -#endif - -#ifndef DES_RISC2 -#undef DES_RISC2 -#endif - -#if defined(DES_RISC1) && defined(DES_RISC2) -#error YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! -#endif - -/* Unroll the inner loop, this sometimes helps, sometimes hinders. - * Very mucy CPU dependant */ -#ifndef DES_UNROLL -#undef DES_UNROLL -#endif - -/* These default values were supplied by - * Peter Gutman <pgut001@cs.auckland.ac.nz> - * They are only used if nothing else has been defined */ -#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) -/* Special defines which change the way the code is built depending on the - CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find - even newer MIPS CPU's, but at the moment one size fits all for - optimization options. Older Sparc's work better with only UNROLL, but - there's no way to tell at compile time what it is you're running on */ - -#if defined( __sun ) || defined ( sun ) /* Newer Sparc's */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#elif defined( __ultrix ) /* Older MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined( __osf1__ ) /* Alpha */ -# define DES_PTR -# define DES_RISC2 -#elif defined ( _AIX ) /* RS6000 */ - /* Unknown */ -#elif defined( __hpux ) /* HP-PA */ - /* Unknown */ -#elif defined( __aux ) /* 68K */ - /* Unknown */ -#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ -# define DES_UNROLL -#elif defined( __sgi ) /* Newer MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#endif /* Systems-specific speed defines */ -#endif - -#endif /* DES_DEFAULT_OPTIONS */ -#endif /* HEADER_DES_LOCL_H */ diff --git a/thirdparty/openssl/crypto/pem/pem_err.c b/thirdparty/openssl/crypto/pem/pem_err.c index e1f4fdb432..4e5f8e936c 100644 --- a/thirdparty/openssl/crypto/pem/pem_err.c +++ b/thirdparty/openssl/crypto/pem/pem_err.c @@ -1,6 +1,6 @@ /* crypto/pem/pem_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2016 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -132,6 +132,7 @@ static ERR_STRING_DATA PEM_str_reasons[] = { "expecting private key blob"}, {ERR_REASON(PEM_R_EXPECTING_PUBLIC_KEY_BLOB), "expecting public key blob"}, + {ERR_REASON(PEM_R_HEADER_TOO_LONG), "header too long"}, {ERR_REASON(PEM_R_INCONSISTENT_HEADER), "inconsistent header"}, {ERR_REASON(PEM_R_KEYBLOB_HEADER_PARSE_ERROR), "keyblob header parse error"}, diff --git a/thirdparty/openssl/crypto/pem/pem_lib.c b/thirdparty/openssl/crypto/pem/pem_lib.c index fe881d6641..c82b3c0ae2 100644 --- a/thirdparty/openssl/crypto/pem/pem_lib.c +++ b/thirdparty/openssl/crypto/pem/pem_lib.c @@ -105,17 +105,23 @@ int PEM_def_callback(char *buf, int num, int w, void *key) prompt = "Enter PEM pass phrase:"; for (;;) { - i = EVP_read_pw_string_min(buf, MIN_LENGTH, num, prompt, w); + /* + * We assume that w == 0 means decryption, + * while w == 1 means encryption + */ + int min_len = w ? MIN_LENGTH : 0; + + i = EVP_read_pw_string_min(buf, min_len, num, prompt, w); if (i != 0) { PEMerr(PEM_F_PEM_DEF_CALLBACK, PEM_R_PROBLEMS_GETTING_PASSWORD); memset(buf, 0, (unsigned int)num); return (-1); } j = strlen(buf); - if (j < MIN_LENGTH) { + if (min_len && j < min_len) { fprintf(stderr, "phrase is too short, needs to be at least %d chars\n", - MIN_LENGTH); + min_len); } else break; } @@ -387,7 +393,7 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, } RAND_add(data, i, 0); /* put in the RSA key. */ OPENSSL_assert(enc->iv_len <= (int)sizeof(iv)); - if (RAND_pseudo_bytes(iv, enc->iv_len) < 0) /* Generate a salt */ + if (RAND_bytes(iv, enc->iv_len) <= 0) /* Generate a salt */ goto err; /* * The 'iv' is used as the iv and as a salt. It is NOT taken from diff --git a/thirdparty/openssl/crypto/pem/pvkfmt.c b/thirdparty/openssl/crypto/pem/pvkfmt.c index 61864468f6..1ce5a1e319 100644 --- a/thirdparty/openssl/crypto/pem/pvkfmt.c +++ b/thirdparty/openssl/crypto/pem/pvkfmt.c @@ -127,6 +127,9 @@ static int read_lebn(const unsigned char **in, unsigned int nbyte, BIGNUM **r) # define MS_KEYTYPE_KEYX 0x1 # define MS_KEYTYPE_SIGN 0x2 +/* Maximum length of a blob after header */ +# define BLOB_MAX_LENGTH 102400 + /* The PVK file magic number: seems to spell out "bobsfile", who is Bob? */ # define MS_PVKMAGIC 0xb0b5f11eL /* Salt length for PVK files */ @@ -272,6 +275,10 @@ static EVP_PKEY *do_b2i_bio(BIO *in, int ispub) return NULL; length = blob_length(bitlen, isdss, ispub); + if (length > BLOB_MAX_LENGTH) { + PEMerr(PEM_F_DO_B2I_BIO, PEM_R_HEADER_TOO_LONG); + return NULL; + } buf = OPENSSL_malloc(length); if (!buf) { PEMerr(PEM_F_DO_B2I_BIO, ERR_R_MALLOC_FAILURE); diff --git a/thirdparty/openssl/crypto/pkcs12/p12_mutl.c b/thirdparty/openssl/crypto/pkcs12/p12_mutl.c index a9277827ff..b1f7381a6f 100644 --- a/thirdparty/openssl/crypto/pkcs12/p12_mutl.c +++ b/thirdparty/openssl/crypto/pkcs12/p12_mutl.c @@ -159,7 +159,10 @@ int PKCS12_set_mac(PKCS12 *p12, const char *pass, int passlen, int PKCS12_setup_mac(PKCS12 *p12, int iter, unsigned char *salt, int saltlen, const EVP_MD *md_type) { - if (!(p12->mac = PKCS12_MAC_DATA_new())) + PKCS12_MAC_DATA_free(p12->mac); + p12->mac = NULL; + + if ((p12->mac = PKCS12_MAC_DATA_new()) == NULL) return PKCS12_ERROR; if (iter > 1) { if (!(p12->mac->iter = M_ASN1_INTEGER_new())) { @@ -179,7 +182,7 @@ int PKCS12_setup_mac(PKCS12 *p12, int iter, unsigned char *salt, int saltlen, } p12->mac->salt->length = saltlen; if (!salt) { - if (RAND_pseudo_bytes(p12->mac->salt->data, saltlen) < 0) + if (RAND_bytes(p12->mac->salt->data, saltlen) <= 0) return 0; } else memcpy(p12->mac->salt->data, salt, saltlen); diff --git a/thirdparty/openssl/crypto/pkcs12/p12_npas.c b/thirdparty/openssl/crypto/pkcs12/p12_npas.c index a89b61abab..9e8ebb2a78 100644 --- a/thirdparty/openssl/crypto/pkcs12/p12_npas.c +++ b/thirdparty/openssl/crypto/pkcs12/p12_npas.c @@ -66,17 +66,18 @@ /* PKCS#12 password change routine */ -static int newpass_p12(PKCS12 *p12, char *oldpass, char *newpass); -static int newpass_bags(STACK_OF(PKCS12_SAFEBAG) *bags, char *oldpass, - char *newpass); -static int newpass_bag(PKCS12_SAFEBAG *bag, char *oldpass, char *newpass); +static int newpass_p12(PKCS12 *p12, const char *oldpass, const char *newpass); +static int newpass_bags(STACK_OF(PKCS12_SAFEBAG) *bags, const char *oldpass, + const char *newpass); +static int newpass_bag(PKCS12_SAFEBAG *bag, const char *oldpass, + const char *newpass); static int alg_get(X509_ALGOR *alg, int *pnid, int *piter, int *psaltlen); /* * Change the password on a PKCS#12 structure. */ -int PKCS12_newpass(PKCS12 *p12, char *oldpass, char *newpass) +int PKCS12_newpass(PKCS12 *p12, const char *oldpass, const char *newpass) { /* Check for NULL PKCS12 structure */ @@ -103,20 +104,21 @@ int PKCS12_newpass(PKCS12 *p12, char *oldpass, char *newpass) /* Parse the outer PKCS#12 structure */ -static int newpass_p12(PKCS12 *p12, char *oldpass, char *newpass) +static int newpass_p12(PKCS12 *p12, const char *oldpass, const char *newpass) { - STACK_OF(PKCS7) *asafes, *newsafes; - STACK_OF(PKCS12_SAFEBAG) *bags; + STACK_OF(PKCS7) *asafes = NULL, *newsafes = NULL; + STACK_OF(PKCS12_SAFEBAG) *bags = NULL; int i, bagnid, pbe_nid = 0, pbe_iter = 0, pbe_saltlen = 0; PKCS7 *p7, *p7new; - ASN1_OCTET_STRING *p12_data_tmp = NULL, *macnew = NULL; + ASN1_OCTET_STRING *p12_data_tmp = NULL; unsigned char mac[EVP_MAX_MD_SIZE]; unsigned int maclen; + int rv = 0; - if (!(asafes = PKCS12_unpack_authsafes(p12))) - return 0; - if (!(newsafes = sk_PKCS7_new_null())) - return 0; + if ((asafes = PKCS12_unpack_authsafes(p12)) == NULL) + goto err; + if ((newsafes = sk_PKCS7_new_null()) == NULL) + goto err; for (i = 0; i < sk_PKCS7_num(asafes); i++) { p7 = sk_PKCS7_value(asafes, i); bagnid = OBJ_obj2nid(p7->type); @@ -125,67 +127,57 @@ static int newpass_p12(PKCS12 *p12, char *oldpass, char *newpass) } else if (bagnid == NID_pkcs7_encrypted) { bags = PKCS12_unpack_p7encdata(p7, oldpass, -1); if (!alg_get(p7->d.encrypted->enc_data->algorithm, - &pbe_nid, &pbe_iter, &pbe_saltlen)) { - sk_PKCS12_SAFEBAG_pop_free(bags, PKCS12_SAFEBAG_free); - bags = NULL; - } - } else + &pbe_nid, &pbe_iter, &pbe_saltlen)) + goto err; + } else { continue; - if (!bags) { - sk_PKCS7_pop_free(asafes, PKCS7_free); - return 0; - } - if (!newpass_bags(bags, oldpass, newpass)) { - sk_PKCS12_SAFEBAG_pop_free(bags, PKCS12_SAFEBAG_free); - sk_PKCS7_pop_free(asafes, PKCS7_free); - return 0; } + if (bags == NULL) + goto err; + if (!newpass_bags(bags, oldpass, newpass)) + goto err; /* Repack bag in same form with new password */ if (bagnid == NID_pkcs7_data) p7new = PKCS12_pack_p7data(bags); else p7new = PKCS12_pack_p7encdata(pbe_nid, newpass, -1, NULL, pbe_saltlen, pbe_iter, bags); + if (!p7new || !sk_PKCS7_push(newsafes, p7new)) + goto err; sk_PKCS12_SAFEBAG_pop_free(bags, PKCS12_SAFEBAG_free); - if (!p7new) { - sk_PKCS7_pop_free(asafes, PKCS7_free); - return 0; - } - sk_PKCS7_push(newsafes, p7new); + bags = NULL; } - sk_PKCS7_pop_free(asafes, PKCS7_free); /* Repack safe: save old safe in case of error */ p12_data_tmp = p12->authsafes->d.data; - if (!(p12->authsafes->d.data = ASN1_OCTET_STRING_new())) - goto saferr; + if ((p12->authsafes->d.data = ASN1_OCTET_STRING_new()) == NULL) + goto err; if (!PKCS12_pack_authsafes(p12, newsafes)) - goto saferr; - + goto err; if (!PKCS12_gen_mac(p12, newpass, -1, mac, &maclen)) - goto saferr; - if (!(macnew = ASN1_OCTET_STRING_new())) - goto saferr; - if (!ASN1_OCTET_STRING_set(macnew, mac, maclen)) - goto saferr; - ASN1_OCTET_STRING_free(p12->mac->dinfo->digest); - p12->mac->dinfo->digest = macnew; - ASN1_OCTET_STRING_free(p12_data_tmp); - - return 1; - - saferr: - /* Restore old safe */ - ASN1_OCTET_STRING_free(p12->authsafes->d.data); - ASN1_OCTET_STRING_free(macnew); - p12->authsafes->d.data = p12_data_tmp; - return 0; - + goto err; + if (!ASN1_OCTET_STRING_set(p12->mac->dinfo->digest, mac, maclen)) + goto err; + + rv = 1; + +err: + /* Restore old safe if necessary */ + if (rv == 1) { + ASN1_OCTET_STRING_free(p12_data_tmp); + } else if (p12_data_tmp != NULL) { + ASN1_OCTET_STRING_free(p12->authsafes->d.data); + p12->authsafes->d.data = p12_data_tmp; + } + sk_PKCS12_SAFEBAG_pop_free(bags, PKCS12_SAFEBAG_free); + sk_PKCS7_pop_free(asafes, PKCS7_free); + sk_PKCS7_pop_free(newsafes, PKCS7_free); + return rv; } -static int newpass_bags(STACK_OF(PKCS12_SAFEBAG) *bags, char *oldpass, - char *newpass) +static int newpass_bags(STACK_OF(PKCS12_SAFEBAG) *bags, const char *oldpass, + const char *newpass) { int i; for (i = 0; i < sk_PKCS12_SAFEBAG_num(bags); i++) { @@ -197,7 +189,8 @@ static int newpass_bags(STACK_OF(PKCS12_SAFEBAG) *bags, char *oldpass, /* Change password of safebag: only needs handle shrouded keybags */ -static int newpass_bag(PKCS12_SAFEBAG *bag, char *oldpass, char *newpass) +static int newpass_bag(PKCS12_SAFEBAG *bag, const char *oldpass, + const char *newpass) { PKCS8_PRIV_KEY_INFO *p8; X509_SIG *p8new; @@ -210,8 +203,10 @@ static int newpass_bag(PKCS12_SAFEBAG *bag, char *oldpass, char *newpass) return 0; if (!alg_get(bag->value.shkeybag->algor, &p8_nid, &p8_iter, &p8_saltlen)) return 0; - if (!(p8new = PKCS8_encrypt(p8_nid, NULL, newpass, -1, NULL, p8_saltlen, - p8_iter, p8))) + p8new = PKCS8_encrypt(p8_nid, NULL, newpass, -1, NULL, p8_saltlen, + p8_iter, p8); + PKCS8_PRIV_KEY_INFO_free(p8); + if (p8new == NULL) return 0; X509_SIG_free(bag->value.shkeybag); bag->value.shkeybag = p8new; diff --git a/thirdparty/openssl/crypto/pkcs12/p12_utl.c b/thirdparty/openssl/crypto/pkcs12/p12_utl.c index a0b992eab6..e466f762ff 100644 --- a/thirdparty/openssl/crypto/pkcs12/p12_utl.c +++ b/thirdparty/openssl/crypto/pkcs12/p12_utl.c @@ -91,6 +91,10 @@ char *OPENSSL_uni2asc(unsigned char *uni, int unilen) { int asclen, i; char *asctmp; + + /* string must contain an even number of bytes */ + if (unilen & 1) + return NULL; asclen = unilen / 2; /* If no terminating zero allow for one */ if (!unilen || uni[unilen - 1]) diff --git a/thirdparty/openssl/crypto/pkcs7/pk7_doit.c b/thirdparty/openssl/crypto/pkcs7/pk7_doit.c index 946aaa6543..6cf8253bc2 100644 --- a/thirdparty/openssl/crypto/pkcs7/pk7_doit.c +++ b/thirdparty/openssl/crypto/pkcs7/pk7_doit.c @@ -340,7 +340,7 @@ BIO *PKCS7_dataInit(PKCS7 *p7, BIO *bio) ivlen = EVP_CIPHER_iv_length(evp_cipher); xalg->algorithm = OBJ_nid2obj(EVP_CIPHER_type(evp_cipher)); if (ivlen > 0) - if (RAND_pseudo_bytes(iv, ivlen) <= 0) + if (RAND_bytes(iv, ivlen) <= 0) goto err; if (EVP_CipherInit_ex(ctx, evp_cipher, NULL, NULL, NULL, 1) <= 0) goto err; @@ -642,6 +642,8 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) } else { # if 0 bio = BIO_new(BIO_s_mem()); + if (bio == NULL) + goto err; /* * We need to set this so that when we have read all the data, the * encrypt BIO, if present, will read EOF and encode the last few diff --git a/thirdparty/openssl/crypto/ppccap.c b/thirdparty/openssl/crypto/ppccap.c index 74af4732b5..60566b1a5f 100644 --- a/thirdparty/openssl/crypto/ppccap.c +++ b/thirdparty/openssl/crypto/ppccap.c @@ -7,6 +7,10 @@ #if defined(__linux) || defined(_AIX) # include <sys/utsname.h> #endif +#if defined(__APPLE__) && defined(__MACH__) +# include <sys/types.h> +# include <sys/sysctl.h> +#endif #include <openssl/crypto.h> #include <openssl/bn.h> @@ -123,6 +127,26 @@ void OPENSSL_cpuid_setup(void) } #endif +#if defined(__APPLE__) && defined(__MACH__) + { + int val; + size_t len = sizeof(val); + + if (sysctlbyname("hw.optional.64bitops", &val, &len, NULL, 0) == 0) { + if (val) + OPENSSL_ppccap_P |= PPC_FPU64; + } + + len = sizeof(val); + if (sysctlbyname("hw.optional.altivec", &val, &len, NULL, 0) == 0) { + if (val) + OPENSSL_ppccap_P |= PPC_ALTIVEC; + } + + return; + } +#endif + memset(&ill_act, 0, sizeof(ill_act)); ill_act.sa_handler = ill_handler; ill_act.sa_mask = all_masked; diff --git a/thirdparty/openssl/crypto/rand/md_rand.c b/thirdparty/openssl/crypto/rand/md_rand.c index 5c13d57765..29e465b075 100644 --- a/thirdparty/openssl/crypto/rand/md_rand.c +++ b/thirdparty/openssl/crypto/rand/md_rand.c @@ -136,7 +136,7 @@ /* #define PREDICT 1 */ #define STATE_SIZE 1023 -static int state_num = 0, state_index = 0; +static size_t state_num = 0, state_index = 0; static unsigned char state[STATE_SIZE + MD_DIGEST_LENGTH]; static unsigned char md[MD_DIGEST_LENGTH]; static long md_count[2] = { 0, 0 }; @@ -266,17 +266,21 @@ static void ssleay_rand_add(const void *buf, int num, double add) j = (num - i); j = (j > MD_DIGEST_LENGTH) ? MD_DIGEST_LENGTH : j; - MD_Init(&m); - MD_Update(&m, local_md, MD_DIGEST_LENGTH); + if (!MD_Init(&m) || + !MD_Update(&m, local_md, MD_DIGEST_LENGTH)) + goto err; k = (st_idx + j) - STATE_SIZE; if (k > 0) { - MD_Update(&m, &(state[st_idx]), j - k); - MD_Update(&m, &(state[0]), k); + if (!MD_Update(&m, &(state[st_idx]), j - k) || + !MD_Update(&m, &(state[0]), k)) + goto err; } else - MD_Update(&m, &(state[st_idx]), j); + if (!MD_Update(&m, &(state[st_idx]), j)) + goto err; /* DO NOT REMOVE THE FOLLOWING CALL TO MD_Update()! */ - MD_Update(&m, buf, j); + if (!MD_Update(&m, buf, j)) + goto err; /* * We know that line may cause programs such as purify and valgrind * to complain about use of uninitialized data. The problem is not, @@ -285,8 +289,9 @@ static void ssleay_rand_add(const void *buf, int num, double add) * insecure keys. */ - MD_Update(&m, (unsigned char *)&(md_c[0]), sizeof(md_c)); - MD_Final(&m, local_md); + if (!MD_Update(&m, (unsigned char *)&(md_c[0]), sizeof(md_c)) || + !MD_Final(&m, local_md)) + goto err; md_c[1]++; buf = (const char *)buf + j; @@ -305,7 +310,6 @@ static void ssleay_rand_add(const void *buf, int num, double add) st_idx = 0; } } - EVP_MD_CTX_cleanup(&m); if (!do_not_lock) CRYPTO_w_lock(CRYPTO_LOCK_RAND); @@ -326,6 +330,9 @@ static void ssleay_rand_add(const void *buf, int num, double add) #if !defined(OPENSSL_THREADS) && !defined(OPENSSL_SYS_WIN32) assert(md_c[1] == md_count[1]); #endif + + err: + EVP_MD_CTX_cleanup(&m); } static void ssleay_rand_seed(const void *buf, int num) @@ -336,8 +343,8 @@ static void ssleay_rand_seed(const void *buf, int num) int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo, int lock) { static volatile int stirred_pool = 0; - int i, j, k, st_num, st_idx; - int num_ceil; + int i, j, k; + size_t num_ceil, st_idx, st_num; int ok; long md_c[2]; unsigned char local_md[MD_DIGEST_LENGTH]; @@ -469,15 +476,18 @@ int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo, int lock) /* num_ceil -= MD_DIGEST_LENGTH/2 */ j = (num >= MD_DIGEST_LENGTH / 2) ? MD_DIGEST_LENGTH / 2 : num; num -= j; - MD_Init(&m); + if (!MD_Init(&m)) + goto err; #ifndef GETPID_IS_MEANINGLESS if (curr_pid) { /* just in the first iteration to save time */ - MD_Update(&m, (unsigned char *)&curr_pid, sizeof curr_pid); + if (!MD_Update(&m, (unsigned char *)&curr_pid, sizeof curr_pid)) + goto err; curr_pid = 0; } #endif - MD_Update(&m, local_md, MD_DIGEST_LENGTH); - MD_Update(&m, (unsigned char *)&(md_c[0]), sizeof(md_c)); + if (!MD_Update(&m, local_md, MD_DIGEST_LENGTH) || + !MD_Update(&m, (unsigned char *)&(md_c[0]), sizeof(md_c))) + goto err; #ifndef PURIFY /* purify complains */ /* @@ -487,16 +497,21 @@ int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo, int lock) * builds it is not used: the removal of such a small source of * entropy has negligible impact on security. */ - MD_Update(&m, buf, j); + if (!MD_Update(&m, buf, j)) + goto err; #endif k = (st_idx + MD_DIGEST_LENGTH / 2) - st_num; if (k > 0) { - MD_Update(&m, &(state[st_idx]), MD_DIGEST_LENGTH / 2 - k); - MD_Update(&m, &(state[0]), k); - } else - MD_Update(&m, &(state[st_idx]), MD_DIGEST_LENGTH / 2); - MD_Final(&m, local_md); + if (!MD_Update(&m, &(state[st_idx]), MD_DIGEST_LENGTH / 2 - k) || + !MD_Update(&m, &(state[0]), k)) + goto err; + } else { + if (!MD_Update(&m, &(state[st_idx]), MD_DIGEST_LENGTH / 2)) + goto err; + } + if (!MD_Final(&m, local_md)) + goto err; for (i = 0; i < MD_DIGEST_LENGTH / 2; i++) { /* may compete with other threads */ @@ -508,13 +523,18 @@ int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo, int lock) } } - MD_Init(&m); - MD_Update(&m, (unsigned char *)&(md_c[0]), sizeof(md_c)); - MD_Update(&m, local_md, MD_DIGEST_LENGTH); + if (!MD_Init(&m) || + !MD_Update(&m, (unsigned char *)&(md_c[0]), sizeof(md_c)) || + !MD_Update(&m, local_md, MD_DIGEST_LENGTH)) + goto err; if (lock) CRYPTO_w_lock(CRYPTO_LOCK_RAND); - MD_Update(&m, md, MD_DIGEST_LENGTH); - MD_Final(&m, md); + if (!MD_Update(&m, md, MD_DIGEST_LENGTH) || + !MD_Final(&m, md)) { + if (lock) + CRYPTO_w_unlock(CRYPTO_LOCK_RAND); + goto err; + } if (lock) CRYPTO_w_unlock(CRYPTO_LOCK_RAND); @@ -529,6 +549,10 @@ int ssleay_rand_bytes(unsigned char *buf, int num, int pseudo, int lock) "http://www.openssl.org/support/faq.html"); return (0); } + + err: + EVP_MD_CTX_cleanup(&m); + return (0); } static int ssleay_rand_nopseudo_bytes(unsigned char *buf, int num) diff --git a/thirdparty/openssl/crypto/rand/rand_unix.c b/thirdparty/openssl/crypto/rand/rand_unix.c index 266111edda..6c5b65da00 100644 --- a/thirdparty/openssl/crypto/rand/rand_unix.c +++ b/thirdparty/openssl/crypto/rand/rand_unix.c @@ -235,7 +235,7 @@ int RAND_poll(void) rnd >>= 8; } RAND_add(buf, sizeof(buf), ENTROPY_NEEDED); - memset(buf, 0, sizeof(buf)); + OPENSSL_cleanse(buf, sizeof(buf)); return 1; } diff --git a/thirdparty/openssl/crypto/rand/rand_vms.c b/thirdparty/openssl/crypto/rand/rand_vms.c index 0e10c363e2..be4ff4cc87 100644..100755 --- a/thirdparty/openssl/crypto/rand/rand_vms.c +++ b/thirdparty/openssl/crypto/rand/rand_vms.c @@ -3,6 +3,11 @@ * Written by Richard Levitte <richard@levitte.org> for the OpenSSL project * 2000. */ +/* + * Modified by VMS Software, Inc (2016) + * Eliminate looping through all processes (performance) + * Add additional randomizations using rand() function + */ /* ==================================================================== * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. * @@ -61,11 +66,11 @@ #include "rand_lcl.h" #if defined(OPENSSL_SYS_VMS) - # include <descrip.h> # include <jpidef.h> # include <ssdef.h> # include <starlet.h> +# include <efndef> # ifdef __DECC # pragma message disable DOLLARID # endif @@ -83,77 +88,93 @@ # endif /* __INITIAL_POINTER_SIZE == 64 [else] */ static struct items_data_st { - short length, code; /* length is amount of bytes */ + short length, code; /* length is number of bytes */ } items_data[] = { - { - 4, JPI$_BUFIO - }, - { - 4, JPI$_CPUTIM - }, - { - 4, JPI$_DIRIO - }, - { - 8, JPI$_LOGINTIM - }, - { - 4, JPI$_PAGEFLTS - }, - { - 4, JPI$_PID - }, - { - 4, JPI$_WSSIZE - }, - { - 0, 0 - } + {4, JPI$_BUFIO}, + {4, JPI$_CPUTIM}, + {4, JPI$_DIRIO}, + {4, JPI$_IMAGECOUNT}, + {8, JPI$_LAST_LOGIN_I}, + {8, JPI$_LOGINTIM}, + {4, JPI$_PAGEFLTS}, + {4, JPI$_PID}, + {4, JPI$_PPGCNT}, + {4, JPI$_WSPEAK}, + {4, JPI$_FINALEXC}, + {0, 0} /* zero terminated */ }; int RAND_poll(void) { - long pid, iosb[2]; - int status = 0; + + /* determine the number of items in the JPI array */ + + struct items_data_st item_entry; + int item_entry_count = sizeof(items_data)/sizeof(item_entry); + + /* Create the JPI itemlist array to hold item_data content */ + struct { short length, code; - long *buffer; + int *buffer; int *retlen; - } item[32], *pitem; - unsigned char data_buffer[256]; - short total_length = 0; - struct items_data_st *pitems_data; + } item[item_entry_count], *pitem; /* number of entries in items_data */ + struct items_data_st *pitems_data; pitems_data = items_data; pitem = item; + int data_buffer[(item_entry_count*2)+4]; /* 8 bytes per entry max */ + int iosb[2]; + int sys_time[2]; + int *ptr; + int i, j ; + int tmp_length = 0; + int total_length = 0; + + /* Setup itemlist for GETJPI */ - /* Setup */ - while (pitems_data->length && (total_length + pitems_data->length <= 256)) { + while (pitems_data->length) { pitem->length = pitems_data->length; - pitem->code = pitems_data->code; - pitem->buffer = (long *)&data_buffer[total_length]; + pitem->code = pitems_data->code; + pitem->buffer = &data_buffer[total_length]; pitem->retlen = 0; - total_length += pitems_data->length; + /* total_length is in longwords */ + total_length += pitems_data->length/4; pitems_data++; pitem ++; } pitem->length = pitem->code = 0; - /* - * Scan through all the processes in the system and add entropy with - * results from the processes that were possible to look at. - * However, view the information as only half trustable. - */ - pid = -1; /* search context */ - while ((status = sys$getjpiw(0, &pid, 0, item, iosb, 0, 0)) - != SS$_NOMOREPROC) { - if (status == SS$_NORMAL) { - RAND_add((PTR_T) data_buffer, total_length, total_length / 2); + /* Fill data_buffer with various info bits from this process */ + /* and twist that data to seed the SSL random number init */ + + if (sys$getjpiw(EFN$C_ENF, NULL, NULL, item, &iosb, 0, 0) == SS$_NORMAL) { + for (i = 0; i < total_length; i++) { + sys$gettim((struct _generic_64 *)&sys_time[0]); + srand(sys_time[0] * data_buffer[0] * data_buffer[1] + i); + + if (i == (total_length - 1)) { /* for JPI$_FINALEXC */ + ptr = &data_buffer[i]; + for (j = 0; j < 4; j++) { + data_buffer[i + j] = ptr[j]; + /* OK to use rand() just to scramble the seed */ + data_buffer[i + j] ^= (sys_time[0] ^ rand()); + tmp_length++; + } + } else { + /* OK to use rand() just to scramble the seed */ + data_buffer[i] ^= (sys_time[0] ^ rand()); + } } + + total_length += (tmp_length - 1); + + /* size of seed is total_length*4 bytes (64bytes) */ + RAND_add((PTR_T) data_buffer, total_length*4, total_length * 2); + } else { + return 0; } - sys$gettim(iosb); - RAND_add((PTR_T) iosb, sizeof(iosb), sizeof(iosb) / 2); + return 1; } - #endif diff --git a/thirdparty/openssl/crypto/rand/rand_win.c b/thirdparty/openssl/crypto/rand/rand_win.c index da4c935a53..cb4093128d 100644 --- a/thirdparty/openssl/crypto/rand/rand_win.c +++ b/thirdparty/openssl/crypto/rand/rand_win.c @@ -118,10 +118,10 @@ # ifndef _WIN32_WINNT # define _WIN32_WINNT 0x0400 # endif -#ifndef UWP_ENABLED +#ifndef UWP_ENABLED // -- GODOT -- # include <wincrypt.h> # include <tlhelp32.h> -#endif +#endif // -- GODOT -- /* * Limit the time spent walking through the heap, processes, threads and @@ -163,7 +163,7 @@ typedef struct tagCURSORINFO { # define CURSOR_SHOWING 0x00000001 # endif /* CURSOR_SHOWING */ -# if !defined(OPENSSL_SYS_WINCE) && !defined(UWP_ENABLED) +# if !defined(OPENSSL_SYS_WINCE) && !defined(UWP_ENABLED) // -- GODOT -- typedef BOOL(WINAPI *CRYPTACQUIRECONTEXTW) (HCRYPTPROV *, LPCWSTR, LPCWSTR, DWORD, DWORD); typedef BOOL(WINAPI *CRYPTGENRANDOM) (HCRYPTPROV, DWORD, BYTE *); @@ -198,7 +198,7 @@ typedef NET_API_STATUS(NET_API_FUNCTION *NETFREE) (LPBYTE); # endif /* 1 */ # endif /* !OPENSSL_SYS_WINCE */ -#if !defined(UWP_ENABLED) +#if !defined(UWP_ENABLED) // -- GODOT -- int RAND_poll(void) { MEMORYSTATUS m; @@ -583,7 +583,7 @@ int RAND_poll(void) return (1); } -#endif // UWP_ENABLED +#endif // UWP_ENABLED // -- GODOT -- int RAND_event(UINT iMsg, WPARAM wParam, LPARAM lParam) { @@ -687,7 +687,7 @@ static void readtimer(void) static void readscreen(void) { -# if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) && !defined(UWP_ENABLED) +# if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) && !defined(UWP_ENABLED) // -- GODOT -- HDC hScrDC; /* screen DC */ HBITMAP hBitmap; /* handle for our bitmap */ BITMAP bm; /* bitmap properties */ diff --git a/thirdparty/openssl/crypto/rand/randfile.c b/thirdparty/openssl/crypto/rand/randfile.c index 9537c56a78..728fd0a721 100644 --- a/thirdparty/openssl/crypto/rand/randfile.c +++ b/thirdparty/openssl/crypto/rand/randfile.c @@ -56,11 +56,6 @@ * [including the GNU Public Licence.] */ -/* We need to define this to get macros like S_IFBLK and S_IFCHR */ -#if !defined(OPENSSL_SYS_VXWORKS) -# define _XOPEN_SOURCE 500 -#endif - #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -80,6 +75,29 @@ #ifndef OPENSSL_NO_POSIX_IO # include <sys/stat.h> # include <fcntl.h> +/* + * Following should not be needed, and we could have been stricter + * and demand S_IS*. But some systems just don't comply... Formally + * below macros are "anatomically incorrect", because normally they + * would look like ((m) & MASK == TYPE), but since MASK availability + * is as questionable, we settle for this poor-man fallback... + */ +# if !defined(S_ISBLK) +# if defined(_S_IFBLK) +# define S_ISBLK(m) ((m) & _S_IFBLK) +# elif defined(S_IFBLK) +# define S_ISBLK(m) ((m) & S_IFBLK) +# elif defined(_WIN32) +# define S_ISBLK(m) 0 /* no concept of block devices on Windows */ +# endif +# endif +# if !defined(S_ISCHR) +# if defined(_S_IFCHR) +# define S_ISCHR(m) ((m) & _S_IFCHR) +# elif defined(S_IFCHR) +# define S_ISCHR(m) ((m) & S_IFCHR) +# endif +# endif #endif #ifdef _WIN32 @@ -93,7 +111,7 @@ #define BUFSIZE 1024 #define RAND_DATA 1024 -#ifdef OPENSSL_SYS_VMS +#if (defined(OPENSSL_SYS_VMS) && (defined(__alpha) || defined(__ia64))) /* * This declaration is a nasty hack to get around vms' extension to fopen for * passing in sharing options being disabled by our /STANDARD=ANSI89 @@ -122,7 +140,24 @@ int RAND_load_file(const char *file, long bytes) struct stat sb; #endif int i, ret = 0, n; +/* + * If setvbuf() is to be called, then the FILE pointer + * to it must be 32 bit. +*/ + +#if !defined OPENSSL_NO_SETVBUF_IONBF && defined(OPENSSL_SYS_VMS) && defined(__VMS_VER) && (__VMS_VER >= 70000000) + /* For 64-bit-->32 bit API Support*/ +#if __INITIAL_POINTER_SIZE == 64 +#pragma __required_pointer_size __save +#pragma __required_pointer_size 32 +#endif + FILE *in; /* setvbuf() requires 32-bit pointers */ +#if __INITIAL_POINTER_SIZE == 64 +#pragma __required_pointer_size __restore +#endif +#else FILE *in; +#endif /* OPENSSL_SYS_VMS */ if (file == NULL) return (0); @@ -151,8 +186,8 @@ int RAND_load_file(const char *file, long bytes) #endif if (in == NULL) goto err; -#if defined(S_IFBLK) && defined(S_IFCHR) && !defined(OPENSSL_NO_POSIX_IO) - if (sb.st_mode & (S_IFBLK | S_IFCHR)) { +#if defined(S_ISBLK) && defined(S_ISCHR) && !defined(OPENSSL_NO_POSIX_IO) + if (S_ISBLK(sb.st_mode) || S_ISCHR(sb.st_mode)) { /* * this file is a device. we don't want read an infinite number of * bytes from a random device, nor do we want to use buffered I/O @@ -231,7 +266,7 @@ int RAND_write_file(const char *file) } #endif -#ifdef OPENSSL_SYS_VMS +#if (defined(OPENSSL_SYS_VMS) && (defined(__alpha) || defined(__ia64))) /* * VMS NOTE: Prior versions of this routine created a _new_ version of * the rand file for each call into this routine, then deleted all diff --git a/thirdparty/openssl/crypto/rsa/rsa_ameth.c b/thirdparty/openssl/crypto/rsa/rsa_ameth.c index 4e0621827c..951e1d5ca3 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_ameth.c +++ b/thirdparty/openssl/crypto/rsa/rsa_ameth.c @@ -68,10 +68,12 @@ #endif #include "asn1_locl.h" +#ifndef OPENSSL_NO_CMS static int rsa_cms_sign(CMS_SignerInfo *si); static int rsa_cms_verify(CMS_SignerInfo *si); static int rsa_cms_decrypt(CMS_RecipientInfo *ri); static int rsa_cms_encrypt(CMS_RecipientInfo *ri); +#endif static int rsa_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) { @@ -665,6 +667,7 @@ static int rsa_pss_to_ctx(EVP_MD_CTX *ctx, EVP_PKEY_CTX *pkctx, return rv; } +#ifndef OPENSSL_NO_CMS static int rsa_cms_verify(CMS_SignerInfo *si) { int nid, nid2; @@ -683,6 +686,7 @@ static int rsa_cms_verify(CMS_SignerInfo *si) } return 0; } +#endif /* * Customised RSA item verification routine. This is called when a signature @@ -705,6 +709,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, return -1; } +#ifndef OPENSSL_NO_CMS static int rsa_cms_sign(CMS_SignerInfo *si) { int pad_mode = RSA_PKCS1_PADDING; @@ -729,6 +734,7 @@ static int rsa_cms_sign(CMS_SignerInfo *si) X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os); return 1; } +#endif static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, X509_ALGOR *alg1, X509_ALGOR *alg2, @@ -785,6 +791,7 @@ static RSA_OAEP_PARAMS *rsa_oaep_decode(const X509_ALGOR *alg, return pss; } +#ifndef OPENSSL_NO_CMS static int rsa_cms_decrypt(CMS_RecipientInfo *ri) { EVP_PKEY_CTX *pkctx; @@ -920,6 +927,7 @@ static int rsa_cms_encrypt(CMS_RecipientInfo *ri) ASN1_STRING_free(os); return rv; } +#endif const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = { { diff --git a/thirdparty/openssl/crypto/rsa/rsa_chk.c b/thirdparty/openssl/crypto/rsa/rsa_chk.c index 607faa0017..475dfc5628 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_chk.c +++ b/thirdparty/openssl/crypto/rsa/rsa_chk.c @@ -56,7 +56,6 @@ int RSA_check_key(const RSA *key) { BIGNUM *i, *j, *k, *l, *m; BN_CTX *ctx; - int r; int ret = 1; if (!key->p || !key->q || !key->n || !key->e || !key->d) { @@ -70,75 +69,68 @@ int RSA_check_key(const RSA *key) l = BN_new(); m = BN_new(); ctx = BN_CTX_new(); - if (i == NULL || j == NULL || k == NULL || l == NULL || - m == NULL || ctx == NULL) { + if (i == NULL || j == NULL || k == NULL || l == NULL + || m == NULL || ctx == NULL) { ret = -1; RSAerr(RSA_F_RSA_CHECK_KEY, ERR_R_MALLOC_FAILURE); goto err; } + if (BN_is_one(key->e)) { + ret = 0; + RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_BAD_E_VALUE); + } + if (!BN_is_odd(key->e)) { + ret = 0; + RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_BAD_E_VALUE); + } + /* p prime? */ - r = BN_is_prime_ex(key->p, BN_prime_checks, NULL, NULL); - if (r != 1) { - ret = r; - if (r != 0) - goto err; + if (BN_is_prime_ex(key->p, BN_prime_checks, NULL, NULL) != 1) { + ret = 0; RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_P_NOT_PRIME); } /* q prime? */ - r = BN_is_prime_ex(key->q, BN_prime_checks, NULL, NULL); - if (r != 1) { - ret = r; - if (r != 0) - goto err; + if (BN_is_prime_ex(key->q, BN_prime_checks, NULL, NULL) != 1) { + ret = 0; RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_Q_NOT_PRIME); } /* n = p*q? */ - r = BN_mul(i, key->p, key->q, ctx); - if (!r) { + if (!BN_mul(i, key->p, key->q, ctx)) { ret = -1; goto err; } - if (BN_cmp(i, key->n) != 0) { ret = 0; RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_N_DOES_NOT_EQUAL_P_Q); } /* d*e = 1 mod lcm(p-1,q-1)? */ - - r = BN_sub(i, key->p, BN_value_one()); - if (!r) { + if (!BN_sub(i, key->p, BN_value_one())) { ret = -1; goto err; } - r = BN_sub(j, key->q, BN_value_one()); - if (!r) { + if (!BN_sub(j, key->q, BN_value_one())) { ret = -1; goto err; } /* now compute k = lcm(i,j) */ - r = BN_mul(l, i, j, ctx); - if (!r) { + if (!BN_mul(l, i, j, ctx)) { ret = -1; goto err; } - r = BN_gcd(m, i, j, ctx); - if (!r) { + if (!BN_gcd(m, i, j, ctx)) { ret = -1; goto err; } - r = BN_div(k, NULL, l, m, ctx); /* remainder is 0 */ - if (!r) { + if (!BN_div(k, NULL, l, m, ctx)) { /* remainder is 0 */ ret = -1; goto err; } - - r = BN_mod_mul(i, key->d, key->e, k, ctx); - if (!r) { + if (!BN_mod_mul(i, key->d, key->e, k, ctx)) { ret = -1; goto err; } @@ -150,36 +142,28 @@ int RSA_check_key(const RSA *key) if (key->dmp1 != NULL && key->dmq1 != NULL && key->iqmp != NULL) { /* dmp1 = d mod (p-1)? */ - r = BN_sub(i, key->p, BN_value_one()); - if (!r) { + if (!BN_sub(i, key->p, BN_value_one())) { ret = -1; goto err; } - - r = BN_mod(j, key->d, i, ctx); - if (!r) { + if (!BN_mod(j, key->d, i, ctx)) { ret = -1; goto err; } - if (BN_cmp(j, key->dmp1) != 0) { ret = 0; RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_DMP1_NOT_CONGRUENT_TO_D); } /* dmq1 = d mod (q-1)? */ - r = BN_sub(i, key->q, BN_value_one()); - if (!r) { + if (!BN_sub(i, key->q, BN_value_one())) { ret = -1; goto err; } - - r = BN_mod(j, key->d, i, ctx); - if (!r) { + if (!BN_mod(j, key->d, i, ctx)) { ret = -1; goto err; } - if (BN_cmp(j, key->dmq1) != 0) { ret = 0; RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_DMQ1_NOT_CONGRUENT_TO_D); @@ -190,7 +174,6 @@ int RSA_check_key(const RSA *key) ret = -1; goto err; } - if (BN_cmp(i, key->iqmp) != 0) { ret = 0; RSAerr(RSA_F_RSA_CHECK_KEY, RSA_R_IQMP_NOT_INVERSE_OF_Q); @@ -198,17 +181,11 @@ int RSA_check_key(const RSA *key) } err: - if (i != NULL) - BN_free(i); - if (j != NULL) - BN_free(j); - if (k != NULL) - BN_free(k); - if (l != NULL) - BN_free(l); - if (m != NULL) - BN_free(m); - if (ctx != NULL) - BN_CTX_free(ctx); - return (ret); + BN_free(i); + BN_free(j); + BN_free(k); + BN_free(l); + BN_free(m); + BN_CTX_free(ctx); + return ret; } diff --git a/thirdparty/openssl/crypto/rsa/rsa_gen.c b/thirdparty/openssl/crypto/rsa/rsa_gen.c index 7f7dca39fd..082c8da2ef 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_gen.c +++ b/thirdparty/openssl/crypto/rsa/rsa_gen.c @@ -142,7 +142,8 @@ static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, if (!rsa->iqmp && ((rsa->iqmp = BN_new()) == NULL)) goto err; - BN_copy(rsa->e, e_value); + if (BN_copy(rsa->e, e_value) == NULL) + goto err; /* generate p and q */ for (;;) { diff --git a/thirdparty/openssl/crypto/rsa/rsa_lib.c b/thirdparty/openssl/crypto/rsa/rsa_lib.c index a6805debc8..6ea6b40dc6 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_lib.c +++ b/thirdparty/openssl/crypto/rsa/rsa_lib.c @@ -143,6 +143,7 @@ RSA *RSA_new_method(ENGINE *engine) RSAerr(RSA_F_RSA_NEW_METHOD, ERR_R_MALLOC_FAILURE); return NULL; } + memset(ret,0,sizeof(RSA)); ret->meth = RSA_get_default_method(); #ifndef OPENSSL_NO_ENGINE diff --git a/thirdparty/openssl/crypto/rsa/rsa_oaep.c b/thirdparty/openssl/crypto/rsa/rsa_oaep.c index 9c2a943cf7..19d28c6f0e 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_oaep.c +++ b/thirdparty/openssl/crypto/rsa/rsa_oaep.c @@ -89,17 +89,21 @@ int RSA_padding_add_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, } if (PKCS1_MGF1(dbmask, emlen - mdlen, seed, mdlen, mgf1md) < 0) - return 0; + goto err; for (i = 0; i < emlen - mdlen; i++) db[i] ^= dbmask[i]; if (PKCS1_MGF1(seedmask, mdlen, db, emlen - mdlen, mgf1md) < 0) - return 0; + goto err; for (i = 0; i < mdlen; i++) seed[i] ^= seedmask[i]; OPENSSL_free(dbmask); return 1; + + err: + OPENSSL_free(dbmask); + return 0; } int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, diff --git a/thirdparty/openssl/crypto/rsa/rsa_pmeth.c b/thirdparty/openssl/crypto/rsa/rsa_pmeth.c index 203635595f..8896e2e977 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_pmeth.c +++ b/thirdparty/openssl/crypto/rsa/rsa_pmeth.c @@ -373,6 +373,10 @@ static int pkey_rsa_verify(EVP_PKEY_CTX *ctx, if (rctx->pad_mode == RSA_PKCS1_PADDING) return RSA_verify(EVP_MD_type(rctx->md), tbs, tbslen, sig, siglen, rsa); + if (tbslen != (size_t)EVP_MD_size(rctx->md)) { + RSAerr(RSA_F_PKEY_RSA_VERIFY, RSA_R_INVALID_DIGEST_LENGTH); + return -1; + } if (rctx->pad_mode == RSA_X931_PADDING) { if (pkey_rsa_verifyrecover(ctx, NULL, &rslen, sig, siglen) <= 0) return 0; @@ -442,19 +446,14 @@ static int pkey_rsa_decrypt(EVP_PKEY_CTX *ctx, int ret; RSA_PKEY_CTX *rctx = ctx->data; if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) { - int i; if (!setup_tbuf(rctx, ctx)) return -1; ret = RSA_private_decrypt(inlen, in, rctx->tbuf, ctx->pkey->pkey.rsa, RSA_NO_PADDING); if (ret <= 0) return ret; - for (i = 0; i < ret; i++) { - if (rctx->tbuf[i]) - break; - } - ret = RSA_padding_check_PKCS1_OAEP_mgf1(out, ret, rctx->tbuf + i, - ret - i, ret, + ret = RSA_padding_check_PKCS1_OAEP_mgf1(out, ret, rctx->tbuf, + ret, ret, rctx->oaep_label, rctx->oaep_labellen, rctx->md, rctx->mgf1md); @@ -545,8 +544,10 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) return 1; case EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP: - if (!p2) + if (p2 == NULL || !BN_is_odd((BIGNUM *)p2) || BN_is_one((BIGNUM *)p2)) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_BAD_E_VALUE); return -2; + } BN_free(rctx->pub_exp); rctx->pub_exp = p2; return 1; diff --git a/thirdparty/openssl/crypto/rsa/rsa_pss.c b/thirdparty/openssl/crypto/rsa/rsa_pss.c index 41bc0844e4..2c3fd73b09 100644 --- a/thirdparty/openssl/crypto/rsa/rsa_pss.c +++ b/thirdparty/openssl/crypto/rsa/rsa_pss.c @@ -122,7 +122,11 @@ int RSA_verify_PKCS1_PSS_mgf1(RSA *rsa, const unsigned char *mHash, EM++; emLen--; } - if (emLen < (hLen + sLen + 2)) { /* sLen can be small negative */ + if (emLen < hLen + 2) { + RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_DATA_TOO_LARGE); + goto err; + } + if (sLen > emLen - hLen - 2) { /* sLen can be small negative */ RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_DATA_TOO_LARGE); goto err; } @@ -222,9 +226,14 @@ int RSA_padding_add_PKCS1_PSS_mgf1(RSA *rsa, unsigned char *EM, *EM++ = 0; emLen--; } + if (emLen < hLen + 2) { + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1, + RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); + goto err; + } if (sLen == -2) { sLen = emLen - hLen - 2; - } else if (emLen < (hLen + sLen + 2)) { + } else if (sLen > emLen - hLen - 2) { RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1, RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); goto err; diff --git a/thirdparty/openssl/crypto/s390xcap.c b/thirdparty/openssl/crypto/s390xcap.c index 47d6b6ff51..cf8c372c05 100644 --- a/thirdparty/openssl/crypto/s390xcap.c +++ b/thirdparty/openssl/crypto/s390xcap.c @@ -3,6 +3,7 @@ #include <string.h> #include <setjmp.h> #include <signal.h> +#include "cryptlib.h" extern unsigned long OPENSSL_s390xcap_P[]; diff --git a/thirdparty/openssl/crypto/srp/srp_lib.c b/thirdparty/openssl/crypto/srp/srp_lib.c index e9a2e058f6..6df3b1cee7 100644 --- a/thirdparty/openssl/crypto/srp/srp_lib.c +++ b/thirdparty/openssl/crypto/srp/srp_lib.c @@ -159,8 +159,7 @@ BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, if (u == NULL || A == NULL || v == NULL || b == NULL || N == NULL) return NULL; - if ((bn_ctx = BN_CTX_new()) == NULL || - (tmp = BN_new()) == NULL || (S = BN_new()) == NULL) + if ((bn_ctx = BN_CTX_new()) == NULL || (tmp = BN_new()) == NULL) goto err; /* S = (A*v**u) ** b */ @@ -169,8 +168,12 @@ BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, goto err; if (!BN_mod_mul(tmp, A, tmp, N, bn_ctx)) goto err; - if (!BN_mod_exp(S, tmp, b, N, bn_ctx)) - goto err; + + S = BN_new(); + if (S != NULL && !BN_mod_exp(S, tmp, b, N, bn_ctx)) { + BN_free(S); + S = NULL; + } err: BN_CTX_free(bn_ctx); BN_clear_free(tmp); @@ -267,7 +270,7 @@ BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, if ((tmp = BN_new()) == NULL || (tmp2 = BN_new()) == NULL || - (tmp3 = BN_new()) == NULL || (K = BN_new()) == NULL) + (tmp3 = BN_new()) == NULL) goto err; if (!BN_mod_exp(tmp, g, x, N, bn_ctx)) @@ -279,12 +282,15 @@ BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, if (!BN_mod_sub(tmp, B, tmp2, N, bn_ctx)) goto err; - if (!BN_mod_mul(tmp3, u, x, N, bn_ctx)) + if (!BN_mul(tmp3, u, x, bn_ctx)) goto err; - if (!BN_mod_add(tmp2, a, tmp3, N, bn_ctx)) - goto err; - if (!BN_mod_exp(K, tmp, tmp2, N, bn_ctx)) + if (!BN_add(tmp2, a, tmp3)) goto err; + K = BN_new(); + if (K != NULL && !BN_mod_exp(K, tmp, tmp2, N, bn_ctx)) { + BN_free(K); + K = NULL; + } err: BN_CTX_free(bn_ctx); diff --git a/thirdparty/openssl/crypto/srp/srp_vfy.c b/thirdparty/openssl/crypto/srp/srp_vfy.c index 26ad3e07b4..c8bc7a94b2 100644 --- a/thirdparty/openssl/crypto/srp/srp_vfy.c +++ b/thirdparty/openssl/crypto/srp/srp_vfy.c @@ -80,15 +80,21 @@ static char b64table[] = /* * Convert a base64 string into raw byte array representation. */ -static int t_fromb64(unsigned char *a, const char *src) +static int t_fromb64(unsigned char *a, size_t alen, const char *src) { char *loc; int i, j; int size; + if (alen == 0 || alen > INT_MAX) + return -1; + while (*src && (*src == ' ' || *src == '\t' || *src == '\n')) ++src; size = strlen(src); + if (size < 0 || size >= (int)alen) + return -1; + i = 0; while (i < size) { loc = strchr(b64table, src[i]); @@ -124,7 +130,7 @@ static int t_fromb64(unsigned char *a, const char *src) if (--i < 0) break; } - while (a[j] == 0 && j <= size) + while (j <= size && a[j] == 0) ++j; i = 0; while (j <= size) @@ -231,13 +237,25 @@ static int SRP_user_pwd_set_sv(SRP_user_pwd *vinfo, const char *s, unsigned char tmp[MAX_LEN]; int len; - if (strlen(s) > MAX_LEN || strlen(v) > MAX_LEN) + vinfo->v = NULL; + vinfo->s = NULL; + + len = t_fromb64(tmp, sizeof(tmp), v); + if (len < 0) return 0; - len = t_fromb64(tmp, v); if (NULL == (vinfo->v = BN_bin2bn(tmp, len, NULL))) return 0; - len = t_fromb64(tmp, s); - return ((vinfo->s = BN_bin2bn(tmp, len, NULL)) != NULL); + len = t_fromb64(tmp, sizeof(tmp), s); + if (len < 0) + goto err; + vinfo->s = BN_bin2bn(tmp, len, NULL); + if (vinfo->s == NULL) + goto err; + return 1; + err: + BN_free(vinfo->v); + vinfo->v = NULL; + return 0; } static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v) @@ -307,10 +325,13 @@ static SRP_gN_cache *SRP_gN_new_init(const char *ch) if (newgN == NULL) return NULL; + len = t_fromb64(tmp, sizeof(tmp), ch); + if (len < 0) + goto err; + if ((newgN->b64_bn = BUF_strdup(ch)) == NULL) goto err; - len = t_fromb64(tmp, ch); if ((newgN->bn = BN_bin2bn(tmp, len, NULL))) return newgN; @@ -544,7 +565,7 @@ SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username) if (!SRP_user_pwd_set_ids(user, username, NULL)) goto err; - if (RAND_pseudo_bytes(digv, SHA_DIGEST_LENGTH) < 0) + if (RAND_bytes(digv, SHA_DIGEST_LENGTH) <= 0) goto err; EVP_MD_CTX_init(&ctxt); EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); @@ -580,10 +601,10 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, goto err; if (N) { - if (!(len = t_fromb64(tmp, N))) + if (!(len = t_fromb64(tmp, sizeof(tmp), N))) goto err; N_bn = BN_bin2bn(tmp, len, NULL); - if (!(len = t_fromb64(tmp, g))) + if (!(len = t_fromb64(tmp, sizeof(tmp), g))) goto err; g_bn = BN_bin2bn(tmp, len, NULL); defgNid = "*"; @@ -597,12 +618,12 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, } if (*salt == NULL) { - if (RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN) < 0) + if (RAND_bytes(tmp2, SRP_RANDOM_SALT_LEN) <= 0) goto err; s = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL); } else { - if (!(len = t_fromb64(tmp2, *salt))) + if (!(len = t_fromb64(tmp2, sizeof(tmp2), *salt))) goto err; s = BN_bin2bn(tmp2, len, NULL); } @@ -635,7 +656,8 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, BN_free(N_bn); BN_free(g_bn); } - OPENSSL_cleanse(vf, vfsize); + if (vf != NULL) + OPENSSL_cleanse(vf, vfsize); OPENSSL_free(vf); BN_clear_free(s); BN_clear_free(v); @@ -670,7 +692,7 @@ int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, srp_bn_print(g); if (*salt == NULL) { - if (RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN) < 0) + if (RAND_bytes(tmp2, SRP_RANDOM_SALT_LEN) <= 0) goto err; salttmp = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL); diff --git a/thirdparty/openssl/crypto/threads/netware.bat b/thirdparty/openssl/crypto/threads/netware.bat deleted file mode 100644 index 0b3eca3caf..0000000000 --- a/thirdparty/openssl/crypto/threads/netware.bat +++ /dev/null @@ -1,79 +0,0 @@ -@echo off -rem batch file to build multi-thread test ( mttest.nlm ) - -rem command line arguments: -rem debug => build using debug settings - -rem -rem After building, copy mttest.nlm to the server and run it, you'll probably -rem want to redirect stdout and stderr. An example command line would be -rem "mttest.nlm -thread 20 -loops 10 -CAfile \openssl\apps\server.pem >mttest.out 2>mttest.err" -rem - -del mttest.nlm - -set BLD_DEBUG= -set CFLAGS= -set LFLAGS= -set LIBS= - -if "%1" == "DEBUG" set BLD_DEBUG=YES -if "%1" == "debug" set BLD_DEBUG=YES - -if "%MWCIncludes%" == "" goto inc_error -if "%PRELUDE%" == "" goto prelude_error -if "%IMPORTS%" == "" goto imports_error - -set CFLAGS=-c -I..\..\outinc_nw -nosyspath -DOPENSSL_SYS_NETWARE -opt off -g -sym internal -maxerrors 20 - -if "%BLD_DEBUG%" == "YES" set LIBS=..\..\out_nw.dbg\ssl.lib ..\..\out_nw.dbg\crypto.lib -if "%BLD_DEBUG%" == "" set LIBS=..\..\out_nw\ssl.lib ..\..\out_nw\crypto.lib - -set LFLAGS=-msgstyle gcc -zerobss -stacksize 32768 -nostdlib -sym internal - -rem generate command file for metrowerks -echo. -echo Generating Metrowerks command file: mttest.def -echo # dynamically generated command file for metrowerks build > mttest.def -echo IMPORT @%IMPORTS%\clib.imp >> mttest.def -echo IMPORT @%IMPORTS%\threads.imp >> mttest.def -echo IMPORT @%IMPORTS%\ws2nlm.imp >> mttest.def -echo IMPORT GetProcessSwitchCount >> mttest.def -echo MODULE clib >> mttest.def - -rem compile -echo. -echo Compiling mttest.c -mwccnlm.exe mttest.c %CFLAGS% -if errorlevel 1 goto end - -rem link -echo. -echo Linking mttest.nlm -mwldnlm.exe %LFLAGS% -screenname mttest -commandfile mttest.def mttest.o "%PRELUDE%" %LIBS% -o mttest.nlm -if errorlevel 1 goto end - -goto end - -:inc_error -echo. -echo Environment variable MWCIncludes is not set - see install.nw -goto end - -:prelude_error -echo. -echo Environment variable PRELUDE is not set - see install.nw -goto end - -:imports_error -echo. -echo Environment variable IMPORTS is not set - see install.nw -goto end - - -:end -set BLD_DEBUG= -set CFLAGS= -set LFLAGS= -set LIBS= - diff --git a/thirdparty/openssl/crypto/threads/profile.sh b/thirdparty/openssl/crypto/threads/profile.sh deleted file mode 100644 index 6e3e342fc0..0000000000 --- a/thirdparty/openssl/crypto/threads/profile.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -/bin/rm -f mttest -cc -p -DSOLARIS -I../../include -g mttest.c -o mttest -L/usr/lib/libc -ldl -L../.. -lthread -lssl -lcrypto -lnsl -lsocket - diff --git a/thirdparty/openssl/crypto/threads/ptest.bat b/thirdparty/openssl/crypto/threads/ptest.bat deleted file mode 100755 index 4071b5ffea..0000000000 --- a/thirdparty/openssl/crypto/threads/ptest.bat +++ /dev/null @@ -1,4 +0,0 @@ -del mttest.exe - -purify cl /O2 -DWIN32 /MD -I..\..\out mttest.c /Femttest ..\..\out\ssl32.lib ..\..\out\crypt32.lib - diff --git a/thirdparty/openssl/crypto/threads/pthread.sh b/thirdparty/openssl/crypto/threads/pthread.sh deleted file mode 100644 index f1c49821d2..0000000000 --- a/thirdparty/openssl/crypto/threads/pthread.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -# -# build using pthreads -# -# http://www.mit.edu:8001/people/proven/pthreads.html -# -/bin/rm -f mttest -pgcc -DPTHREADS -I../../include -g mttest.c -o mttest -L../.. -lssl -lcrypto - diff --git a/thirdparty/openssl/crypto/threads/pthread2.sh b/thirdparty/openssl/crypto/threads/pthread2.sh deleted file mode 100755 index ec945c451b..0000000000 --- a/thirdparty/openssl/crypto/threads/pthread2.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# -# build using pthreads where it's already built into the system -# -/bin/rm -f mttest -gcc -DPTHREADS -I../../include -g mttest.c -o mttest -L../.. -lssl -lcrypto -lpthread -ldl diff --git a/thirdparty/openssl/crypto/threads/pthreads-vms.com b/thirdparty/openssl/crypto/threads/pthreads-vms.com deleted file mode 100644 index 1cf92bdf57..0000000000 --- a/thirdparty/openssl/crypto/threads/pthreads-vms.com +++ /dev/null @@ -1,14 +0,0 @@ -$! To compile mttest on VMS. -$! -$! WARNING: only tested with DEC C so far. -$ -$ if (f$getsyi("cpu").lt.128) -$ then -$ arch := VAX -$ else -$ arch = f$edit( f$getsyi( "ARCH_NAME"), "UPCASE") -$ if (arch .eqs. "") then arch = "UNK" -$ endif -$ define/user openssl [--.include.openssl] -$ cc/def=PTHREADS mttest.c -$ link mttest,[--.'arch'.exe.ssl]libssl/lib,[--.'arch'.exe.crypto]libcrypto/lib diff --git a/thirdparty/openssl/crypto/threads/purify.sh b/thirdparty/openssl/crypto/threads/purify.sh deleted file mode 100644 index 6d44fe26b7..0000000000 --- a/thirdparty/openssl/crypto/threads/purify.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -/bin/rm -f mttest -purify cc -DSOLARIS -I../../include -g mttest.c -o mttest -L../.. -lthread -lssl -lcrypto -lnsl -lsocket - diff --git a/thirdparty/openssl/crypto/threads/solaris.sh b/thirdparty/openssl/crypto/threads/solaris.sh deleted file mode 100644 index bc93094a27..0000000000 --- a/thirdparty/openssl/crypto/threads/solaris.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -/bin/rm -f mttest -cc -DSOLARIS -I../../include -g mttest.c -o mttest -L../.. -lthread -lssl -lcrypto -lnsl -lsocket - diff --git a/thirdparty/openssl/crypto/threads/win32.bat b/thirdparty/openssl/crypto/threads/win32.bat deleted file mode 100755 index ee6da80a07..0000000000 --- a/thirdparty/openssl/crypto/threads/win32.bat +++ /dev/null @@ -1,4 +0,0 @@ -del mttest.exe - -cl /O2 -DWIN32 /MD -I..\..\out mttest.c /Femttest ..\..\out\ssleay32.lib ..\..\out\libeay32.lib - diff --git a/thirdparty/openssl/crypto/ts/ts_lib.c b/thirdparty/openssl/crypto/ts/ts_lib.c index c51538a17f..e0f1063537 100644 --- a/thirdparty/openssl/crypto/ts/ts_lib.c +++ b/thirdparty/openssl/crypto/ts/ts_lib.c @@ -90,9 +90,8 @@ int TS_OBJ_print_bio(BIO *bio, const ASN1_OBJECT *obj) { char obj_txt[128]; - int len = OBJ_obj2txt(obj_txt, sizeof(obj_txt), obj, 0); - BIO_write(bio, obj_txt, len); - BIO_write(bio, "\n", 1); + OBJ_obj2txt(obj_txt, sizeof(obj_txt), obj, 0); + BIO_printf(bio, "%s\n", obj_txt); return 1; } diff --git a/thirdparty/openssl/crypto/ts/ts_rsp_verify.c b/thirdparty/openssl/crypto/ts/ts_rsp_verify.c index 29aa5a497e..7918236287 100644 --- a/thirdparty/openssl/crypto/ts/ts_rsp_verify.c +++ b/thirdparty/openssl/crypto/ts/ts_rsp_verify.c @@ -434,51 +434,58 @@ static int int_TS_RESP_verify_token(TS_VERIFY_CTX *ctx, unsigned char *imprint = NULL; unsigned imprint_len = 0; int ret = 0; + int flags = ctx->flags; + + /* Some options require us to also check the signature */ + if (((flags & TS_VFY_SIGNER) && tsa_name != NULL) + || (flags & TS_VFY_TSA_NAME)) { + flags |= TS_VFY_SIGNATURE; + } /* Verify the signature. */ - if ((ctx->flags & TS_VFY_SIGNATURE) + if ((flags & TS_VFY_SIGNATURE) && !TS_RESP_verify_signature(token, ctx->certs, ctx->store, &signer)) goto err; /* Check version number of response. */ - if ((ctx->flags & TS_VFY_VERSION) + if ((flags & TS_VFY_VERSION) && TS_TST_INFO_get_version(tst_info) != 1) { TSerr(TS_F_INT_TS_RESP_VERIFY_TOKEN, TS_R_UNSUPPORTED_VERSION); goto err; } /* Check policies. */ - if ((ctx->flags & TS_VFY_POLICY) + if ((flags & TS_VFY_POLICY) && !TS_check_policy(ctx->policy, tst_info)) goto err; /* Check message imprints. */ - if ((ctx->flags & TS_VFY_IMPRINT) + if ((flags & TS_VFY_IMPRINT) && !TS_check_imprints(ctx->md_alg, ctx->imprint, ctx->imprint_len, tst_info)) goto err; /* Compute and check message imprints. */ - if ((ctx->flags & TS_VFY_DATA) + if ((flags & TS_VFY_DATA) && (!TS_compute_imprint(ctx->data, tst_info, &md_alg, &imprint, &imprint_len) || !TS_check_imprints(md_alg, imprint, imprint_len, tst_info))) goto err; /* Check nonces. */ - if ((ctx->flags & TS_VFY_NONCE) + if ((flags & TS_VFY_NONCE) && !TS_check_nonces(ctx->nonce, tst_info)) goto err; /* Check whether TSA name and signer certificate match. */ - if ((ctx->flags & TS_VFY_SIGNER) + if ((flags & TS_VFY_SIGNER) && tsa_name && !TS_check_signer_name(tsa_name, signer)) { TSerr(TS_F_INT_TS_RESP_VERIFY_TOKEN, TS_R_TSA_NAME_MISMATCH); goto err; } /* Check whether the TSA is the expected one. */ - if ((ctx->flags & TS_VFY_TSA_NAME) + if ((flags & TS_VFY_TSA_NAME) && !TS_check_signer_name(ctx->tsa_name, signer)) { TSerr(TS_F_INT_TS_RESP_VERIFY_TOKEN, TS_R_TSA_UNTRUSTED); goto err; @@ -548,13 +555,15 @@ static int TS_check_status_info(TS_RESP *response) static char *TS_get_status_text(STACK_OF(ASN1_UTF8STRING) *text) { int i; - unsigned int length = 0; + int length = 0; char *result = NULL; char *p; /* Determine length first. */ for (i = 0; i < sk_ASN1_UTF8STRING_num(text); ++i) { ASN1_UTF8STRING *current = sk_ASN1_UTF8STRING_value(text, i); + if (ASN1_STRING_length(current) > TS_MAX_STATUS_LENGTH - length - 1) + return NULL; length += ASN1_STRING_length(current); length += 1; /* separator character */ } diff --git a/thirdparty/openssl/crypto/txt_db/txt_db.c b/thirdparty/openssl/crypto/txt_db/txt_db.c index f9b42ac6e5..ed02efc261 100644 --- a/thirdparty/openssl/crypto/txt_db/txt_db.c +++ b/thirdparty/openssl/crypto/txt_db/txt_db.c @@ -162,6 +162,7 @@ TXT_DB *TXT_DB_read(BIO *in, int num) "wrong number of fields on line %ld (looking for field %d, got %d, '%s' left)\n", ln, num, n, f); #endif + OPENSSL_free(pp); er = 2; goto err; } @@ -171,6 +172,7 @@ TXT_DB *TXT_DB_read(BIO *in, int num) * fix :-( */ fprintf(stderr, "failure in sk_push\n"); #endif + OPENSSL_free(pp); er = 2; goto err; } @@ -222,7 +224,7 @@ int TXT_DB_create_index(TXT_DB *db, int field, int (*qual) (OPENSSL_STRING *), LHASH_HASH_FN_TYPE hash, LHASH_COMP_FN_TYPE cmp) { LHASH_OF(OPENSSL_STRING) *idx; - OPENSSL_STRING *r; + OPENSSL_STRING *r, *k; int i, n; if (field >= db->num_fields) { @@ -239,13 +241,18 @@ int TXT_DB_create_index(TXT_DB *db, int field, int (*qual) (OPENSSL_STRING *), r = sk_OPENSSL_PSTRING_value(db->data, i); if ((qual != NULL) && (qual(r) == 0)) continue; - if ((r = lh_OPENSSL_STRING_insert(idx, r)) != NULL) { + if ((k = lh_OPENSSL_STRING_insert(idx, r)) != NULL) { db->error = DB_ERROR_INDEX_CLASH; - db->arg1 = sk_OPENSSL_PSTRING_find(db->data, r); + db->arg1 = sk_OPENSSL_PSTRING_find(db->data, k); db->arg2 = i; lh_OPENSSL_STRING_free(idx); return (0); } + if (lh_OPENSSL_STRING_retrieve(idx, r) == NULL) { + db->error = DB_ERROR_MALLOC; + lh_OPENSSL_STRING_free(idx); + return (0); + } } if (db->index[field] != NULL) lh_OPENSSL_STRING_free(db->index[field]); @@ -320,20 +327,29 @@ int TXT_DB_insert(TXT_DB *db, OPENSSL_STRING *row) } } } - /* We have passed the index checks, now just append and insert */ - if (!sk_OPENSSL_PSTRING_push(db->data, row)) { - db->error = DB_ERROR_MALLOC; - goto err; - } for (i = 0; i < db->num_fields; i++) { if (db->index[i] != NULL) { if ((db->qual[i] != NULL) && (db->qual[i] (row) == 0)) continue; (void)lh_OPENSSL_STRING_insert(db->index[i], row); + if (lh_OPENSSL_STRING_retrieve(db->index[i], row) == NULL) + goto err1; } } + if (!sk_OPENSSL_PSTRING_push(db->data, row)) + goto err1; return (1); + + err1: + db->error = DB_ERROR_MALLOC; + while (i-- > 0) { + if (db->index[i] != NULL) { + if ((db->qual[i] != NULL) && (db->qual[i] (row) == 0)) + continue; + (void)lh_OPENSSL_STRING_delete(db->index[i], row); + } + } err: return (0); } diff --git a/thirdparty/openssl/crypto/ui/ui_lib.c b/thirdparty/openssl/crypto/ui/ui_lib.c index 2f580352ce..643ae59343 100644 --- a/thirdparty/openssl/crypto/ui/ui_lib.c +++ b/thirdparty/openssl/crypto/ui/ui_lib.c @@ -164,7 +164,7 @@ static int general_allocate_string(UI *ui, const char *prompt, UI_STRING *s = general_allocate_prompt(ui, prompt, prompt_freeable, type, input_flags, result_buf); - if (s) { + if (s != NULL) { if (allocate_string_stack(ui) >= 0) { s->_.string_data.result_minsize = minsize; s->_.string_data.result_maxsize = maxsize; @@ -197,8 +197,8 @@ static int general_allocate_boolean(UI *ui, } else if (cancel_chars == NULL) { UIerr(UI_F_GENERAL_ALLOCATE_BOOLEAN, ERR_R_PASSED_NULL_PARAMETER); } else { - for (p = ok_chars; *p; p++) { - if (strchr(cancel_chars, *p)) { + for (p = ok_chars; *p != '\0'; p++) { + if (strchr(cancel_chars, *p) != NULL) { UIerr(UI_F_GENERAL_ALLOCATE_BOOLEAN, UI_R_COMMON_OK_AND_CANCEL_CHARACTERS); } @@ -207,7 +207,7 @@ static int general_allocate_boolean(UI *ui, s = general_allocate_prompt(ui, prompt, prompt_freeable, type, input_flags, result_buf); - if (s) { + if (s != NULL) { if (allocate_string_stack(ui) >= 0) { s->_.boolean_data.action_desc = action_desc; s->_.boolean_data.ok_chars = ok_chars; @@ -243,7 +243,7 @@ int UI_dup_input_string(UI *ui, const char *prompt, int flags, { char *prompt_copy = NULL; - if (prompt) { + if (prompt != NULL) { prompt_copy = BUF_strdup(prompt); if (prompt_copy == NULL) { UIerr(UI_F_UI_DUP_INPUT_STRING, ERR_R_MALLOC_FAILURE); @@ -271,7 +271,7 @@ int UI_dup_verify_string(UI *ui, const char *prompt, int flags, { char *prompt_copy = NULL; - if (prompt) { + if (prompt != NULL) { prompt_copy = BUF_strdup(prompt); if (prompt_copy == NULL) { UIerr(UI_F_UI_DUP_VERIFY_STRING, ERR_R_MALLOC_FAILURE); @@ -302,7 +302,7 @@ int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, char *ok_chars_copy = NULL; char *cancel_chars_copy = NULL; - if (prompt) { + if (prompt != NULL) { prompt_copy = BUF_strdup(prompt); if (prompt_copy == NULL) { UIerr(UI_F_UI_DUP_INPUT_BOOLEAN, ERR_R_MALLOC_FAILURE); @@ -310,7 +310,7 @@ int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, } } - if (action_desc) { + if (action_desc != NULL) { action_desc_copy = BUF_strdup(action_desc); if (action_desc_copy == NULL) { UIerr(UI_F_UI_DUP_INPUT_BOOLEAN, ERR_R_MALLOC_FAILURE); @@ -318,7 +318,7 @@ int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, } } - if (ok_chars) { + if (ok_chars != NULL) { ok_chars_copy = BUF_strdup(ok_chars); if (ok_chars_copy == NULL) { UIerr(UI_F_UI_DUP_INPUT_BOOLEAN, ERR_R_MALLOC_FAILURE); @@ -326,7 +326,7 @@ int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, } } - if (cancel_chars) { + if (cancel_chars != NULL) { cancel_chars_copy = BUF_strdup(cancel_chars); if (cancel_chars_copy == NULL) { UIerr(UI_F_UI_DUP_INPUT_BOOLEAN, ERR_R_MALLOC_FAILURE); @@ -359,7 +359,7 @@ int UI_dup_info_string(UI *ui, const char *text) { char *text_copy = NULL; - if (text) { + if (text != NULL) { text_copy = BUF_strdup(text); if (text_copy == NULL) { UIerr(UI_F_UI_DUP_INFO_STRING, ERR_R_MALLOC_FAILURE); @@ -381,7 +381,7 @@ int UI_dup_error_string(UI *ui, const char *text) { char *text_copy = NULL; - if (text) { + if (text != NULL) { text_copy = BUF_strdup(text); if (text_copy == NULL) { UIerr(UI_F_UI_DUP_ERROR_STRING, ERR_R_MALLOC_FAILURE); @@ -397,7 +397,7 @@ char *UI_construct_prompt(UI *ui, const char *object_desc, { char *prompt = NULL; - if (ui->meth->ui_construct_prompt) + if (ui->meth->ui_construct_prompt != NULL) prompt = ui->meth->ui_construct_prompt(ui, object_desc, object_name); else { char prompt1[] = "Enter "; @@ -408,14 +408,16 @@ char *UI_construct_prompt(UI *ui, const char *object_desc, if (object_desc == NULL) return NULL; len = sizeof(prompt1) - 1 + strlen(object_desc); - if (object_name) + if (object_name != NULL) len += sizeof(prompt2) - 1 + strlen(object_name); len += sizeof(prompt3) - 1; prompt = (char *)OPENSSL_malloc(len + 1); + if (prompt == NULL) + return NULL; BUF_strlcpy(prompt, prompt1, len + 1); BUF_strlcat(prompt, object_desc, len + 1); - if (object_name) { + if (object_name != NULL) { BUF_strlcat(prompt, prompt2, len + 1); BUF_strlcat(prompt, object_name, len + 1); } @@ -457,7 +459,8 @@ static int print_error(const char *str, size_t len, UI *ui) uis.type = UIT_ERROR; uis.out_string = str; - if (ui->meth->ui_write_string && !ui->meth->ui_write_string(ui, &uis)) + if (ui->meth->ui_write_string != NULL + && ui->meth->ui_write_string(ui, &uis) <= 0) return -1; return 0; } @@ -466,24 +469,28 @@ int UI_process(UI *ui) { int i, ok = 0; - if (ui->meth->ui_open_session && !ui->meth->ui_open_session(ui)) - return -1; + if (ui->meth->ui_open_session != NULL + && ui->meth->ui_open_session(ui) <= 0) { + ok = -1; + goto err; + } if (ui->flags & UI_FLAG_PRINT_ERRORS) ERR_print_errors_cb((int (*)(const char *, size_t, void *)) print_error, (void *)ui); for (i = 0; i < sk_UI_STRING_num(ui->strings); i++) { - if (ui->meth->ui_write_string - && !ui->meth->ui_write_string(ui, - sk_UI_STRING_value(ui->strings, i))) + if (ui->meth->ui_write_string != NULL + && (ui->meth->ui_write_string(ui, + sk_UI_STRING_value(ui->strings, i)) + <= 0)) { ok = -1; goto err; } } - if (ui->meth->ui_flush) + if (ui->meth->ui_flush != NULL) switch (ui->meth->ui_flush(ui)) { case -1: /* Interrupt/Cancel/something... */ ok = -2; @@ -497,7 +504,7 @@ int UI_process(UI *ui) } for (i = 0; i < sk_UI_STRING_num(ui->strings); i++) { - if (ui->meth->ui_read_string) { + if (ui->meth->ui_read_string != NULL) { switch (ui->meth->ui_read_string(ui, sk_UI_STRING_value(ui->strings, i))) { @@ -514,7 +521,8 @@ int UI_process(UI *ui) } } err: - if (ui->meth->ui_close_session && !ui->meth->ui_close_session(ui)) + if (ui->meth->ui_close_session != NULL + && ui->meth->ui_close_session(ui) <= 0) return -1; return ok; } @@ -610,49 +618,49 @@ void UI_destroy_method(UI_METHOD *ui_method) int UI_method_set_opener(UI_METHOD *method, int (*opener) (UI *ui)) { - if (method) { + if (method != NULL) { method->ui_open_session = opener; return 0; - } else - return -1; + } + return -1; } int UI_method_set_writer(UI_METHOD *method, int (*writer) (UI *ui, UI_STRING *uis)) { - if (method) { + if (method != NULL) { method->ui_write_string = writer; return 0; - } else - return -1; + } + return -1; } int UI_method_set_flusher(UI_METHOD *method, int (*flusher) (UI *ui)) { - if (method) { + if (method != NULL) { method->ui_flush = flusher; return 0; - } else - return -1; + } + return -1; } int UI_method_set_reader(UI_METHOD *method, int (*reader) (UI *ui, UI_STRING *uis)) { - if (method) { + if (method != NULL) { method->ui_read_string = reader; return 0; - } else - return -1; + } + return -1; } int UI_method_set_closer(UI_METHOD *method, int (*closer) (UI *ui)) { - if (method) { + if (method != NULL) { method->ui_close_session = closer; return 0; - } else - return -1; + } + return -1; } int UI_method_set_prompt_constructor(UI_METHOD *method, @@ -662,55 +670,55 @@ int UI_method_set_prompt_constructor(UI_METHOD *method, const char *object_name)) { - if (method) { + if (method != NULL) { method->ui_construct_prompt = prompt_constructor; return 0; - } else - return -1; + } + return -1; } -int (*UI_method_get_opener(UI_METHOD *method)) (UI *) { - if (method) +int (*UI_method_get_opener(UI_METHOD *method)) (UI *) +{ + if (method != NULL) return method->ui_open_session; - else - return NULL; + return NULL; } -int (*UI_method_get_writer(UI_METHOD *method)) (UI *, UI_STRING *) { - if (method) +int (*UI_method_get_writer(UI_METHOD *method)) (UI *, UI_STRING *) +{ + if (method != NULL) return method->ui_write_string; - else - return NULL; + return NULL; } -int (*UI_method_get_flusher(UI_METHOD *method)) (UI *) { - if (method) +int (*UI_method_get_flusher(UI_METHOD *method)) (UI *) +{ + if (method != NULL) return method->ui_flush; - else - return NULL; + return NULL; } -int (*UI_method_get_reader(UI_METHOD *method)) (UI *, UI_STRING *) { - if (method) +int (*UI_method_get_reader(UI_METHOD *method)) (UI *, UI_STRING *) +{ + if (method != NULL) return method->ui_read_string; - else - return NULL; + return NULL; } -int (*UI_method_get_closer(UI_METHOD *method)) (UI *) { - if (method) +int (*UI_method_get_closer(UI_METHOD *method)) (UI *) +{ + if (method != NULL) return method->ui_close_session; - else - return NULL; + return NULL; } char *(*UI_method_get_prompt_constructor(UI_METHOD *method)) (UI *, const char *, - const char *) { - if (method) + const char *) +{ + if (method != NULL) return method->ui_construct_prompt; - else - return NULL; + return NULL; } enum UI_string_types UI_get_string_type(UI_STRING *uis) @@ -739,7 +747,6 @@ const char *UI_get0_action_string(UI_STRING *uis) if (!uis) return NULL; switch (uis->type) { - case UIT_PROMPT: case UIT_BOOLEAN: return uis->_.boolean_data.action_desc; default: diff --git a/thirdparty/openssl/crypto/ui/ui_openssl.c b/thirdparty/openssl/crypto/ui/ui_openssl.c index 9ab259b8f6..17d14f5842 100644 --- a/thirdparty/openssl/crypto/ui/ui_openssl.c +++ b/thirdparty/openssl/crypto/ui/ui_openssl.c @@ -440,7 +440,7 @@ static int read_string_inner(UI *ui, UI_STRING *uis, int echo, int strip_nl) # else p = fgets(result, maxsize, tty_in); # endif - if (!p) + if (p == NULL) goto error; if (feof(tty_in)) goto error; @@ -509,18 +509,31 @@ static int open_console(UI *ui) is_a_tty = 0; else # endif +# ifdef ENODEV + /* + * MacOS X returns ENODEV (Operation not supported by device), + * which seems appropriate. + */ + if (errno == ENODEV) + is_a_tty = 0; + else +# endif return 0; } #endif #ifdef OPENSSL_SYS_VMS status = sys$assign(&terminal, &channel, 0, 0); + + /* if there isn't a TT device, something is very wrong */ if (status != SS$_NORMAL) return 0; - status = - sys$qiow(0, channel, IO$_SENSEMODE, &iosb, 0, 0, tty_orig, 12, 0, 0, - 0, 0); + + status = sys$qiow(0, channel, IO$_SENSEMODE, &iosb, 0, 0, tty_orig, 12, + 0, 0, 0, 0); + + /* If IO$_SENSEMODE doesn't work, this is not a terminal device */ if ((status != SS$_NORMAL) || (iosb.iosb$w_value != SS$_NORMAL)) - return 0; + is_a_tty = 0; #endif return 1; } @@ -537,14 +550,15 @@ static int noecho_console(UI *ui) return 0; #endif #ifdef OPENSSL_SYS_VMS - tty_new[0] = tty_orig[0]; - tty_new[1] = tty_orig[1] | TT$M_NOECHO; - tty_new[2] = tty_orig[2]; - status = - sys$qiow(0, channel, IO$_SETMODE, &iosb, 0, 0, tty_new, 12, 0, 0, 0, - 0); - if ((status != SS$_NORMAL) || (iosb.iosb$w_value != SS$_NORMAL)) - return 0; + if (is_a_tty) { + tty_new[0] = tty_orig[0]; + tty_new[1] = tty_orig[1] | TT$M_NOECHO; + tty_new[2] = tty_orig[2]; + status = sys$qiow(0, channel, IO$_SETMODE, &iosb, 0, 0, tty_new, 12, + 0, 0, 0, 0); + if ((status != SS$_NORMAL) || (iosb.iosb$w_value != SS$_NORMAL)) + return 0; + } #endif return 1; } @@ -561,14 +575,15 @@ static int echo_console(UI *ui) return 0; #endif #ifdef OPENSSL_SYS_VMS - tty_new[0] = tty_orig[0]; - tty_new[1] = tty_orig[1] & ~TT$M_NOECHO; - tty_new[2] = tty_orig[2]; - status = - sys$qiow(0, channel, IO$_SETMODE, &iosb, 0, 0, tty_new, 12, 0, 0, 0, - 0); - if ((status != SS$_NORMAL) || (iosb.iosb$w_value != SS$_NORMAL)) - return 0; + if (is_a_tty) { + tty_new[0] = tty_orig[0]; + tty_new[1] = tty_orig[1] & ~TT$M_NOECHO; + tty_new[2] = tty_orig[2]; + status = sys$qiow(0, channel, IO$_SETMODE, &iosb, 0, 0, tty_new, 12, + 0, 0, 0, 0); + if ((status != SS$_NORMAL) || (iosb.iosb$w_value != SS$_NORMAL)) + return 0; + } #endif return 1; } @@ -581,6 +596,8 @@ static int close_console(UI *ui) fclose(tty_out); #ifdef OPENSSL_SYS_VMS status = sys$dassgn(channel); + if (status != SS$_NORMAL) + return 0; #endif CRYPTO_w_unlock(CRYPTO_LOCK_UI); diff --git a/thirdparty/openssl/crypto/whrlpool/wp_dgst.c b/thirdparty/openssl/crypto/whrlpool/wp_dgst.c index e33bb4f833..807d1c49b2 100644 --- a/thirdparty/openssl/crypto/whrlpool/wp_dgst.c +++ b/thirdparty/openssl/crypto/whrlpool/wp_dgst.c @@ -51,6 +51,7 @@ * input. This is done for perfomance. */ +#include <openssl/crypto.h> #include "wp_locl.h" #include <openssl/crypto.h> #include <string.h> @@ -237,7 +238,7 @@ int WHIRLPOOL_Final(unsigned char *md, WHIRLPOOL_CTX *c) if (md) { memcpy(md, c->H.c, WHIRLPOOL_DIGEST_LENGTH); - memset(c, 0, sizeof(*c)); + OPENSSL_cleanse(c, sizeof(*c)); return (1); } return (0); diff --git a/thirdparty/openssl/crypto/x509/by_dir.c b/thirdparty/openssl/crypto/x509/by_dir.c index 9ee8f8d859..bbc3189381 100644 --- a/thirdparty/openssl/crypto/x509/by_dir.c +++ b/thirdparty/openssl/crypto/x509/by_dir.c @@ -401,6 +401,10 @@ static int get_cert_by_subject(X509_LOOKUP *xl, int type, X509_NAME *name, } if (!hent) { hent = OPENSSL_malloc(sizeof(BY_DIR_HASH)); + if (hent == NULL) { + X509err(X509_F_GET_CERT_BY_SUBJECT, ERR_R_MALLOC_FAILURE); + goto finish; + } hent->hash = h; hent->suffix = k; if (!sk_BY_DIR_HASH_push(ent->hashes, hent)) { diff --git a/thirdparty/openssl/crypto/x509/x509_att.c b/thirdparty/openssl/crypto/x509/x509_att.c index bd59281f9d..2501075307 100644 --- a/thirdparty/openssl/crypto/x509/x509_att.c +++ b/thirdparty/openssl/crypto/x509/x509_att.c @@ -296,7 +296,7 @@ int X509_ATTRIBUTE_set1_object(X509_ATTRIBUTE *attr, const ASN1_OBJECT *obj) int X509_ATTRIBUTE_set1_data(X509_ATTRIBUTE *attr, int attrtype, const void *data, int len) { - ASN1_TYPE *ttmp; + ASN1_TYPE *ttmp = NULL; ASN1_STRING *stmp = NULL; int atype = 0; if (!attr) @@ -324,20 +324,26 @@ int X509_ATTRIBUTE_set1_data(X509_ATTRIBUTE *attr, int attrtype, * least one value but some types use and zero length SET and require * this. */ - if (attrtype == 0) + if (attrtype == 0) { + ASN1_STRING_free(stmp); return 1; + } if (!(ttmp = ASN1_TYPE_new())) goto err; if ((len == -1) && !(attrtype & MBSTRING_FLAG)) { if (!ASN1_TYPE_set1(ttmp, attrtype, data)) goto err; - } else + } else { ASN1_TYPE_set(ttmp, atype, stmp); + stmp = NULL; + } if (!sk_ASN1_TYPE_push(attr->value.set, ttmp)) goto err; return 1; err: X509err(X509_F_X509_ATTRIBUTE_SET1_DATA, ERR_R_MALLOC_FAILURE); + ASN1_TYPE_free(ttmp); + ASN1_STRING_free(stmp); return 0; } diff --git a/thirdparty/openssl/crypto/x509/x509_err.c b/thirdparty/openssl/crypto/x509/x509_err.c index 1e779fefd9..a2a8e1b08b 100644 --- a/thirdparty/openssl/crypto/x509/x509_err.c +++ b/thirdparty/openssl/crypto/x509/x509_err.c @@ -1,6 +1,6 @@ /* crypto/x509/x509_err.c */ /* ==================================================================== - * Copyright (c) 1999-2012 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2016 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -72,6 +72,7 @@ static ERR_STRING_DATA X509_str_functs[] = { {ERR_FUNC(X509_F_ADD_CERT_DIR), "ADD_CERT_DIR"}, {ERR_FUNC(X509_F_BY_FILE_CTRL), "BY_FILE_CTRL"}, + {ERR_FUNC(X509_F_CHECK_NAME_CONSTRAINTS), "CHECK_NAME_CONSTRAINTS"}, {ERR_FUNC(X509_F_CHECK_POLICY), "CHECK_POLICY"}, {ERR_FUNC(X509_F_DIR_CTRL), "DIR_CTRL"}, {ERR_FUNC(X509_F_GET_CERT_BY_SUBJECT), "GET_CERT_BY_SUBJECT"}, diff --git a/thirdparty/openssl/crypto/x509/x509_lu.c b/thirdparty/openssl/crypto/x509/x509_lu.c index 50120a4d70..b7424809fd 100644 --- a/thirdparty/openssl/crypto/x509/x509_lu.c +++ b/thirdparty/openssl/crypto/x509/x509_lu.c @@ -185,14 +185,16 @@ X509_STORE *X509_STORE_new(void) if ((ret = (X509_STORE *)OPENSSL_malloc(sizeof(X509_STORE))) == NULL) return NULL; - ret->objs = sk_X509_OBJECT_new(x509_object_cmp); + if ((ret->objs = sk_X509_OBJECT_new(x509_object_cmp)) == NULL) + goto err0; ret->cache = 1; - ret->get_cert_methods = sk_X509_LOOKUP_new_null(); + if ((ret->get_cert_methods = sk_X509_LOOKUP_new_null()) == NULL) + goto err1; ret->verify = 0; ret->verify_cb = 0; if ((ret->param = X509_VERIFY_PARAM_new()) == NULL) - return NULL; + goto err2; ret->get_issuer = 0; ret->check_issued = 0; @@ -204,14 +206,21 @@ X509_STORE *X509_STORE_new(void) ret->lookup_crls = 0; ret->cleanup = 0; - if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_X509_STORE, ret, &ret->ex_data)) { - sk_X509_OBJECT_free(ret->objs); - OPENSSL_free(ret); - return NULL; - } + if (!CRYPTO_new_ex_data(CRYPTO_EX_INDEX_X509_STORE, ret, &ret->ex_data)) + goto err3; ret->references = 1; return ret; + + err3: + X509_VERIFY_PARAM_free(ret->param); + err2: + sk_X509_LOOKUP_free(ret->get_cert_methods); + err1: + sk_X509_OBJECT_free(ret->objs); + err0: + OPENSSL_free(ret); + return NULL; } static void cleanup(X509_OBJECT *a) @@ -360,8 +369,12 @@ int X509_STORE_add_cert(X509_STORE *ctx, X509 *x) X509err(X509_F_X509_STORE_ADD_CERT, X509_R_CERT_ALREADY_IN_HASH_TABLE); ret = 0; - } else - sk_X509_OBJECT_push(ctx->objs, obj); + } else if (!sk_X509_OBJECT_push(ctx->objs, obj)) { + X509_OBJECT_free_contents(obj); + OPENSSL_free(obj); + X509err(X509_F_X509_STORE_ADD_CERT, ERR_R_MALLOC_FAILURE); + ret = 0; + } CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE); @@ -392,8 +405,12 @@ int X509_STORE_add_crl(X509_STORE *ctx, X509_CRL *x) OPENSSL_free(obj); X509err(X509_F_X509_STORE_ADD_CRL, X509_R_CERT_ALREADY_IN_HASH_TABLE); ret = 0; - } else - sk_X509_OBJECT_push(ctx->objs, obj); + } else if (!sk_X509_OBJECT_push(ctx->objs, obj)) { + X509_OBJECT_free_contents(obj); + OPENSSL_free(obj); + X509err(X509_F_X509_STORE_ADD_CRL, ERR_R_MALLOC_FAILURE); + ret = 0; + } CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE); diff --git a/thirdparty/openssl/crypto/x509/x509_obj.c b/thirdparty/openssl/crypto/x509/x509_obj.c index 3de3ac7204..0a839f3e54 100644 --- a/thirdparty/openssl/crypto/x509/x509_obj.c +++ b/thirdparty/openssl/crypto/x509/x509_obj.c @@ -129,7 +129,7 @@ char *X509_NAME_oneline(X509_NAME *a, char *buf, int len) type == V_ASN1_VISIBLESTRING || type == V_ASN1_PRINTABLESTRING || type == V_ASN1_TELETEXSTRING || - type == V_ASN1_VISIBLESTRING || type == V_ASN1_IA5STRING) { + type == V_ASN1_IA5STRING) { if (num > (int)sizeof(ebcdic_buf)) num = sizeof(ebcdic_buf); ascii2ebcdic(ebcdic_buf, q, num); diff --git a/thirdparty/openssl/crypto/x509/x509_r2x.c b/thirdparty/openssl/crypto/x509/x509_r2x.c index 0ff439c99f..2879569ead 100644 --- a/thirdparty/openssl/crypto/x509/x509_r2x.c +++ b/thirdparty/openssl/crypto/x509/x509_r2x.c @@ -70,10 +70,12 @@ X509 *X509_REQ_to_X509(X509_REQ *r, int days, EVP_PKEY *pkey) X509 *ret = NULL; X509_CINF *xi = NULL; X509_NAME *xn; + EVP_PKEY *pubkey = NULL; + int res; if ((ret = X509_new()) == NULL) { X509err(X509_F_X509_REQ_TO_X509, ERR_R_MALLOC_FAILURE); - goto err; + return NULL; } /* duplicate the request */ @@ -89,9 +91,9 @@ X509 *X509_REQ_to_X509(X509_REQ *r, int days, EVP_PKEY *pkey) } xn = X509_REQ_get_subject_name(r); - if (X509_set_subject_name(ret, X509_NAME_dup(xn)) == 0) + if (X509_set_subject_name(ret, xn) == 0) goto err; - if (X509_set_issuer_name(ret, X509_NAME_dup(xn)) == 0) + if (X509_set_issuer_name(ret, xn) == 0) goto err; if (X509_gmtime_adj(xi->validity->notBefore, 0) == NULL) @@ -100,9 +102,11 @@ X509 *X509_REQ_to_X509(X509_REQ *r, int days, EVP_PKEY *pkey) NULL) goto err; - X509_set_pubkey(ret, X509_REQ_get_pubkey(r)); + pubkey = X509_REQ_get_pubkey(r); + res = X509_set_pubkey(ret, pubkey); + EVP_PKEY_free(pubkey); - if (!X509_sign(ret, pkey, EVP_md5())) + if (!res || !X509_sign(ret, pkey, EVP_md5())) goto err; if (0) { err: diff --git a/thirdparty/openssl/crypto/x509/x509_txt.c b/thirdparty/openssl/crypto/x509/x509_txt.c index 3d46d3ff83..35db095591 100644 --- a/thirdparty/openssl/crypto/x509/x509_txt.c +++ b/thirdparty/openssl/crypto/x509/x509_txt.c @@ -204,6 +204,13 @@ const char *X509_verify_cert_error_string(long n) case X509_V_ERR_IP_ADDRESS_MISMATCH: return ("IP address mismatch"); + case X509_V_ERR_INVALID_CALL: + return ("Invalid certificate verification context"); + case X509_V_ERR_STORE_LOOKUP: + return ("Issuer certificate lookup error"); + case X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION: + return ("proxy subject name violation"); + default: BIO_snprintf(buf, sizeof buf, "error number %ld", n); return (buf); diff --git a/thirdparty/openssl/crypto/x509/x509_vfy.c b/thirdparty/openssl/crypto/x509/x509_vfy.c index 4d34dbac93..b1472018ba 100644 --- a/thirdparty/openssl/crypto/x509/x509_vfy.c +++ b/thirdparty/openssl/crypto/x509/x509_vfy.c @@ -199,6 +199,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (ctx->cert == NULL) { X509err(X509_F_X509_VERIFY_CERT, X509_R_NO_CERT_SET_FOR_US_TO_VERIFY); + ctx->error = X509_V_ERR_INVALID_CALL; return -1; } if (ctx->chain != NULL) { @@ -207,6 +208,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) * cannot do another one. */ X509err(X509_F_X509_VERIFY_CERT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + ctx->error = X509_V_ERR_INVALID_CALL; return -1; } @@ -219,6 +221,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (((ctx->chain = sk_X509_new_null()) == NULL) || (!sk_X509_push(ctx->chain, ctx->cert))) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; ok = -1; goto err; } @@ -229,6 +232,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (ctx->untrusted != NULL && (sktmp = sk_X509_dup(ctx->untrusted)) == NULL) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; ok = -1; goto err; } @@ -253,8 +257,10 @@ int X509_verify_cert(X509_STORE_CTX *ctx) */ if (ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) { ok = ctx->get_issuer(&xtmp, ctx, x); - if (ok < 0) + if (ok < 0) { + ctx->error = X509_V_ERR_STORE_LOOKUP; goto err; + } /* * If successful for now free up cert so it will be picked up * again later. @@ -271,6 +277,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (xtmp != NULL) { if (!sk_X509_push(ctx->chain, xtmp)) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; ok = -1; goto err; } @@ -352,14 +359,17 @@ int X509_verify_cert(X509_STORE_CTX *ctx) break; ok = ctx->get_issuer(&xtmp, ctx, x); - if (ok < 0) + if (ok < 0) { + ctx->error = X509_V_ERR_STORE_LOOKUP; goto err; + } if (ok == 0) break; x = xtmp; if (!sk_X509_push(ctx->chain, x)) { X509_free(xtmp); X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; ok = -1; goto err; } @@ -386,8 +396,10 @@ int X509_verify_cert(X509_STORE_CTX *ctx) while (j-- > 1) { xtmp2 = sk_X509_value(ctx->chain, j - 1); ok = ctx->get_issuer(&xtmp, ctx, xtmp2); - if (ok < 0) + if (ok < 0) { + ctx->error = X509_V_ERR_STORE_LOOKUP; goto err; + } /* Check if we found an alternate chain */ if (ok > 0) { /* @@ -515,6 +527,10 @@ int X509_verify_cert(X509_STORE_CTX *ctx) sk_X509_free(sktmp); if (chain_ss != NULL) X509_free(chain_ss); + + /* Safety net, error returns must set ctx->error */ + if (ok <= 0 && ctx->error == X509_V_OK) + ctx->error = X509_V_ERR_UNSPECIFIED; return ok; } @@ -697,13 +713,27 @@ static int check_chain_extensions(X509_STORE_CTX *ctx) * the next certificate must be a CA certificate. */ if (x->ex_flags & EXFLAG_PROXY) { - if (x->ex_pcpathlen != -1 && i > x->ex_pcpathlen) { - ctx->error = X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED; - ctx->error_depth = i; - ctx->current_cert = x; - ok = cb(0, ctx); - if (!ok) - goto end; + /* + * RFC3820, 4.1.3 (b)(1) stipulates that if pCPathLengthConstraint + * is less than max_path_length, the former should be copied to + * the latter, and 4.1.4 (a) stipulates that max_path_length + * should be verified to be larger than zero and decrement it. + * + * Because we're checking the certs in the reverse order, we start + * with verifying that proxy_path_length isn't larger than pcPLC, + * and copy the latter to the former if it is, and finally, + * increment proxy_path_length. + */ + if (x->ex_pcpathlen != -1) { + if (proxy_path_length > x->ex_pcpathlen) { + ctx->error = X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED; + ctx->error_depth = i; + ctx->current_cert = x; + ok = cb(0, ctx); + if (!ok) + goto end; + } + proxy_path_length = x->ex_pcpathlen; } proxy_path_length++; must_be_ca = 0; @@ -726,6 +756,81 @@ static int check_name_constraints(X509_STORE_CTX *ctx) /* Ignore self issued certs unless last in chain */ if (i && (x->ex_flags & EXFLAG_SI)) continue; + + /* + * Proxy certificates policy has an extra constraint, where the + * certificate subject MUST be the issuer with a single CN entry + * added. + * (RFC 3820: 3.4, 4.1.3 (a)(4)) + */ + if (x->ex_flags & EXFLAG_PROXY) { + X509_NAME *tmpsubject = X509_get_subject_name(x); + X509_NAME *tmpissuer = X509_get_issuer_name(x); + X509_NAME_ENTRY *tmpentry = NULL; + int last_object_nid = 0; + int err = X509_V_OK; + int last_object_loc = X509_NAME_entry_count(tmpsubject) - 1; + + /* Check that there are at least two RDNs */ + if (last_object_loc < 1) { + err = X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION; + goto proxy_name_done; + } + + /* + * Check that there is exactly one more RDN in subject as + * there is in issuer. + */ + if (X509_NAME_entry_count(tmpsubject) + != X509_NAME_entry_count(tmpissuer) + 1) { + err = X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION; + goto proxy_name_done; + } + + /* + * Check that the last subject component isn't part of a + * multivalued RDN + */ + if (X509_NAME_get_entry(tmpsubject, last_object_loc)->set + == X509_NAME_get_entry(tmpsubject, last_object_loc - 1)->set) { + err = X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION; + goto proxy_name_done; + } + + /* + * Check that the last subject RDN is a commonName, and that + * all the previous RDNs match the issuer exactly + */ + tmpsubject = X509_NAME_dup(tmpsubject); + if (tmpsubject == NULL) { + X509err(X509_F_CHECK_NAME_CONSTRAINTS, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; + return 0; + } + + tmpentry = + X509_NAME_delete_entry(tmpsubject, last_object_loc); + last_object_nid = + OBJ_obj2nid(X509_NAME_ENTRY_get_object(tmpentry)); + + if (last_object_nid != NID_commonName + || X509_NAME_cmp(tmpsubject, tmpissuer) != 0) { + err = X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION; + } + + X509_NAME_ENTRY_free(tmpentry); + X509_NAME_free(tmpsubject); + + proxy_name_done: + if (err != X509_V_OK) { + ctx->error = err; + ctx->error_depth = i; + ctx->current_cert = x; + if (!ctx->verify_cb(0, ctx)) + return 0; + } + } + /* * Check against constraints for all certificates higher in chain * including trust anchor. Trust anchor not strictly speaking needed @@ -736,12 +841,19 @@ static int check_name_constraints(X509_STORE_CTX *ctx) NAME_CONSTRAINTS *nc = sk_X509_value(ctx->chain, j)->nc; if (nc) { rv = NAME_CONSTRAINTS_check(x, nc); - if (rv != X509_V_OK) { + switch (rv) { + case X509_V_OK: + continue; + case X509_V_ERR_OUT_OF_MEM: + ctx->error = rv; + return 0; + default: ctx->error = rv; ctx->error_depth = i; ctx->current_cert = x; if (!ctx->verify_cb(0, ctx)) return 0; + break; } } } @@ -880,6 +992,8 @@ static int check_cert(X509_STORE_CTX *ctx) ctx->current_issuer = NULL; ctx->current_crl_score = 0; ctx->current_reasons = 0; + if (x->ex_flags & EXFLAG_PROXY) + return 1; while (ctx->current_reasons != CRLDP_ALL_REASONS) { last_reasons = ctx->current_reasons; /* Try to retrieve relevant CRL */ @@ -1010,13 +1124,25 @@ static int get_crl_sk(X509_STORE_CTX *ctx, X509_CRL **pcrl, X509_CRL **pdcrl, crl = sk_X509_CRL_value(crls, i); reasons = *preasons; crl_score = get_crl_score(ctx, &crl_issuer, &reasons, crl, x); - - if (crl_score > best_score) { - best_crl = crl; - best_crl_issuer = crl_issuer; - best_score = crl_score; - best_reasons = reasons; + if (crl_score < best_score || crl_score == 0) + continue; + /* If current CRL is equivalent use it if it is newer */ + if (crl_score == best_score && best_crl != NULL) { + int day, sec; + if (ASN1_TIME_diff(&day, &sec, X509_CRL_get_lastUpdate(best_crl), + X509_CRL_get_lastUpdate(crl)) == 0) + continue; + /* + * ASN1_TIME_diff never returns inconsistent signs for |day| + * and |sec|. + */ + if (day <= 0 && sec <= 0) + continue; } + best_crl = crl; + best_crl_issuer = crl_issuer; + best_score = crl_score; + best_reasons = reasons; } if (best_crl) { @@ -1630,6 +1756,7 @@ static int check_policy(X509_STORE_CTX *ctx) ctx->param->policies, ctx->param->flags); if (ret == 0) { X509err(X509_F_CHECK_POLICY, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; return 0; } /* Invalid or inconsistent extensions */ @@ -1658,7 +1785,12 @@ static int check_policy(X509_STORE_CTX *ctx) if (ctx->param->flags & X509_V_FLAG_NOTIFY_POLICY) { ctx->current_cert = NULL; - ctx->error = X509_V_OK; + /* + * Verification errors need to be "sticky", a callback may have allowed + * an SSL handshake to continue despite an error, and we must then + * remain in an error state. Therefore, we MUST NOT clear earlier + * verification errors by setting the error to X509_V_OK. + */ if (!ctx->verify_cb(2, ctx)) return 0; } diff --git a/thirdparty/openssl/crypto/x509/x509spki.c b/thirdparty/openssl/crypto/x509/x509spki.c index 2df84ead9e..5ae5d30a35 100644 --- a/thirdparty/openssl/crypto/x509/x509spki.c +++ b/thirdparty/openssl/crypto/x509/x509spki.c @@ -112,6 +112,8 @@ char *NETSCAPE_SPKI_b64_encode(NETSCAPE_SPKI *spki) der_spki = OPENSSL_malloc(der_len); b64_str = OPENSSL_malloc(der_len * 2); if (!der_spki || !b64_str) { + OPENSSL_free(der_spki); + OPENSSL_free(b64_str); X509err(X509_F_NETSCAPE_SPKI_B64_ENCODE, ERR_R_MALLOC_FAILURE); return NULL; } diff --git a/thirdparty/openssl/crypto/x509v3/v3_addr.c b/thirdparty/openssl/crypto/x509v3/v3_addr.c index 94cfed0509..1290dec9bb 100644 --- a/thirdparty/openssl/crypto/x509v3/v3_addr.c +++ b/thirdparty/openssl/crypto/x509v3/v3_addr.c @@ -1211,6 +1211,11 @@ int v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b) /* * Core code for RFC 3779 2.3 path validation. + * + * Returns 1 for success, 0 on error. + * + * When returning 0, ctx->error MUST be set to an appropriate value other than + * X509_V_OK. */ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx, STACK_OF(X509) *chain, @@ -1245,6 +1250,7 @@ static int v3_addr_validate_path_internal(X509_STORE_CTX *ctx, if ((child = sk_IPAddressFamily_dup(ext)) == NULL) { X509V3err(X509V3_F_V3_ADDR_VALIDATE_PATH_INTERNAL, ERR_R_MALLOC_FAILURE); + ctx->error = X509_V_ERR_OUT_OF_MEM; ret = 0; goto done; } diff --git a/thirdparty/openssl/crypto/x509v3/v3_alt.c b/thirdparty/openssl/crypto/x509v3/v3_alt.c index 22ec202846..a0351faf11 100644 --- a/thirdparty/openssl/crypto/x509v3/v3_alt.c +++ b/thirdparty/openssl/crypto/x509v3/v3_alt.c @@ -119,32 +119,39 @@ STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(X509V3_EXT_METHOD *method, int i; switch (gen->type) { case GEN_OTHERNAME: - X509V3_add_value("othername", "<unsupported>", &ret); + if (!X509V3_add_value("othername", "<unsupported>", &ret)) + return NULL; break; case GEN_X400: - X509V3_add_value("X400Name", "<unsupported>", &ret); + if (!X509V3_add_value("X400Name", "<unsupported>", &ret)) + return NULL; break; case GEN_EDIPARTY: - X509V3_add_value("EdiPartyName", "<unsupported>", &ret); + if (!X509V3_add_value("EdiPartyName", "<unsupported>", &ret)) + return NULL; break; case GEN_EMAIL: - X509V3_add_value_uchar("email", gen->d.ia5->data, &ret); + if (!X509V3_add_value_uchar("email", gen->d.ia5->data, &ret)) + return NULL; break; case GEN_DNS: - X509V3_add_value_uchar("DNS", gen->d.ia5->data, &ret); + if (!X509V3_add_value_uchar("DNS", gen->d.ia5->data, &ret)) + return NULL; break; case GEN_URI: - X509V3_add_value_uchar("URI", gen->d.ia5->data, &ret); + if (!X509V3_add_value_uchar("URI", gen->d.ia5->data, &ret)) + return NULL; break; case GEN_DIRNAME: - X509_NAME_oneline(gen->d.dirn, oline, 256); - X509V3_add_value("DirName", oline, &ret); + if (X509_NAME_oneline(gen->d.dirn, oline, 256) == NULL + || !X509V3_add_value("DirName", oline, &ret)) + return NULL; break; case GEN_IPADD: @@ -162,15 +169,18 @@ STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(X509V3_EXT_METHOD *method, strcat(oline, ":"); } } else { - X509V3_add_value("IP Address", "<invalid>", &ret); + if (!X509V3_add_value("IP Address", "<invalid>", &ret)) + return NULL; break; } - X509V3_add_value("IP Address", oline, &ret); + if (!X509V3_add_value("IP Address", oline, &ret)) + return NULL; break; case GEN_RID: i2t_ASN1_OBJECT(oline, 256, gen->d.rid); - X509V3_add_value("Registered ID", oline, &ret); + if (!X509V3_add_value("Registered ID", oline, &ret)) + return NULL; break; } return ret; @@ -573,6 +583,8 @@ static int do_othername(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx) return 0; objlen = p - value; objtmp = OPENSSL_malloc(objlen + 1); + if (objtmp == NULL) + return 0; strncpy(objtmp, value, objlen); objtmp[objlen] = 0; gen->d.otherName->type_id = OBJ_txt2obj(objtmp, 0); diff --git a/thirdparty/openssl/crypto/x509v3/v3_conf.c b/thirdparty/openssl/crypto/x509v3/v3_conf.c index eeff8bd185..c1b4c1a89f 100644 --- a/thirdparty/openssl/crypto/x509v3/v3_conf.c +++ b/thirdparty/openssl/crypto/x509v3/v3_conf.c @@ -135,11 +135,13 @@ static X509_EXTENSION *do_ext_nconf(CONF *conf, X509V3_CTX *ctx, int ext_nid, nval = NCONF_get_section(conf, value + 1); else nval = X509V3_parse_list(value); - if (sk_CONF_VALUE_num(nval) <= 0) { + if (nval == NULL || sk_CONF_VALUE_num(nval) <= 0) { X509V3err(X509V3_F_DO_EXT_NCONF, X509V3_R_INVALID_EXTENSION_STRING); ERR_add_error_data(4, "name=", OBJ_nid2sn(ext_nid), ",section=", value); + if (*value != '@') + sk_CONF_VALUE_free(nval); return NULL; } ext_struc = method->v2i(method, ctx, nval); diff --git a/thirdparty/openssl/crypto/x509v3/v3_cpols.c b/thirdparty/openssl/crypto/x509v3/v3_cpols.c index d97f6226b9..b99269e7f8 100644 --- a/thirdparty/openssl/crypto/x509v3/v3_cpols.c +++ b/thirdparty/openssl/crypto/x509v3/v3_cpols.c @@ -390,10 +390,10 @@ static int nref_nos(STACK_OF(ASN1_INTEGER) *nnums, STACK_OF(CONF_VALUE) *nos) return 1; merr: + ASN1_INTEGER_free(aint); X509V3err(X509V3_F_NREF_NOS, ERR_R_MALLOC_FAILURE); err: - sk_ASN1_INTEGER_pop_free(nnums, ASN1_STRING_free); return 0; } @@ -458,9 +458,15 @@ static void print_notice(BIO *out, USERNOTICE *notice, int indent) num = sk_ASN1_INTEGER_value(ref->noticenos, i); if (i) BIO_puts(out, ", "); - tmp = i2s_ASN1_INTEGER(NULL, num); - BIO_puts(out, tmp); - OPENSSL_free(tmp); + if (num == NULL) + BIO_puts(out, "(null)"); + else { + tmp = i2s_ASN1_INTEGER(NULL, num); + if (tmp == NULL) + return; + BIO_puts(out, tmp); + OPENSSL_free(tmp); + } } BIO_puts(out, "\n"); } diff --git a/thirdparty/openssl/crypto/x509v3/v3_info.c b/thirdparty/openssl/crypto/x509v3/v3_info.c index e052a34b94..7064c725d9 100644 --- a/thirdparty/openssl/crypto/x509v3/v3_info.c +++ b/thirdparty/openssl/crypto/x509v3/v3_info.c @@ -107,29 +107,30 @@ ASN1_ITEM_TEMPLATE_END(AUTHORITY_INFO_ACCESS) IMPLEMENT_ASN1_FUNCTIONS(AUTHORITY_INFO_ACCESS) -static STACK_OF(CONF_VALUE) *i2v_AUTHORITY_INFO_ACCESS(X509V3_EXT_METHOD - *method, AUTHORITY_INFO_ACCESS - *ainfo, STACK_OF(CONF_VALUE) - *ret) +static STACK_OF(CONF_VALUE) *i2v_AUTHORITY_INFO_ACCESS( + X509V3_EXT_METHOD *method, AUTHORITY_INFO_ACCESS *ainfo, + STACK_OF(CONF_VALUE) *ret) { ACCESS_DESCRIPTION *desc; int i, nlen; char objtmp[80], *ntmp; CONF_VALUE *vtmp; + STACK_OF(CONF_VALUE) *tret = ret; + for (i = 0; i < sk_ACCESS_DESCRIPTION_num(ainfo); i++) { + STACK_OF(CONF_VALUE) *tmp; + desc = sk_ACCESS_DESCRIPTION_value(ainfo, i); - ret = i2v_GENERAL_NAME(method, desc->location, ret); - if (!ret) - break; - vtmp = sk_CONF_VALUE_value(ret, i); + tmp = i2v_GENERAL_NAME(method, desc->location, tret); + if (tmp == NULL) + goto err; + tret = tmp; + vtmp = sk_CONF_VALUE_value(tret, i); i2t_ASN1_OBJECT(objtmp, sizeof objtmp, desc->method); nlen = strlen(objtmp) + strlen(vtmp->name) + 5; ntmp = OPENSSL_malloc(nlen); - if (!ntmp) { - X509V3err(X509V3_F_I2V_AUTHORITY_INFO_ACCESS, - ERR_R_MALLOC_FAILURE); - return NULL; - } + if (ntmp == NULL) + goto err; BUF_strlcpy(ntmp, objtmp, nlen); BUF_strlcat(ntmp, " - ", nlen); BUF_strlcat(ntmp, vtmp->name, nlen); @@ -137,9 +138,15 @@ static STACK_OF(CONF_VALUE) *i2v_AUTHORITY_INFO_ACCESS(X509V3_EXT_METHOD vtmp->name = ntmp; } - if (!ret) + if (ret == NULL && tret == NULL) return sk_CONF_VALUE_new_null(); - return ret; + + return tret; + err: + X509V3err(X509V3_F_I2V_AUTHORITY_INFO_ACCESS, ERR_R_MALLOC_FAILURE); + if (ret == NULL && tret != NULL) + sk_CONF_VALUE_pop_free(tret, X509V3_conf_free); + return NULL; } static AUTHORITY_INFO_ACCESS *v2i_AUTHORITY_INFO_ACCESS(X509V3_EXT_METHOD diff --git a/thirdparty/openssl/crypto/x509v3/v3_purp.c b/thirdparty/openssl/crypto/x509v3/v3_purp.c index 845be673b7..96e629a930 100644 --- a/thirdparty/openssl/crypto/x509v3/v3_purp.c +++ b/thirdparty/openssl/crypto/x509v3/v3_purp.c @@ -321,6 +321,7 @@ int X509_supported_extension(X509_EXTENSION *ex) NID_subject_alt_name, /* 85 */ NID_basic_constraints, /* 87 */ NID_certificate_policies, /* 89 */ + NID_crl_distribution_points, /* 103 */ NID_ext_key_usage, /* 126 */ #ifndef OPENSSL_NO_RFC3779 NID_sbgp_ipAddrBlock, /* 290 */ diff --git a/thirdparty/openssl/openssl/bio.h b/thirdparty/openssl/openssl/bio.h index 6790aed28e..8f2438cdad 100644 --- a/thirdparty/openssl/openssl/bio.h +++ b/thirdparty/openssl/openssl/bio.h @@ -559,11 +559,11 @@ int BIO_read_filename(BIO *b, const char *name); # define BIO_get_ssl(b,sslp) BIO_ctrl(b,BIO_C_GET_SSL,0,(char *)sslp) # define BIO_set_ssl_mode(b,client) BIO_ctrl(b,BIO_C_SSL_MODE,client,NULL) # define BIO_set_ssl_renegotiate_bytes(b,num) \ - BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_BYTES,num,NULL); + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_BYTES,num,NULL) # define BIO_get_num_renegotiates(b) \ - BIO_ctrl(b,BIO_C_GET_SSL_NUM_RENEGOTIATES,0,NULL); + BIO_ctrl(b,BIO_C_GET_SSL_NUM_RENEGOTIATES,0,NULL) # define BIO_set_ssl_renegotiate_timeout(b,seconds) \ - BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT,seconds,NULL); + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT,seconds,NULL) /* defined in evp.h */ /* #define BIO_set_md(b,md) BIO_ctrl(b,BIO_C_SET_MD,1,(char *)md) */ diff --git a/thirdparty/openssl/openssl/bn.h b/thirdparty/openssl/openssl/bn.h index 86264ae631..633d1b1f60 100644 --- a/thirdparty/openssl/openssl/bn.h +++ b/thirdparty/openssl/openssl/bn.h @@ -842,6 +842,8 @@ int RAND_pseudo_bytes(unsigned char *buf, int num); if (*(ftl--)) break; \ (a)->top = tmp_top; \ } \ + if ((a)->top == 0) \ + (a)->neg = 0; \ bn_pollute(a); \ } diff --git a/thirdparty/openssl/openssl/comp.h b/thirdparty/openssl/openssl/comp.h index 60a073404e..df599ba331 100644 --- a/thirdparty/openssl/openssl/comp.h +++ b/thirdparty/openssl/openssl/comp.h @@ -14,7 +14,7 @@ extern "C" { typedef struct comp_ctx_st COMP_CTX; -typedef struct comp_method_st { +struct comp_method_st { int type; /* NID for compression library */ const char *name; /* A text string to identify the library */ int (*init) (COMP_CTX *ctx); @@ -30,7 +30,7 @@ typedef struct comp_method_st { */ long (*ctrl) (void); long (*callback_ctrl) (void); -} COMP_METHOD; +}; struct comp_ctx_st { COMP_METHOD *meth; diff --git a/thirdparty/openssl/openssl/conf.h b/thirdparty/openssl/openssl/conf.h index 8d926d5d82..fe49113080 100644 --- a/thirdparty/openssl/openssl/conf.h +++ b/thirdparty/openssl/openssl/conf.h @@ -259,6 +259,7 @@ void ERR_load_CONF_strings(void); # define CONF_R_NO_VALUE 108 # define CONF_R_UNABLE_TO_CREATE_NEW_SECTION 103 # define CONF_R_UNKNOWN_MODULE_NAME 113 +# define CONF_R_VARIABLE_EXPANSION_TOO_LONG 116 # define CONF_R_VARIABLE_HAS_NO_VALUE 104 #ifdef __cplusplus diff --git a/thirdparty/openssl/openssl/dh.h b/thirdparty/openssl/openssl/dh.h index a5bd9016aa..a228c7a7a4 100644 --- a/thirdparty/openssl/openssl/dh.h +++ b/thirdparty/openssl/openssl/dh.h @@ -182,12 +182,29 @@ struct dh_st { */ # define DH_CHECK_P_NOT_STRONG_PRIME DH_CHECK_P_NOT_SAFE_PRIME -# define d2i_DHparams_fp(fp,x) (DH *)ASN1_d2i_fp((char *(*)())DH_new, \ - (char *(*)())d2i_DHparams,(fp),(unsigned char **)(x)) -# define i2d_DHparams_fp(fp,x) ASN1_i2d_fp(i2d_DHparams,(fp), \ - (unsigned char *)(x)) -# define d2i_DHparams_bio(bp,x) ASN1_d2i_bio_of(DH,DH_new,d2i_DHparams,bp,x) -# define i2d_DHparams_bio(bp,x) ASN1_i2d_bio_of_const(DH,i2d_DHparams,bp,x) +# define d2i_DHparams_fp(fp,x) \ + (DH *)ASN1_d2i_fp((char *(*)())DH_new, \ + (char *(*)())d2i_DHparams, \ + (fp), \ + (unsigned char **)(x)) +# define i2d_DHparams_fp(fp,x) \ + ASN1_i2d_fp(i2d_DHparams,(fp), (unsigned char *)(x)) +# define d2i_DHparams_bio(bp,x) \ + ASN1_d2i_bio_of(DH, DH_new, d2i_DHparams, bp, x) +# define i2d_DHparams_bio(bp,x) \ + ASN1_i2d_bio_of_const(DH,i2d_DHparams,bp,x) + +# define d2i_DHxparams_fp(fp,x) \ + (DH *)ASN1_d2i_fp((char *(*)())DH_new, \ + (char *(*)())d2i_DHxparams, \ + (fp), \ + (unsigned char **)(x)) +# define i2d_DHxparams_fp(fp,x) \ + ASN1_i2d_fp(i2d_DHxparams,(fp), (unsigned char *)(x)) +# define d2i_DHxparams_bio(bp,x) \ + ASN1_d2i_bio_of(DH, DH_new, d2i_DHxparams, bp, x) +# define i2d_DHxparams_bio(bp,x) \ + ASN1_i2d_bio_of_const(DH, i2d_DHxparams, bp, x) DH *DHparams_dup(DH *); diff --git a/thirdparty/openssl/openssl/dtls1.h b/thirdparty/openssl/openssl/dtls1.h index cdd1e4d86e..81d28c29cd 100644 --- a/thirdparty/openssl/openssl/dtls1.h +++ b/thirdparty/openssl/openssl/dtls1.h @@ -69,9 +69,6 @@ # ifdef OPENSSL_SYS_WIN32 /* Needed for struct timeval */ # include <winsock.h> -#ifdef X509_NAME -#undef X509_NAME -#endif # elif defined(OPENSSL_SYS_NETWARE) && !defined(_WINSOCK2API_) # include <sys/timeval.h> # else @@ -81,9 +78,9 @@ # include <sys/time.h> # endif # endif -#ifdef UWP_ENABLED +#ifdef UWP_ENABLED // -- GODOT start -- #include <winsock2.h> -#endif +#endif // -- GODOT end -- #ifdef __cplusplus extern "C" { diff --git a/thirdparty/openssl/openssl/err.h b/thirdparty/openssl/openssl/err.h index 585aa8ba3d..f42365620d 100644 --- a/thirdparty/openssl/openssl/err.h +++ b/thirdparty/openssl/openssl/err.h @@ -258,6 +258,7 @@ typedef struct err_state_st { # define SYS_F_WSASTARTUP 9/* Winsock stuff */ # define SYS_F_OPENDIR 10 # define SYS_F_FREAD 11 +# define SYS_F_FFLUSH 18 /* reasons */ # define ERR_R_SYS_LIB ERR_LIB_SYS/* 2 */ diff --git a/thirdparty/openssl/openssl/evp.h b/thirdparty/openssl/openssl/evp.h index 39ab7937d2..d258ef870a 100644 --- a/thirdparty/openssl/openssl/evp.h +++ b/thirdparty/openssl/openssl/evp.h @@ -1370,6 +1370,7 @@ void EVP_add_alg_module(void); * The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. */ + void ERR_load_EVP_strings(void); /* Error codes for the EVP functions. */ @@ -1489,6 +1490,7 @@ void ERR_load_EVP_strings(void); # define EVP_R_INPUT_NOT_INITIALIZED 111 # define EVP_R_INVALID_DIGEST 152 # define EVP_R_INVALID_FIPS_MODE 168 +# define EVP_R_INVALID_KEY 171 # define EVP_R_INVALID_KEY_LENGTH 130 # define EVP_R_INVALID_OPERATION 148 # define EVP_R_IV_TOO_LARGE 102 @@ -1528,7 +1530,7 @@ void ERR_load_EVP_strings(void); # define EVP_R_WRONG_FINAL_BLOCK_LENGTH 109 # define EVP_R_WRONG_PUBLIC_KEY_TYPE 110 -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif diff --git a/thirdparty/openssl/openssl/md5.h b/thirdparty/openssl/openssl/md5.h index 300d3f3463..2659038abd 100644 --- a/thirdparty/openssl/openssl/md5.h +++ b/thirdparty/openssl/openssl/md5.h @@ -107,18 +107,11 @@ typedef struct MD5state_st { # ifdef OPENSSL_FIPS int private_MD5_Init(MD5_CTX *c); # endif - -//#define MD5_Init _SSL_MD5_Init -#define MD5_Final _SSL_MD5_Final -#define MD5_Update _SSL_MD5_Update -#define MD5_Transform _SSL_MD5_Transform -#define MD5_Init private_MD5_Init - -int _SSL_MD5_Init(MD5_CTX *c); -int _SSL_MD5_Update(MD5_CTX *c, const void *data, size_t len); -int _SSL_MD5_Final(unsigned char *md, MD5_CTX *c); +int MD5_Init(MD5_CTX *c); +int MD5_Update(MD5_CTX *c, const void *data, size_t len); +int MD5_Final(unsigned char *md, MD5_CTX *c); unsigned char *MD5(const unsigned char *d, size_t n, unsigned char *md); -void _SSL_MD5_Transform(MD5_CTX *c, const unsigned char *b); +void MD5_Transform(MD5_CTX *c, const unsigned char *b); #ifdef __cplusplus } #endif diff --git a/thirdparty/openssl/openssl/opensslconf.h b/thirdparty/openssl/openssl/opensslconf.h index c86bb60b94..19fad23423 100644 --- a/thirdparty/openssl/openssl/opensslconf.h +++ b/thirdparty/openssl/openssl/opensslconf.h @@ -1,37 +1,26 @@ /* opensslconf.h */ /* WARNING: Generated automatically from opensslconf.h.in by Configure. */ -//sorry godot needs a single file for multiple builds - #ifdef __cplusplus extern "C" { #endif +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND -// Check windows - -#ifdef USE_64BITS -//weirder platforms that don't use GCC, LLVM or MSVC must define this -# define OPENSSL_USE_64_BITS -#elif _WIN32 || _WIN64 -# if _WIN64 -# define OPENSSL_USE_64_BITS +// -- GODOT start -- +#if defined(OPENSSL_SYS_WINDOWS) +# define WIN32_LEAN_AND_MEAN +// Seems like we have troubles properly using the logic in e_os2.h +# if defined(_WIN32) +# define OPENSSL_SYS_WIN32 +# define OPENSSL_SYSNAME_WIN32 # endif -// Check GCC -#elif __GNUC__ -# if __x86_64__ || __ppc64__ -# define OPENSSL_USE_64_BITS +# if defined(_WIN64) +# define OPENSSL_SYS_WIN64 +# define OPENSSL_SYSNAME_WIN64 # endif #endif - -#ifndef OPENSSL_USE_64_BITS -//wqerw -#endif - - - -/* OpenSSL was configured with the following options: */ -#ifndef OPENSSL_DOING_MAKEDEPEND - +// -- GODOT end -- #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 # define OPENSSL_NO_EC_NISTP_64_GCC_128 @@ -78,9 +67,6 @@ extern "C" { #endif /* OPENSSL_DOING_MAKEDEPEND */ -#ifndef OPENSSL_THREADS -# define OPENSSL_THREADS -#endif #ifndef OPENSSL_NO_DYNAMIC_ENGINE # define OPENSSL_NO_DYNAMIC_ENGINE #endif @@ -134,8 +120,6 @@ extern "C" { # endif #endif -//#define OPENSSL_CPUID_OBJ - /* crypto/opensslconf.h.in */ /* Generate 80386 code? */ @@ -175,19 +159,14 @@ extern "C" { * - Intel P6 because partial register stalls are very expensive; * - elder Alpha because it lacks byte load/store instructions; */ -#ifdef OPENSSL_USE_64_BITS #define RC4_INT unsigned int -#else -#define RC4_INT unsigned char -#endif - #endif #if !defined(RC4_CHUNK) /* * This enables code handling data aligned at natural CPU word * boundary. See crypto/rc4/rc4_enc.c for further details. */ -#define RC4_CHUNK unsigned long +#undef RC4_CHUNK #endif #endif @@ -195,42 +174,20 @@ extern "C" { /* If this is set to 'unsigned int' on a DEC Alpha, this gives about a * %20 speed up (longs are 8 bytes, int's are 4). */ #ifndef DES_LONG -#define DES_LONG unsigned int +#define DES_LONG unsigned long #endif #endif #if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) #define CONFIG_HEADER_BN_H -#ifdef OPENSSL_USE_64_BITS #undef BN_LLONG -#else -#define BN_LLONG -#endif /* Should we define BN_DIV2W here? */ /* Only one for the following should be defined */ - -#ifdef OPENSSL_USE_64_BITS - -# ifdef _WIN32 -# undef SIXTY_FOUR_BIT_LONG -# define SIXTY_FOUR_BIT -# else -# define SIXTY_FOUR_BIT_LONG -# undef SIXTY_FOUR_BIT -# endif -#undef THIRTY_TWO_BIT - -#else - #undef SIXTY_FOUR_BIT_LONG #undef SIXTY_FOUR_BIT #define THIRTY_TWO_BIT - -#endif - - #endif #if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) @@ -272,7 +229,7 @@ extern "C" { /* Unroll the inner loop, this sometimes helps, sometimes hinders. * Very mucy CPU dependant */ #ifndef DES_UNROLL -#define DES_UNROLL +#undef DES_UNROLL #endif /* These default values were supplied by diff --git a/thirdparty/openssl/openssl/opensslv.h b/thirdparty/openssl/openssl/opensslv.h index 13fe440231..825a330abc 100644 --- a/thirdparty/openssl/openssl/opensslv.h +++ b/thirdparty/openssl/openssl/opensslv.h @@ -30,11 +30,11 @@ extern "C" { * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -# define OPENSSL_VERSION_NUMBER 0x1000208fL +# define OPENSSL_VERSION_NUMBER 0x100020cfL # ifdef OPENSSL_FIPS -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2h-fips 3 May 2016" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2l-fips 25 May 2017" # else -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2h 3 May 2016" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2l 25 May 2017" # endif # define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/thirdparty/openssl/openssl/ossl_typ.h b/thirdparty/openssl/openssl/ossl_typ.h index 9144ea2cf6..364d26238e 100644 --- a/thirdparty/openssl/openssl/ossl_typ.h +++ b/thirdparty/openssl/openssl/ossl_typ.h @@ -178,6 +178,8 @@ typedef struct engine_st ENGINE; typedef struct ssl_st SSL; typedef struct ssl_ctx_st SSL_CTX; +typedef struct comp_method_st COMP_METHOD; + typedef struct X509_POLICY_NODE_st X509_POLICY_NODE; typedef struct X509_POLICY_LEVEL_st X509_POLICY_LEVEL; typedef struct X509_POLICY_TREE_st X509_POLICY_TREE; diff --git a/thirdparty/openssl/openssl/pem.h b/thirdparty/openssl/openssl/pem.h index d3b23fc997..aac72fb21e 100644 --- a/thirdparty/openssl/openssl/pem.h +++ b/thirdparty/openssl/openssl/pem.h @@ -531,6 +531,7 @@ int i2b_PVK_bio(BIO *out, EVP_PKEY *pk, int enclevel, * The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. */ + void ERR_load_PEM_strings(void); /* Error codes for the PEM functions. */ @@ -592,6 +593,7 @@ void ERR_load_PEM_strings(void); # define PEM_R_ERROR_CONVERTING_PRIVATE_KEY 115 # define PEM_R_EXPECTING_PRIVATE_KEY_BLOB 119 # define PEM_R_EXPECTING_PUBLIC_KEY_BLOB 120 +# define PEM_R_HEADER_TOO_LONG 128 # define PEM_R_INCONSISTENT_HEADER 121 # define PEM_R_KEYBLOB_HEADER_PARSE_ERROR 122 # define PEM_R_KEYBLOB_TOO_SHORT 123 @@ -609,7 +611,7 @@ void ERR_load_PEM_strings(void); # define PEM_R_UNSUPPORTED_ENCRYPTION 114 # define PEM_R_UNSUPPORTED_KEY_COMPONENTS 126 -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif diff --git a/thirdparty/openssl/openssl/pkcs12.h b/thirdparty/openssl/openssl/pkcs12.h index a39adf5eb5..21f1f62b36 100644 --- a/thirdparty/openssl/openssl/pkcs12.h +++ b/thirdparty/openssl/openssl/pkcs12.h @@ -270,7 +270,7 @@ int i2d_PKCS12_bio(BIO *bp, PKCS12 *p12); int i2d_PKCS12_fp(FILE *fp, PKCS12 *p12); PKCS12 *d2i_PKCS12_bio(BIO *bp, PKCS12 **p12); PKCS12 *d2i_PKCS12_fp(FILE *fp, PKCS12 **p12); -int PKCS12_newpass(PKCS12 *p12, char *oldpass, char *newpass); +int PKCS12_newpass(PKCS12 *p12, const char *oldpass, const char *newpass); /* BEGIN ERROR CODES */ /* diff --git a/thirdparty/openssl/openssl/rand.h b/thirdparty/openssl/openssl/rand.h index 66068834eb..2553afda20 100644 --- a/thirdparty/openssl/openssl/rand.h +++ b/thirdparty/openssl/openssl/rand.h @@ -64,23 +64,7 @@ # include <openssl/e_os2.h> # if defined(OPENSSL_SYS_WINDOWS) -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -#ifdef OCSP_RESPONSE -#undef OCSP_RESPONSE -#endif -#ifdef OCSP_REQUEST -#undef OCSP_REQUEST -#endif -#ifdef X509_NAME -#undef X509_NAME -#undef X509_NAME -#undef X509_EXTENSIONS -#undef X509_CERT_PAIR -#undef PKCS7_ISSUER_AND_SERIAL -#endif - - +# include <windows.h> # endif #ifdef __cplusplus diff --git a/thirdparty/openssl/openssl/ssl.h b/thirdparty/openssl/openssl/ssl.h index 5ef56faa50..90aeb0ce4e 100644 --- a/thirdparty/openssl/openssl/ssl.h +++ b/thirdparty/openssl/openssl/ssl.h @@ -2532,7 +2532,6 @@ void SSL_set_tmp_ecdh_callback(SSL *ssl, int keylength)); # endif -# ifndef OPENSSL_NO_COMP const COMP_METHOD *SSL_get_current_compression(SSL *s); const COMP_METHOD *SSL_get_current_expansion(SSL *s); const char *SSL_COMP_get_name(const COMP_METHOD *comp); @@ -2541,13 +2540,6 @@ STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) *meths); void SSL_COMP_free_compression_methods(void); int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm); -# else -const void *SSL_get_current_compression(SSL *s); -const void *SSL_get_current_expansion(SSL *s); -const char *SSL_COMP_get_name(const void *comp); -void *SSL_COMP_get_compression_methods(void); -int SSL_COMP_add_compression_method(int id, void *cm); -# endif const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr); @@ -2623,6 +2615,7 @@ void ERR_load_SSL_strings(void); # define SSL_F_DTLS1_HEARTBEAT 305 # define SSL_F_DTLS1_OUTPUT_CERT_CHAIN 255 # define SSL_F_DTLS1_PREPROCESS_FRAGMENT 288 +# define SSL_F_DTLS1_PROCESS_BUFFERED_RECORDS 424 # define SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE 256 # define SSL_F_DTLS1_PROCESS_RECORD 257 # define SSL_F_DTLS1_READ_BYTES 258 @@ -3114,6 +3107,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST 157 # define SSL_R_TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST 233 # define SSL_R_TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG 234 +# define SSL_R_TOO_MANY_WARN_ALERTS 409 # define SSL_R_TRIED_TO_USE_UNSUPPORTED_CIPHER 235 # define SSL_R_UNABLE_TO_DECODE_DH_CERTS 236 # define SSL_R_UNABLE_TO_DECODE_ECDH_CERTS 313 diff --git a/thirdparty/openssl/openssl/ts.h b/thirdparty/openssl/openssl/ts.h index 16eccbb38d..2daa1b2fb5 100644 --- a/thirdparty/openssl/openssl/ts.h +++ b/thirdparty/openssl/openssl/ts.h @@ -565,6 +565,9 @@ int TS_RESP_CTX_set_clock_precision_digits(TS_RESP_CTX *ctx, /* At most we accept usec precision. */ # define TS_MAX_CLOCK_PRECISION_DIGITS 6 +/* Maximum status message length */ +# define TS_MAX_STATUS_LENGTH (1024 * 1024) + /* No flags are set by default. */ void TS_RESP_CTX_add_flags(TS_RESP_CTX *ctx, int flags); diff --git a/thirdparty/openssl/openssl/x509.h b/thirdparty/openssl/openssl/x509.h index fc613ce635..6fa28ebada 100644 --- a/thirdparty/openssl/openssl/x509.h +++ b/thirdparty/openssl/openssl/x509.h @@ -1234,6 +1234,7 @@ int X509_TRUST_get_trust(X509_TRUST *xp); * The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. */ + void ERR_load_X509_strings(void); /* Error codes for the X509 functions. */ @@ -1241,6 +1242,7 @@ void ERR_load_X509_strings(void); /* Function codes. */ # define X509_F_ADD_CERT_DIR 100 # define X509_F_BY_FILE_CTRL 101 +# define X509_F_CHECK_NAME_CONSTRAINTS 106 # define X509_F_CHECK_POLICY 145 # define X509_F_DIR_CTRL 102 # define X509_F_GET_CERT_BY_SUBJECT 103 @@ -1322,7 +1324,7 @@ void ERR_load_X509_strings(void); # define X509_R_WRONG_LOOKUP_TYPE 112 # define X509_R_WRONG_TYPE 122 -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif diff --git a/thirdparty/openssl/openssl/x509_vfy.h b/thirdparty/openssl/openssl/x509_vfy.h index 2663e1c0a3..50626826e0 100644 --- a/thirdparty/openssl/openssl/x509_vfy.h +++ b/thirdparty/openssl/openssl/x509_vfy.h @@ -368,6 +368,7 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); # define X509_V_ERR_PERMITTED_VIOLATION 47 # define X509_V_ERR_EXCLUDED_VIOLATION 48 # define X509_V_ERR_SUBTREE_MINMAX 49 +# define X509_V_ERR_APPLICATION_VERIFICATION 50 # define X509_V_ERR_UNSUPPORTED_CONSTRAINT_TYPE 51 # define X509_V_ERR_UNSUPPORTED_CONSTRAINT_SYNTAX 52 # define X509_V_ERR_UNSUPPORTED_NAME_SYNTAX 53 @@ -386,8 +387,12 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); # define X509_V_ERR_EMAIL_MISMATCH 63 # define X509_V_ERR_IP_ADDRESS_MISMATCH 64 -/* The application is not happy */ -# define X509_V_ERR_APPLICATION_VERIFICATION 50 +/* Caller error */ +# define X509_V_ERR_INVALID_CALL 65 +/* Issuer lookup error */ +# define X509_V_ERR_STORE_LOOKUP 66 + +# define X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION 67 /* Certificate verify flags */ diff --git a/thirdparty/openssl/patches/config_windows.patch b/thirdparty/openssl/patches/config_windows.patch new file mode 100644 index 0000000000..e69ff1356a --- /dev/null +++ b/thirdparty/openssl/patches/config_windows.patch @@ -0,0 +1,49 @@ +commit 4c8ab8b4415d129d0283d7d0d9a5789163ec8d5e +Author: Rémi Verschelde <rverschelde@gmail.com> +Date: Sat May 27 16:38:46 2017 +0200 + + openssl: Define WIN32_LEAN_AND_MEAN on Windows + + This avoids namespace collisions with things such as X509_NAME. + Also force include of necessary definitions in `crypto/o_str.c` + which seem missing on MSVC (but work on MinGW). + +diff --git a/thirdparty/openssl/crypto/o_str.c b/thirdparty/openssl/crypto/o_str.c +index 7e61cde85..1854798e2 100644 +--- a/thirdparty/openssl/crypto/o_str.c ++++ b/thirdparty/openssl/crypto/o_str.c +@@ -59,6 +59,9 @@ + + #include <ctype.h> + #include <e_os.h> ++// -- GODOT start -- ++#include <openssl/opensslconf.h> ++// -- GODOT end -- + #include "o_str.h" + + #if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && \ +diff --git a/thirdparty/openssl/openssl/opensslconf.h b/thirdparty/openssl/openssl/opensslconf.h +index f533508b1..19fad2342 100644 +--- a/thirdparty/openssl/openssl/opensslconf.h ++++ b/thirdparty/openssl/openssl/opensslconf.h +@@ -7,6 +7,20 @@ extern "C" { + /* OpenSSL was configured with the following options: */ + #ifndef OPENSSL_DOING_MAKEDEPEND + ++// -- GODOT start -- ++#if defined(OPENSSL_SYS_WINDOWS) ++# define WIN32_LEAN_AND_MEAN ++// Seems like we have troubles properly using the logic in e_os2.h ++# if defined(_WIN32) ++# define OPENSSL_SYS_WIN32 ++# define OPENSSL_SYSNAME_WIN32 ++# endif ++# if defined(_WIN64) ++# define OPENSSL_SYS_WIN64 ++# define OPENSSL_SYSNAME_WIN64 ++# endif ++#endif ++// -- GODOT end -- + + #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 + # define OPENSSL_NO_EC_NISTP_64_GCC_128 diff --git a/thirdparty/openssl/uwp_fix.patch b/thirdparty/openssl/patches/uwp_fix.patch index 00d8b64d00..54aeb1f80d 100644 --- a/thirdparty/openssl/uwp_fix.patch +++ b/thirdparty/openssl/patches/uwp_fix.patch @@ -1,15 +1,15 @@ -diff --git a/drivers/builtin_openssl2/crypto/rand/rand_win.c b/drivers/builtin_openssl2/crypto/rand/rand_win.c -index 06670ae..70fd52a 100644 ---- a/drivers/builtin_openssl2/crypto/rand/rand_win.c -+++ b/drivers/builtin_openssl2/crypto/rand/rand_win.c +diff --git a/thirdparty/openssl/crypto/rand/rand_win.c b/thirdparty/openssl/crypto/rand/rand_win.c +index 06670ae01..cb4093128 100644 +--- a/thirdparty/openssl/crypto/rand/rand_win.c ++++ b/thirdparty/openssl/crypto/rand/rand_win.c @@ -118,8 +118,10 @@ # ifndef _WIN32_WINNT # define _WIN32_WINNT 0x0400 # endif -+#ifndef UWP_ENABLED ++#ifndef UWP_ENABLED // -- GODOT -- # include <wincrypt.h> # include <tlhelp32.h> -+#endif ++#endif // -- GODOT -- /* * Limit the time spent walking through the heap, processes, threads and @@ -18,7 +18,7 @@ index 06670ae..70fd52a 100644 # endif /* CURSOR_SHOWING */ -# if !defined(OPENSSL_SYS_WINCE) -+# if !defined(OPENSSL_SYS_WINCE) && !defined(UWP_ENABLED) ++# if !defined(OPENSSL_SYS_WINCE) && !defined(UWP_ENABLED) // -- GODOT -- typedef BOOL(WINAPI *CRYPTACQUIRECONTEXTW) (HCRYPTPROV *, LPCWSTR, LPCWSTR, DWORD, DWORD); typedef BOOL(WINAPI *CRYPTGENRANDOM) (HCRYPTPROV, DWORD, BYTE *); @@ -26,7 +26,7 @@ index 06670ae..70fd52a 100644 # endif /* 1 */ # endif /* !OPENSSL_SYS_WINCE */ -+#if !defined(UWP_ENABLED) ++#if !defined(UWP_ENABLED) // -- GODOT -- int RAND_poll(void) { MEMORYSTATUS m; @@ -34,7 +34,7 @@ index 06670ae..70fd52a 100644 return (1); } -+#endif // UWP_ENABLED ++#endif // UWP_ENABLED // -- GODOT -- + int RAND_event(UINT iMsg, WPARAM wParam, LPARAM lParam) { @@ -44,21 +44,21 @@ index 06670ae..70fd52a 100644 static void readscreen(void) { -# if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) -+# if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) && !defined(UWP_ENABLED) ++# if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) && !defined(UWP_ENABLED) // -- GODOT -- HDC hScrDC; /* screen DC */ HBITMAP hBitmap; /* handle for our bitmap */ BITMAP bm; /* bitmap properties */ -diff --git a/drivers/builtin_openssl2/openssl/dtls1.h b/drivers/builtin_openssl2/openssl/dtls1.h -index 64ad3c8..a58aca2 100644 ---- a/drivers/builtin_openssl2/openssl/dtls1.h -+++ b/drivers/builtin_openssl2/openssl/dtls1.h -@@ -81,6 +81,9 @@ +diff --git a/thirdparty/openssl/openssl/dtls1.h b/thirdparty/openssl/openssl/dtls1.h +index 30bbcf278..81d28c29c 100644 +--- a/thirdparty/openssl/openssl/dtls1.h ++++ b/thirdparty/openssl/openssl/dtls1.h +@@ -78,6 +78,9 @@ # include <sys/time.h> # endif # endif -+#ifdef UWP_ENABLED ++#ifdef UWP_ENABLED // -- GODOT start -- +#include <winsock2.h> -+#endif ++#endif // -- GODOT end -- #ifdef __cplusplus extern "C" { diff --git a/thirdparty/openssl/ssl/d1_both.c b/thirdparty/openssl/ssl/d1_both.c index 5d26c94926..e6bc761e8b 100644 --- a/thirdparty/openssl/ssl/d1_both.c +++ b/thirdparty/openssl/ssl/d1_both.c @@ -517,6 +517,17 @@ long dtls1_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) return i; } + /* + * Don't change the *message* read sequence number while listening. For + * the *record* write sequence we reflect the ClientHello sequence number + * when listening. + */ + if (s->d1->listen) + memcpy(s->s3->write_sequence, s->s3->read_sequence, + sizeof(s->s3->write_sequence)); + else + s->d1->handshake_read_seq++; + if (mt >= 0 && s->s3->tmp.message_type != mt) { al = SSL_AD_UNEXPECTED_MESSAGE; SSLerr(SSL_F_DTLS1_GET_MESSAGE, SSL_R_UNEXPECTED_MESSAGE); @@ -544,10 +555,6 @@ long dtls1_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) memset(msg_hdr, 0x00, sizeof(struct hm_header_st)); - /* Don't change sequence numbers while listening */ - if (!s->d1->listen) - s->d1->handshake_read_seq++; - s->init_msg = s->init_buf->data + DTLS1_HM_HEADER_LENGTH; return s->init_num; @@ -581,9 +588,12 @@ static int dtls1_preprocess_fragment(SSL *s, struct hm_header_st *msg_hdr, /* * msg_len is limited to 2^24, but is effectively checked against max * above + * + * Make buffer slightly larger than message length as a precaution + * against small OOB reads e.g. CVE-2016-6306 */ if (!BUF_MEM_grow_clean - (s->init_buf, msg_len + DTLS1_HM_HEADER_LENGTH)) { + (s->init_buf, msg_len + DTLS1_HM_HEADER_LENGTH + 16)) { SSLerr(SSL_F_DTLS1_PREPROCESS_FRAGMENT, ERR_R_BUF_LIB); return SSL_AD_INTERNAL_ERROR; } @@ -618,11 +628,23 @@ static int dtls1_retrieve_buffered_fragment(SSL *s, long max, int *ok) int al; *ok = 0; - item = pqueue_peek(s->d1->buffered_messages); - if (item == NULL) - return 0; + do { + item = pqueue_peek(s->d1->buffered_messages); + if (item == NULL) + return 0; + + frag = (hm_fragment *)item->data; + + if (frag->msg_header.seq < s->d1->handshake_read_seq) { + /* This is a stale message that has been buffered so clear it */ + pqueue_pop(s->d1->buffered_messages); + dtls1_hm_fragment_free(frag); + pitem_free(item); + item = NULL; + frag = NULL; + } + } while (item == NULL); - frag = (hm_fragment *)item->data; /* Don't return if reassembly still in progress */ if (frag->reassembly != NULL) @@ -1053,7 +1075,9 @@ int dtls1_send_change_cipher_spec(SSL *s, int a, int b) int dtls1_read_failed(SSL *s, int code) { if (code > 0) { +#ifdef TLS_DEBUG fprintf(stderr, "invalid state reached %s:%d", __FILE__, __LINE__); +#endif return 1; } @@ -1125,7 +1149,9 @@ int dtls1_retransmit_buffered_messages(SSL *s) (frag->msg_header.seq, frag->msg_header.is_ccs), 0, &found) <= 0 && found) { +#ifdef TLS_DEBUG fprintf(stderr, "dtls1_retransmit_message() failed\n"); +#endif return -1; } } @@ -1211,7 +1237,7 @@ dtls1_retransmit_message(SSL *s, unsigned short seq, unsigned long frag_off, unsigned long header_length; unsigned char seq64be[8]; struct dtls1_retransmit_state saved_state; - unsigned char save_write_sequence[8]; + unsigned char save_write_sequence[8] = {0, 0, 0, 0, 0, 0, 0, 0}; /*- OPENSSL_assert(s->init_num == 0); @@ -1225,7 +1251,9 @@ dtls1_retransmit_message(SSL *s, unsigned short seq, unsigned long frag_off, item = pqueue_find(s->d1->sent_messages, seq64be); if (item == NULL) { +#ifdef TLS_DEBUG fprintf(stderr, "retransmit: message %d non-existant\n", seq); +#endif *found = 0; return 0; } @@ -1296,18 +1324,6 @@ dtls1_retransmit_message(SSL *s, unsigned short seq, unsigned long frag_off, return ret; } -/* call this function when the buffered messages are no longer needed */ -void dtls1_clear_record_buffer(SSL *s) -{ - pitem *item; - - for (item = pqueue_pop(s->d1->sent_messages); - item != NULL; item = pqueue_pop(s->d1->sent_messages)) { - dtls1_hm_fragment_free((hm_fragment *)item->data); - pitem_free(item); - } -} - unsigned char *dtls1_set_message_header(SSL *s, unsigned char *p, unsigned char mt, unsigned long len, unsigned long frag_off, @@ -1469,7 +1485,7 @@ int dtls1_process_heartbeat(SSL *s) memcpy(bp, pl, payload); bp += payload; /* Random padding */ - if (RAND_pseudo_bytes(bp, padding) < 0) { + if (RAND_bytes(bp, padding) <= 0) { OPENSSL_free(buffer); return -1; } @@ -1546,6 +1562,8 @@ int dtls1_heartbeat(SSL *s) * - Padding */ buf = OPENSSL_malloc(1 + 2 + payload + padding); + if (buf == NULL) + goto err; p = buf; /* Message Type */ *p++ = TLS1_HB_REQUEST; @@ -1554,11 +1572,11 @@ int dtls1_heartbeat(SSL *s) /* Sequence number */ s2n(s->tlsext_hb_seq, p); /* 16 random bytes */ - if (RAND_pseudo_bytes(p, 16) < 0) + if (RAND_bytes(p, 16) <= 0) goto err; p += 16; /* Random padding */ - if (RAND_pseudo_bytes(p, padding) < 0) + if (RAND_bytes(p, padding) <= 0) goto err; ret = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buf, 3 + payload + padding); diff --git a/thirdparty/openssl/ssl/d1_clnt.c b/thirdparty/openssl/ssl/d1_clnt.c index 3ddfa7bca4..76451a346d 100644 --- a/thirdparty/openssl/ssl/d1_clnt.c +++ b/thirdparty/openssl/ssl/d1_clnt.c @@ -320,8 +320,13 @@ int dtls1_connect(SSL *s) s->shutdown = 0; /* every DTLS ClientHello resets Finished MAC */ - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } + /* fall thru */ case SSL3_ST_CW_CLNT_HELLO_B: dtls1_start_timer(s); ret = ssl3_client_hello(s); @@ -769,6 +774,7 @@ int dtls1_connect(SSL *s) /* done with handshaking */ s->d1->handshake_read_seq = 0; s->d1->next_handshake_write_seq = 0; + dtls1_clear_received_buffer(s); goto end; /* break; */ diff --git a/thirdparty/openssl/ssl/d1_lib.c b/thirdparty/openssl/ssl/d1_lib.c index ee78921ba8..debd4fd5dc 100644 --- a/thirdparty/openssl/ssl/d1_lib.c +++ b/thirdparty/openssl/ssl/d1_lib.c @@ -170,7 +170,6 @@ int dtls1_new(SSL *s) static void dtls1_clear_queues(SSL *s) { pitem *item = NULL; - hm_fragment *frag = NULL; DTLS1_RECORD_DATA *rdata; while ((item = pqueue_pop(s->d1->unprocessed_rcds.q)) != NULL) { @@ -191,28 +190,44 @@ static void dtls1_clear_queues(SSL *s) pitem_free(item); } + while ((item = pqueue_pop(s->d1->buffered_app_data.q)) != NULL) { + rdata = (DTLS1_RECORD_DATA *)item->data; + if (rdata->rbuf.buf) { + OPENSSL_free(rdata->rbuf.buf); + } + OPENSSL_free(item->data); + pitem_free(item); + } + + dtls1_clear_received_buffer(s); + dtls1_clear_sent_buffer(s); +} + +void dtls1_clear_received_buffer(SSL *s) +{ + pitem *item = NULL; + hm_fragment *frag = NULL; + while ((item = pqueue_pop(s->d1->buffered_messages)) != NULL) { frag = (hm_fragment *)item->data; dtls1_hm_fragment_free(frag); pitem_free(item); } +} + +void dtls1_clear_sent_buffer(SSL *s) +{ + pitem *item = NULL; + hm_fragment *frag = NULL; while ((item = pqueue_pop(s->d1->sent_messages)) != NULL) { frag = (hm_fragment *)item->data; dtls1_hm_fragment_free(frag); pitem_free(item); } - - while ((item = pqueue_pop(s->d1->buffered_app_data.q)) != NULL) { - rdata = (DTLS1_RECORD_DATA *)item->data; - if (rdata->rbuf.buf) { - OPENSSL_free(rdata->rbuf.buf); - } - OPENSSL_free(item->data); - pitem_free(item); - } } + void dtls1_free(SSL *s) { ssl3_free(s); @@ -456,7 +471,7 @@ void dtls1_stop_timer(SSL *s) BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT, 0, &(s->d1->next_timeout)); /* Clear retransmission buffer */ - dtls1_clear_record_buffer(s); + dtls1_clear_sent_buffer(s); } int dtls1_check_timeout_num(SSL *s) diff --git a/thirdparty/openssl/ssl/d1_pkt.c b/thirdparty/openssl/ssl/d1_pkt.c index fe30ec7d00..10586fee54 100644 --- a/thirdparty/openssl/ssl/d1_pkt.c +++ b/thirdparty/openssl/ssl/d1_pkt.c @@ -125,7 +125,7 @@ /* mod 128 saturating subtract of two 64-bit values in big-endian order */ static int satsub64be(const unsigned char *v1, const unsigned char *v2) { - int ret, sat, brw, i; + int ret, i; if (sizeof(long) == 8) do { @@ -157,28 +157,51 @@ static int satsub64be(const unsigned char *v1, const unsigned char *v2) return (int)l; } while (0); - ret = (int)v1[7] - (int)v2[7]; - sat = 0; - brw = ret >> 8; /* brw is either 0 or -1 */ - if (ret & 0x80) { - for (i = 6; i >= 0; i--) { - brw += (int)v1[i] - (int)v2[i]; - sat |= ~brw; - brw >>= 8; - } - } else { - for (i = 6; i >= 0; i--) { - brw += (int)v1[i] - (int)v2[i]; - sat |= brw; - brw >>= 8; + ret = 0; + for (i=0; i<7; i++) { + if (v1[i] > v2[i]) { + /* v1 is larger... but by how much? */ + if (v1[i] != v2[i] + 1) + return 128; + while (++i <= 6) { + if (v1[i] != 0x00 || v2[i] != 0xff) + return 128; /* too much */ + } + /* We checked all the way to the penultimate byte, + * so despite higher bytes changing we actually + * know that it only changed from (e.g.) + * ... (xx) ff ff ff ?? + * to ... (xx+1) 00 00 00 ?? + * so we add a 'bias' of 256 for the carry that + * happened, and will eventually return + * 256 + v1[7] - v2[7]. */ + ret = 256; + break; + } else if (v2[i] > v1[i]) { + /* v2 is larger... but by how much? */ + if (v2[i] != v1[i] + 1) + return -128; + while (++i <= 6) { + if (v2[i] != 0x00 || v1[i] != 0xff) + return -128; /* too much */ + } + /* Similar to the case above, we know it changed + * from ... (xx) 00 00 00 ?? + * to ... (xx-1) ff ff ff ?? + * so we add a 'bias' of -256 for the borrow, + * to return -256 + v1[7] - v2[7]. */ + ret = -256; } } - brw <<= 8; /* brw is either 0 or -256 */ - if (sat & 0xff) - return brw | 0x80; + ret += (int)v1[7] - (int)v2[7]; + + if (ret > 128) + return 128; + else if (ret < -128) + return -128; else - return brw + (ret & 0xFF); + return ret; } static int have_handshake_fragment(SSL *s, int type, unsigned char *buf, @@ -194,7 +217,7 @@ static int dtls1_record_needs_buffering(SSL *s, SSL3_RECORD *rr, #endif static int dtls1_buffer_record(SSL *s, record_pqueue *q, unsigned char *priority); -static int dtls1_process_record(SSL *s); +static int dtls1_process_record(SSL *s, DTLS1_BITMAP *bitmap); /* copy buffered record into SSL structure */ static int dtls1_copy_record(SSL *s, pitem *item) @@ -319,21 +342,70 @@ static int dtls1_retrieve_buffered_record(SSL *s, record_pqueue *queue) static int dtls1_process_buffered_records(SSL *s) { pitem *item; + SSL3_BUFFER *rb; + SSL3_RECORD *rr; + DTLS1_BITMAP *bitmap; + unsigned int is_next_epoch; + int replayok = 1; item = pqueue_peek(s->d1->unprocessed_rcds.q); if (item) { /* Check if epoch is current. */ if (s->d1->unprocessed_rcds.epoch != s->d1->r_epoch) - return (1); /* Nothing to do. */ + return 1; /* Nothing to do. */ + + rr = &s->s3->rrec; + rb = &s->s3->rbuf; + + if (rb->left > 0) { + /* + * We've still got data from the current packet to read. There could + * be a record from the new epoch in it - so don't overwrite it + * with the unprocessed records yet (we'll do it when we've + * finished reading the current packet). + */ + return 1; + } + /* Process all the records. */ while (pqueue_peek(s->d1->unprocessed_rcds.q)) { dtls1_get_unprocessed_record(s); - if (!dtls1_process_record(s)) - return (0); + bitmap = dtls1_get_bitmap(s, rr, &is_next_epoch); + if (bitmap == NULL) { + /* + * Should not happen. This will only ever be NULL when the + * current record is from a different epoch. But that cannot + * be the case because we already checked the epoch above + */ + SSLerr(SSL_F_DTLS1_PROCESS_BUFFERED_RECORDS, + ERR_R_INTERNAL_ERROR); + return 0; + } +#ifndef OPENSSL_NO_SCTP + /* Only do replay check if no SCTP bio */ + if (!BIO_dgram_is_sctp(SSL_get_rbio(s))) +#endif + { + /* + * Check whether this is a repeat, or aged record. We did this + * check once already when we first received the record - but + * we might have updated the window since then due to + * records we subsequently processed. + */ + replayok = dtls1_record_replay_check(s, bitmap); + } + + if (!replayok || !dtls1_process_record(s, bitmap)) { + /* dump this record */ + rr->length = 0; + s->packet_length = 0; + continue; + } + if (dtls1_buffer_record(s, &(s->d1->processed_rcds), s->s3->rrec.seq_num) < 0) - return -1; + return 0; } } @@ -344,7 +416,7 @@ static int dtls1_process_buffered_records(SSL *s) s->d1->processed_rcds.epoch = s->d1->r_epoch; s->d1->unprocessed_rcds.epoch = s->d1->r_epoch + 1; - return (1); + return 1; } #if 0 @@ -391,7 +463,7 @@ static int dtls1_get_buffered_record(SSL *s) #endif -static int dtls1_process_record(SSL *s) +static int dtls1_process_record(SSL *s, DTLS1_BITMAP *bitmap) { int i, al; int enc_err; @@ -551,6 +623,10 @@ static int dtls1_process_record(SSL *s) /* we have pulled in a full packet so zero things */ s->packet_length = 0; + + /* Mark receipt of record. */ + dtls1_record_bitmap_update(s, bitmap); + return (1); f_err: @@ -581,11 +657,12 @@ int dtls1_get_record(SSL *s) rr = &(s->s3->rrec); + again: /* * The epoch may have changed. If so, process all the pending records. * This is a non-blocking operation. */ - if (dtls1_process_buffered_records(s) < 0) + if (!dtls1_process_buffered_records(s)) return -1; /* if we're renegotiating, then there may be buffered records */ @@ -593,7 +670,6 @@ int dtls1_get_record(SSL *s) return 1; /* get something from the wire */ - again: /* check if we have the header */ if ((s->rstate != SSL_ST_READ_BODY) || (s->packet_length < DTLS1_RT_HEADER_LENGTH)) { @@ -721,20 +797,17 @@ int dtls1_get_record(SSL *s) if (dtls1_buffer_record (s, &(s->d1->unprocessed_rcds), rr->seq_num) < 0) return -1; - /* Mark receipt of record. */ - dtls1_record_bitmap_update(s, bitmap); } rr->length = 0; s->packet_length = 0; goto again; } - if (!dtls1_process_record(s)) { + if (!dtls1_process_record(s, bitmap)) { rr->length = 0; s->packet_length = 0; /* dump this record */ goto again; /* get another record */ } - dtls1_record_bitmap_update(s, bitmap); /* Mark receipt of record. */ return (1); @@ -878,6 +951,13 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) goto start; } + /* + * Reset the count of consecutive warning alerts if we've got a non-empty + * record that isn't an alert. + */ + if (rr->type != SSL3_RT_ALERT && rr->length != 0) + s->cert->alert_count = 0; + /* we now have a packet which can be read and processed */ if (s->s3->change_cipher_spec /* set when we receive ChangeCipherSpec, @@ -1144,6 +1224,14 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) if (alert_level == SSL3_AL_WARNING) { s->s3->warn_alert = alert_descr; + + s->cert->alert_count++; + if (s->cert->alert_count == MAX_WARN_ALERT_COUNT) { + al = SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_DTLS1_READ_BYTES, SSL_R_TOO_MANY_WARN_ALERTS); + goto f_err; + } + if (alert_descr == SSL_AD_CLOSE_NOTIFY) { #ifndef OPENSSL_NO_SCTP /* @@ -1201,7 +1289,7 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) BIO_snprintf(tmp, sizeof tmp, "%d", alert_descr); ERR_add_error_data(2, "SSL alert number ", tmp); s->shutdown |= SSL_RECEIVED_SHUTDOWN; - SSL_CTX_remove_session(s->ctx, s->session); + SSL_CTX_remove_session(s->session_ctx, s->session); return (0); } else { al = SSL_AD_ILLEGAL_PARAMETER; @@ -1235,9 +1323,9 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) /* XDTLS: check that epoch is consistent */ if ((rr->length != ccs_hdr_len) || (rr->off != 0) || (rr->data[0] != SSL3_MT_CCS)) { - i = SSL_AD_ILLEGAL_PARAMETER; + al = SSL_AD_ILLEGAL_PARAMETER; SSLerr(SSL_F_DTLS1_READ_BYTES, SSL_R_BAD_CHANGE_CIPHER_SPEC); - goto err; + goto f_err; } rr->length = 0; @@ -1830,8 +1918,13 @@ static DTLS1_BITMAP *dtls1_get_bitmap(SSL *s, SSL3_RECORD *rr, if (rr->epoch == s->d1->r_epoch) return &s->d1->bitmap; - /* Only HM and ALERT messages can be from the next epoch */ + /* + * Only HM and ALERT messages can be from the next epoch and only if we + * have already processed all of the unprocessed records from the last + * epoch + */ else if (rr->epoch == (unsigned long)(s->d1->r_epoch + 1) && + s->d1->unprocessed_rcds.epoch != s->d1->r_epoch && (rr->type == SSL3_RT_HANDSHAKE || rr->type == SSL3_RT_ALERT)) { *is_next_epoch = 1; return &s->d1->next_bitmap; @@ -1910,6 +2003,12 @@ void dtls1_reset_seq_numbers(SSL *s, int rw) s->d1->r_epoch++; memcpy(&(s->d1->bitmap), &(s->d1->next_bitmap), sizeof(DTLS1_BITMAP)); memset(&(s->d1->next_bitmap), 0x00, sizeof(DTLS1_BITMAP)); + + /* + * We must not use any buffered messages received from the previous + * epoch + */ + dtls1_clear_received_buffer(s); } else { seq = s->s3->write_sequence; memcpy(s->d1->last_write_sequence, seq, diff --git a/thirdparty/openssl/ssl/d1_srvr.c b/thirdparty/openssl/ssl/d1_srvr.c index e677d880f0..8502b242e5 100644 --- a/thirdparty/openssl/ssl/d1_srvr.c +++ b/thirdparty/openssl/ssl/d1_srvr.c @@ -282,7 +282,12 @@ int dtls1_accept(SSL *s) goto end; } - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } + s->state = SSL3_ST_SR_CLNT_HELLO_A; s->ctx->stats.sess_accept++; } else if (!s->s3->send_connection_binding && @@ -313,7 +318,7 @@ int dtls1_accept(SSL *s) case SSL3_ST_SW_HELLO_REQ_B: s->shutdown = 0; - dtls1_clear_record_buffer(s); + dtls1_clear_sent_buffer(s); dtls1_start_timer(s); ret = ssl3_send_hello_request(s); if (ret <= 0) @@ -322,7 +327,11 @@ int dtls1_accept(SSL *s) s->state = SSL3_ST_SW_FLUSH; s->init_num = 0; - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } break; case SSL3_ST_SW_HELLO_REQ_C: @@ -346,15 +355,6 @@ int dtls1_accept(SSL *s) s->init_num = 0; - /* - * Reflect ClientHello sequence to remain stateless while - * listening - */ - if (listen) { - memcpy(s->s3->write_sequence, s->s3->read_sequence, - sizeof(s->s3->write_sequence)); - } - /* If we're just listening, stop here */ if (listen && s->state == SSL3_ST_SW_SRVR_HELLO_A) { ret = 2; @@ -381,7 +381,11 @@ int dtls1_accept(SSL *s) /* HelloVerifyRequest resets Finished MAC */ if (s->version != DTLS1_BAD_VER) - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } break; #ifndef OPENSSL_NO_SCTP @@ -894,6 +898,7 @@ int dtls1_accept(SSL *s) /* next message is server hello */ s->d1->handshake_write_seq = 0; s->d1->next_handshake_write_seq = 0; + dtls1_clear_received_buffer(s); goto end; /* break; */ diff --git a/thirdparty/openssl/ssl/install-ssl.com b/thirdparty/openssl/ssl/install-ssl.com deleted file mode 100755 index afe6967f85..0000000000 --- a/thirdparty/openssl/ssl/install-ssl.com +++ /dev/null @@ -1,136 +0,0 @@ -$! INSTALL-SSL.COM -- Installs the files in a given directory tree -$! -$! Author: Richard Levitte <richard@levitte.org> -$! Time of creation: 22-MAY-1998 10:13 -$! -$! P1 root of the directory tree -$! P2 "64" for 64-bit pointers. -$! -$! -$! Announce/identify. -$! -$ proc = f$environment( "procedure") -$ write sys$output "@@@ "+ - - f$parse( proc, , , "name")+ f$parse( proc, , , "type") -$! -$ on error then goto tidy -$ on control_c then goto tidy -$! -$ if p1 .eqs. "" -$ then -$ write sys$output "First argument missing." -$ write sys$output - - "It should be the directory where you want things installed." -$ exit -$ endif -$! -$ if (f$getsyi( "cpu") .lt. 128) -$ then -$ arch = "VAX" -$ else -$ arch = f$edit( f$getsyi( "arch_name"), "upcase") -$ if (arch .eqs. "") then arch = "UNK" -$ endif -$! -$ archd = arch -$ lib32 = "32" -$ shr = "_SHR32" -$! -$ if (p2 .nes. "") -$ then -$ if (p2 .eqs. "64") -$ then -$ archd = arch+ "_64" -$ lib32 = "" -$ shr = "_SHR" -$ else -$ if (p2 .nes. "32") -$ then -$ write sys$output "Second argument invalid." -$ write sys$output "It should be "32", "64", or nothing." -$ exit -$ endif -$ endif -$ endif -$! -$ root = f$parse( p1, "[]A.;0", , , "syntax_only, no_conceal") - "A.;0" -$ root_dev = f$parse(root,,,"device","syntax_only") -$ root_dir = f$parse(root,,,"directory","syntax_only") - - - "[000000." - "][" - "[" - "]" -$ root = root_dev + "[" + root_dir -$! -$ define /nolog wrk_sslroot 'root'.] /trans=conc -$ define /nolog wrk_sslinclude wrk_sslroot:[include] -$ define /nolog wrk_sslxexe wrk_sslroot:['archd'_exe] -$ define /nolog wrk_sslxlib wrk_sslroot:['arch'_lib] -$! -$ if f$parse("wrk_sslroot:[000000]") .eqs. "" then - - create /directory /log wrk_sslroot:[000000] -$ if f$parse("wrk_sslinclude:") .eqs. "" then - - create /directory /log wrk_sslinclude: -$ if f$parse("wrk_sslxexe:") .eqs. "" then - - create /directory /log wrk_sslxexe: -$ if f$parse("wrk_sslxlib:") .eqs. "" then - - create /directory /log wrk_sslxlib: -$! -$ exheader := ssl.h, ssl2.h, ssl3.h, ssl23.h, tls1.h, dtls1.h, kssl.h, srtp.h -$ e_exe := ssl_task -$ libs := ssl_libssl -$! -$ xexe_dir := [-.'archd'.exe.ssl] -$! -$ copy /protection = w:re 'exheader' wrk_sslinclude: /log -$! -$ i = 0 -$ loop_exe: -$ e = f$edit( f$element( i, ",", e_exe), "trim") -$ i = i + 1 -$ if e .eqs. "," then goto loop_exe_end -$ set noon -$ file = xexe_dir+ e+ ".exe" -$ if f$search( file) .nes. "" -$ then -$ copy /protection = w:re 'file' wrk_sslxexe: /log -$ endif -$ set on -$ goto loop_exe -$ loop_exe_end: -$! -$ i = 0 -$ loop_lib: -$ e = f$edit(f$element(i, ",", libs),"trim") -$ i = i + 1 -$ if e .eqs. "," then goto loop_lib_end -$ set noon -$! Object library. -$ file = xexe_dir+ e+ lib32+ ".olb" -$ if f$search( file) .nes. "" -$ then -$ copy /protection = w:re 'file' wrk_sslxlib: /log -$ endif -$! Shareable image. -$ file = xexe_dir+ e+ shr+ ".exe" -$ if f$search( file) .nes. "" -$ then -$ copy /protection = w:re 'file' wrk_sslxlib: /log -$ endif -$ set on -$ goto loop_lib -$ loop_lib_end: -$! -$ tidy: -$! -$ call deass wrk_sslroot -$ call deass wrk_sslinclude -$ call deass wrk_sslxexe -$ call deass wrk_sslxlib -$! -$ exit -$! -$ deass: subroutine -$ if (f$trnlnm( p1, "LNM$PROCESS") .nes. "") -$ then -$ deassign /process 'p1' -$ endif -$ endsubroutine -$! diff --git a/thirdparty/openssl/ssl/s23_clnt.c b/thirdparty/openssl/ssl/s23_clnt.c index f782010c47..b80d1fd8ce 100644 --- a/thirdparty/openssl/ssl/s23_clnt.c +++ b/thirdparty/openssl/ssl/s23_clnt.c @@ -204,7 +204,10 @@ int ssl23_connect(SSL *s) goto end; } - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + goto end; + } s->state = SSL23_ST_CW_CLNT_HELLO_A; s->ctx->stats.sess_connect++; @@ -289,9 +292,9 @@ int ssl_fill_hello_random(SSL *s, int server, unsigned char *result, int len) unsigned long Time = (unsigned long)time(NULL); unsigned char *p = result; l2n(Time, p); - return RAND_pseudo_bytes(p, len - 4); + return RAND_bytes(p, len - 4); } else - return RAND_pseudo_bytes(result, len); + return RAND_bytes(result, len); } static int ssl23_client_hello(SSL *s) @@ -466,8 +469,8 @@ static int ssl23_client_hello(SSL *s) i = ch_len; s2n(i, d); memset(&(s->s3->client_random[0]), 0, SSL3_RANDOM_SIZE); - if (RAND_pseudo_bytes - (&(s->s3->client_random[SSL3_RANDOM_SIZE - i]), i) <= 0) + if (RAND_bytes (&(s->s3->client_random[SSL3_RANDOM_SIZE - i]), i) + <= 0) return -1; memcpy(p, &(s->s3->client_random[SSL3_RANDOM_SIZE - i]), i); diff --git a/thirdparty/openssl/ssl/s23_pkt.c b/thirdparty/openssl/ssl/s23_pkt.c index efc8647841..6544180efe 100644 --- a/thirdparty/openssl/ssl/s23_pkt.c +++ b/thirdparty/openssl/ssl/s23_pkt.c @@ -63,6 +63,9 @@ #include <openssl/evp.h> #include <openssl/buffer.h> +/* + * Return values are as per SSL_write() + */ int ssl23_write_bytes(SSL *s) { int i, num, tot; @@ -77,7 +80,7 @@ int ssl23_write_bytes(SSL *s) if (i <= 0) { s->init_off = tot; s->init_num = num; - return (i); + return i; } s->rwstate = SSL_NOTHING; if (i == num) @@ -88,7 +91,10 @@ int ssl23_write_bytes(SSL *s) } } -/* return regularly only when we have read (at least) 'n' bytes */ +/* return regularly only when we have read (at least) 'n' bytes + * + * Return values are as per SSL_read() + */ int ssl23_read_bytes(SSL *s, int n) { unsigned char *p; @@ -102,7 +108,7 @@ int ssl23_read_bytes(SSL *s, int n) j = BIO_read(s->rbio, (char *)&(p[s->packet_length]), n - s->packet_length); if (j <= 0) - return (j); + return j; s->rwstate = SSL_NOTHING; s->packet_length += j; if (s->packet_length >= (unsigned int)n) diff --git a/thirdparty/openssl/ssl/s23_srvr.c b/thirdparty/openssl/ssl/s23_srvr.c index 470bd3d94f..d3f6db15cc 100644 --- a/thirdparty/openssl/ssl/s23_srvr.c +++ b/thirdparty/openssl/ssl/s23_srvr.c @@ -195,7 +195,10 @@ int ssl23_accept(SSL *s) s->init_buf = buf; } - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + goto end; + } s->state = SSL23_ST_SR_CLNT_HELLO_A; s->ctx->stats.sess_accept++; diff --git a/thirdparty/openssl/ssl/s2_clnt.c b/thirdparty/openssl/ssl/s2_clnt.c index 69da6b1421..20de1a8217 100644 --- a/thirdparty/openssl/ssl/s2_clnt.c +++ b/thirdparty/openssl/ssl/s2_clnt.c @@ -581,7 +581,7 @@ static int client_hello(SSL *s) /* * challenge id data */ - if (RAND_pseudo_bytes(s->s2->challenge, SSL2_CHALLENGE_LENGTH) <= 0) + if (RAND_bytes(s->s2->challenge, SSL2_CHALLENGE_LENGTH) <= 0) return -1; memcpy(d, s->s2->challenge, SSL2_CHALLENGE_LENGTH); d += SSL2_CHALLENGE_LENGTH; @@ -629,7 +629,7 @@ static int client_master_key(SSL *s) return -1; } if (i > 0) - if (RAND_pseudo_bytes(sess->key_arg, i) <= 0) + if (RAND_bytes(sess->key_arg, i) <= 0) return -1; /* make a master key */ diff --git a/thirdparty/openssl/ssl/s2_lib.c b/thirdparty/openssl/ssl/s2_lib.c index 88e67f083a..cc1360307b 100644 --- a/thirdparty/openssl/ssl/s2_lib.c +++ b/thirdparty/openssl/ssl/s2_lib.c @@ -254,7 +254,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = { SSL_3DES, SSL_MD5, SSL_SSLV2, - SSL_NOT_DEFAULT | SSL_NOT_EXP | SSL_HIGH, + SSL_NOT_DEFAULT | SSL_NOT_EXP | SSL_MEDIUM, 0, 112, 168, diff --git a/thirdparty/openssl/ssl/s2_pkt.c b/thirdparty/openssl/ssl/s2_pkt.c index 7a61888134..e44bc0335a 100644 --- a/thirdparty/openssl/ssl/s2_pkt.c +++ b/thirdparty/openssl/ssl/s2_pkt.c @@ -307,6 +307,9 @@ int ssl2_peek(SSL *s, void *buf, int len) return ssl2_read_internal(s, buf, len, 1); } +/* + * Return values are as per SSL_read() + */ static int read_n(SSL *s, unsigned int n, unsigned int max, unsigned int extend) { @@ -374,7 +377,7 @@ static int read_n(SSL *s, unsigned int n, unsigned int max, # endif if (i <= 0) { s->s2->rbuf_left += newb; - return (i); + return i; } newb += i; } @@ -441,6 +444,9 @@ int ssl2_write(SSL *s, const void *_buf, int len) } } +/* + * Return values are as per SSL_write() + */ static int write_pending(SSL *s, const unsigned char *buf, unsigned int len) { int i; @@ -477,7 +483,7 @@ static int write_pending(SSL *s, const unsigned char *buf, unsigned int len) s->rwstate = SSL_NOTHING; return (s->s2->wpend_ret); } else if (i <= 0) - return (i); + return i; s->s2->wpend_off += i; s->s2->wpend_len -= i; } diff --git a/thirdparty/openssl/ssl/s2_srvr.c b/thirdparty/openssl/ssl/s2_srvr.c index 07e9df8282..d3b243c27e 100644 --- a/thirdparty/openssl/ssl/s2_srvr.c +++ b/thirdparty/openssl/ssl/s2_srvr.c @@ -526,11 +526,8 @@ static int get_client_master_key(SSL *s) * fails. See https://tools.ietf.org/html/rfc5246#section-7.4.7.1 */ - /* - * should be RAND_bytes, but we cannot work around a failure. - */ - if (RAND_pseudo_bytes(rand_premaster_secret, - (int)num_encrypted_key_bytes) <= 0) + if (RAND_bytes(rand_premaster_secret, + (int)num_encrypted_key_bytes) <= 0) return 0; i = ssl_rsa_private_decrypt(s->cert, s->s2->tmp.enc, @@ -822,8 +819,7 @@ static int server_hello(SSL *s) /* make and send conn_id */ s2n(SSL2_CONNECTION_ID_LENGTH, p); /* add conn_id length */ s->s2->conn_id_length = SSL2_CONNECTION_ID_LENGTH; - if (RAND_pseudo_bytes(s->s2->conn_id, (int)s->s2->conn_id_length) <= - 0) + if (RAND_bytes(s->s2->conn_id, (int)s->s2->conn_id_length) <= 0) return -1; memcpy(d, s->s2->conn_id, SSL2_CONNECTION_ID_LENGTH); d += SSL2_CONNECTION_ID_LENGTH; @@ -962,7 +958,7 @@ static int request_certificate(SSL *s) p = (unsigned char *)s->init_buf->data; *(p++) = SSL2_MT_REQUEST_CERTIFICATE; *(p++) = SSL2_AT_MD5_WITH_RSA_ENCRYPTION; - if (RAND_pseudo_bytes(ccd, SSL2_MIN_CERT_CHALLENGE_LENGTH) <= 0) + if (RAND_bytes(ccd, SSL2_MIN_CERT_CHALLENGE_LENGTH) <= 0) return -1; memcpy(p, ccd, SSL2_MIN_CERT_CHALLENGE_LENGTH); diff --git a/thirdparty/openssl/ssl/s3_both.c b/thirdparty/openssl/ssl/s3_both.c index 09d0661e81..054ded1c99 100644 --- a/thirdparty/openssl/ssl/s3_both.c +++ b/thirdparty/openssl/ssl/s3_both.c @@ -356,21 +356,22 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) } *ok = 1; s->state = stn; - s->init_msg = s->init_buf->data + 4; + s->init_msg = s->init_buf->data + SSL3_HM_HEADER_LENGTH; s->init_num = (int)s->s3->tmp.message_size; return s->init_num; } p = (unsigned char *)s->init_buf->data; - if (s->state == st1) { /* s->init_num < 4 */ + if (s->state == st1) { /* s->init_num < SSL3_HM_HEADER_LENGTH */ int skip_message; do { - while (s->init_num < 4) { + while (s->init_num < SSL3_HM_HEADER_LENGTH) { i = s->method->ssl_read_bytes(s, SSL3_RT_HANDSHAKE, &p[s->init_num], - 4 - s->init_num, 0); + SSL3_HM_HEADER_LENGTH - + s->init_num, 0); if (i <= 0) { s->rwstate = SSL_READING; *ok = 0; @@ -394,12 +395,13 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) if (s->msg_callback) s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, - p, 4, s, s->msg_callback_arg); + p, SSL3_HM_HEADER_LENGTH, s, + s->msg_callback_arg); } } while (skip_message); - /* s->init_num == 4 */ + /* s->init_num == SSL3_HM_HEADER_LENGTH */ if ((mt >= 0) && (*p != mt)) { al = SSL_AD_UNEXPECTED_MESSAGE; @@ -415,19 +417,20 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) SSLerr(SSL_F_SSL3_GET_MESSAGE, SSL_R_EXCESSIVE_MESSAGE_SIZE); goto f_err; } - if (l > (INT_MAX - 4)) { /* BUF_MEM_grow takes an 'int' parameter */ - al = SSL_AD_ILLEGAL_PARAMETER; - SSLerr(SSL_F_SSL3_GET_MESSAGE, SSL_R_EXCESSIVE_MESSAGE_SIZE); - goto f_err; - } - if (l && !BUF_MEM_grow_clean(s->init_buf, (int)l + 4)) { + /* + * Make buffer slightly larger than message length as a precaution + * against small OOB reads e.g. CVE-2016-6306 + */ + if (l + && !BUF_MEM_grow_clean(s->init_buf, + (int)l + SSL3_HM_HEADER_LENGTH + 16)) { SSLerr(SSL_F_SSL3_GET_MESSAGE, ERR_R_BUF_LIB); goto err; } s->s3->tmp.message_size = l; s->state = stn; - s->init_msg = s->init_buf->data + 4; + s->init_msg = s->init_buf->data + SSL3_HM_HEADER_LENGTH; s->init_num = 0; } @@ -456,10 +459,12 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) #endif /* Feed this message into MAC computation. */ - ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4); + ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, + s->init_num + SSL3_HM_HEADER_LENGTH); if (s->msg_callback) s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, - (size_t)s->init_num + 4, s, s->msg_callback_arg); + (size_t)s->init_num + SSL3_HM_HEADER_LENGTH, s, + s->msg_callback_arg); *ok = 1; return s->init_num; f_err: @@ -535,6 +540,9 @@ int ssl_verify_alarm_type(long type) case X509_V_ERR_CRL_NOT_YET_VALID: case X509_V_ERR_CERT_UNTRUSTED: case X509_V_ERR_CERT_REJECTED: + case X509_V_ERR_HOSTNAME_MISMATCH: + case X509_V_ERR_EMAIL_MISMATCH: + case X509_V_ERR_IP_ADDRESS_MISMATCH: al = SSL_AD_BAD_CERTIFICATE; break; case X509_V_ERR_CERT_SIGNATURE_FAILURE: @@ -548,7 +556,10 @@ int ssl_verify_alarm_type(long type) case X509_V_ERR_CERT_REVOKED: al = SSL_AD_CERTIFICATE_REVOKED; break; + case X509_V_ERR_UNSPECIFIED: case X509_V_ERR_OUT_OF_MEM: + case X509_V_ERR_INVALID_CALL: + case X509_V_ERR_STORE_LOOKUP: al = SSL_AD_INTERNAL_ERROR; break; case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT: diff --git a/thirdparty/openssl/ssl/s3_clnt.c b/thirdparty/openssl/ssl/s3_clnt.c index 19dc8648b9..5b8b2da59f 100644 --- a/thirdparty/openssl/ssl/s3_clnt.c +++ b/thirdparty/openssl/ssl/s3_clnt.c @@ -263,6 +263,7 @@ int ssl3_connect(SSL *s) if (!ssl3_setup_buffers(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -275,7 +276,11 @@ int ssl3_connect(SSL *s) /* don't push the buffering BIO quite yet */ - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } s->state = SSL3_ST_CW_CLNT_HELLO_A; s->ctx->stats.sess_connect++; @@ -1216,6 +1221,12 @@ int ssl3_get_server_certificate(SSL *s) goto f_err; } for (nc = 0; nc < llen;) { + if (nc + 3 > llen) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_SERVER_CERTIFICATE, + SSL_R_CERT_LENGTH_MISMATCH); + goto f_err; + } n2l3(p, l); if ((l + nc + 3) > llen) { al = SSL_AD_DECODE_ERROR; @@ -1704,12 +1715,6 @@ int ssl3_get_key_exchange(SSL *s) } p += i; - if (BN_is_zero(dh->p)) { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_BAD_DH_P_VALUE); - goto f_err; - } - - if (2 > n - param_len) { SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_LENGTH_TOO_SHORT); goto f_err; @@ -1730,11 +1735,6 @@ int ssl3_get_key_exchange(SSL *s) } p += i; - if (BN_is_zero(dh->g)) { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_BAD_DH_G_VALUE); - goto f_err; - } - if (2 > n - param_len) { SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_LENGTH_TOO_SHORT); goto f_err; @@ -1761,6 +1761,39 @@ int ssl3_get_key_exchange(SSL *s) goto f_err; } + /*- + * Check that p and g are suitable enough + * + * p is odd + * 1 < g < p - 1 + */ + { + BIGNUM *tmp = NULL; + + if (!BN_is_odd(dh->p)) { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_BAD_DH_P_VALUE); + goto f_err; + } + if (BN_is_negative(dh->g) || BN_is_zero(dh->g) + || BN_is_one(dh->g)) { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_BAD_DH_G_VALUE); + goto f_err; + } + if ((tmp = BN_new()) == NULL + || BN_copy(tmp, dh->p) == NULL + || !BN_sub_word(tmp, 1)) { + BN_free(tmp); + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_BN_LIB); + goto err; + } + if (BN_cmp(dh->g, tmp) >= 0) { + BN_free(tmp); + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_BAD_DH_G_VALUE); + goto f_err; + } + BN_free(tmp); + } + # ifndef OPENSSL_NO_RSA if (alg_a & SSL_aRSA) pkey = @@ -1836,6 +1869,7 @@ int ssl3_get_key_exchange(SSL *s) goto err; } if (EC_KEY_set_group(ecdh, ngroup) == 0) { + EC_GROUP_free(ngroup); SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_EC_LIB); goto err; } @@ -2111,6 +2145,10 @@ int ssl3_get_certificate_request(SSL *s) if (ctype_num > SSL3_CT_NUMBER) { /* If we exceed static buffer copy all to cert structure */ s->cert->ctypes = OPENSSL_malloc(ctype_num); + if (s->cert->ctypes == NULL) { + SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST, ERR_R_MALLOC_FAILURE); + goto err; + } memcpy(s->cert->ctypes, p, ctype_num); s->cert->ctype_num = (size_t)ctype_num; ctype_num = SSL3_CT_NUMBER; @@ -2167,6 +2205,11 @@ int ssl3_get_certificate_request(SSL *s) } for (nc = 0; nc < llen;) { + if (nc + 2 > llen) { + ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR); + SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST, SSL_R_CA_DN_TOO_LONG); + goto err; + } n2s(p, l); if ((l + nc + 2) > llen) { if ((s->options & SSL_OP_NETSCAPE_CA_DN_BUG)) @@ -2999,19 +3042,6 @@ int ssl3_send_client_key_exchange(SSL *s) goto err; } /* - * If we have client certificate, use its secret as peer key - */ - if (s->s3->tmp.cert_req && s->cert->key->privatekey) { - if (EVP_PKEY_derive_set_peer - (pkey_ctx, s->cert->key->privatekey) <= 0) { - /* - * If there was an error - just ignore it. Ephemeral key - * * would be used - */ - ERR_clear_error(); - } - } - /* * Compute shared IV and store it in algorithm-specific context * data */ @@ -3057,12 +3087,6 @@ int ssl3_send_client_key_exchange(SSL *s) n = msglen + 2; } memcpy(p, tmp, msglen); - /* Check if pubkey from client certificate was used */ - if (EVP_PKEY_CTX_ctrl - (pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0) { - /* Set flag "skip certificate verify" */ - s->s3->flags |= TLS1_FLAGS_SKIP_CERT_VERIFY; - } EVP_PKEY_CTX_free(pkey_ctx); s->session->master_key_length = s->method->ssl3_enc->generate_master_secret(s, diff --git a/thirdparty/openssl/ssl/s3_enc.c b/thirdparty/openssl/ssl/s3_enc.c index 47a0ec9fe0..1eee9d9b21 100644 --- a/thirdparty/openssl/ssl/s3_enc.c +++ b/thirdparty/openssl/ssl/s3_enc.c @@ -177,32 +177,34 @@ static int ssl3_generate_key_block(SSL *s, unsigned char *km, int num) EVP_MD_CTX_init(&s1); for (i = 0; (int)i < num; i += MD5_DIGEST_LENGTH) { k++; - if (k > sizeof buf) { + if (k > sizeof(buf)) /* bug: 'buf' is too small for this ciphersuite */ - SSLerr(SSL_F_SSL3_GENERATE_KEY_BLOCK, ERR_R_INTERNAL_ERROR); - return 0; - } + goto err; for (j = 0; j < k; j++) buf[j] = c; c++; - EVP_DigestInit_ex(&s1, EVP_sha1(), NULL); - EVP_DigestUpdate(&s1, buf, k); - EVP_DigestUpdate(&s1, s->session->master_key, - s->session->master_key_length); - EVP_DigestUpdate(&s1, s->s3->server_random, SSL3_RANDOM_SIZE); - EVP_DigestUpdate(&s1, s->s3->client_random, SSL3_RANDOM_SIZE); - EVP_DigestFinal_ex(&s1, smd, NULL); - - EVP_DigestInit_ex(&m5, EVP_md5(), NULL); - EVP_DigestUpdate(&m5, s->session->master_key, - s->session->master_key_length); - EVP_DigestUpdate(&m5, smd, SHA_DIGEST_LENGTH); + if (!EVP_DigestInit_ex(&s1, EVP_sha1(), NULL) || + !EVP_DigestUpdate(&s1, buf, k) || + !EVP_DigestUpdate(&s1, s->session->master_key, + s->session->master_key_length) || + !EVP_DigestUpdate(&s1, s->s3->server_random, SSL3_RANDOM_SIZE) || + !EVP_DigestUpdate(&s1, s->s3->client_random, SSL3_RANDOM_SIZE) || + !EVP_DigestFinal_ex(&s1, smd, NULL)) + goto err2; + + if (!EVP_DigestInit_ex(&m5, EVP_md5(), NULL) || + !EVP_DigestUpdate(&m5, s->session->master_key, + s->session->master_key_length) || + !EVP_DigestUpdate(&m5, smd, SHA_DIGEST_LENGTH)) + goto err2; if ((int)(i + MD5_DIGEST_LENGTH) > num) { - EVP_DigestFinal_ex(&m5, smd, NULL); + if (!EVP_DigestFinal_ex(&m5, smd, NULL)) + goto err2; memcpy(km, smd, (num - i)); } else - EVP_DigestFinal_ex(&m5, km, NULL); + if (!EVP_DigestFinal_ex(&m5, km, NULL)) + goto err2; km += MD5_DIGEST_LENGTH; } @@ -210,6 +212,12 @@ static int ssl3_generate_key_block(SSL *s, unsigned char *km, int num) EVP_MD_CTX_cleanup(&m5); EVP_MD_CTX_cleanup(&s1); return 1; + err: + SSLerr(SSL_F_SSL3_GENERATE_KEY_BLOCK, ERR_R_INTERNAL_ERROR); + err2: + EVP_MD_CTX_cleanup(&m5); + EVP_MD_CTX_cleanup(&s1); + return 0; } int ssl3_change_cipher_state(SSL *s, int which) @@ -360,25 +368,33 @@ int ssl3_change_cipher_state(SSL *s, int which) * In here I set both the read and write key/iv to the same value * since only the correct one will be used :-). */ - EVP_DigestInit_ex(&md, EVP_md5(), NULL); - EVP_DigestUpdate(&md, key, j); - EVP_DigestUpdate(&md, er1, SSL3_RANDOM_SIZE); - EVP_DigestUpdate(&md, er2, SSL3_RANDOM_SIZE); - EVP_DigestFinal_ex(&md, &(exp_key[0]), NULL); + if (!EVP_DigestInit_ex(&md, EVP_md5(), NULL) || + !EVP_DigestUpdate(&md, key, j) || + !EVP_DigestUpdate(&md, er1, SSL3_RANDOM_SIZE) || + !EVP_DigestUpdate(&md, er2, SSL3_RANDOM_SIZE) || + !EVP_DigestFinal_ex(&md, &(exp_key[0]), NULL)) { + EVP_MD_CTX_cleanup(&md); + goto err2; + } key = &(exp_key[0]); if (k > 0) { - EVP_DigestInit_ex(&md, EVP_md5(), NULL); - EVP_DigestUpdate(&md, er1, SSL3_RANDOM_SIZE); - EVP_DigestUpdate(&md, er2, SSL3_RANDOM_SIZE); - EVP_DigestFinal_ex(&md, &(exp_iv[0]), NULL); + if (!EVP_DigestInit_ex(&md, EVP_md5(), NULL) || + !EVP_DigestUpdate(&md, er1, SSL3_RANDOM_SIZE) || + !EVP_DigestUpdate(&md, er2, SSL3_RANDOM_SIZE) || + !EVP_DigestFinal_ex(&md, &(exp_iv[0]), NULL)) { + EVP_MD_CTX_cleanup(&md); + goto err2; + } iv = &(exp_iv[0]); } } + EVP_MD_CTX_cleanup(&md); s->session->key_arg_length = 0; - EVP_CipherInit_ex(dd, c, NULL, key, iv, (which & SSL3_CC_WRITE)); + if (!EVP_CipherInit_ex(dd, c, NULL, key, iv, (which & SSL3_CC_WRITE))) + goto err2; #ifdef OPENSSL_SSL_TRACE_CRYPTO if (s->msg_callback) { @@ -399,7 +415,6 @@ int ssl3_change_cipher_state(SSL *s, int which) OPENSSL_cleanse(&(exp_key[0]), sizeof(exp_key)); OPENSSL_cleanse(&(exp_iv[0]), sizeof(exp_iv)); - EVP_MD_CTX_cleanup(&md); return (1); err: SSLerr(SSL_F_SSL3_CHANGE_CIPHER_STATE, ERR_R_MALLOC_FAILURE); @@ -555,17 +570,20 @@ int ssl3_enc(SSL *s, int send) if ((bs != 1) && !send) return ssl3_cbc_remove_padding(s, rec, bs, mac_size); } - return (1); + return 1; } -void ssl3_init_finished_mac(SSL *s) +int ssl3_init_finished_mac(SSL *s) { if (s->s3->handshake_buffer) BIO_free(s->s3->handshake_buffer); if (s->s3->handshake_dgst) ssl3_free_digest_list(s); s->s3->handshake_buffer = BIO_new(BIO_s_mem()); + if (s->s3->handshake_buffer == NULL) + return 0; (void)BIO_set_close(s->s3->handshake_buffer, BIO_CLOSE); + return 1; } void ssl3_free_digest_list(SSL *s) @@ -607,6 +625,10 @@ int ssl3_digest_cached_records(SSL *s) ssl3_free_digest_list(s); s->s3->handshake_dgst = OPENSSL_malloc(SSL_MAX_DIGEST * sizeof(EVP_MD_CTX *)); + if (s->s3->handshake_dgst == NULL) { + SSLerr(SSL_F_SSL3_DIGEST_CACHED_RECORDS, ERR_R_MALLOC_FAILURE); + return 0; + } memset(s->s3->handshake_dgst, 0, SSL_MAX_DIGEST * sizeof(EVP_MD_CTX *)); hdatalen = BIO_get_mem_data(s->s3->handshake_buffer, &hdata); if (hdatalen <= 0) { @@ -618,14 +640,22 @@ int ssl3_digest_cached_records(SSL *s) for (i = 0; ssl_get_handshake_digest(i, &mask, &md); i++) { if ((mask & ssl_get_algorithm2(s)) && md) { s->s3->handshake_dgst[i] = EVP_MD_CTX_create(); + if (s->s3->handshake_dgst[i] == NULL) { + SSLerr(SSL_F_SSL3_DIGEST_CACHED_RECORDS, ERR_R_MALLOC_FAILURE); + return 0; + } #ifdef OPENSSL_FIPS if (EVP_MD_nid(md) == NID_md5) { EVP_MD_CTX_set_flags(s->s3->handshake_dgst[i], EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); } #endif - EVP_DigestInit_ex(s->s3->handshake_dgst[i], md, NULL); - EVP_DigestUpdate(s->s3->handshake_dgst[i], hdata, hdatalen); + if (!EVP_DigestInit_ex(s->s3->handshake_dgst[i], md, NULL) + || !EVP_DigestUpdate(s->s3->handshake_dgst[i], hdata, + hdatalen)) { + SSLerr(SSL_F_SSL3_DIGEST_CACHED_RECORDS, ERR_R_INTERNAL_ERROR); + return 0; + } } else { s->s3->handshake_dgst[i] = NULL; } @@ -895,7 +925,7 @@ int ssl3_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, s, s->msg_callback_arg); } #endif - OPENSSL_cleanse(buf, sizeof buf); + OPENSSL_cleanse(buf, sizeof(buf)); return (ret); } diff --git a/thirdparty/openssl/ssl/s3_lib.c b/thirdparty/openssl/ssl/s3_lib.c index 872e636af9..1014a3fce1 100644 --- a/thirdparty/openssl/ssl/s3_lib.c +++ b/thirdparty/openssl/ssl/s3_lib.c @@ -329,7 +329,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -382,7 +382,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -434,7 +434,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -487,7 +487,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -539,7 +539,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -625,7 +625,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_DEFAULT | SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_DEFAULT | SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -712,7 +712,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -778,7 +778,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_MD5, SSL_SSLV3, - SSL_NOT_EXP | SSL_HIGH, + SSL_NOT_EXP | SSL_MEDIUM, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -1728,7 +1728,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2120,7 +2120,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2200,7 +2200,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2280,7 +2280,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2360,7 +2360,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2440,7 +2440,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_DEFAULT | SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, + SSL_NOT_DEFAULT | SSL_NOT_EXP | SSL_MEDIUM | SSL_FIPS, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2490,7 +2490,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH, + SSL_NOT_EXP | SSL_MEDIUM, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2506,7 +2506,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH, + SSL_NOT_EXP | SSL_MEDIUM, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -2522,7 +2522,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { SSL_3DES, SSL_SHA1, SSL_TLSV1, - SSL_NOT_EXP | SSL_HIGH, + SSL_NOT_EXP | SSL_MEDIUM, SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF, 112, 168, @@ -4237,7 +4237,7 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p) return (int)s->cert->ctype_num; } /* get configured sigalgs */ - siglen = tls12_get_psigalgs(s, &sig); + siglen = tls12_get_psigalgs(s, 1, &sig); if (s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT) nostrict = 0; for (i = 0; i < siglen; i += 2, sig += 2) { @@ -4528,7 +4528,10 @@ int ssl3_renegotiate_check(SSL *s) */ long ssl_get_algorithm2(SSL *s) { - long alg2 = s->s3->tmp.new_cipher->algorithm2; + long alg2; + if (s->s3 == NULL || s->s3->tmp.new_cipher == NULL) + return -1; + alg2 = s->s3->tmp.new_cipher->algorithm2; if (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_SHA256_PRF && alg2 == (SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF)) return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256; diff --git a/thirdparty/openssl/ssl/s3_pkt.c b/thirdparty/openssl/ssl/s3_pkt.c index 379890237e..0290c991d8 100644 --- a/thirdparty/openssl/ssl/s3_pkt.c +++ b/thirdparty/openssl/ssl/s3_pkt.c @@ -136,6 +136,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, unsigned int len, int create_empty_fragment); static int ssl3_get_record(SSL *s); +/* + * Return values are as per SSL_read() + */ int ssl3_read_n(SSL *s, int n, int max, int extend) { /* @@ -667,7 +670,7 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) * promptly send beyond the end of the users buffer ... so we trap and * report the error in a way the user will notice */ - if (len < tot) { + if ((len < tot) || ((wb->left != 0) && (len < (tot + s->s3->wpend_tot)))) { SSLerr(SSL_F_SSL3_WRITE_BYTES, SSL_R_BAD_LENGTH); return (-1); } @@ -696,6 +699,7 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) len >= 4 * (int)(max_send_fragment = s->max_send_fragment) && s->compress == NULL && s->msg_callback == NULL && SSL_USE_EXPLICIT_IV(s) && + s->enc_write_ctx != NULL && EVP_CIPHER_flags(s->enc_write_ctx->cipher) & EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) { unsigned char aad[13]; @@ -1082,7 +1086,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, return -1; } -/* if s->s3->wbuf.left != 0, we need to call this */ +/* if s->s3->wbuf.left != 0, we need to call this + * + * Return values are as per SSL_write(), i.e. + */ int ssl3_write_pending(SSL *s, int type, const unsigned char *buf, unsigned int len) { @@ -1122,7 +1129,7 @@ int ssl3_write_pending(SSL *s, int type, const unsigned char *buf, */ wb->left = 0; } - return (i); + return i; } wb->offset += i; wb->left -= i; @@ -1229,6 +1236,13 @@ int ssl3_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) return (ret); } + /* + * Reset the count of consecutive warning alerts if we've got a non-empty + * record that isn't an alert. + */ + if (rr->type != SSL3_RT_ALERT && rr->length != 0) + s->cert->alert_count = 0; + /* we now have a packet which can be read and processed */ if (s->s3->change_cipher_spec /* set when we receive ChangeCipherSpec, @@ -1443,6 +1457,14 @@ int ssl3_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) if (alert_level == SSL3_AL_WARNING) { s->s3->warn_alert = alert_descr; + + s->cert->alert_count++; + if (s->cert->alert_count == MAX_WARN_ALERT_COUNT) { + al = SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_SSL3_READ_BYTES, SSL_R_TOO_MANY_WARN_ALERTS); + goto f_err; + } + if (alert_descr == SSL_AD_CLOSE_NOTIFY) { s->shutdown |= SSL_RECEIVED_SHUTDOWN; return (0); @@ -1473,7 +1495,7 @@ int ssl3_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) BIO_snprintf(tmp, sizeof tmp, "%d", alert_descr); ERR_add_error_data(2, "SSL alert number ", tmp); s->shutdown |= SSL_RECEIVED_SHUTDOWN; - SSL_CTX_remove_session(s->ctx, s->session); + SSL_CTX_remove_session(s->session_ctx, s->session); return (0); } else { al = SSL_AD_ILLEGAL_PARAMETER; @@ -1578,16 +1600,13 @@ int ssl3_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) switch (rr->type) { default: -#ifndef OPENSSL_NO_TLS /* - * TLS up to v1.1 just ignores unknown message types: TLS v1.2 give - * an unexpected message alert. + * TLS 1.0 and 1.1 say you SHOULD ignore unrecognised record types, but + * TLS 1.2 says you MUST send an unexpected message alert. We use the + * TLS 1.2 behaviour for all protocol versions to prevent issues where + * no progress is being made and the peer continually sends unrecognised + * record types, using up resources processing them. */ - if (s->version >= TLS1_VERSION && s->version <= TLS1_1_VERSION) { - rr->length = 0; - goto start; - } -#endif al = SSL_AD_UNEXPECTED_MESSAGE; SSLerr(SSL_F_SSL3_READ_BYTES, SSL_R_UNEXPECTED_RECORD); goto f_err; @@ -1698,7 +1717,7 @@ int ssl3_send_alert(SSL *s, int level, int desc) return -1; /* If a fatal one, remove from cache */ if ((level == 2) && (s->session != NULL)) - SSL_CTX_remove_session(s->ctx, s->session); + SSL_CTX_remove_session(s->session_ctx, s->session); s->s3->alert_dispatch = 1; s->s3->send_alert[0] = level; diff --git a/thirdparty/openssl/ssl/s3_srvr.c b/thirdparty/openssl/ssl/s3_srvr.c index ab28702ee9..ba17f1b562 100644 --- a/thirdparty/openssl/ssl/s3_srvr.c +++ b/thirdparty/openssl/ssl/s3_srvr.c @@ -311,7 +311,12 @@ int ssl3_accept(SSL *s) goto end; } - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } + s->state = SSL3_ST_SR_CLNT_HELLO_A; s->ctx->stats.sess_accept++; } else if (!s->s3->send_connection_binding && @@ -348,7 +353,11 @@ int ssl3_accept(SSL *s) s->state = SSL3_ST_SW_FLUSH; s->init_num = 0; - ssl3_init_finished_mac(s); + if (!ssl3_init_finished_mac(s)) { + ret = -1; + s->state = SSL_ST_ERR; + goto end; + } break; case SSL3_ST_SW_HELLO_REQ_C: @@ -506,7 +515,7 @@ int ssl3_accept(SSL *s) * if SSL_VERIFY_CLIENT_ONCE is set, don't request cert * during re-negotiation: */ - ((s->session->peer != NULL) && + (s->s3->tmp.finish_md_len != 0 && (s->verify_mode & SSL_VERIFY_CLIENT_ONCE)) || /* * never request cert in anonymous ciphersuites (see @@ -980,7 +989,8 @@ int ssl3_get_client_hello(SSL *s) session_length = *(p + SSL3_RANDOM_SIZE); - if (p + SSL3_RANDOM_SIZE + session_length + 1 >= d + n) { + if (SSL3_RANDOM_SIZE + session_length + 1 + >= (unsigned int)((d + n) - p)) { al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); goto f_err; @@ -998,7 +1008,7 @@ int ssl3_get_client_hello(SSL *s) /* get the session-id */ j = *(p++); - if (p + j > d + n) { + if ((d + n) - p < j) { al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); goto f_err; @@ -1054,14 +1064,14 @@ int ssl3_get_client_hello(SSL *s) if (SSL_IS_DTLS(s)) { /* cookie stuff */ - if (p + 1 > d + n) { + if ((d + n) - p < 1) { al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); goto f_err; } cookie_len = *(p++); - if (p + cookie_len > d + n) { + if ((unsigned int)((d + n ) - p) < cookie_len) { al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); goto f_err; @@ -1131,7 +1141,7 @@ int ssl3_get_client_hello(SSL *s) } } - if (p + 2 > d + n) { + if ((d + n ) - p < 2) { al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); goto f_err; @@ -1145,7 +1155,7 @@ int ssl3_get_client_hello(SSL *s) } /* i bytes of cipher data + 1 byte for compression length later */ - if ((p + i + 1) > (d + n)) { + if ((d + n) - p < i + 1) { /* not enough data */ al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_MISMATCH); @@ -1211,7 +1221,7 @@ int ssl3_get_client_hello(SSL *s) /* compression */ i = *(p++); - if ((p + i) > (d + n)) { + if ((d + n) - p < i) { /* not enough data */ al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_MISMATCH); @@ -1464,9 +1474,9 @@ int ssl3_get_client_hello(SSL *s) /* Handles TLS extensions that we couldn't check earlier */ if (s->version >= SSL3_VERSION) { - if (ssl_check_clienthello_tlsext_late(s) <= 0) { + if (!ssl_check_clienthello_tlsext_late(s, &al)) { SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT); - goto err; + goto f_err; } } @@ -1600,6 +1610,9 @@ int ssl3_send_server_key_exchange(SSL *s) unsigned int u; #endif #ifndef OPENSSL_NO_DH +# ifdef OPENSSL_NO_RSA + int j; +# endif DH *dh = NULL, *dhp; #endif #ifndef OPENSSL_NO_ECDH @@ -1700,6 +1713,12 @@ int ssl3_send_server_key_exchange(SSL *s) if (type & SSL_kEECDH) { const EC_GROUP *group; + if (s->s3->tmp.ecdh != NULL) { + SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, + ERR_R_INTERNAL_ERROR); + goto err; + } + ecdhp = cert->ecdh_tmp; if (s->cert->ecdh_tmp_auto) { /* Get NID of appropriate shared curve */ @@ -1720,17 +1739,7 @@ int ssl3_send_server_key_exchange(SSL *s) goto f_err; } - if (s->s3->tmp.ecdh != NULL) { - SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - /* Duplicate the ECDH structure. */ - if (ecdhp == NULL) { - SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } if (s->cert->ecdh_tmp_auto) ecdh = ecdhp; else if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) { @@ -1861,6 +1870,16 @@ int ssl3_send_server_key_exchange(SSL *s) n += 1 + nr[i]; else #endif +#ifndef OPENSSL_NO_DH + /* + * for interoperability with some versions of the Microsoft TLS + * stack, we need to zero pad the DHE pub key to the same length + * as the prime, so use the length of the prime here + */ + if ((i == 2) && (type & (SSL_kEDH))) + n += 2 + nr[0]; + else +#endif n += 2 + nr[i]; } @@ -1872,6 +1891,11 @@ int ssl3_send_server_key_exchange(SSL *s) goto f_err; } kn = EVP_PKEY_size(pkey); + /* Allow space for signature algorithm */ + if (SSL_USE_SIGALGS(s)) + kn += 2; + /* Allow space for signature length */ + kn += 2; } else { pkey = NULL; kn = 0; @@ -1890,6 +1914,20 @@ int ssl3_send_server_key_exchange(SSL *s) p++; } else #endif +#ifndef OPENSSL_NO_DH + /* + * for interoperability with some versions of the Microsoft TLS + * stack, we need to zero pad the DHE pub key to the same length + * as the prime + */ + if ((i == 2) && (type & (SSL_kEDH))) { + s2n(nr[0], p); + for (j = 0; j < (nr[0] - nr[2]); ++j) { + *p = 0; + ++p; + } + } else +#endif s2n(nr[i], p); BN_bn2bin(r[i], p); p += nr[i]; @@ -2051,7 +2089,7 @@ int ssl3_send_certificate_request(SSL *s) if (SSL_USE_SIGALGS(s)) { const unsigned char *psigs; - nl = tls12_get_psigalgs(s, &psigs); + nl = tls12_get_psigalgs(s, 1, &psigs); s2n(nl, p); memcpy(p, psigs, nl); p += nl; @@ -2229,11 +2267,8 @@ int ssl3_get_client_key_exchange(SSL *s) * fails. See https://tools.ietf.org/html/rfc5246#section-7.4.7.1 */ - /* - * should be RAND_bytes, but we cannot work around a failure. - */ - if (RAND_pseudo_bytes(rand_premaster_secret, - sizeof(rand_premaster_secret)) <= 0) + if (RAND_bytes(rand_premaster_secret, + sizeof(rand_premaster_secret)) <= 0) goto err; decrypt_len = RSA_private_decrypt((int)n, p, p, rsa, RSA_PKCS1_PADDING); @@ -2323,7 +2358,8 @@ int ssl3_get_client_key_exchange(SSL *s) if (!(s->options & SSL_OP_SSLEAY_080_CLIENT_DH_BUG)) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG); - goto err; + al = SSL_AD_HANDSHAKE_FAILURE; + goto f_err; } else { p -= 2; i = (int)n; @@ -2376,9 +2412,10 @@ int ssl3_get_client_key_exchange(SSL *s) i = DH_compute_key(p, pub, dh_srvr); if (i <= 0) { + al = SSL_AD_HANDSHAKE_FAILURE; SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); BN_clear_free(pub); - goto err; + goto f_err; } DH_free(s->s3->tmp.dh); @@ -2676,12 +2713,14 @@ int ssl3_get_client_key_exchange(SSL *s) i = *p; p += 1; if (n != 1 + i) { - SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, ERR_R_EC_LIB); - goto err; + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_LENGTH_MISMATCH); + al = SSL_AD_DECODE_ERROR; + goto f_err; } if (EC_POINT_oct2point(group, clnt_ecpoint, p, i, bn_ctx) == 0) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, ERR_R_EC_LIB); - goto err; + al = SSL_AD_HANDSHAKE_FAILURE; + goto f_err; } /* * p is pointing to somewhere in the buffer currently, so set it @@ -2984,6 +3023,11 @@ int ssl3_get_cert_verify(SSL *s) peer = s->session->peer; pkey = X509_get_pubkey(peer); + if (pkey == NULL) { + al = SSL_AD_INTERNAL_ERROR; + goto f_err; + } + type = X509_certificate_type(peer, pkey); if (!(type & EVP_PKT_SIGN)) { @@ -3120,7 +3164,9 @@ int ssl3_get_cert_verify(SSL *s) goto f_err; } if (i != 64) { +#ifdef SSL_DEBUG fprintf(stderr, "GOST signature length is %d", i); +#endif } for (idx = 0; idx < 64; idx++) { signature[63 - idx] = p[idx]; @@ -3213,6 +3259,12 @@ int ssl3_get_client_certificate(SSL *s) goto f_err; } for (nc = 0; nc < llen;) { + if (nc + 3 > llen) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_CERTIFICATE, + SSL_R_CERT_LENGTH_MISMATCH); + goto f_err; + } n2l3(p, l); if ((l + nc + 3) > llen) { al = SSL_AD_DECODE_ERROR; @@ -3423,8 +3475,22 @@ int ssl3_send_newsession_ticket(SSL *s) * all the work otherwise use generated values from parent ctx. */ if (tctx->tlsext_ticket_key_cb) { - if (tctx->tlsext_ticket_key_cb(s, key_name, iv, &ctx, - &hctx, 1) < 0) + /* if 0 is returned, write en empty ticket */ + int ret = tctx->tlsext_ticket_key_cb(s, key_name, iv, &ctx, + &hctx, 1); + + if (ret == 0) { + l2n(0, p); /* timeout */ + s2n(0, p); /* length */ + ssl_set_handshake_header(s, SSL3_MT_NEWSESSION_TICKET, + p - ssl_handshake_start(s)); + s->state = SSL3_ST_SW_SESSION_TICKET_B; + OPENSSL_free(senc); + EVP_CIPHER_CTX_cleanup(&ctx); + HMAC_CTX_cleanup(&hctx); + return ssl_do_write(s); + } + if (ret < 0) goto err; } else { if (RAND_bytes(iv, 16) <= 0) @@ -3497,37 +3563,34 @@ int ssl3_send_cert_status(SSL *s) { if (s->state == SSL3_ST_SW_CERT_STATUS_A) { unsigned char *p; + size_t msglen; + /*- * Grow buffer if need be: the length calculation is as - * follows 1 (message type) + 3 (message length) + + * follows handshake_header_length + * 1 (ocsp response type) + 3 (ocsp response length) * + (ocsp response) */ - if (!BUF_MEM_grow(s->init_buf, 8 + s->tlsext_ocsp_resplen)) { + msglen = 4 + s->tlsext_ocsp_resplen; + if (!BUF_MEM_grow(s->init_buf, SSL_HM_HEADER_LENGTH(s) + msglen)) { s->state = SSL_ST_ERR; return -1; } - p = (unsigned char *)s->init_buf->data; + p = ssl_handshake_start(s); - /* do the header */ - *(p++) = SSL3_MT_CERTIFICATE_STATUS; - /* message length */ - l2n3(s->tlsext_ocsp_resplen + 4, p); /* status type */ *(p++) = s->tlsext_status_type; /* length of OCSP response */ l2n3(s->tlsext_ocsp_resplen, p); /* actual response */ memcpy(p, s->tlsext_ocsp_resp, s->tlsext_ocsp_resplen); - /* number of bytes to write */ - s->init_num = 8 + s->tlsext_ocsp_resplen; - s->state = SSL3_ST_SW_CERT_STATUS_B; - s->init_off = 0; + + ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE_STATUS, msglen); } /* SSL3_ST_SW_CERT_STATUS_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return (ssl_do_write(s)); } # ifndef OPENSSL_NO_NEXTPROTONEG diff --git a/thirdparty/openssl/ssl/ssl-lib.com b/thirdparty/openssl/ssl/ssl-lib.com deleted file mode 100644 index 43fea17541..0000000000 --- a/thirdparty/openssl/ssl/ssl-lib.com +++ /dev/null @@ -1,1229 +0,0 @@ -$! -$! SSL-LIB.COM -$! Written By: Robert Byer -$! Vice-President -$! A-Com Computing, Inc. -$! byer@mail.all-net.net -$! -$! Changes by Richard Levitte <richard@levitte.org> -$! -$! This command file compiles and creates the "[.xxx.EXE.SSL]LIBSSL.OLB" -$! library for OpenSSL. The "xxx" denotes the machine architecture of -$! ALPHA, IA64 or VAX. -$! -$! It is written to detect what type of machine you are compiling on -$! (i.e. ALPHA or VAX) and which "C" compiler you have (i.e. VAXC, DECC -$! or GNU C) or you can specify which compiler to use. -$! -$! Specify the following as P1 to build just that part or ALL to just -$! build everything. -$! -$! LIBRARY To just compile the [.xxx.EXE.SSL]LIBSSL.OLB Library. -$! SSL_TASK To just compile the [.xxx.EXE.SSL]SSL_TASK.EXE -$! -$! Specify DEBUG or NODEBUG as P2 to compile with or without debugger -$! information. -$! -$! Specify which compiler at P3 to try to compile under. -$! -$! VAXC For VAX C. -$! DECC For DEC C. -$! GNUC For GNU C. -$! -$! If you don't specify a compiler, it will try to determine which -$! "C" compiler to use. -$! -$! P4, if defined, sets a TCP/IP library to use, through one of the following -$! keywords: -$! -$! UCX for UCX -$! TCPIP for TCPIP (post UCX) -$! SOCKETSHR for SOCKETSHR+NETLIB -$! -$! P5, if defined, sets a compiler thread NOT needed on OpenVMS 7.1 (and up) -$! -$! P6, if defined, specifies the C pointer size. Ignored on VAX. -$! ("64=ARGV" gives more efficient code with HP C V7.3 or newer.) -$! Supported values are: -$! -$! "" Compile with default (/NOPOINTER_SIZE) -$! 32 Compile with /POINTER_SIZE=32 (SHORT) -$! 64 Compile with /POINTER_SIZE=64[=ARGV] (LONG[=ARGV]) -$! (Automatically select ARGV if compiler supports it.) -$! 64= Compile with /POINTER_SIZE=64 (LONG). -$! 64=ARGV Compile with /POINTER_SIZE=64=ARGV (LONG=ARGV). -$! -$! P7, if defined, specifies a directory where ZLIB files (zlib.h, -$! libz.olb) may be found. Optionally, a non-default object library -$! name may be included ("dev:[dir]libz_64.olb", for example). -$! -$! -$! Announce/identify. -$! -$ proc = f$environment( "procedure") -$ write sys$output "@@@ "+ - - f$parse( proc, , , "name")+ f$parse( proc, , , "type") -$! -$! Define A TCP/IP Library That We Will Need To Link To. -$! (That Is, If We Need To Link To One.) -$! -$ TCPIP_LIB = "" -$ ZLIB_LIB = "" -$! -$! Check What Architecture We Are Using. -$! -$ IF (F$GETSYI("CPU").LT.128) -$ THEN -$! -$! The Architecture Is VAX. -$! -$ ARCH = "VAX" -$! -$! Else... -$! -$ ELSE -$! -$! The Architecture Is Alpha, IA64 or whatever comes in the future. -$! -$ ARCH = F$EDIT( F$GETSYI( "ARCH_NAME"), "UPCASE") -$ IF (ARCH .EQS. "") THEN ARCH = "UNK" -$! -$! End The Architecture Check. -$! -$ ENDIF -$! -$ ARCHD = ARCH -$ LIB32 = "32" -$ OPT_FILE = "" -$ POINTER_SIZE = "" -$! -$! Check To Make Sure We Have Valid Command Line Parameters. -$! -$ GOSUB CHECK_OPTIONS -$! -$! Define The OBJ and EXE Directories. -$! -$ OBJ_DIR := SYS$DISK:[-.'ARCHD'.OBJ.SSL] -$ EXE_DIR := SYS$DISK:[-.'ARCHD'.EXE.SSL] -$! -$! Specify the destination directory in any /MAP option. -$! -$ if (LINKMAP .eqs. "MAP") -$ then -$ LINKMAP = LINKMAP+ "=''EXE_DIR'" -$ endif -$! -$! Add the location prefix to the linker options file name. -$! -$ if (OPT_FILE .nes. "") -$ then -$ OPT_FILE = EXE_DIR+ OPT_FILE -$ endif -$! -$! Initialise logical names and such -$! -$ GOSUB INITIALISE -$! -$! Tell The User What Kind of Machine We Run On. -$! -$ WRITE SYS$OUTPUT "Host system architecture: ''ARCHD'" -$! -$! Check To See If The Architecture Specific OBJ Directory Exists. -$! -$ IF (F$PARSE(OBJ_DIR).EQS."") -$ THEN -$! -$! It Dosen't Exist, So Create It. -$! -$ CREATE/DIR 'OBJ_DIR' -$! -$! End The Architecture Specific OBJ Directory Check. -$! -$ ENDIF -$! -$! Check To See If The Architecture Specific Directory Exists. -$! -$ IF (F$PARSE(EXE_DIR).EQS."") -$ THEN -$! -$! It Dosen't Exist, So Create It. -$! -$ CREATE/DIR 'EXE_DIR' -$! -$! End The Architecture Specific Directory Check. -$! -$ ENDIF -$! -$! Define The Library Name. -$! -$ SSL_LIB := 'EXE_DIR'SSL_LIBSSL'LIB32'.OLB -$! -$! Define The CRYPTO-LIB We Are To Use. -$! -$ CRYPTO_LIB := SYS$DISK:[-.'ARCHD'.EXE.CRYPTO]SSL_LIBCRYPTO'LIB32'.OLB -$! -$! Set up exceptional compilations. -$! -$ CC5_SHOWN = 0 -$! -$! Check To See What We Are To Do. -$! -$ IF (BUILDALL.EQS."TRUE") -$ THEN -$! -$! Since Nothing Special Was Specified, Do Everything. -$! -$ GOSUB LIBRARY -$ GOSUB SSL_TASK -$! -$! Else... -$! -$ ELSE -$! -$! Build Just What The User Wants Us To Build. -$! -$ GOSUB 'BUILDALL' -$! -$! End The BUILDALL Check. -$! -$ ENDIF -$! -$! Time To EXIT. -$! -$ EXIT: -$ GOSUB CLEANUP -$ EXIT -$! -$! Compile The Library. -$! -$ LIBRARY: -$! -$! Check To See If We Already Have A "[.xxx.EXE.SSL]SSL_LIBSSL''LIB32'.OLB" Library... -$! -$ IF (F$SEARCH(SSL_LIB).EQS."") -$ THEN -$! -$! Guess Not, Create The Library. -$! -$ LIBRARY/CREATE/OBJECT 'SSL_LIB' -$! -$! End The Library Exist Check. -$! -$ ENDIF -$! -$! Define The Different SSL "library" Files. -$! -$ LIB_SSL = "s2_meth, s2_srvr, s2_clnt, s2_lib, s2_enc, s2_pkt,"+ - - "s3_meth, s3_srvr, s3_clnt, s3_lib, s3_enc, s3_pkt, s3_both, s3_cbc,"+ - - "s23_meth,s23_srvr,s23_clnt,s23_lib, s23_pkt,"+ - - "t1_meth, t1_srvr, t1_clnt, t1_lib, t1_enc, t1_ext,"+ - - "d1_meth, d1_srvr, d1_clnt, d1_lib, d1_pkt,"+ - - "d1_both,d1_srtp,"+ - - "ssl_lib,ssl_err2,ssl_cert,ssl_sess,"+ - - "ssl_ciph,ssl_stat,ssl_rsa,"+ - - "ssl_asn1,ssl_txt,ssl_algs,ssl_conf,"+ - - "bio_ssl,ssl_err,kssl,t1_reneg,tls_srp,t1_trce,ssl_utst" -$! -$ COMPILEWITH_CC5 = "" -$! -$! Tell The User That We Are Compiling The Library. -$! -$ WRITE SYS$OUTPUT "Building The ",SSL_LIB," Library." -$! -$! Define A File Counter And Set It To "0" -$! -$ FILE_COUNTER = 0 -$! -$! Top Of The File Loop. -$! -$ NEXT_FILE: -$! -$! O.K, Extract The File Name From The File List. -$! -$ FILE_NAME = F$EDIT(F$ELEMENT(FILE_COUNTER,",",LIB_SSL),"COLLAPSE") -$! -$! Check To See If We Are At The End Of The File List. -$! -$ IF (FILE_NAME.EQS.",") THEN GOTO FILE_DONE -$! -$! Increment The Counter. -$! -$ FILE_COUNTER = FILE_COUNTER + 1 -$! -$! Create The Source File Name. -$! -$ SOURCE_FILE = "SYS$DISK:[]" + FILE_NAME + ".C" -$! -$! Create The Object File Name. -$! -$ OBJECT_FILE = OBJ_DIR + FILE_NAME + ".OBJ" -$ ON WARNING THEN GOTO NEXT_FILE -$! -$! Check To See If The File We Want To Compile Is Actually There. -$! -$ IF (F$SEARCH(SOURCE_FILE).EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File ",SOURCE_FILE," Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The File Exists Check. -$! -$ ENDIF -$! -$! Tell The User What File We Are Compiling. -$! -$ WRITE SYS$OUTPUT " ",FILE_NAME,".c" -$! -$! Compile The File. -$! -$ ON ERROR THEN GOTO NEXT_FILE -$ CC/OBJECT='OBJECT_FILE' 'SOURCE_FILE' -$! -$! Add It To The Library. -$! -$ LIBRARY/REPLACE/OBJECT 'SSL_LIB' 'OBJECT_FILE' -$! -$! Time To Clean Up The Object File. -$! -$ DELETE 'OBJECT_FILE';* -$! -$! Go Back And Get The Next File Name. -$! -$ GOTO NEXT_FILE -$! -$! All Done With This Library. -$! -$ FILE_DONE: -$! -$! Tell The User That We Are All Done. -$! -$ WRITE SYS$OUTPUT "Library ",SSL_LIB," Compiled." -$! -$! Time To RETURN. -$! -$ RETURN -$ SSL_TASK: -$! -$! Check To See If We Have The Proper Libraries. -$! -$ GOSUB LIB_CHECK -$! -$! Check To See If We Have A Linker Option File. -$! -$ GOSUB CHECK_OPT_FILE -$! -$! Check To See If The File We Want To Compile Is Actually There. -$! -$ IF (F$SEARCH("SYS$DISK:[]SSL_TASK.C").EQS."") -$ THEN -$! -$! Tell The User That The File Dosen't Exist. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The File SSL_TASK.C Dosen't Exist." -$ WRITE SYS$OUTPUT "" -$! -$! Exit The Build. -$! -$ EXIT -$! -$! End The SSL_TASK.C File Check. -$! -$ ENDIF -$! -$ COMPILEWITH_CC5 = "" !!! ",ssl_task," -$! -$! Tell The User We Are Creating The SSL_TASK. -$! -$! Tell The User We Are Creating The SSL_TASK. -$! -$ WRITE SYS$OUTPUT "Creating SSL_TASK OSU HTTP SSL Engine." -$! -$! Tell The User What File We Are Compiling. -$! -$ FILE_NAME = "ssl_task" -$ WRITE SYS$OUTPUT " ",FILE_NAME,".c" -$! -$! Compile The File. -$! -$ ON ERROR THEN GOTO SSL_TASK_END -$! -$ FILE_NAME0 = ","+ F$ELEMENT(0,".",FILE_NAME)+ "," -$ IF COMPILEWITH_CC5 - FILE_NAME0 .NES. COMPILEWITH_CC5 -$ THEN -$ if (.not. CC5_SHOWN) -$ then -$ CC5_SHOWN = 1 -$ write sys$output " \Using special rule (5)" -$ x = " "+ CC5 -$ write /symbol sys$output x -$ endif -$ CC5 /OBJECT='OBJ_DIR''FILE_NAME'.OBJ SYS$DISK:[]'FILE_NAME'.C -$ ELSE -$ CC /OBJECT='OBJ_DIR''FILE_NAME'.OBJ SYS$DISK:[]'FILE_NAME'.C -$ ENDIF -$! -$! Link The Program. -$! -$ LINK /'DEBUGGER' /'LINKMAP' /'TRACEBACK' /EXE='EXE_DIR'SSL_TASK.EXE - - 'OBJ_DIR'SSL_TASK.OBJ, - - 'SSL_LIB'/LIBRARY, - - 'CRYPTO_LIB'/LIBRARY - - 'TCPIP_LIB' - - 'ZLIB_LIB' - - ,'OPT_FILE' /OPTIONS -$! -$! Time To Return. -$! -$SSL_TASK_END: -$ RETURN -$! -$! Check For The Link Option FIle. -$! -$ CHECK_OPT_FILE: -$! -$! Check To See If We Need To Make A VAX C Option File. -$! -$ IF (COMPILER.EQS."VAXC") -$ THEN -$! -$! Check To See If We Already Have A VAX C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! We Need A VAX C Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Against -! The Sharable VAX C Runtime Library. -! -SYS$SHARE:VAXCRTL.EXE/SHARE -$EOD -$! -$! End The Option File Check. -$! -$ ENDIF -$! -$! End The VAXC Check. -$! -$ ENDIF -$! -$! Check To See If We Need A GNU C Option File. -$! -$ IF (COMPILER.EQS."GNUC") -$ THEN -$! -$! Check To See If We Already Have A GNU C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! We Need A GNU C Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Against -! The Sharable C Runtime Library. -! -GNU_CC:[000000]GCCLIB/LIBRARY -SYS$SHARE:VAXCRTL/SHARE -$EOD -$! -$! End The Option File Check. -$! -$ ENDIF -$! -$! End The GNU C Check. -$! -$ ENDIF -$! -$! Check To See If We Need A DEC C Option File. -$! -$ IF (COMPILER.EQS."DECC") -$ THEN -$! -$! Check To See If We Already Have A DEC C Linker Option File. -$! -$ IF (F$SEARCH(OPT_FILE).EQS."") -$ THEN -$! -$! Figure Out If We Need A non-VAX Or A VAX Linker Option File. -$! -$ IF (ARCH.EQS."VAX") -$ THEN -$! -$! We Need A DEC C Linker Option File For VAX. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File To Link Against -! The Sharable DEC C Runtime Library. -! -SYS$SHARE:DECC$SHR.EXE/SHARE -$EOD -$! -$! Else... -$! -$ ELSE -$! -$! Create The non-VAX Linker Option File. -$! -$ CREATE 'OPT_FILE' -$DECK -! -! Default System Options File For non-VAX To Link Against -! The Sharable C Runtime Library. -! -SYS$SHARE:CMA$OPEN_LIB_SHR/SHARE -SYS$SHARE:CMA$OPEN_RTL/SHARE -$EOD -$! -$! End The DEC C Option File Check. -$! -$ ENDIF -$! -$! End The Option File Search. -$! -$ ENDIF -$! -$! End The DEC C Check. -$! -$ ENDIF -$! -$! Tell The User What Linker Option File We Are Using. -$! -$ WRITE SYS$OUTPUT "Using Linker Option File ",OPT_FILE,"." -$! -$! Time To RETURN. -$! -$ RETURN -$ LIB_CHECK: -$! -$! Look For The VAX Library LIBSSL.OLB. -$! -$ IF (F$SEARCH(SSL_LIB).EQS."") -$ THEN -$! -$! Tell The User We Can't Find The LIBSSL.OLB Library. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "Can't Find The Library ",SSL_LIB,"." -$ WRITE SYS$OUTPUT "We Can't Link Without It." -$ WRITE SYS$OUTPUT "" -$! -$! Since We Can't Link Without It, Exit. -$! -$ EXIT -$! -$! End The LIBSSL.OLB Library Check. -$! -$ ENDIF -$! -$! Look For The Library LIBCRYPTO.OLB. -$! -$ IF (F$SEARCH(CRYPTO_LIB).EQS."") -$ THEN -$! -$! Tell The User We Can't Find The LIBCRYPTO.OLB Library. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "Can't Find The Library ",CRYPTO_LIB,"." -$ WRITE SYS$OUTPUT "We Can't Link Without It." -$ WRITE SYS$OUTPUT "" -$! -$! Since We Can't Link Without It, Exit. -$! -$ EXIT -$! -$! End The LIBCRYPTO.OLB Library Check. -$! -$ ENDIF -$! -$! Time To Return. -$! -$ RETURN -$! -$! Check The User's Options. -$! -$ CHECK_OPTIONS: -$! -$! Check To See If P1 Is Blank. -$! -$ IF (P1.EQS."ALL") -$ THEN -$! -$! P1 Is Blank, So Build Everything. -$! -$ BUILDALL = "TRUE" -$! -$! Else... -$! -$ ELSE -$! -$! Else, Check To See If P1 Has A Valid Argument. -$! -$ IF (P1.EQS."LIBRARY").OR.(P1.EQS."SSL_TASK") -$ THEN -$! -$! A Valid Argument. -$! -$ BUILDALL = P1 -$! -$! Else... -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P1," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " ALL : Just Build Everything." -$ WRITE SYS$OUTPUT " LIBRARY : To Compile Just The [.xxx.EXE.SSL]LIBSSL.OLB Library." -$ WRITE SYS$OUTPUT " SSL_TASK : To Compile Just The [.xxx.EXE.SSL]SSL_TASK.EXE Program." -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " Where 'xxx' Stands For:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " ALPHA[64]: Alpha Architecture." -$ WRITE SYS$OUTPUT " IA64[64] : IA64 Architecture." -$ WRITE SYS$OUTPUT " VAX : VAX Architecture." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! End The P1 Check. -$! -$ ENDIF -$! -$! Check To See If P2 Is Blank. -$! -$ IF (P2.EQS."NODEBUG") -$ THEN -$! -$! P2 Is NODEBUG, So Compile Without Debugger Information. -$! -$ DEBUGGER = "NODEBUG" -$ LINKMAP = "NOMAP" -$ TRACEBACK = "NOTRACEBACK" -$ GCC_OPTIMIZE = "OPTIMIZE" -$ CC_OPTIMIZE = "OPTIMIZE" -$ WRITE SYS$OUTPUT "No Debugger Information Will Be Produced During Compile." -$ WRITE SYS$OUTPUT "Compiling With Compiler Optimization." -$! -$! Else... -$! -$ ELSE -$! -$! Check To See If We Are To Compile With Debugger Information. -$! -$ IF (P2.EQS."DEBUG") -$ THEN -$! -$! Compile With Debugger Information. -$! -$ DEBUGGER = "DEBUG" -$ LINKMAP = "MAP" -$ TRACEBACK = "TRACEBACK" -$ GCC_OPTIMIZE = "NOOPTIMIZE" -$ CC_OPTIMIZE = "NOOPTIMIZE" -$ WRITE SYS$OUTPUT "Debugger Information Will Be Produced During Compile." -$ WRITE SYS$OUTPUT "Compiling Without Compiler Optimization." -$ ELSE -$! -$! Tell The User Entered An Invalid Option. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P2," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " DEBUG : Compile With The Debugger Information." -$ WRITE SYS$OUTPUT " NODEBUG : Compile Without The Debugger Information." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! End The Valid Argument Check. -$! -$ ENDIF -$! -$! End The P2 Check. -$! -$ ENDIF -$! -$! Special Threads For OpenVMS v7.1 Or Later -$! -$! Written By: Richard Levitte -$! richard@levitte.org -$! -$! -$! Check To See If We Have A Option For P5. -$! -$ IF (P5.EQS."") -$ THEN -$! -$! Get The Version Of VMS We Are Using. -$! -$ ISSEVEN := -$ TMP = F$ELEMENT(0,"-",F$EXTRACT(1,4,F$GETSYI("VERSION"))) -$ TMP = F$INTEGER(F$ELEMENT(0,".",TMP)+F$ELEMENT(1,".",TMP)) -$! -$! Check To See If The VMS Version Is v7.1 Or Later. -$! -$ IF (TMP.GE.71) -$ THEN -$! -$! We Have OpenVMS v7.1 Or Later, So Use The Special Threads. -$! -$ ISSEVEN := ,PTHREAD_USE_D4 -$! -$! End The VMS Version Check. -$! -$ ENDIF -$! -$! End The P5 Check. -$! -$ ENDIF -$! -$! Check P6 (POINTER_SIZE). -$! -$ IF (P6 .NES. "") .AND. (ARCH .NES. "VAX") -$ THEN -$! -$ IF (P6 .EQS. "32") -$ THEN -$ POINTER_SIZE = " /POINTER_SIZE=32" -$ ELSE -$ POINTER_SIZE = F$EDIT( P6, "COLLAPSE, UPCASE") -$ IF ((POINTER_SIZE .EQS. "64") .OR. - - (POINTER_SIZE .EQS. "64=") .OR. - - (POINTER_SIZE .EQS. "64=ARGV")) -$ THEN -$ ARCHD = ARCH+ "_64" -$ LIB32 = "" -$ POINTER_SIZE = " /POINTER_SIZE=64" -$ ELSE -$! -$! Tell The User Entered An Invalid Option. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ", P6, - - " Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT - - " """" : Compile with default (short) pointers." -$ WRITE SYS$OUTPUT - - " 32 : Compile with 32-bit (short) pointers." -$ WRITE SYS$OUTPUT - - " 64 : Compile with 64-bit (long) pointers (auto ARGV)." -$ WRITE SYS$OUTPUT - - " 64= : Compile with 64-bit (long) pointers (no ARGV)." -$ WRITE SYS$OUTPUT - - " 64=ARGV : Compile with 64-bit (long) pointers (ARGV)." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$ ENDIF -$! -$ ENDIF -$! -$! End The P6 (POINTER_SIZE) Check. -$! -$ ENDIF -$! -$! Set basic C compiler /INCLUDE directories. -$! -$ CC_INCLUDES = "SYS$DISK:[-.CRYPTO],SYS$DISK:[-]" -$! -$! Check To See If P3 Is Blank. -$! -$ IF (P3.EQS."") -$ THEN -$! -$! O.K., The User Didn't Specify A Compiler, Let's Try To -$! Find Out Which One To Use. -$! -$! Check To See If We Have GNU C. -$! -$ IF (F$TRNLNM("GNU_CC").NES."") -$ THEN -$! -$! Looks Like GNUC, Set To Use GNUC. -$! -$ P3 = "GNUC" -$! -$! End The GNU C Compiler Check. -$! -$ ELSE -$! -$! Check To See If We Have VAXC Or DECC. -$! -$ IF (ARCH.NES."VAX").OR.(F$TRNLNM("DECC$CC_DEFAULT").NES."") -$ THEN -$! -$! Looks Like DECC, Set To Use DECC. -$! -$ P3 = "DECC" -$! -$! Else... -$! -$ ELSE -$! -$! Looks Like VAXC, Set To Use VAXC. -$! -$ P3 = "VAXC" -$! -$! End The VAXC Compiler Check. -$! -$ ENDIF -$! -$! End The DECC & VAXC Compiler Check. -$! -$ ENDIF -$! -$! End The Compiler Check. -$! -$ ENDIF -$! -$! Check To See If We Have A Option For P4. -$! -$ IF (P4.EQS."") -$ THEN -$! -$! Find out what socket library we have available -$! -$ IF F$PARSE("SOCKETSHR:") .NES. "" -$ THEN -$! -$! We have SOCKETSHR, and it is my opinion that it's the best to use. -$! -$ P4 = "SOCKETSHR" -$! -$! Tell the user -$! -$ WRITE SYS$OUTPUT "Using SOCKETSHR for TCP/IP" -$! -$! Else, let's look for something else -$! -$ ELSE -$! -$! Like UCX (the reason to do this before Multinet is that the UCX -$! emulation is easier to use...) -$! -$ IF F$TRNLNM("UCX$IPC_SHR") .NES. "" - - .OR. F$PARSE("SYS$SHARE:UCX$IPC_SHR.EXE") .NES. "" - - .OR. F$PARSE("SYS$LIBRARY:UCX$IPC.OLB") .NES. "" -$ THEN -$! -$! Last resort: a UCX or UCX-compatible library -$! -$ P4 = "UCX" -$! -$! Tell the user -$! -$ WRITE SYS$OUTPUT "Using UCX or an emulation thereof for TCP/IP" -$! -$! That was all... -$! -$ ENDIF -$ ENDIF -$ ENDIF -$! -$! Set Up Initial CC Definitions, Possibly With User Ones -$! -$ CCDEFS = "TCPIP_TYPE_''P4'" -$ IF F$TYPE(USER_CCDEFS) .NES. "" THEN CCDEFS = CCDEFS + "," + USER_CCDEFS -$ CCEXTRAFLAGS = "" -$ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS -$ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR" -$ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" -$ THEN -$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," -$ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS -$ ENDIF -$! -$! Check To See If We Have A ZLIB Option. -$! -$ ZLIB = P7 -$ IF (ZLIB .NES. "") -$ THEN -$! -$! Check for expected ZLIB files. -$! -$ err = 0 -$ file1 = f$parse( "zlib.h", ZLIB, , , "SYNTAX_ONLY") -$ if (f$search( file1) .eqs. "") -$ then -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ", ZLIB, " Is Invalid." -$ WRITE SYS$OUTPUT " Can't find header: ''file1'" -$ err = 1 -$ endif -$ file1 = f$parse( "A.;", ZLIB)- "A.;" -$! -$ file2 = f$parse( ZLIB, "libz.olb", , , "SYNTAX_ONLY") -$ if (f$search( file2) .eqs. "") -$ then -$ if (err .eq. 0) -$ then -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ", ZLIB, " Is Invalid." -$ endif -$ WRITE SYS$OUTPUT " Can't find library: ''file2'" -$ WRITE SYS$OUTPUT "" -$ err = err+ 2 -$ endif -$ if (err .eq. 1) -$ then -$ WRITE SYS$OUTPUT "" -$ endif -$! -$ if (err .ne. 0) -$ then -$ EXIT -$ endif -$! -$ CCDEFS = """ZLIB=1"", "+ CCDEFS -$ CC_INCLUDES = CC_INCLUDES+ ", "+ file1 -$ ZLIB_LIB = ", ''file2' /library" -$! -$! Print info -$! -$ WRITE SYS$OUTPUT "ZLIB library spec: ", file2 -$! -$! End The ZLIB Check. -$! -$ ENDIF -$! -$! Check To See If The User Entered A Valid Parameter. -$! -$ IF (P3.EQS."VAXC").OR.(P3.EQS."DECC").OR.(P3.EQS."GNUC") -$ THEN -$! -$! Check To See If The User Wanted DECC. -$! -$ IF (P3.EQS."DECC") -$ THEN -$! -$! Looks Like DECC, Set To Use DECC. -$! -$ COMPILER = "DECC" -$! -$! Tell The User We Are Using DECC. -$! -$ WRITE SYS$OUTPUT "Using DECC 'C' Compiler." -$! -$! Use DECC... -$! -$ CC = "CC" -$ IF ARCH.EQS."VAX" .AND. F$TRNLNM("DECC$CC_DEFAULT").NES."/DECC" - - THEN CC = "CC/DECC" -$ CC = CC + " /''CC_OPTIMIZE' /''DEBUGGER' /STANDARD=RELAXED"+ - - "''POINTER_SIZE' /NOLIST /PREFIX=ALL" + - - " /INCLUDE=(''CC_INCLUDES') " + CCEXTRAFLAGS -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "VAX_DECC_OPTIONS.OPT" -$! -$! End DECC Check. -$! -$ ENDIF -$! -$! Check To See If We Are To Use VAXC. -$! -$ IF (P3.EQS."VAXC") -$ THEN -$! -$! Looks Like VAXC, Set To Use VAXC. -$! -$ COMPILER = "VAXC" -$! -$! Tell The User We Are Using VAX C. -$! -$ WRITE SYS$OUTPUT "Using VAXC 'C' Compiler." -$! -$! Compile Using VAXC. -$! -$ CC = "CC" -$ IF ARCH.NES."VAX" -$ THEN -$ WRITE SYS$OUTPUT "There is no VAX C on ''ARCH'!" -$ EXIT -$ ENDIF -$ IF F$TRNLNM("DECC$CC_DEFAULT").EQS."/DECC" THEN CC = "CC/VAXC" -$ CC = CC + "/''CC_OPTIMIZE'/''DEBUGGER'/NOLIST" + - - "/INCLUDE=(''CC_INCLUDES')" + CCEXTRAFLAGS -$ CCDEFS = CCDEFS + ",""VAXC""" -$! -$! Define <sys> As SYS$COMMON:[SYSLIB] -$! -$ DEFINE/NOLOG SYS SYS$COMMON:[SYSLIB] -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "VAX_VAXC_OPTIONS.OPT" -$! -$! End VAXC Check -$! -$ ENDIF -$! -$! Check To See If We Are To Use GNU C. -$! -$ IF (P3.EQS."GNUC") -$ THEN -$! -$! Looks Like GNUC, Set To Use GNUC. -$! -$ COMPILER = "GNUC" -$! -$! Tell The User We Are Using GNUC. -$! -$ WRITE SYS$OUTPUT "Using GNU 'C' Compiler." -$! -$! Use GNU C... -$! -$ IF F$TYPE(GCC) .EQS. "" THEN GCC := GCC -$ CC = GCC+"/NOCASE_HACK/''GCC_OPTIMIZE'/''DEBUGGER'/NOLIST" + - - "/INCLUDE=(''CC_INCLUDES')" + CCEXTRAFLAGS -$! -$! Define The Linker Options File Name. -$! -$ OPT_FILE = "VAX_GNUC_OPTIONS.OPT" -$! -$! End The GNU C Check. -$! -$ ENDIF -$! -$! Set up default defines -$! -$ CCDEFS = """FLAT_INC=1""," + CCDEFS -$! -$! Finish up the definition of CC. -$! -$ IF COMPILER .EQS. "DECC" -$ THEN -$! Not all compiler versions support MAYLOSEDATA3. -$ OPT_TEST = "MAYLOSEDATA3" -$ DEFINE /USER_MODE SYS$ERROR NL: -$ DEFINE /USER_MODE SYS$OUTPUT NL: -$ 'CC' /NOCROSS_REFERENCE /NOLIST /NOOBJECT - - /WARNINGS = DISABLE = ('OPT_TEST', EMPTYFILE) NL: -$ IF ($SEVERITY) -$ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN - - CCDISABLEWARNINGS = CCDISABLEWARNINGS+ "," -$ CCDISABLEWARNINGS = CCDISABLEWARNINGS+ OPT_TEST -$ ENDIF -$ IF CCDISABLEWARNINGS .EQS. "" -$ THEN -$ CC4DISABLEWARNINGS = "DOLLARID" -$ ELSE -$ CC4DISABLEWARNINGS = CCDISABLEWARNINGS + ",DOLLARID" -$ CCDISABLEWARNINGS = " /WARNING=(DISABLE=(" + CCDISABLEWARNINGS + "))" -$ ENDIF -$ CC4DISABLEWARNINGS = " /WARNING=(DISABLE=(" + CC4DISABLEWARNINGS + "))" -$ ELSE -$ CCDISABLEWARNINGS = "" -$ CC4DISABLEWARNINGS = "" -$ ENDIF -$ CC2 = CC + " /DEFINE=(" + CCDEFS + ",_POSIX_C_SOURCE)" + CCDISABLEWARNINGS -$ CC3 = CC + " /DEFINE=(" + CCDEFS + ISSEVEN + ")" + CCDISABLEWARNINGS -$ CC = CC + " /DEFINE=(" + CCDEFS + ")" + CCDISABLEWARNINGS -$ IF COMPILER .EQS. "DECC" -$ THEN -$ CC4 = CC - CCDISABLEWARNINGS + CC4DISABLEWARNINGS -$ CC5 = CC3 - CCDISABLEWARNINGS + CC4DISABLEWARNINGS -$ ELSE -$ CC4 = CC -$ CC5 = CC3 -$ ENDIF -$! -$! Show user the result -$! -$ WRITE/SYMBOL SYS$OUTPUT "Main Compiling Command: ",CC -$! -$! Else The User Entered An Invalid Argument. -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P3," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " VAXC : To Compile With VAX C." -$ WRITE SYS$OUTPUT " DECC : To Compile With DEC C." -$ WRITE SYS$OUTPUT " GNUC : To Compile With GNU C." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$ ENDIF -$! -$! Time to check the contents, and to make sure we get the correct library. -$! -$ IF P4.EQS."SOCKETSHR" .OR. P4.EQS."MULTINET" .OR. P4.EQS."UCX" - - .OR. P4.EQS."TCPIP" .OR. P4.EQS."NONE" -$ THEN -$! -$! Check to see if SOCKETSHR was chosen -$! -$ IF P4.EQS."SOCKETSHR" -$ THEN -$! -$! Set the library to use SOCKETSHR -$! -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]SOCKETSHR_SHR.OPT /OPTIONS" -$! -$! Done with SOCKETSHR -$! -$ ENDIF -$! -$! Check to see if MULTINET was chosen -$! -$ IF P4.EQS."MULTINET" -$ THEN -$! -$! Set the library to use UCX emulation. -$! -$ P4 = "UCX" -$! -$! Done with MULTINET -$! -$ ENDIF -$! -$! Check to see if UCX was chosen -$! -$ IF P4.EQS."UCX" -$ THEN -$! -$! Set the library to use UCX. -$! -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]UCX_SHR_DECC.OPT /OPTIONS" -$ IF F$TRNLNM("UCX$IPC_SHR") .NES. "" -$ THEN -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]UCX_SHR_DECC_LOG.OPT /OPTIONS" -$ ELSE -$ IF COMPILER .NES. "DECC" .AND. ARCH .EQS. "VAX" THEN - - TCPIP_LIB = ",SYS$DISK:[-.VMS]UCX_SHR_VAXC.OPT /OPTIONS" -$ ENDIF -$! -$! Done with UCX -$! -$ ENDIF -$! -$! Check to see if TCPIP was chosen -$! -$ IF P4.EQS."TCPIP" -$ THEN -$! -$! Set the library to use TCPIP (post UCX). -$! -$ TCPIP_LIB = ",SYS$DISK:[-.VMS]TCPIP_SHR_DECC.OPT /OPTIONS" -$! -$! Done with TCPIP -$! -$ ENDIF -$! -$! Check to see if NONE was chosen -$! -$ IF P4.EQS."NONE" -$ THEN -$! -$! Do not use a TCPIP library. -$! -$ TCPIP_LIB = "" -$! -$! Done with NONE -$! -$ ENDIF -$! -$! Print info -$! -$ WRITE SYS$OUTPUT "TCP/IP library spec: ", TCPIP_LIB- "," -$! -$! Else The User Entered An Invalid Argument. -$! -$ ELSE -$! -$! Tell The User We Don't Know What They Want. -$! -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "The Option ",P4," Is Invalid. The Valid Options Are:" -$ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT " SOCKETSHR : To link with SOCKETSHR TCP/IP library." -$ WRITE SYS$OUTPUT " UCX : To link with UCX TCP/IP library." -$ WRITE SYS$OUTPUT " TCPIP : To link with TCPIP (post UCX) TCP/IP library." -$ WRITE SYS$OUTPUT "" -$! -$! Time To EXIT. -$! -$ EXIT -$! -$! Done with TCP/IP libraries -$! -$ ENDIF -$! -$! Time To RETURN... -$! -$ RETURN -$! -$ INITIALISE: -$! -$! Save old value of the logical name OPENSSL -$! -$ __SAVE_OPENSSL = F$TRNLNM("OPENSSL","LNM$PROCESS_TABLE") -$! -$! Save directory information -$! -$ __HERE = F$PARSE(F$PARSE("A.;",F$ENVIRONMENT("PROCEDURE"))-"A.;","[]A.;") - "A.;" -$ __HERE = F$EDIT(__HERE,"UPCASE") -$ __TOP = __HERE - "SSL]" -$ __INCLUDE = __TOP + "INCLUDE.OPENSSL]" -$! -$! Set up the logical name OPENSSL to point at the include directory -$! -$ DEFINE OPENSSL/NOLOG '__INCLUDE' -$! -$! Done -$! -$ RETURN -$! -$ CLEANUP: -$! -$! Restore the logical name OPENSSL if it had a value -$! -$ IF __SAVE_OPENSSL .EQS. "" -$ THEN -$ DEASSIGN OPENSSL -$ ELSE -$ DEFINE/NOLOG OPENSSL '__SAVE_OPENSSL' -$ ENDIF -$! -$! Done -$! -$ RETURN diff --git a/thirdparty/openssl/ssl/ssl_asn1.c b/thirdparty/openssl/ssl/ssl_asn1.c index 35cc27c5e9..499f0e85ad 100644 --- a/thirdparty/openssl/ssl/ssl_asn1.c +++ b/thirdparty/openssl/ssl/ssl_asn1.c @@ -527,6 +527,9 @@ SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, if (os.length > SSL_MAX_SID_CTX_LENGTH) { c.error = SSL_R_BAD_LENGTH; c.line = __LINE__; + OPENSSL_free(os.data); + os.data = NULL; + os.length = 0; goto err; } else { ret->sid_ctx_length = os.length; diff --git a/thirdparty/openssl/ssl/ssl_cert.c b/thirdparty/openssl/ssl/ssl_cert.c index f48ebaecc0..155728d037 100644 --- a/thirdparty/openssl/ssl/ssl_cert.c +++ b/thirdparty/openssl/ssl/ssl_cert.c @@ -315,7 +315,7 @@ CERT *ssl_cert_dup(CERT *cert) OPENSSL_malloc(cert->pkeys[i].serverinfo_length); if (ret->pkeys[i].serverinfo == NULL) { SSLerr(SSL_F_SSL_CERT_DUP, ERR_R_MALLOC_FAILURE); - return NULL; + goto err; } ret->pkeys[i].serverinfo_length = cert->pkeys[i].serverinfo_length; @@ -392,9 +392,7 @@ CERT *ssl_cert_dup(CERT *cert) return (ret); -#if !defined(OPENSSL_NO_DH) || !defined(OPENSSL_NO_ECDH) err: -#endif #ifndef OPENSSL_NO_RSA if (ret->rsa_tmp != NULL) RSA_free(ret->rsa_tmp); @@ -414,6 +412,7 @@ CERT *ssl_cert_dup(CERT *cert) #endif ssl_cert_clear_certs(ret); + OPENSSL_free(ret); return NULL; } diff --git a/thirdparty/openssl/ssl/ssl_ciph.c b/thirdparty/openssl/ssl/ssl_ciph.c index 302464e643..40021329a9 100644 --- a/thirdparty/openssl/ssl/ssl_ciph.c +++ b/thirdparty/openssl/ssl/ssl_ciph.c @@ -1932,17 +1932,27 @@ SSL_COMP *ssl3_comp_find(STACK_OF(SSL_COMP) *sk, int n) } #ifdef OPENSSL_NO_COMP -void *SSL_COMP_get_compression_methods(void) +STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void) +{ + return NULL; +} + +STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) + *meths) { return NULL; } -int SSL_COMP_add_compression_method(int id, void *cm) +void SSL_COMP_free_compression_methods(void) +{ +} + +int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm) { return 1; } -const char *SSL_COMP_get_name(const void *comp) +const char *SSL_COMP_get_name(const COMP_METHOD *comp) { return NULL; } @@ -1991,13 +2001,19 @@ int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm) if (id < 193 || id > 255) { SSLerr(SSL_F_SSL_COMP_ADD_COMPRESSION_METHOD, SSL_R_COMPRESSION_ID_NOT_WITHIN_PRIVATE_RANGE); - return 0; + return 1; } MemCheck_off(); comp = (SSL_COMP *)OPENSSL_malloc(sizeof(SSL_COMP)); + if (comp == NULL) { + MemCheck_on(); + SSLerr(SSL_F_SSL_COMP_ADD_COMPRESSION_METHOD, ERR_R_MALLOC_FAILURE); + return 1; + } comp->id = id; comp->method = cm; + comp->name = cm->name; load_builtin_compressions(); if (ssl_comp_methods && sk_SSL_COMP_find(ssl_comp_methods, comp) >= 0) { OPENSSL_free(comp); diff --git a/thirdparty/openssl/ssl/ssl_err.c b/thirdparty/openssl/ssl/ssl_err.c index 704088dc46..a4c17a6bf3 100644 --- a/thirdparty/openssl/ssl/ssl_err.c +++ b/thirdparty/openssl/ssl/ssl_err.c @@ -1,6 +1,6 @@ /* ssl/ssl_err.c */ /* ==================================================================== - * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2016 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -93,6 +93,8 @@ static ERR_STRING_DATA SSL_str_functs[] = { {ERR_FUNC(SSL_F_DTLS1_HEARTBEAT), "dtls1_heartbeat"}, {ERR_FUNC(SSL_F_DTLS1_OUTPUT_CERT_CHAIN), "dtls1_output_cert_chain"}, {ERR_FUNC(SSL_F_DTLS1_PREPROCESS_FRAGMENT), "DTLS1_PREPROCESS_FRAGMENT"}, + {ERR_FUNC(SSL_F_DTLS1_PROCESS_BUFFERED_RECORDS), + "DTLS1_PROCESS_BUFFERED_RECORDS"}, {ERR_FUNC(SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE), "DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE"}, {ERR_FUNC(SSL_F_DTLS1_PROCESS_RECORD), "DTLS1_PROCESS_RECORD"}, @@ -751,6 +753,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { "tls illegal exporter label"}, {ERR_REASON(SSL_R_TLS_INVALID_ECPOINTFORMAT_LIST), "tls invalid ecpointformat list"}, + {ERR_REASON(SSL_R_TOO_MANY_WARN_ALERTS), "too many warn alerts"}, {ERR_REASON(SSL_R_TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST), "tls peer did not respond with certificate list"}, {ERR_REASON(SSL_R_TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG), diff --git a/thirdparty/openssl/ssl/ssl_lib.c b/thirdparty/openssl/ssl/ssl_lib.c index fd94325bb3..24be376c9f 100644 --- a/thirdparty/openssl/ssl/ssl_lib.c +++ b/thirdparty/openssl/ssl/ssl_lib.c @@ -1828,7 +1828,7 @@ int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, const unsigned char *p, size_t plen, int use_context) { - if (s->version < TLS1_VERSION) + if (s->version < TLS1_VERSION && s->version != DTLS1_BAD_VER) return -1; return s->method->ssl3_enc->export_keying_material(s, out, olen, label, @@ -1838,13 +1838,21 @@ int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, static unsigned long ssl_session_hash(const SSL_SESSION *a) { + const unsigned char *session_id = a->session_id; unsigned long l; + unsigned char tmp_storage[4]; + + if (a->session_id_length < sizeof(tmp_storage)) { + memset(tmp_storage, 0, sizeof(tmp_storage)); + memcpy(tmp_storage, a->session_id, a->session_id_length); + session_id = tmp_storage; + } l = (unsigned long) - ((unsigned int)a->session_id[0]) | - ((unsigned int)a->session_id[1] << 8L) | - ((unsigned long)a->session_id[2] << 16L) | - ((unsigned long)a->session_id[3] << 24L); + ((unsigned long)session_id[0]) | + ((unsigned long)session_id[1] << 8L) | + ((unsigned long)session_id[2] << 16L) | + ((unsigned long)session_id[3] << 24L); return (l); } @@ -2000,7 +2008,7 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) ret->tlsext_servername_callback = 0; ret->tlsext_servername_arg = NULL; /* Setup RFC4507 ticket keys */ - if ((RAND_pseudo_bytes(ret->tlsext_tick_key_name, 16) <= 0) + if ((RAND_bytes(ret->tlsext_tick_key_name, 16) <= 0) || (RAND_bytes(ret->tlsext_tick_hmac_key, 16) <= 0) || (RAND_bytes(ret->tlsext_tick_aes_key, 16) <= 0)) ret->options |= SSL_OP_NO_TICKET; @@ -2030,10 +2038,8 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) ret->rbuf_freelist->len = 0; ret->rbuf_freelist->head = NULL; ret->wbuf_freelist = OPENSSL_malloc(sizeof(SSL3_BUF_FREELIST)); - if (!ret->wbuf_freelist) { - OPENSSL_free(ret->rbuf_freelist); + if (!ret->wbuf_freelist) goto err; - } ret->wbuf_freelist->chunklen = 0; ret->wbuf_freelist->len = 0; ret->wbuf_freelist->head = NULL; @@ -3050,12 +3056,12 @@ const SSL_CIPHER *SSL_get_current_cipher(const SSL *s) } #ifdef OPENSSL_NO_COMP -const void *SSL_get_current_compression(SSL *s) +const COMP_METHOD *SSL_get_current_compression(SSL *s) { return NULL; } -const void *SSL_get_current_expansion(SSL *s) +const COMP_METHOD *SSL_get_current_expansion(SSL *s) { return NULL; } @@ -3188,6 +3194,9 @@ SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX *ctx) ssl->cert->alpn_proposed_len = ocert->alpn_proposed_len; ocert->alpn_proposed = NULL; ssl->cert->alpn_sent = ocert->alpn_sent; + + if (!custom_exts_copy_flags(&ssl->cert->srv_ext, &ocert->srv_ext)) + return NULL; #endif ssl_cert_free(ocert); } diff --git a/thirdparty/openssl/ssl/ssl_locl.h b/thirdparty/openssl/ssl/ssl_locl.h index 747e718a52..aeffc00634 100644 --- a/thirdparty/openssl/ssl/ssl_locl.h +++ b/thirdparty/openssl/ssl/ssl_locl.h @@ -491,6 +491,12 @@ # define SSL_CLIENT_USE_TLS1_2_CIPHERS(s) \ ((SSL_IS_DTLS(s) && s->client_version <= DTLS1_2_VERSION) || \ (!SSL_IS_DTLS(s) && s->client_version >= TLS1_2_VERSION)) +/* + * Determine if a client should send signature algorithms extension: + * as with TLS1.2 cipher we can't rely on method flags. + */ +# define SSL_CLIENT_USE_SIGALGS(s) \ + SSL_CLIENT_USE_TLS1_2_CIPHERS(s) /* Mostly for SSLv3 */ # define SSL_PKEY_RSA_ENC 0 @@ -585,6 +591,8 @@ typedef struct { */ # define SSL_EXT_FLAG_SENT 0x2 +# define MAX_WARN_ALERT_COUNT 5 + typedef struct { custom_ext_method *meths; size_t meths_count; @@ -692,6 +700,8 @@ typedef struct cert_st { unsigned char *alpn_proposed; /* server */ unsigned int alpn_proposed_len; int alpn_sent; /* client */ + /* Count of the number of consecutive warning alerts received */ + unsigned int alert_count; } CERT; typedef struct sess_cert_st { @@ -1148,7 +1158,7 @@ long ssl2_default_timeout(void); const SSL_CIPHER *ssl3_get_cipher_by_char(const unsigned char *p); int ssl3_put_cipher_by_char(const SSL_CIPHER *c, unsigned char *p); -void ssl3_init_finished_mac(SSL *s); +int ssl3_init_finished_mac(SSL *s); int ssl3_send_server_certificate(SSL *s); int ssl3_send_newsession_ticket(SSL *s); int ssl3_send_cert_status(SSL *s); @@ -1242,7 +1252,8 @@ int dtls1_retransmit_message(SSL *s, unsigned short seq, unsigned long frag_off, int *found); int dtls1_get_queue_priority(unsigned short seq, int is_ccs); int dtls1_retransmit_buffered_messages(SSL *s); -void dtls1_clear_record_buffer(SSL *s); +void dtls1_clear_received_buffer(SSL *s); +void dtls1_clear_sent_buffer(SSL *s); void dtls1_get_message_header(unsigned char *data, struct hm_header_st *msg_hdr); void dtls1_get_ccs_header(unsigned char *data, struct ccs_header_st *ccs_hdr); @@ -1373,7 +1384,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **data, unsigned char *limit); int tls1_set_server_sigalgs(SSL *s); -int ssl_check_clienthello_tlsext_late(SSL *s); +int ssl_check_clienthello_tlsext_late(SSL *s, int *al); int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **data, unsigned char *d, int n); int ssl_prepare_clienthello_tlsext(SSL *s); @@ -1419,7 +1430,7 @@ int ssl_parse_clienthello_renegotiate_ext(SSL *s, unsigned char *d, int len, long ssl_get_algorithm2(SSL *s); int tls1_save_sigalgs(SSL *s, const unsigned char *data, int dsize); int tls1_process_sigalgs(SSL *s); -size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs); +size_t tls12_get_psigalgs(SSL *s, int sent, const unsigned char **psigs); int tls12_check_peer_sigalg(const EVP_MD **pmd, SSL *s, const unsigned char *sig, EVP_PKEY *pkey); void ssl_set_client_disabled(SSL *s); @@ -1471,6 +1482,8 @@ int custom_ext_add(SSL *s, int server, unsigned char **pret, unsigned char *limit, int *al); int custom_exts_copy(custom_ext_methods *dst, const custom_ext_methods *src); +int custom_exts_copy_flags(custom_ext_methods *dst, + const custom_ext_methods *src); void custom_exts_free(custom_ext_methods *exts); # else diff --git a/thirdparty/openssl/ssl/ssl_rsa.c b/thirdparty/openssl/ssl/ssl_rsa.c index 82022470bf..af03d45c2e 100644 --- a/thirdparty/openssl/ssl/ssl_rsa.c +++ b/thirdparty/openssl/ssl/ssl_rsa.c @@ -912,6 +912,8 @@ static int serverinfo_process_buffer(const unsigned char *serverinfo, int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, size_t serverinfo_length) { + unsigned char *new_serverinfo; + if (ctx == NULL || serverinfo == NULL || serverinfo_length == 0) { SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_PASSED_NULL_PARAMETER); return 0; @@ -928,12 +930,13 @@ int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_INTERNAL_ERROR); return 0; } - ctx->cert->key->serverinfo = OPENSSL_realloc(ctx->cert->key->serverinfo, - serverinfo_length); - if (ctx->cert->key->serverinfo == NULL) { + new_serverinfo = OPENSSL_realloc(ctx->cert->key->serverinfo, + serverinfo_length); + if (new_serverinfo == NULL) { SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_MALLOC_FAILURE); return 0; } + ctx->cert->key->serverinfo = new_serverinfo; memcpy(ctx->cert->key->serverinfo, serverinfo, serverinfo_length); ctx->cert->key->serverinfo_length = serverinfo_length; @@ -961,6 +964,7 @@ int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file) int ret = 0; BIO *bin = NULL; size_t num_extensions = 0; + unsigned char *new_serverinfo; if (ctx == NULL || file == NULL) { SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, @@ -1011,12 +1015,13 @@ int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file) goto end; } /* Append the decoded extension to the serverinfo buffer */ - serverinfo = + new_serverinfo = OPENSSL_realloc(serverinfo, serverinfo_length + extension_length); - if (serverinfo == NULL) { + if (new_serverinfo == NULL) { SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_MALLOC_FAILURE); goto end; } + serverinfo = new_serverinfo; memcpy(serverinfo + serverinfo_length, extension, extension_length); serverinfo_length += extension_length; diff --git a/thirdparty/openssl/ssl/ssl_sess.c b/thirdparty/openssl/ssl/ssl_sess.c index b182998343..f50f514212 100644 --- a/thirdparty/openssl/ssl/ssl_sess.c +++ b/thirdparty/openssl/ssl/ssl_sess.c @@ -382,7 +382,7 @@ static int def_generate_session_id(const SSL *ssl, unsigned char *id, { unsigned int retry = 0; do - if (RAND_pseudo_bytes(id, *id_len) <= 0) + if (RAND_bytes(id, *id_len) <= 0) return 0; while (SSL_has_matching_session_id(ssl, id, *id_len) && (++retry < MAX_SESS_ID_ATTEMPTS)) ; @@ -573,7 +573,7 @@ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, int r; #endif - if (session_id + len > limit) { + if (limit - session_id < len) { fatal = 1; goto err; } @@ -769,6 +769,15 @@ int SSL_CTX_add_session(SSL_CTX *ctx, SSL_SESSION *c) * obtain the same session from an external cache) */ s = NULL; + } else if (s == NULL && + lh_SSL_SESSION_retrieve(ctx->sessions, c) == NULL) { + /* s == NULL can also mean OOM error in lh_SSL_SESSION_insert ... */ + + /* + * ... so take back the extra reference and also don't add + * the session to the SSL_SESSION_list at this time + */ + s = c; } /* Put at the head of the queue unless it is already in the cache */ @@ -919,6 +928,10 @@ int SSL_set_session(SSL *s, SSL_SESSION *session) session->krb5_client_princ_len > 0) { s->kssl_ctx->client_princ = (char *)OPENSSL_malloc(session->krb5_client_princ_len + 1); + if (s->kssl_ctx->client_princ == NULL) { + SSLerr(SSL_F_SSL_SET_SESSION, ERR_R_MALLOC_FAILURE); + return 0; + } memcpy(s->kssl_ctx->client_princ, session->krb5_client_princ, session->krb5_client_princ_len); s->kssl_ctx->client_princ[session->krb5_client_princ_len] = '\0'; @@ -993,7 +1006,8 @@ int SSL_SESSION_set1_id_context(SSL_SESSION *s, const unsigned char *sid_ctx, return 0; } s->sid_ctx_length = sid_ctx_len; - memcpy(s->sid_ctx, sid_ctx, sid_ctx_len); + if (s->sid_ctx != sid_ctx) + memcpy(s->sid_ctx, sid_ctx, sid_ctx_len); return 1; } @@ -1123,7 +1137,7 @@ int ssl_clear_bad_session(SSL *s) if ((s->session != NULL) && !(s->shutdown & SSL_SENT_SHUTDOWN) && !(SSL_in_init(s) || SSL_in_before(s))) { - SSL_CTX_remove_session(s->ctx, s->session); + SSL_CTX_remove_session(s->session_ctx, s->session); return (1); } else return (0); diff --git a/thirdparty/openssl/ssl/t1_enc.c b/thirdparty/openssl/ssl/t1_enc.c index 514fcb3e4e..b6d1ee95a5 100644 --- a/thirdparty/openssl/ssl/t1_enc.c +++ b/thirdparty/openssl/ssl/t1_enc.c @@ -673,7 +673,6 @@ int tls1_setup_key_block(SSL *s) if ((p2 = (unsigned char *)OPENSSL_malloc(num)) == NULL) { SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK, ERR_R_MALLOC_FAILURE); - OPENSSL_free(p1); goto err; } #ifdef TLS_DEBUG diff --git a/thirdparty/openssl/ssl/t1_ext.c b/thirdparty/openssl/ssl/t1_ext.c index 724ddf76ac..0f4aba0226 100644 --- a/thirdparty/openssl/ssl/t1_ext.c +++ b/thirdparty/openssl/ssl/t1_ext.c @@ -179,6 +179,25 @@ int custom_ext_add(SSL *s, int server, return 1; } +/* Copy the flags from src to dst for any extensions that exist in both */ +int custom_exts_copy_flags(custom_ext_methods *dst, + const custom_ext_methods *src) +{ + size_t i; + custom_ext_method *methsrc = src->meths; + + for (i = 0; i < src->meths_count; i++, methsrc++) { + custom_ext_method *methdst = custom_ext_find(dst, methsrc->ext_type); + + if (methdst == NULL) + continue; + + methdst->ext_flags = methsrc->ext_flags; + } + + return 1; +} + /* Copy table of custom extensions */ int custom_exts_copy(custom_ext_methods *dst, const custom_ext_methods *src) { @@ -223,16 +242,14 @@ static int custom_ext_meth_add(custom_ext_methods *exts, /* Search for duplicate */ if (custom_ext_find(exts, ext_type)) return 0; - exts->meths = OPENSSL_realloc(exts->meths, - (exts->meths_count + - 1) * sizeof(custom_ext_method)); - - if (!exts->meths) { - exts->meths_count = 0; + meth = OPENSSL_realloc(exts->meths, + (exts->meths_count + 1) + * sizeof(custom_ext_method)); + if (meth == NULL) return 0; - } - meth = exts->meths + exts->meths_count; + exts->meths = meth; + meth += exts->meths_count; memset(meth, 0, sizeof(custom_ext_method)); meth->parse_cb = parse_cb; meth->add_cb = add_cb; @@ -275,7 +292,9 @@ int SSL_extension_supported(unsigned int ext_type) case TLSEXT_TYPE_ec_point_formats: case TLSEXT_TYPE_elliptic_curves: case TLSEXT_TYPE_heartbeat: +# ifndef OPENSSL_NO_NEXTPROTONEG case TLSEXT_TYPE_next_proto_neg: +# endif case TLSEXT_TYPE_padding: case TLSEXT_TYPE_renegotiate: case TLSEXT_TYPE_server_name: diff --git a/thirdparty/openssl/ssl/t1_lib.c b/thirdparty/openssl/ssl/t1_lib.c index dd5bd0050d..6587e8bb68 100644 --- a/thirdparty/openssl/ssl/t1_lib.c +++ b/thirdparty/openssl/ssl/t1_lib.c @@ -132,6 +132,9 @@ static int ssl_check_clienthello_tlsext_early(SSL *s); int ssl_check_serverhello_tlsext(SSL *s); #endif +#define CHECKLEN(curr, val, limit) \ + (((curr) >= (limit)) || (size_t)((limit) - (curr)) < (size_t)(val)) + SSL3_ENC_METHOD TLSv1_enc_data = { tls1_enc, tls1_mac, @@ -1032,7 +1035,7 @@ static unsigned char suiteb_sigalgs[] = { tlsext_sigalg_ecdsa(TLSEXT_hash_sha384) }; # endif -size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs) +size_t tls12_get_psigalgs(SSL *s, int sent, const unsigned char **psigs) { /* * If Suite B mode use Suite B sigalgs only, ignore any other @@ -1054,7 +1057,7 @@ size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs) } # endif /* If server use client authentication sigalgs if not NULL */ - if (s->server && s->cert->client_sigalgs) { + if (s->server == sent && s->cert->client_sigalgs) { *psigs = s->cert->client_sigalgs; return s->cert->client_sigalgslen; } else if (s->cert->conf_sigalgs) { @@ -1118,7 +1121,7 @@ int tls12_check_peer_sigalg(const EVP_MD **pmd, SSL *s, # endif /* Check signature matches a type we sent */ - sent_sigslen = tls12_get_psigalgs(s, &sent_sigs); + sent_sigslen = tls12_get_psigalgs(s, 1, &sent_sigs); for (i = 0; i < sent_sigslen; i += 2, sent_sigs += 2) { if (sig[0] == sent_sigs[0] && sig[1] == sent_sigs[1]) break; @@ -1166,7 +1169,7 @@ void ssl_set_client_disabled(SSL *s) * Now go through all signature algorithms seeing if we support any for * RSA, DSA, ECDSA. Do this for all versions not just TLS 1.2. */ - sigalgslen = tls12_get_psigalgs(s, &sigalgs); + sigalgslen = tls12_get_psigalgs(s, 1, &sigalgs); for (i = 0; i < sigalgslen; i += 2, sigalgs += 2) { switch (sigalgs[1]) { # ifndef OPENSSL_NO_RSA @@ -1263,8 +1266,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, if (s->tlsext_hostname != NULL) { /* Add TLS extension servername to the Client Hello message */ - unsigned long size_str; - long lenmax; + size_t size_str; /*- * check for enough space. @@ -1274,10 +1276,8 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, * 2 for hostname length * + hostname length */ - - if ((lenmax = limit - ret - 9) < 0 - || (size_str = - strlen(s->tlsext_hostname)) > (unsigned long)lenmax) + size_str = strlen(s->tlsext_hostname); + if (CHECKLEN(ret, 9 + size_str, limit)) return NULL; /* extension type and length */ @@ -1321,7 +1321,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, if (s->srp_ctx.login != NULL) { /* Add TLS extension SRP username to the * Client Hello message */ - int login_len = strlen(s->srp_ctx.login); + size_t login_len = strlen(s->srp_ctx.login); if (login_len > 255 || login_len == 0) { SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; @@ -1333,7 +1333,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, * 1 for the srp user identity * + srp user identity length */ - if ((limit - ret - 5 - login_len) < 0) + if (CHECKLEN(ret, 5 + login_len, limit)) return NULL; /* fill in the extension */ @@ -1350,20 +1350,23 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, /* * Add TLS extension ECPointFormats to the ClientHello message */ - long lenmax; const unsigned char *pcurves, *pformats; size_t num_curves, num_formats, curves_list_len; tls1_get_formatlist(s, &pformats, &num_formats); - if ((lenmax = limit - ret - 5) < 0) - return NULL; - if (num_formats > (size_t)lenmax) - return NULL; if (num_formats > 255) { SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; } + /*- + * check for enough space. + * 4 bytes for the ec point formats type and extension length + * 1 byte for the length of the formats + * + formats length + */ + if (CHECKLEN(ret, 5 + num_formats, limit)) + return NULL; s2n(TLSEXT_TYPE_ec_point_formats, ret); /* The point format list has 1-byte length. */ @@ -1379,15 +1382,20 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, if (!tls1_get_curvelist(s, 0, &pcurves, &num_curves)) return NULL; - if ((lenmax = limit - ret - 6) < 0) - return NULL; - if (num_curves > (size_t)lenmax / 2) - return NULL; if (num_curves > 65532 / 2) { SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; } curves_list_len = 2 * num_curves; + /*- + * check for enough space. + * 4 bytes for the ec curves type and extension length + * 2 bytes for the curve list length + * + curve list length + */ + if (CHECKLEN(ret, 6 + curves_list_len, limit)) + return NULL; + s2n(TLSEXT_TYPE_elliptic_curves, ret); s2n(curves_list_len + 2, ret); s2n(curves_list_len, ret); @@ -1397,7 +1405,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, # endif /* OPENSSL_NO_EC */ if (!(SSL_get_options(s) & SSL_OP_NO_TICKET)) { - int ticklen; + size_t ticklen; if (!s->new_session && s->session && s->session->tlsext_tick) ticklen = s->session->tlsext_ticklen; else if (s->session && s->tlsext_session_ticket && @@ -1418,22 +1426,29 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, * Check for enough room 2 for extension type, 2 for len rest for * ticket */ - if ((long)(limit - ret - 4 - ticklen) < 0) + if (CHECKLEN(ret, 4 + ticklen, limit)) return NULL; s2n(TLSEXT_TYPE_session_ticket, ret); s2n(ticklen, ret); - if (ticklen) { + if (ticklen > 0) { memcpy(ret, s->session->tlsext_tick, ticklen); ret += ticklen; } } skip_ext: - if (SSL_USE_SIGALGS(s)) { + if (SSL_CLIENT_USE_SIGALGS(s)) { size_t salglen; const unsigned char *salg; - salglen = tls12_get_psigalgs(s, &salg); - if ((size_t)(limit - ret) < salglen + 6) + salglen = tls12_get_psigalgs(s, 1, &salg); + + /*- + * check for enough space. + * 4 bytes for the sigalgs type and extension length + * 2 bytes for the sigalg list length + * + sigalg list length + */ + if (CHECKLEN(ret, salglen + 6, limit)) return NULL; s2n(TLSEXT_TYPE_signature_algorithms, ret); s2n(salglen + 2, ret); @@ -1460,30 +1475,42 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, if (s->tlsext_status_type == TLSEXT_STATUSTYPE_ocsp) { int i; - long extlen, idlen, itmp; + size_t extlen, idlen; + int lentmp; OCSP_RESPID *id; idlen = 0; for (i = 0; i < sk_OCSP_RESPID_num(s->tlsext_ocsp_ids); i++) { id = sk_OCSP_RESPID_value(s->tlsext_ocsp_ids, i); - itmp = i2d_OCSP_RESPID(id, NULL); - if (itmp <= 0) + lentmp = i2d_OCSP_RESPID(id, NULL); + if (lentmp <= 0) return NULL; - idlen += itmp + 2; + idlen += (size_t)lentmp + 2; } if (s->tlsext_ocsp_exts) { - extlen = i2d_X509_EXTENSIONS(s->tlsext_ocsp_exts, NULL); - if (extlen < 0) + lentmp = i2d_X509_EXTENSIONS(s->tlsext_ocsp_exts, NULL); + if (lentmp < 0) return NULL; + extlen = (size_t)lentmp; } else extlen = 0; - if ((long)(limit - ret - 7 - extlen - idlen) < 0) - return NULL; - s2n(TLSEXT_TYPE_status_request, ret); if (extlen + idlen > 0xFFF0) return NULL; + /* + * 2 bytes for status request type + * 2 bytes for status request len + * 1 byte for OCSP request type + * 2 bytes for length of ids + * 2 bytes for length of extensions + * + length of ids + * + length of extensions + */ + if (CHECKLEN(ret, 9 + idlen + extlen, limit)) + return NULL; + + s2n(TLSEXT_TYPE_status_request, ret); s2n(extlen + idlen + 5, ret); *(ret++) = TLSEXT_STATUSTYPE_ocsp; s2n(idlen, ret); @@ -1493,9 +1520,9 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, id = sk_OCSP_RESPID_value(s->tlsext_ocsp_ids, i); /* skip over id len */ ret += 2; - itmp = i2d_OCSP_RESPID(id, &ret); + lentmp = i2d_OCSP_RESPID(id, &ret); /* write id len */ - s2n(itmp, q); + s2n(lentmp, q); } s2n(extlen, ret); if (extlen > 0) @@ -1503,8 +1530,15 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, } # ifndef OPENSSL_NO_HEARTBEATS /* Add Heartbeat extension */ - if ((limit - ret - 4 - 1) < 0) + + /*- + * check for enough space. + * 4 bytes for the heartbeat ext type and extension length + * 1 byte for the mode + */ + if (CHECKLEN(ret, 5, limit)) return NULL; + s2n(TLSEXT_TYPE_heartbeat, ret); s2n(1, ret); /*- @@ -1524,7 +1558,12 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, * The client advertises an emtpy extension to indicate its support * for Next Protocol Negotiation */ - if (limit - ret - 4 < 0) + + /*- + * check for enough space. + * 4 bytes for the NPN ext type and extension length + */ + if (CHECKLEN(ret, 4, limit)) return NULL; s2n(TLSEXT_TYPE_next_proto_neg, ret); s2n(0, ret); @@ -1532,7 +1571,13 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, # endif if (s->alpn_client_proto_list && !s->s3->tmp.finish_md_len) { - if ((size_t)(limit - ret) < 6 + s->alpn_client_proto_list_len) + /*- + * check for enough space. + * 4 bytes for the ALPN type and extension length + * 2 bytes for the ALPN protocol list length + * + ALPN protocol list length + */ + if (CHECKLEN(ret, 6 + s->alpn_client_proto_list_len, limit)) return NULL; s2n(TLSEXT_TYPE_application_layer_protocol_negotiation, ret); s2n(2 + s->alpn_client_proto_list_len, ret); @@ -1547,7 +1592,12 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, ssl_add_clienthello_use_srtp_ext(s, 0, &el, 0); - if ((limit - ret - 4 - el) < 0) + /*- + * check for enough space. + * 4 bytes for the SRTP type and extension length + * + SRTP profiles length + */ + if (CHECKLEN(ret, 4 + el, limit)) return NULL; s2n(TLSEXT_TYPE_use_srtp, ret); @@ -1587,6 +1637,17 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, else hlen = 0; + /*- + * check for enough space. Strictly speaking we know we've already + * got enough space because to get here the message size is < 0x200, + * but we know that we've allocated far more than that in the buffer + * - but for consistency and robustness we're going to check anyway. + * + * 4 bytes for the padding type and extension length + * + padding length + */ + if (CHECKLEN(ret, 4 + hlen, limit)) + return NULL; s2n(TLSEXT_TYPE_padding, ret); s2n(hlen, ret); memset(ret, 0, hlen); @@ -1644,7 +1705,12 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, return NULL; } - if ((limit - ret - 4 - el) < 0) + /*- + * check for enough space. + * 4 bytes for the reneg type and extension length + * + reneg data length + */ + if (CHECKLEN(ret, 4 + el, limit)) return NULL; s2n(TLSEXT_TYPE_renegotiate, ret); @@ -1664,19 +1730,23 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, /* * Add TLS extension ECPointFormats to the ServerHello message */ - long lenmax; tls1_get_formatlist(s, &plist, &plistlen); - if ((lenmax = limit - ret - 5) < 0) - return NULL; - if (plistlen > (size_t)lenmax) - return NULL; if (plistlen > 255) { SSLerr(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; } + /*- + * check for enough space. + * 4 bytes for the ec points format type and extension length + * 1 byte for the points format list length + * + length of points format list + */ + if (CHECKLEN(ret, 5 + plistlen, limit)) + return NULL; + s2n(TLSEXT_TYPE_ec_point_formats, ret); s2n(plistlen + 1, ret); *(ret++) = (unsigned char)plistlen; @@ -1691,14 +1761,25 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, # endif /* OPENSSL_NO_EC */ if (s->tlsext_ticket_expected && !(SSL_get_options(s) & SSL_OP_NO_TICKET)) { - if ((long)(limit - ret - 4) < 0) + /*- + * check for enough space. + * 4 bytes for the Ticket type and extension length + */ + if (CHECKLEN(ret, 4, limit)) return NULL; s2n(TLSEXT_TYPE_session_ticket, ret); s2n(0, ret); + } else { + /* if we don't add the above TLSEXT, we can't add a session ticket later */ + s->tlsext_ticket_expected = 0; } if (s->tlsext_status_expected) { - if ((long)(limit - ret - 4) < 0) + /*- + * check for enough space. + * 4 bytes for the Status request type and extension length + */ + if (CHECKLEN(ret, 4, limit)) return NULL; s2n(TLSEXT_TYPE_status_request, ret); s2n(0, ret); @@ -1726,7 +1807,12 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, ssl_add_serverhello_use_srtp_ext(s, 0, &el, 0); - if ((limit - ret - 4 - el) < 0) + /*- + * check for enough space. + * 4 bytes for the SRTP profiles type and extension length + * + length of the SRTP profiles list + */ + if (CHECKLEN(ret, 4 + el, limit)) return NULL; s2n(TLSEXT_TYPE_use_srtp, ret); @@ -1751,16 +1837,23 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, 0x2a, 0x85, 0x03, 0x02, 0x02, 0x16, 0x30, 0x08, 0x06, 0x06, 0x2a, 0x85, 0x03, 0x02, 0x02, 0x17 }; - if (limit - ret < 36) + + /* check for enough space. */ + if (CHECKLEN(ret, sizeof(cryptopro_ext), limit)) return NULL; - memcpy(ret, cryptopro_ext, 36); - ret += 36; + memcpy(ret, cryptopro_ext, sizeof(cryptopro_ext)); + ret += sizeof(cryptopro_ext); } # ifndef OPENSSL_NO_HEARTBEATS /* Add Heartbeat extension if we've received one */ if (s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) { - if ((limit - ret - 4 - 1) < 0) + /*- + * check for enough space. + * 4 bytes for the Heartbeat type and extension length + * 1 byte for the mode + */ + if (CHECKLEN(ret, 5, limit)) return NULL; s2n(TLSEXT_TYPE_heartbeat, ret); s2n(1, ret); @@ -1789,7 +1882,12 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, s-> ctx->next_protos_advertised_cb_arg); if (r == SSL_TLSEXT_ERR_OK) { - if ((long)(limit - ret - 4 - npalen) < 0) + /*- + * check for enough space. + * 4 bytes for the NPN type and extension length + * + length of protocols list + */ + if (CHECKLEN(ret, 4 + npalen, limit)) return NULL; s2n(TLSEXT_TYPE_next_proto_neg, ret); s2n(npalen, ret); @@ -1804,9 +1902,16 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, if (s->s3->alpn_selected) { const unsigned char *selected = s->s3->alpn_selected; - unsigned len = s->s3->alpn_selected_len; + size_t len = s->s3->alpn_selected_len; - if ((long)(limit - ret - 4 - 2 - 1 - len) < 0) + /*- + * check for enough space. + * 4 bytes for the ALPN type and extension length + * 2 bytes for ALPN data length + * 1 byte for selected protocol length + * + length of the selected protocol + */ + if (CHECKLEN(ret, 7 + len, limit)) return NULL; s2n(TLSEXT_TYPE_application_layer_protocol_negotiation, ret); s2n(3 + len, ret); @@ -1867,11 +1972,11 @@ static void ssl_check_for_safari(SSL *s, const unsigned char *data, 0x02, 0x03, /* SHA-1/ECDSA */ }; - if (data >= (limit - 2)) + if (limit - data <= 2) return; data += 2; - if (data > (limit - 4)) + if (limit - data < 4) return; n2s(data, type); n2s(data, size); @@ -1879,7 +1984,7 @@ static void ssl_check_for_safari(SSL *s, const unsigned char *data, if (type != TLSEXT_TYPE_server_name) return; - if (data + size > limit) + if (limit - data < size) return; data += size; @@ -1887,7 +1992,7 @@ static void ssl_check_for_safari(SSL *s, const unsigned char *data, const size_t len1 = sizeof(kSafariExtensionsBlock); const size_t len2 = sizeof(kSafariTLS12ExtensionsBlock); - if (data + len1 + len2 != limit) + if (limit - data != (int)(len1 + len2)) return; if (memcmp(data, kSafariExtensionsBlock, len1) != 0) return; @@ -1896,7 +2001,7 @@ static void ssl_check_for_safari(SSL *s, const unsigned char *data, } else { const size_t len = sizeof(kSafariExtensionsBlock); - if (data + len != limit) + if (limit - data != (int)(len)) return; if (memcmp(data, kSafariExtensionsBlock, len) != 0) return; @@ -1966,11 +2071,10 @@ static int tls1_alpn_handle_client_hello(SSL *s, const unsigned char *data, /* * Process the ALPN extension in a ClientHello. - * ret: a pointer to the TLSEXT return value: SSL_TLSEXT_ERR_* * al: a pointer to the alert value to send in the event of a failure. - * returns 1 on success, 0 on failure: al/ret set only on failure + * returns 1 on success, 0 on failure: al set only on failure */ -static int tls1_alpn_handle_client_hello_late(SSL *s, int *ret, int *al) +static int tls1_alpn_handle_client_hello_late(SSL *s, int *al) { const unsigned char *selected = NULL; unsigned char selected_len = 0; @@ -1986,7 +2090,6 @@ static int tls1_alpn_handle_client_hello_late(SSL *s, int *ret, int *al) s->s3->alpn_selected = OPENSSL_malloc(selected_len); if (s->s3->alpn_selected == NULL) { *al = SSL_AD_INTERNAL_ERROR; - *ret = SSL_TLSEXT_ERR_ALERT_FATAL; return 0; } memcpy(s->s3->alpn_selected, selected, selected_len); @@ -2053,19 +2156,19 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, if (data == limit) goto ri_check; - if (data > (limit - 2)) + if (limit - data < 2) goto err; n2s(data, len); - if (data + len != limit) + if (limit - data != len) goto err; - while (data <= (limit - 4)) { + while (limit - data >= 4) { n2s(data, type); n2s(data, size); - if (data + size > (limit)) + if (limit - data < size) goto err; # if 0 fprintf(stderr, "Received extension type %d size %d\n", type, size); @@ -2316,6 +2419,23 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, size -= 2; if (dsize > size) goto err; + + /* + * We remove any OCSP_RESPIDs from a previous handshake + * to prevent unbounded memory growth - CVE-2016-6304 + */ + sk_OCSP_RESPID_pop_free(s->tlsext_ocsp_ids, + OCSP_RESPID_free); + if (dsize > 0) { + s->tlsext_ocsp_ids = sk_OCSP_RESPID_new_null(); + if (s->tlsext_ocsp_ids == NULL) { + *al = SSL_AD_INTERNAL_ERROR; + return 0; + } + } else { + s->tlsext_ocsp_ids = NULL; + } + while (dsize > 0) { OCSP_RESPID *id; int idsize; @@ -2335,13 +2455,6 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, OCSP_RESPID_free(id); goto err; } - if (!s->tlsext_ocsp_ids - && !(s->tlsext_ocsp_ids = - sk_OCSP_RESPID_new_null())) { - OCSP_RESPID_free(id); - *al = SSL_AD_INTERNAL_ERROR; - return 0; - } if (!sk_OCSP_RESPID_push(s->tlsext_ocsp_ids, id)) { OCSP_RESPID_free(id); *al = SSL_AD_INTERNAL_ERROR; @@ -2472,18 +2585,18 @@ static int ssl_scan_clienthello_custom_tlsext(SSL *s, if (s->hit || s->cert->srv_ext.meths_count == 0) return 1; - if (data >= limit - 2) + if (limit - data <= 2) return 1; n2s(data, len); - if (data > limit - len) + if (limit - data < len) return 1; - while (data <= limit - 4) { + while (limit - data >= 4) { n2s(data, type); n2s(data, size); - if (data + size > limit) + if (limit - data < size) return 1; if (custom_ext_parse(s, 1 /* server */ , type, data, size, al) <= 0) return 0; @@ -2569,20 +2682,20 @@ static int ssl_scan_serverhello_tlsext(SSL *s, unsigned char **p, SSL_TLSEXT_HB_DONT_SEND_REQUESTS); # endif - if (data >= (d + n - 2)) + if ((d + n) - data <= 2) goto ri_check; n2s(data, length); - if (data + length != d + n) { + if ((d + n) - data != length) { *al = SSL_AD_DECODE_ERROR; return 0; } - while (data <= (d + n - 4)) { + while ((d + n) - data >= 4) { n2s(data, type); n2s(data, size); - if (data + size > (d + n)) + if ((d + n) - data < size) goto ri_check; if (s->tlsext_debug_cb) @@ -2712,6 +2825,11 @@ static int ssl_scan_serverhello_tlsext(SSL *s, unsigned char **p, *al = TLS1_AD_INTERNAL_ERROR; return 0; } + /* + * Could be non-NULL if server has sent multiple NPN extensions in + * a single Serverhello + */ + OPENSSL_free(s->next_proto_negotiated); s->next_proto_negotiated = OPENSSL_malloc(selected_len); if (!s->next_proto_negotiated) { *al = TLS1_AD_INTERNAL_ERROR; @@ -3049,10 +3167,12 @@ int tls1_set_server_sigalgs(SSL *s) return 0; } -int ssl_check_clienthello_tlsext_late(SSL *s) +/* + * Upon success, returns 1. + * Upon failure, returns 0 and sets |al| to the appropriate fatal alert. + */ +int ssl_check_clienthello_tlsext_late(SSL *s, int *al) { - int ret = SSL_TLSEXT_ERR_OK; - int al; /* * If status request then ask callback what to do. Note: this must be @@ -3061,58 +3181,41 @@ int ssl_check_clienthello_tlsext_late(SSL *s) * influence which certificate is sent */ if ((s->tlsext_status_type != -1) && s->ctx && s->ctx->tlsext_status_cb) { - int r; + int ret; CERT_PKEY *certpkey; certpkey = ssl_get_server_send_pkey(s); /* If no certificate can't return certificate status */ - if (certpkey == NULL) { - s->tlsext_status_expected = 0; - return 1; - } - /* - * Set current certificate to one we will use so SSL_get_certificate - * et al can pick it up. - */ - s->cert->key = certpkey; - r = s->ctx->tlsext_status_cb(s, s->ctx->tlsext_status_arg); - switch (r) { - /* We don't want to send a status request response */ - case SSL_TLSEXT_ERR_NOACK: - s->tlsext_status_expected = 0; - break; - /* status request response should be sent */ - case SSL_TLSEXT_ERR_OK: - if (s->tlsext_ocsp_resp) - s->tlsext_status_expected = 1; - else + if (certpkey != NULL) { + /* + * Set current certificate to one we will use so SSL_get_certificate + * et al can pick it up. + */ + s->cert->key = certpkey; + ret = s->ctx->tlsext_status_cb(s, s->ctx->tlsext_status_arg); + switch (ret) { + /* We don't want to send a status request response */ + case SSL_TLSEXT_ERR_NOACK: s->tlsext_status_expected = 0; - break; - /* something bad happened */ - case SSL_TLSEXT_ERR_ALERT_FATAL: - ret = SSL_TLSEXT_ERR_ALERT_FATAL; - al = SSL_AD_INTERNAL_ERROR; - goto err; + break; + /* status request response should be sent */ + case SSL_TLSEXT_ERR_OK: + if (s->tlsext_ocsp_resp) + s->tlsext_status_expected = 1; + break; + /* something bad happened */ + case SSL_TLSEXT_ERR_ALERT_FATAL: + default: + *al = SSL_AD_INTERNAL_ERROR; + return 0; + } } - } else - s->tlsext_status_expected = 0; - - if (!tls1_alpn_handle_client_hello_late(s, &ret, &al)) { - goto err; } - err: - switch (ret) { - case SSL_TLSEXT_ERR_ALERT_FATAL: - ssl3_send_alert(s, SSL3_AL_FATAL, al); - return -1; - - case SSL_TLSEXT_ERR_ALERT_WARNING: - ssl3_send_alert(s, SSL3_AL_WARNING, al); - return 1; - - default: - return 1; + if (!tls1_alpn_handle_client_hello_late(s, al)) { + return 0; } + + return 1; } int ssl_check_serverhello_tlsext(SSL *s) @@ -3307,29 +3410,33 @@ int tls1_process_ticket(SSL *s, unsigned char *session_id, int len, /* Skip past DTLS cookie */ if (SSL_IS_DTLS(s)) { i = *(p++); - p += i; - if (p >= limit) + + if (limit - p <= i) return -1; + + p += i; } /* Skip past cipher list */ n2s(p, i); - p += i; - if (p >= limit) + if (limit - p <= i) return -1; + p += i; + /* Skip past compression algorithm list */ i = *(p++); - p += i; - if (p > limit) + if (limit - p < i) return -1; + p += i; + /* Now at start of extensions */ - if ((p + 2) >= limit) + if (limit - p <= 2) return 0; n2s(p, i); - while ((p + 4) <= limit) { + while (limit - p >= 4) { unsigned short type, size; n2s(p, type); n2s(p, size); - if (p + size > limit) + if (limit - p < size) return 0; if (type == TLSEXT_TYPE_session_ticket) { int r; @@ -3397,9 +3504,7 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, HMAC_CTX hctx; EVP_CIPHER_CTX ctx; SSL_CTX *tctx = s->initial_ctx; - /* Need at least keyname + iv + some encrypted data */ - if (eticklen < 48) - return 2; + /* Initialize session ticket encryption and HMAC contexts */ HMAC_CTX_init(&hctx); EVP_CIPHER_CTX_init(&ctx); @@ -3433,6 +3538,13 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, if (mlen < 0) { goto err; } + /* Sanity check ticket length: must exceed keyname + IV + HMAC */ + if (eticklen <= 16 + EVP_CIPHER_CTX_iv_length(&ctx) + mlen) { + HMAC_CTX_cleanup(&hctx); + EVP_CIPHER_CTX_cleanup(&ctx); + return 2; + } + eticklen -= mlen; /* Check HMAC of encrypted ticket */ if (HMAC_Update(&hctx, etick, eticklen) <= 0 @@ -3465,8 +3577,14 @@ static int tls_decrypt_ticket(SSL *s, const unsigned char *etick, p = sdec; sess = d2i_SSL_SESSION(NULL, &p, slen); + slen -= p - sdec; OPENSSL_free(sdec); if (sess) { + /* Some additional consistency checks */ + if (slen != 0 || sess->session_id_length != 0) { + SSL_SESSION_free(sess); + return 2; + } /* * The session ID, if non-empty, is used by some clients to detect * that the ticket has been accepted. So we copy it to the session @@ -3694,7 +3812,7 @@ static int tls1_set_shared_sigalgs(SSL *s) conf = c->conf_sigalgs; conflen = c->conf_sigalgslen; } else - conflen = tls12_get_psigalgs(s, &conf); + conflen = tls12_get_psigalgs(s, 0, &conf); if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || is_suiteb) { pref = conf; preflen = conflen; @@ -3902,7 +4020,7 @@ int tls1_process_heartbeat(SSL *s) memcpy(bp, pl, payload); bp += payload; /* Random padding */ - if (RAND_pseudo_bytes(bp, padding) < 0) { + if (RAND_bytes(bp, padding) <= 0) { OPENSSL_free(buffer); return -1; } @@ -3980,6 +4098,8 @@ int tls1_heartbeat(SSL *s) * - Padding */ buf = OPENSSL_malloc(1 + 2 + payload + padding); + if (buf == NULL) + return -1; p = buf; /* Message Type */ *p++ = TLS1_HB_REQUEST; @@ -3988,13 +4108,13 @@ int tls1_heartbeat(SSL *s) /* Sequence number */ s2n(s->tlsext_hb_seq, p); /* 16 random bytes */ - if (RAND_pseudo_bytes(p, 16) < 0) { + if (RAND_bytes(p, 16) <= 0) { SSLerr(SSL_F_TLS1_HEARTBEAT, ERR_R_INTERNAL_ERROR); goto err; } p += 16; /* Random padding */ - if (RAND_pseudo_bytes(p, padding) < 0) { + if (RAND_bytes(p, padding) <= 0) { SSLerr(SSL_F_TLS1_HEARTBEAT, ERR_R_INTERNAL_ERROR); goto err; } diff --git a/thirdparty/opus/celt/arch.h b/thirdparty/opus/celt/arch.h index 9f74ddd267..8ceab5fe10 100644 --- a/thirdparty/opus/celt/arch.h +++ b/thirdparty/opus/celt/arch.h @@ -78,6 +78,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define UADD32(a,b) ((a)+(b)) #define USUB32(a,b) ((a)-(b)) +/* Set this if opus_int64 is a native type of the CPU. */ +/* Assume that all LP64 architectures have fast 64-bit types; also x86_64 + (which can be ILP32 for x32) and Win64 (which is LLP64). */ +#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) +#define OPUS_FAST_INT64 1 +#else +#define OPUS_FAST_INT64 0 +#endif + #define PRINT_MIPS(file) #ifdef FIXED_POINT @@ -118,7 +127,9 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { #include "fixed_generic.h" -#ifdef OPUS_ARM_INLINE_EDSP +#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR +#include "arm/fixed_arm64.h" +#elif OPUS_ARM_INLINE_EDSP #include "arm/fixed_armv5e.h" #elif defined (OPUS_ARM_INLINE_ASM) #include "arm/fixed_armv4.h" diff --git a/thirdparty/opus/celt/arm/arm_celt_map.c b/thirdparty/opus/celt/arm/arm_celt_map.c index ee6c244786..4d4d069a86 100644 --- a/thirdparty/opus/celt/arm/arm_celt_map.c +++ b/thirdparty/opus/celt/arm/arm_celt_map.c @@ -36,6 +36,9 @@ #if defined(OPUS_HAVE_RTCD) # if defined(FIXED_POINT) +# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int , int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -43,8 +46,10 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ }; + +# endif # else /* !FIXED_POINT */ -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int, int) = { celt_pitch_xcorr_c, /* ARMv4 */ @@ -55,6 +60,23 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, # endif # endif /* FIXED_POINT */ +#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \ + defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) + +void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len +) = { + xcorr_kernel_c, /* ARMv4 */ + xcorr_kernel_c, /* EDSP */ + xcorr_kernel_c, /* Media */ + xcorr_kernel_neon_fixed, /* Neon */ +}; + +#endif + # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) # if defined(HAVE_ARM_NE10) # if defined(CUSTOM_MODES) diff --git a/thirdparty/opus/celt/arm/armcpu.c b/thirdparty/opus/celt/arm/armcpu.c index 5e5d10c344..694a63b78e 100644 --- a/thirdparty/opus/celt/arm/armcpu.c +++ b/thirdparty/opus/celt/arm/armcpu.c @@ -37,11 +37,12 @@ #include "cpu_support.h" #include "os_support.h" #include "opus_types.h" +#include "arch.h" -#define OPUS_CPU_ARM_V4 (1) -#define OPUS_CPU_ARM_EDSP (1<<1) -#define OPUS_CPU_ARM_MEDIA (1<<2) -#define OPUS_CPU_ARM_NEON (1<<3) +#define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4) +#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP) +#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA) +#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON) #if defined(_MSC_VER) /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ @@ -55,20 +56,22 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ -# if defined(OPUS_ARM_MAY_HAVE_EDSP) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) __try{ /*PLD [r13]*/ __emit(0xF5DDF000); - flags|=OPUS_CPU_ARM_EDSP; + flags|=OPUS_CPU_ARM_EDSP_FLAG; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) __try{ /*SHADD8 r3,r3,r3*/ __emit(0xE6333F93); - flags|=OPUS_CPU_ARM_MEDIA; + flags|=OPUS_CPU_ARM_MEDIA_FLAG; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ @@ -77,7 +80,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ __try{ /*VORR q0,q0,q0*/ __emit(0xF2200150); - flags|=OPUS_CPU_ARM_NEON; + flags|=OPUS_CPU_ARM_NEON_FLAG; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ @@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void) while(fgets(buf, 512, cpuinfo) != NULL) { -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) /* Search for edsp and neon flag */ if(memcmp(buf, "Features", 8) == 0) { char *p; -# if defined(OPUS_ARM_MAY_HAVE_EDSP) p = strstr(buf, " edsp"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_EDSP; -# endif + flags |= OPUS_CPU_ARM_EDSP_FLAG; # if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) p = strstr(buf, " neon"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_NEON; + flags |= OPUS_CPU_ARM_NEON_FLAG; # endif } # endif -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ + || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) /* Search for media capabilities (>= ARMv6) */ if(memcmp(buf, "CPU architecture:", 17) == 0) { @@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void) version = atoi(buf+17); if(version >= 6) - flags |= OPUS_CPU_ARM_MEDIA; + flags |= OPUS_CPU_ARM_MEDIA_FLAG; } # endif } @@ -156,18 +159,26 @@ int opus_select_arch(void) opus_uint32 flags = opus_cpu_capabilities(); int arch = 0; - if(!(flags & OPUS_CPU_ARM_EDSP)) + if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) { + /* Asserts ensure arch values are sequential */ + celt_assert(arch == OPUS_ARCH_ARM_V4); return arch; + } arch++; - if(!(flags & OPUS_CPU_ARM_MEDIA)) + if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) { + celt_assert(arch == OPUS_ARCH_ARM_EDSP); return arch; + } arch++; - if(!(flags & OPUS_CPU_ARM_NEON)) + if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) { + celt_assert(arch == OPUS_ARCH_ARM_MEDIA); return arch; + } arch++; + celt_assert(arch == OPUS_ARCH_ARM_NEON); return arch; } diff --git a/thirdparty/opus/celt/arm/armcpu.h b/thirdparty/opus/celt/arm/armcpu.h index ac5744606e..820262ff5f 100644 --- a/thirdparty/opus/celt/arm/armcpu.h +++ b/thirdparty/opus/celt/arm/armcpu.h @@ -66,6 +66,12 @@ # if defined(OPUS_HAVE_RTCD) int opus_select_arch(void); + +#define OPUS_ARCH_ARM_V4 (0) +#define OPUS_ARCH_ARM_EDSP (1) +#define OPUS_ARCH_ARM_MEDIA (2) +#define OPUS_ARCH_ARM_NEON (3) + # endif #endif diff --git a/thirdparty/opus/celt/arm/celt_neon_intr.c b/thirdparty/opus/celt/arm/celt_neon_intr.c index 47dce15ba5..47bbe3dc22 100644 --- a/thirdparty/opus/celt/arm/celt_neon_intr.c +++ b/thirdparty/opus/celt/arm/celt_neon_intr.c @@ -37,7 +37,66 @@ #include <arm_neon.h> #include "../pitch.h" -#if !defined(FIXED_POINT) +#if defined(FIXED_POINT) +void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +{ + int j; + int32x4_t a = vld1q_s32(sum); + /* Load y[0...3] */ + /* This requires len>0 to always be valid (which we assert in the C code). */ + int16x4_t y0 = vld1_s16(y); + y += 4; + + for (j = 0; j + 8 <= len; j += 8) + { + /* Load x[0...7] */ + int16x8_t xx = vld1q_s16(x); + int16x4_t x0 = vget_low_s16(xx); + int16x4_t x4 = vget_high_s16(xx); + /* Load y[4...11] */ + int16x8_t yy = vld1q_s16(y); + int16x4_t y4 = vget_low_s16(yy); + int16x4_t y8 = vget_high_s16(yy); + int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0); + int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0); + + int16x4_t y1 = vext_s16(y0, y4, 1); + int16x4_t y5 = vext_s16(y4, y8, 1); + int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1); + int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1); + + int16x4_t y2 = vext_s16(y0, y4, 2); + int16x4_t y6 = vext_s16(y4, y8, 2); + int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2); + int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2); + + int16x4_t y3 = vext_s16(y0, y4, 3); + int16x4_t y7 = vext_s16(y4, y8, 3); + int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3); + int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3); + + y0 = y8; + a = a7; + x += 8; + y += 8; + } + + for (; j < len; j++) + { + int16x4_t x0 = vld1_dup_s16(x); /* load next x */ + int32x4_t a0 = vmlal_s16(a, y0, x0); + + int16x4_t y4 = vld1_dup_s16(y); /* load next y */ + y0 = vext_s16(y0, y4, 1); + a = a0; + x++; + y++; + } + + vst1q_s32(sum, a); +} + +#else /* * Function: xcorr_kernel_neon_float * --------------------------------- diff --git a/thirdparty/opus/celt/arm/pitch_arm.h b/thirdparty/opus/celt/arm/pitch_arm.h index 8626ed75b9..14331169ee 100644 --- a/thirdparty/opus/celt/arm/pitch_arm.h +++ b/thirdparty/opus/celt/arm/pitch_arm.h @@ -46,10 +46,53 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); # endif -# if !defined(OPUS_HAVE_RTCD) +# if defined(OPUS_HAVE_RTCD) && \ + ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ + (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ + (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) +extern opus_val32 +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) + +# elif defined(OPUS_ARM_PRESUME_EDSP) || \ + defined(OPUS_ARM_PRESUME_MEDIA) || \ + defined(OPUS_ARM_PRESUME_NEON) # define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) + +# endif + +# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) +void xcorr_kernel_neon_fixed( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); +# endif + +# if defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) + +extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( + const opus_val16 *x, + const opus_val16 *y, + opus_val32 sum[4], + int len); + +# define OVERRIDE_XCORR_KERNEL (1) +# define xcorr_kernel(x, y, sum, len, arch) \ + ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) + +# elif defined(OPUS_ARM_PRESUME_NEON_INTR) +# define OVERRIDE_XCORR_KERNEL (1) +# define xcorr_kernel(x, y, sum, len, arch) \ + ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len)) + # endif #else /* Start !FIXED_POINT */ @@ -57,12 +100,27 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); -#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR) -#define OVERRIDE_PITCH_XCORR (1) +#endif + +# if defined(OPUS_HAVE_RTCD) && \ + (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) +extern void +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); + +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) + +# elif defined(OPUS_ARM_PRESUME_NEON_INTR) + +# define OVERRIDE_PITCH_XCORR (1) # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) -#endif -#endif + +# endif #endif /* end !FIXED_POINT */ + #endif diff --git a/thirdparty/opus/celt/bands.c b/thirdparty/opus/celt/bands.c index 25f229e267..87eaa6c031 100644 --- a/thirdparty/opus/celt/bands.c +++ b/thirdparty/opus/celt/bands.c @@ -414,7 +414,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT /* Compensating for the mid normalization */ xp = MULT16_32_Q15(mid, xp); /* mid and side are in Q15, not Q14 like X and Y */ - mid2 = SHR32(mid, 1); + mid2 = SHR16(mid, 1); El = MULT16_16(mid2, mid2) + side - 2*xp; Er = MULT16_16(mid2, mid2) + side + 2*xp; if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) @@ -714,7 +714,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, if (qn!=1) { if (encode) - itheta = (itheta*qn+8192)>>14; + itheta = (itheta*(opus_int32)qn+8192)>>14; /* Entropy coding of the angle. We use a uniform pdf for the time split, a step for stereo, and a triangular one for the rest. */ diff --git a/thirdparty/opus/celt/celt.h b/thirdparty/opus/celt/celt.h index a423b95046..d1f7eb690d 100644 --- a/thirdparty/opus/celt/celt.h +++ b/thirdparty/opus/celt/celt.h @@ -209,7 +209,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, #endif #ifndef OVERRIDE_COMB_FILTER_CONST -# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ +# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12)) #endif diff --git a/thirdparty/opus/celt/celt_decoder.c b/thirdparty/opus/celt/celt_decoder.c index b688f2a4e3..b978bb34d1 100644 --- a/thirdparty/opus/celt/celt_decoder.c +++ b/thirdparty/opus/celt/celt_decoder.c @@ -82,6 +82,7 @@ struct OpusCustomDecoder { int error; int last_pitch_index; int loss_count; + int skip_plc; int postfilter_period; int postfilter_period_old; opus_val16 postfilter_gain; @@ -164,8 +165,6 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod st->signalling = 1; st->arch = opus_select_arch(); - st->loss_count = 0; - opus_custom_decoder_ctl(st, OPUS_RESET_STATE); return OPUS_OK; @@ -447,7 +446,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) loss_count = st->loss_count; start = st->start; - noise_based = loss_count >= 5 || start != 0; + noise_based = loss_count >= 5 || start != 0 || st->skip_plc; if (noise_based) { /* Noise-based PLC/CNG */ @@ -832,6 +831,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return frame_size/st->downsample; } + /* Check if there are at least two packets received consecutively before + * turning on the pitch-based PLC */ + st->skip_plc = st->loss_count != 0; + if (dec == NULL) { ec_dec_init(&_dec,(unsigned char*)data,len); @@ -1198,6 +1201,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...) ((char*)&st->DECODER_RESET_START - (char*)st)); for (i=0;i<2*st->mode->nbEBands;i++) oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); + st->skip_plc = 1; } break; case OPUS_GET_PITCH_REQUEST: diff --git a/thirdparty/opus/celt/celt_encoder.c b/thirdparty/opus/celt/celt_encoder.c index 41fbfd49c8..3ee7a4d3f7 100644 --- a/thirdparty/opus/celt/celt_encoder.c +++ b/thirdparty/opus/celt/celt_encoder.c @@ -1175,10 +1175,10 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, if (N>COMBFILTER_MAXPERIOD) { - OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); + OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); } else { OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); - OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); + OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); } } while (++c<CC); @@ -1281,12 +1281,15 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) { - opus_val16 rate_factor; + opus_val16 rate_factor = Q15ONE; + if (bitrate < 64000) + { #ifdef FIXED_POINT - rate_factor = MAX16(0,(bitrate-32000)); + rate_factor = MAX16(0,(bitrate-32000)); #else - rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); + rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); #endif + } if (constrained_vbr) rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); diff --git a/thirdparty/opus/celt/celt_lpc.c b/thirdparty/opus/celt/celt_lpc.c index f02145af0d..b410a21c5f 100644 --- a/thirdparty/opus/celt/celt_lpc.c +++ b/thirdparty/opus/celt/celt_lpc.c @@ -49,8 +49,7 @@ int p float *lpc = _lpc; #endif - for (i = 0; i < p; i++) - lpc[i] = 0; + OPUS_CLEAR(lpc, p); if (ac[0] != 0) { for (i = 0; i < p; i++) { diff --git a/thirdparty/opus/celt/cwrs.c b/thirdparty/opus/celt/cwrs.c index 2fa9f89cd6..9722f0ac86 100644 --- a/thirdparty/opus/celt/cwrs.c +++ b/thirdparty/opus/celt/cwrs.c @@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac) /*Although derived separately, the pulse vector coding scheme is equivalent to a Pyramid Vector Quantizer \cite{Fis86}. Some additional notes about an early version appear at - http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering + https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering and the definitions of some terms have evolved since that was written. The conversion from a pulse vector to an integer index (encoding) and back diff --git a/thirdparty/opus/celt/fixed_generic.h b/thirdparty/opus/celt/fixed_generic.h index ac67d37ce8..1cfd6d6989 100644 --- a/thirdparty/opus/celt/fixed_generic.h +++ b/thirdparty/opus/celt/fixed_generic.h @@ -37,16 +37,32 @@ #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16)) +#else #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) +#endif /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16)) +#else #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) +#endif /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15)) +#else #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) +#endif /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ +#if OPUS_FAST_INT64 +#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31)) +#else #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) +#endif /** Compile-time conversion of float constant to 16-bit value */ #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) diff --git a/thirdparty/opus/celt/kiss_fft.c b/thirdparty/opus/celt/kiss_fft.c index 4ed37d2bb7..1f8fd05321 100644 --- a/thirdparty/opus/celt/kiss_fft.c +++ b/thirdparty/opus/celt/kiss_fft.c @@ -191,7 +191,7 @@ static void kf_bfly3( kiss_fft_cpx * Fout_beg = Fout; #ifdef FIXED_POINT - epi3.r = -16384; + /*epi3.r = -16384;*/ /* Unused */ epi3.i = -28378; #else epi3 = st->twiddles[fstride*m]; diff --git a/thirdparty/opus/celt/mathops.c b/thirdparty/opus/celt/mathops.c index 3f8c5dcc0e..21a01f52e4 100644 --- a/thirdparty/opus/celt/mathops.c +++ b/thirdparty/opus/celt/mathops.c @@ -164,7 +164,7 @@ opus_val16 celt_cos_norm(opus_val32 x) { return _celt_cos_pi_2(EXTRACT16(x)); } else { - return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); + return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x))); } } else { if (x&0x0000ffff) diff --git a/thirdparty/opus/celt/pitch.c b/thirdparty/opus/celt/pitch.c index 1d89cb0342..bf46e7d562 100644 --- a/thirdparty/opus/celt/pitch.c +++ b/thirdparty/opus/celt/pitch.c @@ -412,6 +412,41 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR RESTORE_STACK; } +#ifdef FIXED_POINT +static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) +{ + opus_val32 x2y2; + int sx, sy, shift; + opus_val32 g; + opus_val16 den; + if (xy == 0 || xx == 0 || yy == 0) + return 0; + sx = celt_ilog2(xx)-14; + sy = celt_ilog2(yy)-14; + shift = sx + sy; + x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy)); + if (shift & 1) { + if (x2y2 < 32768) + { + x2y2 <<= 1; + shift--; + } else { + x2y2 >>= 1; + shift++; + } + } + den = celt_rsqrt_norm(x2y2); + g = MULT16_32_Q15(den, xy); + g = VSHR32(g, (shift>>1)-1); + return EXTRACT16(MIN32(g, Q15ONE)); +} +#else +static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) +{ + return xy/celt_sqrt(1+xx*yy); +} +#endif + static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) @@ -450,18 +485,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, yy = yy_lookup[T0]; best_xy = xy; best_yy = yy; -#ifdef FIXED_POINT - { - opus_val32 x2y2; - int sh, t; - x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy)); - sh = celt_ilog2(x2y2)>>1; - t = VSHR32(x2y2, 2*(sh-7)); - g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1); - } -#else - g = g0 = xy/celt_sqrt(1+xx*yy); -#endif + g = g0 = compute_pitch_gain(xy, xx, yy); /* Look for any pitch at T/k */ for (k=2;k<=15;k++) { @@ -484,24 +508,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); - xy += xy2; - yy = yy_lookup[T1] + yy_lookup[T1b]; -#ifdef FIXED_POINT - { - opus_val32 x2y2; - int sh, t; - x2y2 = 1+MULT32_32_Q31(xx,yy); - sh = celt_ilog2(x2y2)>>1; - t = VSHR32(x2y2, 2*(sh-7)); - g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1); - } -#else - g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy); -#endif + xy = HALF32(xy + xy2); + yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]); + g1 = compute_pitch_gain(xy, xx, yy); if (abs(T1-prev_period)<=1) cont = prev_gain; else if (abs(T1-prev_period)<=2 && 5*k*k < T0) - cont = HALF32(prev_gain); + cont = HALF16(prev_gain); else cont = 0; thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); diff --git a/thirdparty/opus/celt/pitch.h b/thirdparty/opus/celt/pitch.h index 65a77a6ecc..d3503532a0 100644 --- a/thirdparty/opus/celt/pitch.h +++ b/thirdparty/opus/celt/pitch.h @@ -187,25 +187,6 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); #if !defined(OVERRIDE_PITCH_XCORR) -/*Is run-time CPU detection enabled on this platform?*/ -# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \ - || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \ - && !defined(OPUS_ARM_PRESUME_NEON_INTR))) -extern -# if defined(FIXED_POINT) -opus_val32 -# else -void -# endif -(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int); - -# define OVERRIDE_PITCH_XCORR -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch)) -# else - #ifdef FIXED_POINT opus_val32 #else @@ -214,7 +195,6 @@ void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch, int arch); -# endif #endif #endif diff --git a/thirdparty/opus/celt/rate.c b/thirdparty/opus/celt/rate.c index b28d8feccd..7dfa5be8a6 100644 --- a/thirdparty/opus/celt/rate.c +++ b/thirdparty/opus/celt/rate.c @@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, done = 0; for (j=end;j-->start;) { - int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS); + int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS); if (tmp < thresh[j] && !done) { if (tmp >= alloc_floor) diff --git a/thirdparty/opus/celt/vq.c b/thirdparty/opus/celt/vq.c index f358396065..d29f38fd8e 100644 --- a/thirdparty/opus/celt/vq.c +++ b/thirdparty/opus/celt/vq.c @@ -271,7 +271,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc best_id = 0; /* The squared magnitude term gets added anyway, so we might as well add it outside the loop */ - yy = ADD32(yy, 1); + yy = ADD16(yy, 1); j=0; do { opus_val16 Rxy, Ryy; diff --git a/thirdparty/opus/celt/x86/pitch_sse.h b/thirdparty/opus/celt/x86/pitch_sse.h index d4cbeb8b9c..e5f87ab51a 100644 --- a/thirdparty/opus/celt/x86/pitch_sse.h +++ b/thirdparty/opus/celt/x86/pitch_sse.h @@ -102,21 +102,21 @@ opus_val32 celt_inner_prod_sse( #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) #define OVERRIDE_CELT_INNER_PROD #define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse4_1(x, y, N)) + ((void)arch, celt_inner_prod_sse4_1(x, y, N)) #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) #define OVERRIDE_CELT_INNER_PROD #define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse2(x, y, N)) + ((void)arch, celt_inner_prod_sse2(x, y, N)) #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) #define OVERRIDE_CELT_INNER_PROD #define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse(x, y, N)) + ((void)arch, celt_inner_prod_sse(x, y, N)) #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) + (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, @@ -138,19 +138,19 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( #undef comb_filter_const void dual_inner_prod_sse(const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2); + const opus_val16 *y01, + const opus_val16 *y02, + int N, + opus_val32 *xy1, + opus_val32 *xy2); void comb_filter_const_sse(opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12); + opus_val32 *x, + int T, + int N, + opus_val16 g10, + opus_val16 g11, + opus_val16 g12); #if defined(OPUS_X86_PRESUME_SSE) @@ -169,7 +169,7 @@ extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( opus_val32 *xy1, opus_val32 *xy2); -#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ +#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( @@ -181,7 +181,7 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( opus_val16 g11, opus_val16 g12); -#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ +#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12)) #define NON_STATIC_COMB_FILTER_CONST_C diff --git a/thirdparty/opus/celt/x86/x86_celt_map.c b/thirdparty/opus/celt/x86/x86_celt_map.c index 8e5e449275..47ba41b9ee 100644 --- a/thirdparty/opus/celt/x86/x86_celt_map.c +++ b/thirdparty/opus/celt/x86/x86_celt_map.c @@ -72,7 +72,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( #endif #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) + (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( const opus_val16 *x, diff --git a/thirdparty/opus/celt/x86/x86cpu.c b/thirdparty/opus/celt/x86/x86cpu.c index 555a576b8a..080eb25e41 100644 --- a/thirdparty/opus/celt/x86/x86cpu.c +++ b/thirdparty/opus/celt/x86/x86cpu.c @@ -46,7 +46,7 @@ #include <intrin.h> static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) { - __cpuid((int*)CPUInfo, InfoType); + __cpuid((int*)CPUInfo, InfoType); } #else diff --git a/thirdparty/opus/opus_multistream_encoder.c b/thirdparty/opus/opus_multistream_encoder.c index e722e31ab8..1698223a16 100644 --- a/thirdparty/opus/opus_multistream_encoder.c +++ b/thirdparty/opus/opus_multistream_encoder.c @@ -277,7 +277,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b sum = celt_inner_prod(in, in, frame_size+overlap, 0); /* This should filter out both NaNs and ridiculous signals that could cause NaNs further down. */ - if (!(sum < 1e9f) || celt_isnan(sum)) + if (!(sum < 1e18f) || celt_isnan(sum)) { OPUS_CLEAR(in, frame_size+overlap); preemph_mem[c] = 0; diff --git a/thirdparty/opus/silk/CNG.c b/thirdparty/opus/silk/CNG.c index 61787c2302..8443ad63bb 100644 --- a/thirdparty/opus/silk/CNG.c +++ b/thirdparty/opus/silk/CNG.c @@ -34,9 +34,8 @@ POSSIBILITY OF SUCH DAMAGE. /* Generates excitation for CNG LPC synthesis */ static OPUS_INLINE void silk_CNG_exc( - opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */ + opus_int32 exc_Q14[], /* O CNG excitation signal Q10 */ opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ - opus_int32 Gain_Q16, /* I Gain to apply */ opus_int length, /* I Length */ opus_int32 *rand_seed /* I/O Seed to random index generator */ ) @@ -55,7 +54,7 @@ static OPUS_INLINE void silk_CNG_exc( idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); silk_assert( idx >= 0 ); silk_assert( idx <= CNG_BUF_MASK_MAX ); - exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); + exc_Q14[ i ] = exc_buf_Q14[ idx ]; } *rand_seed = seed; } @@ -85,7 +84,7 @@ void silk_CNG( ) { opus_int i, subfr; - opus_int32 sum_Q6, max_Gain_Q16, gain_Q16; + opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10; opus_int16 A_Q12[ MAX_LPC_ORDER ]; silk_CNG_struct *psCNG = &psDec->sCNG; SAVE_STACK; @@ -124,8 +123,8 @@ void silk_CNG( /* Add CNG when packet is lost or during DTX */ if( psDec->lossCnt ) { - VARDECL( opus_int32, CNG_sig_Q10 ); - ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); + VARDECL( opus_int32, CNG_sig_Q14 ); + ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 ); /* Generate CNG excitation */ gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); @@ -138,42 +137,46 @@ void silk_CNG( gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); } - silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed ); + gain_Q10 = silk_RSHIFT( gain_Q16, 6 ); + + silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed ); /* Convert CNG NLSF to filter representation */ silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); /* Generate CNG signal, by synthesis filtering */ - silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); + silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); for( i = 0; i < length; i++ ) { silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); + LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); if( psDec->LPC_order == 16 ) { - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] ); - sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] ); } /* Update states */ - CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); - - frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) ); + CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); + + /* Scale with Gain and add to input signal */ + frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) ); + } - silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); + silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); } else { silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) ); } diff --git a/thirdparty/opus/silk/NLSF_del_dec_quant.c b/thirdparty/opus/silk/NLSF_del_dec_quant.c index c3b9efccfa..de88fee060 100644 --- a/thirdparty/opus/silk/NLSF_del_dec_quant.c +++ b/thirdparty/opus/silk/NLSF_del_dec_quant.c @@ -46,8 +46,9 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns ) { opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10; - opus_int pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5; - opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16; + opus_int pred_Q10, diff_Q10, rate0_Q5, rate1_Q5; + opus_int16 out0_Q10, out1_Q10; + opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25; opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ]; opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ]; opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; @@ -74,8 +75,8 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); } - out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 ); - out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 ); + out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 ); + out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 ); } silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */ @@ -85,12 +86,11 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns prev_out_Q10[ 0 ] = 0; for( i = order - 1; ; i-- ) { rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ]; - pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 ); in_Q10 = x_Q10[ i ]; for( j = 0; j < nStates; j++ ) { - pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] ); + pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 ); res_Q10 = silk_SUB16( in_Q10, pred_Q10 ); - ind_tmp = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 ); + ind_tmp = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 ); ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 ); ind[ j ][ i ] = (opus_int8)ind_tmp; diff --git a/thirdparty/opus/silk/NLSF_encode.c b/thirdparty/opus/silk/NLSF_encode.c index 03a036fda2..f03c3f1c35 100644 --- a/thirdparty/opus/silk/NLSF_encode.c +++ b/thirdparty/opus/silk/NLSF_encode.c @@ -46,7 +46,7 @@ opus_int32 silk_NLSF_encode( /* O Returns ) { opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7; - opus_int32 W_tmp_Q9; + opus_int32 W_tmp_Q9, ret; VARDECL( opus_int32, err_Q26 ); VARDECL( opus_int32, RD_Q25 ); VARDECL( opus_int, tempIndices1 ); @@ -131,6 +131,7 @@ opus_int32 silk_NLSF_encode( /* O Returns /* Decode */ silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB ); + ret = RD_Q25[ 0 ]; RESTORE_STACK; - return RD_Q25[ 0 ]; + return ret; } diff --git a/thirdparty/opus/silk/NSQ.c b/thirdparty/opus/silk/NSQ.c index a065884070..43e3fee7e0 100644 --- a/thirdparty/opus/silk/NSQ.c +++ b/thirdparty/opus/silk/NSQ.c @@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" #include "stack_alloc.h" +#include "NSQ.h" + static OPUS_INLINE void silk_nsq_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ @@ -66,7 +68,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer( opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder /* I Prediction filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + int arch /* I Architecture */ ); #endif @@ -155,7 +158,7 @@ void silk_NSQ_c silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, - offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder ); + offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); x_Q3 += psEncC->subfr_length; pulses += psEncC->subfr_length; @@ -198,15 +201,19 @@ void silk_noise_shape_quantizer( opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder /* I Prediction filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + int arch /* I Architecture */ ) { - opus_int i, j; + opus_int i; opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; +#ifdef silk_short_prediction_create_arch_coef + opus_int32 a_Q12_arch[MAX_LPC_ORDER]; +#endif shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; @@ -215,32 +222,16 @@ void silk_noise_shape_quantizer( /* Set up short term AR state */ psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; +#ifdef silk_short_prediction_create_arch_coef + silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder); +#endif + for( i = 0; i < length; i++ ) { /* Generate dither */ NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ 0 ], a_Q12[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); - if( predictLPCOrder == 16 ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] ); - } + LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch); /* Long-term prediction */ if( signalType == TYPE_VOICED ) { @@ -259,23 +250,8 @@ void silk_noise_shape_quantizer( /* Noise shape feedback */ silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - tmp2 = psLPC_Q14[ 0 ]; - tmp1 = NSQ->sAR2_Q14[ 0 ]; - NSQ->sAR2_Q14[ 0 ] = tmp2; - n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 ); - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] ); - for( j = 2; j < shapingLPCOrder; j += 2 ) { - tmp2 = NSQ->sAR2_Q14[ j - 1 ]; - NSQ->sAR2_Q14[ j - 1 ] = tmp1; - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] ); - tmp1 = NSQ->sAR2_Q14[ j + 0 ]; - NSQ->sAR2_Q14[ j + 0 ] = tmp2; - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] ); - } - NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); + n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch); - n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */ n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 ); n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); diff --git a/thirdparty/opus/silk/NSQ_del_dec.c b/thirdparty/opus/silk/NSQ_del_dec.c index aff560c221..ab6feeac98 100644 --- a/thirdparty/opus/silk/NSQ_del_dec.c +++ b/thirdparty/opus/silk/NSQ_del_dec.c @@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" #include "stack_alloc.h" +#include "NSQ.h" + typedef struct { opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; @@ -106,7 +108,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ + opus_int decisionDelay, /* I */ + int arch /* I */ ); void silk_NSQ_del_dec_c( @@ -260,7 +263,7 @@ void silk_NSQ_del_dec_c( silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, - psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); + psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch ); x_Q3 += psEncC->subfr_length; pulses += psEncC->subfr_length; @@ -333,7 +336,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ + opus_int decisionDelay, /* I */ + int arch /* I */ ) { opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; @@ -343,6 +347,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; +#ifdef silk_short_prediction_create_arch_coef + opus_int32 a_Q12_arch[MAX_LPC_ORDER]; +#endif + VARDECL( NSQ_sample_pair, psSampleState ); NSQ_del_dec_struct *psDD; NSQ_sample_struct *psSS; @@ -355,6 +363,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); +#ifdef silk_short_prediction_create_arch_coef + silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder); +#endif + for( i = 0; i < length; i++ ) { /* Perform common calculations used in all states */ @@ -398,27 +410,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( /* Pointer used in short term prediction and shaping */ psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); - if( predictLPCOrder == 16 ) { - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] ); - } + LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch); LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ /* Noise shape feedback */ diff --git a/thirdparty/opus/silk/PLC.c b/thirdparty/opus/silk/PLC.c index 34a94bc313..fb6ea887b7 100644 --- a/thirdparty/opus/silk/PLC.c +++ b/thirdparty/opus/silk/PLC.c @@ -365,7 +365,8 @@ static OPUS_INLINE void silk_PLC_conceal( } /* Add prediction to LPC excitation */ - sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); + sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], + silk_LSHIFT_SAT32( LPC_pred_Q10, 4 )); /* Scale with Gain */ frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) ); diff --git a/thirdparty/opus/silk/decode_core.c b/thirdparty/opus/silk/decode_core.c index b88991e349..e569c0e72b 100644 --- a/thirdparty/opus/silk/decode_core.c +++ b/thirdparty/opus/silk/decode_core.c @@ -219,7 +219,7 @@ void silk_decode_core( } /* Add prediction to LPC excitation */ - sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 ); + sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) ); /* Scale with gain */ pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) ); diff --git a/thirdparty/opus/silk/fixed/burg_modified_FIX.c b/thirdparty/opus/silk/fixed/burg_modified_FIX.c index 4878553b65..17d0e0993c 100644 --- a/thirdparty/opus/silk/fixed/burg_modified_FIX.c +++ b/thirdparty/opus/silk/fixed/burg_modified_FIX.c @@ -150,8 +150,11 @@ void silk_burg_modified_c( C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ + /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32), + but they cancel each other and the real result seems to always fit in a 32-bit + signed integer. This was determined experimentally, not theoretically (unfortunately). */ + tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ + tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ @@ -200,12 +203,14 @@ void silk_burg_modified_c( /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ - /* Newton-Raphson iteration */ - rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ - rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ - if( num < 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc_Q31 = -rc_Q31; + if( rc_Q31 > 0 ) { + /* Newton-Raphson iteration */ + rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ + rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ + if( num < 0 ) { + /* Ensure adjusted reflection coefficients has the original sign */ + rc_Q31 = -rc_Q31; + } } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; diff --git a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c index 3756095fbe..3c3583c5fc 100644 --- a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c +++ b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c @@ -300,12 +300,14 @@ void silk_burg_modified_sse4_1( /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ - /* Newton-Raphson iteration */ - rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ - rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ - if( num < 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc_Q31 = -rc_Q31; + if( rc_Q31 > 0 ) { + /* Newton-Raphson iteration */ + rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ + rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ + if( num < 0 ) { + /* Ensure adjusted reflection coefficients has the original sign */ + rc_Q31 = -rc_Q31; + } } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; diff --git a/thirdparty/opus/silk/macros.h b/thirdparty/opus/silk/macros.h index bc30303466..d3ca347520 100644 --- a/thirdparty/opus/silk/macros.h +++ b/thirdparty/opus/silk/macros.h @@ -34,6 +34,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "opus_types.h" #include "opus_defines.h" +#include "arch.h" #if OPUS_GNUC_PREREQ(3, 0) #define opus_likely(x) (__builtin_expect(!!(x), 1)) @@ -43,31 +44,32 @@ POSSIBILITY OF SUCH DAMAGE. #define opus_unlikely(x) (!!(x)) #endif -/* Set this if opus_int64 is a native type of the CPU. */ -#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)) - /* This is an OPUS_INLINE header file for general platform. */ /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ #if OPUS_FAST_INT64 -#define silk_SMULWB(a32, b32) (((a32) * (opus_int64)((opus_int16)(b32))) >> 16) +#define silk_SMULWB(a32, b32) ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16)) #else #define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) #endif /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ #if OPUS_FAST_INT64 -#define silk_SMLAWB(a32, b32, c32) ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)) +#define silk_SMLAWB(a32, b32, c32) ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))) #else #define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) #endif /* (a32 * (b32 >> 16)) >> 16 */ +#if OPUS_FAST_INT64 +#define silk_SMULWT(a32, b32) ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16)) +#else #define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16)) +#endif /* a32 + (b32 * (c32 >> 16)) >> 16 */ #if OPUS_FAST_INT64 -#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)) +#define silk_SMLAWT(a32, b32, c32) ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))) #else #define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16)) #endif @@ -89,14 +91,14 @@ POSSIBILITY OF SUCH DAMAGE. /* (a32 * b32) >> 16 */ #if OPUS_FAST_INT64 -#define silk_SMULWW(a32, b32) (((opus_int64)(a32) * (b32)) >> 16) +#define silk_SMULWW(a32, b32) ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16)) #else #define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)) #endif /* a32 + ((b32 * c32) >> 16) */ #if OPUS_FAST_INT64 -#define silk_SMLAWW(a32, b32, c32) ((a32) + (((opus_int64)(b32) * (c32)) >> 16)) +#define silk_SMLAWW(a32, b32, c32) ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16))) #else #define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)) #endif @@ -149,5 +151,9 @@ static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) #include "arm/macros_armv5e.h" #endif +#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR +#include "arm/macros_arm64.h" +#endif + #endif /* SILK_MACROS_H */ diff --git a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h b/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h index f6afd923e8..ad1cfe2a9b 100644 --- a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h +++ b/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h @@ -62,7 +62,8 @@ static inline void silk_noise_shape_quantizer_del_dec( opus_int warping_Q16, /* I */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ + opus_int decisionDelay, /* I */ + int arch /* I */ ) { opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; @@ -82,6 +83,9 @@ static inline void silk_noise_shape_quantizer_del_dec( opus_int32 cur, prev, next; + /*Unused.*/ + (void)arch; + //Intialize b_Q14 variables b_Q14_0 = b_Q14[ 0 ]; b_Q14_1 = b_Q14[ 1 ]; diff --git a/thirdparty/opus/silk/process_NLSFs.c b/thirdparty/opus/silk/process_NLSFs.c index c27cf03046..0ab71f0163 100644 --- a/thirdparty/opus/silk/process_NLSFs.c +++ b/thirdparty/opus/silk/process_NLSFs.c @@ -41,7 +41,7 @@ void silk_process_NLSFs( { opus_int i, doInterpolate; opus_int NLSF_mu_Q20; - opus_int32 i_sqr_Q15; + opus_int16 i_sqr_Q15; opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ]; opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ]; opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ]; @@ -79,7 +79,8 @@ void silk_process_NLSFs( /* Update NLSF weights with contribution from first half */ i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 ); for( i = 0; i < psEncC->predictLPCOrder; i++ ) { - pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 ); + pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT( + silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) ); silk_assert( pNLSFW_QW[ i ] >= 1 ); } } @@ -100,6 +101,7 @@ void silk_process_NLSFs( } else { /* Copy LPC coefficients for first half from second half */ + silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER ); silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) ); } } diff --git a/thirdparty/opus/silk/sort.c b/thirdparty/opus/silk/sort.c index 8670dbdd02..7187c9efb1 100644 --- a/thirdparty/opus/silk/sort.c +++ b/thirdparty/opus/silk/sort.c @@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE. /* Best case: O(n) for an already sorted array */ /* Worst case: O(n^2) for an inversely sorted array */ /* */ -/* Shell short: http://en.wikipedia.org/wiki/Shell_sort */ +/* Shell short: https://en.wikipedia.org/wiki/Shell_sort */ #include "SigProc_FIX.h" diff --git a/thirdparty/opus/silk/stereo_LR_to_MS.c b/thirdparty/opus/silk/stereo_LR_to_MS.c index 42906e6f67..dda0298de2 100644 --- a/thirdparty/opus/silk/stereo_LR_to_MS.c +++ b/thirdparty/opus/silk/stereo_LR_to_MS.c @@ -77,7 +77,7 @@ void silk_stereo_LR_to_MS( ALLOC( LP_mid, frame_length, opus_int16 ); ALLOC( HP_mid, frame_length, opus_int16 ); for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); + sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); LP_mid[ n ] = sum; HP_mid[ n ] = mid[ n + 1 ] - sum; } @@ -86,7 +86,7 @@ void silk_stereo_LR_to_MS( ALLOC( LP_side, frame_length, opus_int16 ); ALLOC( HP_side, frame_length, opus_int16 ); for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); + sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); LP_side[ n ] = sum; HP_side[ n ] = side[ n + 1 ] - sum; } @@ -207,7 +207,7 @@ void silk_stereo_LR_to_MS( pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; w_Q24 += deltaw_Q24; - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); @@ -217,7 +217,7 @@ void silk_stereo_LR_to_MS( pred1_Q13 = -pred_Q13[ 1 ]; w_Q24 = silk_LSHIFT( width_Q14, 10 ); for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); diff --git a/thirdparty/opus/silk/x86/NSQ_sse.c b/thirdparty/opus/silk/x86/NSQ_sse.c index 72f34fd6fc..bb3c5f1955 100644 --- a/thirdparty/opus/silk/x86/NSQ_sse.c +++ b/thirdparty/opus/silk/x86/NSQ_sse.c @@ -221,7 +221,7 @@ void silk_NSQ_sse4_1( { silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, - offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder ); + offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); } x_Q3 += psEncC->subfr_length; diff --git a/thirdparty/opus/silk/x86/main_sse.h b/thirdparty/opus/silk/x86/main_sse.h index afd5ec26e1..d8d61310ed 100644 --- a/thirdparty/opus/silk/x86/main_sse.h +++ b/thirdparty/opus/silk/x86/main_sse.h @@ -207,7 +207,8 @@ void silk_noise_shape_quantizer( opus_int offset_Q10, /* I */ opus_int length, /* I Input length */ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder /* I Prediction filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + int arch /* I Architecture */ ); /**************************/ diff --git a/thirdparty/rg-etc1/rg_etc1.cpp b/thirdparty/rg-etc1/rg_etc1.cpp deleted file mode 100644 index 8e28b53f9d..0000000000 --- a/thirdparty/rg-etc1/rg_etc1.cpp +++ /dev/null @@ -1,2446 +0,0 @@ -// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com> -// Please see ZLIB license at the end of rg_etc1.h. -// -// For more information Ericsson Texture Compression (ETC/ETC1), see: -// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt -// -// v1.04 - 5/15/14 - Fix signed vs. unsigned subtraction problem (noticed when compiled with gcc) in pack_etc1_block_init(). -// This issue would cause an assert when this func. was called in debug. (Note this module was developed/testing with MSVC, -// I still need to test it throughly when compiled with gcc.) -// -// v1.03 - 5/12/13 - Initial public release -#include "rg_etc1.h" - -#include <stdlib.h> -#include <memory.h> -#include <assert.h> -//#include <stdio.h> -#include <math.h> - -#pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union - -#if defined(_DEBUG) || defined(DEBUG) -#define RG_ETC1_BUILD_DEBUG -#endif - -#define RG_ETC1_ASSERT assert - -namespace rg_etc1 -{ - typedef unsigned char uint8; - typedef unsigned short uint16; - typedef unsigned int uint; - typedef unsigned int uint32; - typedef long long int64; - typedef unsigned long long uint64; - - const uint32 cUINT32_MAX = 0xFFFFFFFFU; - const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; - - template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; } - template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } - template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; } - template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } - template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } - template<typename T> inline T square(T value) { return value * value; } - template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); } - template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); } - - template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]); - -#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) - - enum eNoClamp { cNoClamp }; - - struct color_quad_u8 - { - static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; } - - struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; }; - - public: - typedef unsigned char component_t; - typedef int parameter_t; - - enum { cNumComps = 4 }; - - union - { - struct - { - component_t r; - component_t g; - component_t b; - component_t a; - }; - - component_t c[cNumComps]; - - uint32 m_u32; - }; - - inline color_quad_u8() - { - } - - inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32) - { - } - - explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax) - { - set(y, alpha); - } - - inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) - { - set(red, green, blue, alpha); - } - - explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax) - { - set_noclamp_y_alpha(y, alpha); - } - - inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) - { - set_noclamp_rgba(red, green, blue, alpha); - } - - inline void clear() - { - m_u32 = 0; - } - - inline color_quad_u8& operator= (const color_quad_u8& other) - { - m_u32 = other.m_u32; - return *this; - } - - inline color_quad_u8& set_rgb(const color_quad_u8& other) - { - r = other.r; - g = other.g; - b = other.b; - return *this; - } - - inline color_quad_u8& operator= (parameter_t y) - { - set(y, component_traits::cMax); - return *this; - } - - inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax) - { - y = clamp(y); - alpha = clamp(alpha); - r = static_cast<component_t>(y); - g = static_cast<component_t>(y); - b = static_cast<component_t>(y); - a = static_cast<component_t>(alpha); - return *this; - } - - inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax) - { - RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) ); - RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); - - r = static_cast<component_t>(y); - g = static_cast<component_t>(y); - b = static_cast<component_t>(y); - a = static_cast<component_t>(alpha); - return *this; - } - - inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) - { - r = static_cast<component_t>(clamp(red)); - g = static_cast<component_t>(clamp(green)); - b = static_cast<component_t>(clamp(blue)); - a = static_cast<component_t>(clamp(alpha)); - return *this; - } - - inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha) - { - RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); - RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); - RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); - RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); - - r = static_cast<component_t>(red); - g = static_cast<component_t>(green); - b = static_cast<component_t>(blue); - a = static_cast<component_t>(alpha); - return *this; - } - - inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue) - { - RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); - RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); - RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); - - r = static_cast<component_t>(red); - g = static_cast<component_t>(green); - b = static_cast<component_t>(blue); - return *this; - } - - static inline parameter_t get_min_comp() { return component_traits::cMin; } - static inline parameter_t get_max_comp() { return component_traits::cMax; } - static inline bool get_comps_are_signed() { return component_traits::cSigned; } - - inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } - inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } - - inline color_quad_u8& set_component(uint i, parameter_t f) - { - RG_ETC1_ASSERT(i < cNumComps); - - c[i] = static_cast<component_t>(clamp(f)); - - return *this; - } - - inline color_quad_u8& set_grayscale(parameter_t l) - { - component_t x = static_cast<component_t>(clamp(l)); - c[0] = x; - c[1] = x; - c[2] = x; - return *this; - } - - inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h) - { - for (uint i = 0; i < cNumComps; i++) - c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i])); - return *this; - } - - inline color_quad_u8& clamp(parameter_t l, parameter_t h) - { - for (uint i = 0; i < cNumComps; i++) - c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h)); - return *this; - } - - // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). - inline parameter_t get_luma() const - { - return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); - } - - // Returns REC 709 luma. - inline parameter_t get_luma_rec709() const - { - return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); - } - - inline uint squared_distance_rgb(const color_quad_u8& c) const - { - return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b); - } - - inline uint squared_distance_rgba(const color_quad_u8& c) const - { - return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a); - } - - inline bool rgb_equals(const color_quad_u8& rhs) const - { - return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); - } - - inline bool operator== (const color_quad_u8& rhs) const - { - return m_u32 == rhs.m_u32; - } - - color_quad_u8& operator+= (const color_quad_u8& other) - { - for (uint i = 0; i < 4; i++) - c[i] = static_cast<component_t>(clamp(c[i] + other.c[i])); - return *this; - } - - color_quad_u8& operator-= (const color_quad_u8& other) - { - for (uint i = 0; i < 4; i++) - c[i] = static_cast<component_t>(clamp(c[i] - other.c[i])); - return *this; - } - - friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs) - { - color_quad_u8 result(lhs); - result += rhs; - return result; - } - - friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs) - { - color_quad_u8 result(lhs); - result -= rhs; - return result; - } - }; // class color_quad_u8 - - struct vec3F - { - float m_s[3]; - - inline vec3F() { } - inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; } - inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; } - - inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; } - - inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; } - - inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; } - }; - - enum etc_constants - { - cETC1BytesPerBlock = 8U, - - cETC1SelectorBits = 2U, - cETC1SelectorValues = 1U << cETC1SelectorBits, - cETC1SelectorMask = cETC1SelectorValues - 1U, - - cETC1BlockShift = 2U, - cETC1BlockSize = 1U << cETC1BlockShift, - - cETC1LSBSelectorIndicesBitOffset = 0, - cETC1MSBSelectorIndicesBitOffset = 16, - - cETC1FlipBitOffset = 32, - cETC1DiffBitOffset = 33, - - cETC1IntenModifierNumBits = 3, - cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, - cETC1RightIntenModifierTableBitOffset = 34, - cETC1LeftIntenModifierTableBitOffset = 37, - - // Base+Delta encoding (5 bit bases, 3 bit delta) - cETC1BaseColorCompNumBits = 5, - cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, - - cETC1DeltaColorCompNumBits = 3, - cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, - cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, - - cETC1BaseColor5RBitOffset = 59, - cETC1BaseColor5GBitOffset = 51, - cETC1BaseColor5BBitOffset = 43, - - cETC1DeltaColor3RBitOffset = 56, - cETC1DeltaColor3GBitOffset = 48, - cETC1DeltaColor3BBitOffset = 40, - - // Absolute (non-delta) encoding (two 4-bit per component bases) - cETC1AbsColorCompNumBits = 4, - cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, - - cETC1AbsColor4R1BitOffset = 60, - cETC1AbsColor4G1BitOffset = 52, - cETC1AbsColor4B1BitOffset = 44, - - cETC1AbsColor4R2BitOffset = 56, - cETC1AbsColor4G2BitOffset = 48, - cETC1AbsColor4B2BitOffset = 40, - - cETC1ColorDeltaMin = -4, - cETC1ColorDeltaMax = 3, - - // Delta3: - // 0 1 2 3 4 5 6 7 - // 000 001 010 011 100 101 110 111 - // 0 1 2 3 -4 -3 -2 -1 - }; - - static uint8 g_quant5_tab[256+16]; - - static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = - { - { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, - { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } - }; - - static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; - static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - - // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. - static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color] - - // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. - // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) - static const uint16 g_color8_to_etc_block_config_0_255[2][33] = - { - { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, - 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, - { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, - 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, - }; - - // Really only [254][11]. - static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = - { - { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, - 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { - 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, - 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, - 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, - 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, - 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, - 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF - }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, - 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, - 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { - 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, - 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, - 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, - 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, - 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { - 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, - 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, - 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, - 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, - 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { - 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, - 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, - 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, - 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, - 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, - 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, - 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, - 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { - 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, - 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, - 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, - 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { - 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, - 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, - 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { - 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, - 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, - 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { - 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, - 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, - 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, - 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, - 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, - 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, - 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, - 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { - 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { - 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, - 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, - 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, - 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { - 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, - 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, - 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, - 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, - 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, - 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { - 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF - }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, - 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, - 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { - 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, - 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, - 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, - 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, - 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, - 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, - 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, - 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF - }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { - 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, - 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, - 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, - 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF - }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { - 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { - 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, - 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, - 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, - 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, - 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, - 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, - 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, - 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, - 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, - 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, - 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { - 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, - 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { - 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, - 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, - 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { - 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, - 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { - 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, - 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, - 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, - 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, - 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, - 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF - }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, - 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, - 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, - 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, - 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, - 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, - 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, - 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, - 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, - 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, - }; - - struct etc1_block - { - // big endian uint64: - // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 - union - { - uint64 m_uint64; - uint8 m_bytes[8]; - }; - - uint8 m_low_color[2]; - uint8 m_high_color[2]; - - enum { cNumSelectorBytes = 4 }; - uint8 m_selectors[cNumSelectorBytes]; - - inline void clear() - { - zero_this(this); - } - - inline uint get_byte_bits(uint ofs, uint num) const - { - RG_ETC1_ASSERT((ofs + num) <= 64U); - RG_ETC1_ASSERT(num && (num <= 8U)); - RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); - const uint byte_ofs = 7 - (ofs >> 3); - const uint byte_bit_ofs = ofs & 7; - return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); - } - - inline void set_byte_bits(uint ofs, uint num, uint bits) - { - RG_ETC1_ASSERT((ofs + num) <= 64U); - RG_ETC1_ASSERT(num && (num < 32U)); - RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); - RG_ETC1_ASSERT(bits < (1U << num)); - const uint byte_ofs = 7 - (ofs >> 3); - const uint byte_bit_ofs = ofs & 7; - const uint mask = (1 << num) - 1; - m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); - m_bytes[byte_ofs] |= (bits << byte_bit_ofs); - } - - // false = left/right subblocks - // true = upper/lower subblocks - inline bool get_flip_bit() const - { - return (m_bytes[3] & 1) != 0; - } - - inline void set_flip_bit(bool flip) - { - m_bytes[3] &= ~1; - m_bytes[3] |= static_cast<uint8>(flip); - } - - inline bool get_diff_bit() const - { - return (m_bytes[3] & 2) != 0; - } - - inline void set_diff_bit(bool diff) - { - m_bytes[3] &= ~2; - m_bytes[3] |= (static_cast<uint>(diff) << 1); - } - - // Returns intensity modifier table (0-7) used by subblock subblock_id. - // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) - inline uint get_inten_table(uint subblock_id) const - { - RG_ETC1_ASSERT(subblock_id < 2); - const uint ofs = subblock_id ? 2 : 5; - return (m_bytes[3] >> ofs) & 7; - } - - // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) - inline void set_inten_table(uint subblock_id, uint t) - { - RG_ETC1_ASSERT(subblock_id < 2); - RG_ETC1_ASSERT(t < 8); - const uint ofs = subblock_id ? 2 : 5; - m_bytes[3] &= ~(7 << ofs); - m_bytes[3] |= (t << ofs); - } - - // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. - inline uint get_selector(uint x, uint y) const - { - RG_ETC1_ASSERT((x | y) < 4); - - const uint bit_index = x * 4 + y; - const uint byte_bit_ofs = bit_index & 7; - const uint8 *p = &m_bytes[7 - (bit_index >> 3)]; - const uint lsb = (p[0] >> byte_bit_ofs) & 1; - const uint msb = (p[-2] >> byte_bit_ofs) & 1; - const uint val = lsb | (msb << 1); - - return g_etc1_to_selector_index[val]; - } - - // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. - inline void set_selector(uint x, uint y, uint val) - { - RG_ETC1_ASSERT((x | y | val) < 4); - const uint bit_index = x * 4 + y; - - uint8 *p = &m_bytes[7 - (bit_index >> 3)]; - - const uint byte_bit_ofs = bit_index & 7; - const uint mask = 1 << byte_bit_ofs; - - const uint etc1_val = g_selector_index_to_etc1[val]; - - const uint lsb = etc1_val & 1; - const uint msb = etc1_val >> 1; - - p[0] &= ~mask; - p[0] |= (lsb << byte_bit_ofs); - - p[-2] &= ~mask; - p[-2] |= (msb << byte_bit_ofs); - } - - inline void set_base4_color(uint idx, uint16 c) - { - if (idx) - { - set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); - set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); - set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); - } - else - { - set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); - set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); - set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); - } - } - - inline uint16 get_base4_color(uint idx) const - { - uint r, g, b; - if (idx) - { - r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); - g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); - b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); - } - else - { - r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); - g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); - b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); - } - return static_cast<uint16>(b | (g << 4U) | (r << 8U)); - } - - inline void set_base5_color(uint16 c) - { - set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); - set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); - set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); - } - - inline uint16 get_base5_color() const - { - const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); - const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); - const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); - return static_cast<uint16>(b | (g << 5U) | (r << 10U)); - } - - void set_delta3_color(uint16 c) - { - set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); - set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); - set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); - } - - inline uint16 get_delta3_color() const - { - const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); - const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); - const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); - return static_cast<uint16>(b | (g << 3U) | (r << 6U)); - } - - // Base color 5 - static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); - static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); - - static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); - static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); - - static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); - static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); - - // Delta color 3 - // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) - static uint16 pack_delta3(int r, int g, int b); - - // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) - static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); - - // Abs color 4 - static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); - static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); - - static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); - static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); - - // subblock colors - static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); - static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); - static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); - - static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) - { - if (color4) - { - dst.r = src.r | (src.r << 4); - dst.g = src.g | (src.g << 4); - dst.b = src.b | (src.b << 4); - } - else - { - dst.r = (src.r >> 2) | (src.r << 3); - dst.g = (src.g >> 2) | (src.g << 3); - dst.b = (src.b >> 2) | (src.b << 3); - } - dst.a = src.a; - } - }; - - // Returns pointer to sorted array. - template<typename T, typename Q> - T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) - { - RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T))); - RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4)); - - if (init_indices) - { - T* p = pIndices0; - T* q = pIndices0 + (num_indices >> 1) * 2; - uint i; - for (i = 0; p != q; p += 2, i += 2) - { - p[0] = static_cast<T>(i); - p[1] = static_cast<T>(i + 1); - } - - if (num_indices & 1) - *p = static_cast<T>(i); - } - - uint hist[256 * 4]; - - memset(hist, 0, sizeof(hist[0]) * 256 * key_size); - -#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) -#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) - - if (key_size == 4) - { - T* p = pIndices0; - T* q = pIndices0 + num_indices; - for ( ; p != q; p++) - { - const uint key = RG_ETC1_GET_KEY(p); - - hist[ key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - hist[512 + ((key >> 16) & 0xFF)]++; - hist[768 + ((key >> 24) & 0xFF)]++; - } - } - else if (key_size == 3) - { - T* p = pIndices0; - T* q = pIndices0 + num_indices; - for ( ; p != q; p++) - { - const uint key = RG_ETC1_GET_KEY(p); - - hist[ key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - hist[512 + ((key >> 16) & 0xFF)]++; - } - } - else if (key_size == 2) - { - T* p = pIndices0; - T* q = pIndices0 + (num_indices >> 1) * 2; - - for ( ; p != q; p += 2) - { - const uint key0 = RG_ETC1_GET_KEY(p); - const uint key1 = RG_ETC1_GET_KEY(p+1); - - hist[ key0 & 0xFF]++; - hist[256 + ((key0 >> 8) & 0xFF)]++; - - hist[ key1 & 0xFF]++; - hist[256 + ((key1 >> 8) & 0xFF)]++; - } - - if (num_indices & 1) - { - const uint key = RG_ETC1_GET_KEY(p); - - hist[ key & 0xFF]++; - hist[256 + ((key >> 8) & 0xFF)]++; - } - } - else - { - RG_ETC1_ASSERT(key_size == 1); - if (key_size != 1) - return NULL; - - T* p = pIndices0; - T* q = pIndices0 + (num_indices >> 1) * 2; - - for ( ; p != q; p += 2) - { - const uint key0 = RG_ETC1_GET_KEY(p); - const uint key1 = RG_ETC1_GET_KEY(p+1); - - hist[key0 & 0xFF]++; - hist[key1 & 0xFF]++; - } - - if (num_indices & 1) - { - const uint key = RG_ETC1_GET_KEY(p); - - hist[key & 0xFF]++; - } - } - - T* pCur = pIndices0; - T* pNew = pIndices1; - - for (uint pass = 0; pass < key_size; pass++) - { - const uint* pHist = &hist[pass << 8]; - - uint offsets[256]; - - uint cur_ofs = 0; - for (uint i = 0; i < 256; i += 2) - { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; - - offsets[i+1] = cur_ofs; - cur_ofs += pHist[i+1]; - } - - const uint pass_shift = pass << 3; - - T* p = pCur; - T* q = pCur + (num_indices >> 1) * 2; - - for ( ; p != q; p += 2) - { - uint index0 = p[0]; - uint index1 = p[1]; - - uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; - uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; - - if (c0 == c1) - { - uint dst_offset0 = offsets[c0]; - - offsets[c0] = dst_offset0 + 2; - - pNew[dst_offset0] = static_cast<T>(index0); - pNew[dst_offset0 + 1] = static_cast<T>(index1); - } - else - { - uint dst_offset0 = offsets[c0]++; - uint dst_offset1 = offsets[c1]++; - - pNew[dst_offset0] = static_cast<T>(index0); - pNew[dst_offset1] = static_cast<T>(index1); - } - } - - if (num_indices & 1) - { - uint index = *p; - uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; - - uint dst_offset = offsets[c]; - offsets[c] = dst_offset + 1; - - pNew[dst_offset] = static_cast<T>(index); - } - - T* t = pCur; - pCur = pNew; - pNew = t; - } - - return pCur; - } - -#undef RG_ETC1_GET_KEY -#undef RG_ETC1_GET_KEY_FROM_INDEX - - uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) - { - return pack_color5(color.r, color.g, color.b, scaled, bias); - } - - uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) - { - if (scaled) - { - r = (r * 31U + bias) / 255U; - g = (g * 31U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - - r = rg_etc1::minimum(r, 31U); - g = rg_etc1::minimum(g, 31U); - b = rg_etc1::minimum(b, 31U); - - return static_cast<uint16>(b | (g << 5U) | (r << 10U)); - } - - color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) - { - uint b = packed_color5 & 31U; - uint g = (packed_color5 >> 5U) & 31U; - uint r = (packed_color5 >> 10U) & 31U; - - if (scaled) - { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); - } - - void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) - { - color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; - } - - bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) - { - int dc_r, dc_g, dc_b; - unpack_delta3(dc_r, dc_g, dc_b, packed_delta3); - - int b = (packed_color5 & 31U) + dc_b; - int g = ((packed_color5 >> 5U) & 31U) + dc_g; - int r = ((packed_color5 >> 10U) & 31U) + dc_r; - - bool success = true; - if (static_cast<uint>(r | g | b) > 31U) - { - success = false; - r = rg_etc1::clamp<int>(r, 0, 31); - g = rg_etc1::clamp<int>(g, 0, 31); - b = rg_etc1::clamp<int>(b, 0, 31); - } - - if (scaled) - { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U)); - return success; - } - - bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) - { - color_quad_u8 result; - const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); - r = result.r; - g = result.g; - b = result.b; - return success; - } - - uint16 etc1_block::pack_delta3(int r, int g, int b) - { - RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); - RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); - RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); - if (r < 0) r += 8; - if (g < 0) g += 8; - if (b < 0) b += 8; - return static_cast<uint16>(b | (g << 3) | (r << 6)); - } - - void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) - { - r = (packed_delta3 >> 6) & 7; - g = (packed_delta3 >> 3) & 7; - b = packed_delta3 & 7; - if (r >= 4) r -= 8; - if (g >= 4) g -= 8; - if (b >= 4) b -= 8; - } - - uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) - { - return pack_color4(color.r, color.g, color.b, scaled, bias); - } - - uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) - { - if (scaled) - { - r = (r * 15U + bias) / 255U; - g = (g * 15U + bias) / 255U; - b = (b * 15U + bias) / 255U; - } - - r = rg_etc1::minimum(r, 15U); - g = rg_etc1::minimum(g, 15U); - b = rg_etc1::minimum(b, 15U); - - return static_cast<uint16>(b | (g << 4U) | (r << 8U)); - } - - color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) - { - uint b = packed_color4 & 15U; - uint g = (packed_color4 >> 4U) & 15U; - uint r = (packed_color4 >> 8U) & 15U; - - if (scaled) - { - b = (b << 4U) | b; - g = (g << 4U) | g; - r = (r << 4U) | r; - } - - return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); - } - - void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) - { - color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; - } - - void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) - { - RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint r, g, b; - unpack_color5(r, g, b, packed_color5, true); - - const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3); - } - - bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) - { - RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint r, g, b; - bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); - - const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3); - - return success; - } - - void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) - { - RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint r, g, b; - unpack_color4(r, g, b, packed_color4, true); - - const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3); - } - - bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha) - { - color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba); - const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block); - - const bool diff_flag = block.get_diff_bit(); - const bool flip_flag = block.get_flip_bit(); - const uint table_index0 = block.get_inten_table(0); - const uint table_index1 = block.get_inten_table(1); - - color_quad_u8 subblock_colors0[4]; - color_quad_u8 subblock_colors1[4]; - bool success = true; - - if (diff_flag) - { - const uint16 base_color5 = block.get_base5_color(); - const uint16 delta_color3 = block.get_delta3_color(); - etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - - if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) - success = false; - } - else - { - const uint16 base_color4_0 = block.get_base4_color(0); - etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); - - const uint16 base_color4_1 = block.get_base4_color(1); - etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); - } - - if (preserve_alpha) - { - if (flip_flag) - { - for (uint y = 0; y < 2; y++) - { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); - pDst += 4; - } - - for (uint y = 2; y < 4; y++) - { - pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } - else - { - for (uint y = 0; y < 4; y++) - { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } - } - else - { - if (flip_flag) - { - // 0000 - // 0000 - // 1111 - // 1111 - for (uint y = 0; y < 2; y++) - { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors0[block.get_selector(2, y)]; - pDst[3] = subblock_colors0[block.get_selector(3, y)]; - pDst += 4; - } - - for (uint y = 2; y < 4; y++) - { - pDst[0] = subblock_colors1[block.get_selector(0, y)]; - pDst[1] = subblock_colors1[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } - else - { - // 0011 - // 0011 - // 0011 - // 0011 - for (uint y = 0; y < 4; y++) - { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } - } - - return success; - } - - struct etc1_solution_coordinates - { - inline etc1_solution_coordinates() : - m_unscaled_color(0, 0, 0, 0), - m_inten_table(0), - m_color4(false) - { - } - - inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : - m_unscaled_color(r, g, b, 255), - m_inten_table(inten_table), - m_color4(color4) - { - } - - inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : - m_unscaled_color(c), - m_inten_table(inten_table), - m_color4(color4) - { - } - - inline etc1_solution_coordinates(const etc1_solution_coordinates& other) - { - *this = other; - } - - inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) - { - m_unscaled_color = rhs.m_unscaled_color; - m_inten_table = rhs.m_inten_table; - m_color4 = rhs.m_color4; - return *this; - } - - inline void clear() - { - m_unscaled_color.clear(); - m_inten_table = 0; - m_color4 = false; - } - - inline color_quad_u8 get_scaled_color() const - { - int br, bg, bb; - if (m_color4) - { - br = m_unscaled_color.r | (m_unscaled_color.r << 4); - bg = m_unscaled_color.g | (m_unscaled_color.g << 4); - bb = m_unscaled_color.b | (m_unscaled_color.b << 4); - } - else - { - br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); - bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); - bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); - } - return color_quad_u8(br, bg, bb); - } - - inline void get_block_colors(color_quad_u8* pBlock_colors) - { - int br, bg, bb; - if (m_color4) - { - br = m_unscaled_color.r | (m_unscaled_color.r << 4); - bg = m_unscaled_color.g | (m_unscaled_color.g << 4); - bb = m_unscaled_color.b | (m_unscaled_color.b << 4); - } - else - { - br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); - bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); - bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); - } - const int* pInten_table = g_etc1_inten_tables[m_inten_table]; - pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); - pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); - pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); - pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); - } - - color_quad_u8 m_unscaled_color; - uint m_inten_table; - bool m_color4; - }; - - class etc1_optimizer - { - etc1_optimizer(const etc1_optimizer&); - etc1_optimizer& operator= (const etc1_optimizer&); - - public: - etc1_optimizer() - { - clear(); - } - - void clear() - { - m_pParams = NULL; - m_pResult = NULL; - m_pSorted_luma = NULL; - m_pSorted_luma_indices = NULL; - } - - struct params : etc1_pack_params - { - params() - { - clear(); - } - - params(const etc1_pack_params& base_params) : - etc1_pack_params(base_params) - { - clear_optimizer_params(); - } - - void clear() - { - etc1_pack_params::clear(); - clear_optimizer_params(); - } - - void clear_optimizer_params() - { - m_num_src_pixels = 0; - m_pSrc_pixels = 0; - - m_use_color4 = false; - static const int s_default_scan_delta[] = { 0 }; - m_pScan_deltas = s_default_scan_delta; - m_scan_delta_size = 1; - - m_base_color5.clear(); - m_constrain_against_base_color5 = false; - } - - uint m_num_src_pixels; - const color_quad_u8* m_pSrc_pixels; - - bool m_use_color4; - const int* m_pScan_deltas; - uint m_scan_delta_size; - - color_quad_u8 m_base_color5; - bool m_constrain_against_base_color5; - }; - - struct results - { - uint64 m_error; - color_quad_u8 m_block_color_unscaled; - uint m_block_inten_table; - uint m_n; - uint8* m_pSelectors; - bool m_block_color4; - - inline results& operator= (const results& rhs) - { - m_block_color_unscaled = rhs.m_block_color_unscaled; - m_block_color4 = rhs.m_block_color4; - m_block_inten_table = rhs.m_block_inten_table; - m_error = rhs.m_error; - RG_ETC1_ASSERT(m_n == rhs.m_n); - memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); - return *this; - } - }; - - void init(const params& params, results& result); - bool compute(); - - private: - struct potential_solution - { - potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) - { - } - - etc1_solution_coordinates m_coords; - uint8 m_selectors[8]; - uint64 m_error; - bool m_valid; - - void clear() - { - m_coords.clear(); - m_error = cUINT64_MAX; - m_valid = false; - } - }; - - const params* m_pParams; - results* m_pResult; - - int m_limit; - - vec3F m_avg_color; - int m_br, m_bg, m_bb; - uint16 m_luma[8]; - uint32 m_sorted_luma[2][8]; - const uint32* m_pSorted_luma_indices; - uint32* m_pSorted_luma; - - uint8 m_selectors[8]; - uint8 m_best_selectors[8]; - - potential_solution m_best_solution; - potential_solution m_trial_solution; - uint8 m_temp_selectors[8]; - - bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); - bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); - }; - - bool etc1_optimizer::compute() - { - const uint n = m_pParams->m_num_src_pixels; - const int scan_delta_size = m_pParams->m_scan_delta_size; - - // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. - // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. - for (int zdi = 0; zdi < scan_delta_size; zdi++) - { - const int zd = m_pParams->m_pScan_deltas[zdi]; - const int mbb = m_bb + zd; - if (mbb < 0) continue; else if (mbb > m_limit) break; - - for (int ydi = 0; ydi < scan_delta_size; ydi++) - { - const int yd = m_pParams->m_pScan_deltas[ydi]; - const int mbg = m_bg + yd; - if (mbg < 0) continue; else if (mbg > m_limit) break; - - for (int xdi = 0; xdi < scan_delta_size; xdi++) - { - const int xd = m_pParams->m_pScan_deltas[xdi]; - const int mbr = m_br + xd; - if (mbr < 0) continue; else if (mbr > m_limit) break; - - etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); - if (m_pParams->m_quality == cHighQuality) - { - if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) - continue; - } - else - { - if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) - continue; - } - - // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. - // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: - // The goal is: - // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 - // Rearranging this: - // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 - // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 - // So what this means: - // optimal_block_color = avg_input - avg_inten_delta - // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. - // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. - // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. - - const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); - for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) - { - const uint8* pSelectors = m_best_solution.m_selectors; - const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; - - int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; - const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); - for (uint r = 0; r < n; r++) - { - const uint s = *pSelectors++; - const int yd = pInten_table[s]; - // Compute actual delta being applied to each pixel, taking into account clamping. - delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r; - delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g; - delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b; - } - if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) - break; - const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n; - const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n; - const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n; - const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); - - bool skip = false; - - if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) - skip = true; - else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) - skip = true; - else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) - skip = true; - - if (skip) - break; - - etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); - if (m_pParams->m_quality == cHighQuality) - { - if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) - break; - } - else - { - if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) - break; - } - - } // refinement_trial - - } // xdi - } // ydi - } // zdi - - if (!m_best_solution.m_valid) - { - m_pResult->m_error = cUINT32_MAX; - return false; - } - - const uint8* pSelectors = m_best_solution.m_selectors; - -#ifdef RG_ETC1_BUILD_DEBUG - { - color_quad_u8 block_colors[4]; - m_best_solution.m_coords.get_block_colors(block_colors); - - const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - uint64 actual_error = 0; - for (uint i = 0; i < n; i++) - actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]); - - RG_ETC1_ASSERT(actual_error == m_best_solution.m_error); - } -#endif - - m_pResult->m_error = m_best_solution.m_error; - - m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; - m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; - - m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; - memcpy(m_pResult->m_pSelectors, pSelectors, n); - m_pResult->m_n = n; - - return true; - } - - void etc1_optimizer::init(const params& p, results& r) - { - // This version is hardcoded for 8 pixel subblocks. - RG_ETC1_ASSERT(p.m_num_src_pixels == 8); - - m_pParams = &p; - m_pResult = &r; - - const uint n = 8; - - m_limit = m_pParams->m_use_color4 ? 15 : 31; - - vec3F avg_color(0.0f); - - for (uint i = 0; i < n; i++) - { - const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; - const vec3F fc(c.r, c.g, c.b); - - avg_color += fc; - - m_luma[i] = static_cast<uint16>(c.r + c.g + c.b); - m_sorted_luma[0][i] = i; - } - avg_color *= (1.0f / static_cast<float>(n)); - m_avg_color = avg_color; - - m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); - m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); - m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); - - if (m_pParams->m_quality <= cMediumQuality) - { - m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false); - m_pSorted_luma = m_sorted_luma[0]; - if (m_pSorted_luma_indices == m_sorted_luma[0]) - m_pSorted_luma = m_sorted_luma[1]; - - for (uint i = 0; i < n; i++) - m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; - } - - m_best_solution.m_coords.clear(); - m_best_solution.m_valid = false; - m_best_solution.m_error = cUINT64_MAX; - } - - bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) - { - trial_solution.m_valid = false; - - if (m_pParams->m_constrain_against_base_color5) - { - const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; - const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; - const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; - - if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) - return false; - } - - const color_quad_u8 base_color(coords.get_scaled_color()); - - const uint n = 8; - - trial_solution.m_error = cUINT64_MAX; - - for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) - { - const int* pInten_table = g_etc1_inten_tables[inten_table]; - - color_quad_u8 block_colors[4]; - for (uint s = 0; s < 4; s++) - { - const int yd = pInten_table[s]; - block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); - } - - uint64 total_error = 0; - - const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - for (uint c = 0; c < n; c++) - { - const color_quad_u8& src_pixel = *pSrc_pixels++; - - uint best_selector_index = 0; - uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b); - - uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 1; - } - - trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 2; - } - - trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 3; - } - - m_temp_selectors[c] = static_cast<uint8>(best_selector_index); - - total_error += best_error; - if (total_error >= trial_solution.m_error) - break; - } - - if (total_error < trial_solution.m_error) - { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - memcpy(trial_solution.m_selectors, m_temp_selectors, 8); - trial_solution.m_valid = true; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - - bool success = false; - if (pBest_solution) - { - if (trial_solution.m_error < pBest_solution->m_error) - { - *pBest_solution = trial_solution; - success = true; - } - } - - return success; - } - - bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) - { - if (m_pParams->m_constrain_against_base_color5) - { - const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; - const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; - const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; - - if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) - { - trial_solution.m_valid = false; - return false; - } - } - - const color_quad_u8 base_color(coords.get_scaled_color()); - - const uint n = 8; - - trial_solution.m_error = cUINT64_MAX; - - for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) - { - const int* pInten_table = g_etc1_inten_tables[inten_table]; - - uint block_inten[4]; - color_quad_u8 block_colors[4]; - for (uint s = 0; s < 4; s++) - { - const int yd = pInten_table[s]; - color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); - block_colors[s] = block_color; - block_inten[s] = block_color.r + block_color.g + block_color.b; - } - - // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. - // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. - // 0 1 2 3 - // 01 12 23 - const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; - - uint64 total_error = 0; - const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; - if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) - { - if (block_inten[0] > m_pSorted_luma[n - 1]) - { - const uint min_error = labs(block_inten[0] - m_pSorted_luma[n - 1]); - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 0, n); - - for (uint c = 0; c < n; c++) - total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]); - } - else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) - { - if (m_pSorted_luma[0] > block_inten[3]) - { - const uint min_error = labs(m_pSorted_luma[0] - block_inten[3]); - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 3, n); - - for (uint c = 0; c < n; c++) - total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]); - } - else - { - uint cur_selector = 0, c; - for (c = 0; c < n; c++) - { - const uint y = m_pSorted_luma[c]; - while ((y * 2) >= block_inten_midpoints[cur_selector]) - if (++cur_selector > 2) - goto done; - const uint sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector); - total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); - } -done: - while (c < n) - { - const uint sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = 3; - total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); - ++c; - } - } - - if (total_error < trial_solution.m_error) - { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - memcpy(trial_solution.m_selectors, m_temp_selectors, n); - trial_solution.m_valid = true; - if (!total_error) - break; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - - bool success = false; - if (pBest_solution) - { - if (trial_solution.m_error < pBest_solution->m_error) - { - *pBest_solution = trial_solution; - success = true; - } - } - - return success; - } - - static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) - { - const uint limit = diff ? 32 : 16; limit; - RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); - int c; - if (diff) - c = (packed_c >> 2) | (packed_c << 3); - else - c = packed_c | (packed_c << 4); - c += g_etc1_inten_tables[inten][selector]; - c = rg_etc1::clamp<int>(c, 0, 255); - return c; - } - - static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; } - - void pack_etc1_block_init() - { - for (uint diff = 0; diff < 2; diff++) - { - const uint limit = diff ? 32 : 16; - - for (uint inten = 0; inten < 8; inten++) - { - for (uint selector = 0; selector < 4; selector++) - { - const uint inverse_table_index = diff + (inten << 1) + (selector << 4); - for (uint color = 0; color < 256; color++) - { - uint best_error = cUINT32_MAX, best_packed_c = 0; - for (uint packed_c = 0; packed_c < limit; packed_c++) - { - int v = etc1_decode_value(diff, inten, selector, packed_c); - uint err = labs(v - static_cast<int>(color)); - if (err < best_error) - { - best_error = err; - best_packed_c = packed_c; - if (!best_error) - break; - } - } - RG_ETC1_ASSERT(best_error <= 255); - g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8)); - } - } - } - } - - uint expand5[32]; - for(int i = 0; i < 32; i++) - expand5[i] = (i << 3) | (i >> 2); - - for(int i = 0; i < 256 + 16; i++) - { - int v = clamp<int>(i - 8, 0, 255); - g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]); - } - } - - // Packs solid color blocks efficiently using a set of small precomputed tables. - // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. - static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params) - { - pack_params; - RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); - - static uint s_next_comp[4] = { 1, 2, 0, 1 }; - - uint best_error = cUINT32_MAX, best_i = 0; - int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; - - // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. - for (uint i = 0; i < 3; i++) - { - const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; - - const int delta_range = 1; - for (int delta = -delta_range; delta <= delta_range; delta++) - { - const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255); - - const uint16* pTable; - if (!c_plus_delta) - pTable = g_color8_to_etc_block_config_0_255[0]; - else if (c_plus_delta == 255) - pTable = g_color8_to_etc_block_config_0_255[1]; - else - pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; - - do - { - const uint x = *pTable++; - -#ifdef RG_ETC1_BUILD_DEBUG - const uint diff = x & 1; - const uint inten = (x >> 1) & 7; - const uint selector = (x >> 4) & 3; - const uint p0 = (x >> 8) & 255; - RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); -#endif - - const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; - uint16 p1 = pInverse_table[c1]; - uint16 p2 = pInverse_table[c2]; - const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); - if (trial_error < best_error) - { - best_error = trial_error; - best_x = x; - best_packed_c1 = p1 & 0xFF; - best_packed_c2 = p2 & 0xFF; - best_i = i; - if (!best_error) - goto found_perfect_match; - } - } while (*pTable != 0xFFFF); - } - } -found_perfect_match: - - const uint diff = best_x & 1; - const uint inten = (best_x >> 1) & 7; - - block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1)); - - const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; - *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; - *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; - - const uint best_packed_c0 = (best_x >> 8) & 255; - if (diff) - { - block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3); - block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3); - block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3); - } - else - { - block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4)); - block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4)); - block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4)); - } - - return best_error; - } - - static uint pack_etc1_block_solid_color_constrained( - etc1_optimizer::results& results, - uint num_colors, const uint8* pColor, - etc1_pack_params& pack_params, - bool use_diff, - const color_quad_u8* pBase_color5_unscaled) - { - RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); - - pack_params; - static uint s_next_comp[4] = { 1, 2, 0, 1 }; - - uint best_error = cUINT32_MAX, best_i = 0; - int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; - - // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. - for (uint i = 0; i < 3; i++) - { - const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; - - const int delta_range = 1; - for (int delta = -delta_range; delta <= delta_range; delta++) - { - const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255); - - const uint16* pTable; - if (!c_plus_delta) - pTable = g_color8_to_etc_block_config_0_255[0]; - else if (c_plus_delta == 255) - pTable = g_color8_to_etc_block_config_0_255[1]; - else - pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; - - do - { - const uint x = *pTable++; - const uint diff = x & 1; - if (static_cast<uint>(use_diff) != diff) - { - if (*pTable == 0xFFFF) - break; - continue; - } - - if ((diff) && (pBase_color5_unscaled)) - { - const int p0 = (x >> 8) & 255; - int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]); - if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) - { - if (*pTable == 0xFFFF) - break; - continue; - } - } - -#ifdef RG_ETC1_BUILD_DEBUG - { - const uint inten = (x >> 1) & 7; - const uint selector = (x >> 4) & 3; - const uint p0 = (x >> 8) & 255; - RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); - } -#endif - - const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; - uint16 p1 = pInverse_table[c1]; - uint16 p2 = pInverse_table[c2]; - - if ((diff) && (pBase_color5_unscaled)) - { - int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]); - int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]); - if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) - { - if (*pTable == 0xFFFF) - break; - continue; - } - } - - const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); - if (trial_error < best_error) - { - best_error = trial_error; - best_x = x; - best_packed_c1 = p1 & 0xFF; - best_packed_c2 = p2 & 0xFF; - best_i = i; - if (!best_error) - goto found_perfect_match; - } - } while (*pTable != 0xFFFF); - } - } -found_perfect_match: - - if (best_error == cUINT32_MAX) - return best_error; - - best_error *= num_colors; - - results.m_n = num_colors; - results.m_block_color4 = !(best_x & 1); - results.m_block_inten_table = (best_x >> 1) & 7; - memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); - - const uint best_packed_c0 = (best_x >> 8) & 255; - results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0); - results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1); - results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2); - results.m_error = best_error; - - return best_error; - } - - // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555. - static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block) - { - int err[8],*ep1 = err,*ep2 = err+4; - uint8 *quant = g_quant5_tab+8; - - memset(dest, 0xFF, sizeof(color_quad_u8)*16); - - // process channels seperately - for(int ch=0;ch<3;ch++) - { - uint8* bp = (uint8*)block; - uint8* dp = (uint8*)dest; - - bp += ch; dp += ch; - - memset(err,0, sizeof(err)); - for(int y = 0; y < 4; y++) - { - // pixel 0 - dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)]; - ep1[0] = bp[ 0] - dp[ 0]; - - // pixel 1 - dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)]; - ep1[1] = bp[ 4] - dp[ 4]; - - // pixel 2 - dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)]; - ep1[2] = bp[ 8] - dp[ 8]; - - // pixel 3 - dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)]; - ep1[3] = bp[12] - dp[12]; - - // advance to next line - int* tmp = ep1; ep1 = ep2; ep2 = tmp; - bp += 16; - dp += 16; - } - } - } - - unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params) - { - const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba); - etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block); - -#ifdef RG_ETC1_BUILD_DEBUG - // Ensure all alpha values are 0xFF. - for (uint i = 0; i < 16; i++) - { - RG_ETC1_ASSERT(pSrc_pixels[i].a == 255); - } -#endif - - color_quad_u8 src_pixel0(pSrc_pixels[0]); - - // Check for solid block. - const uint32 first_pixel_u32 = pSrc_pixels->m_u32; - int r; - for (r = 15; r >= 1; --r) - if (pSrc_pixels[r].m_u32 != first_pixel_u32) - break; - if (!r) - return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params)); - - color_quad_u8 dithered_pixels[16]; - if (pack_params.m_dithering) - { - dither_block_555(dithered_pixels, pSrc_pixels); - pSrc_pixels = dithered_pixels; - } - - etc1_optimizer optimizer; - - uint64 best_error = cUINT64_MAX; - uint best_flip = false, best_use_color4 = false; - - uint8 best_selectors[2][8]; - etc1_optimizer::results best_results[2]; - for (uint i = 0; i < 2; i++) - { - best_results[i].m_n = 8; - best_results[i].m_pSelectors = best_selectors[i]; - } - - uint8 selectors[3][8]; - etc1_optimizer::results results[3]; - - for (uint i = 0; i < 3; i++) - { - results[i].m_n = 8; - results[i].m_pSelectors = selectors[i]; - } - - color_quad_u8 subblock_pixels[8]; - - etc1_optimizer::params params(pack_params); - params.m_num_src_pixels = 8; - params.m_pSrc_pixels = subblock_pixels; - - for (uint flip = 0; flip < 2; flip++) - { - for (uint use_color4 = 0; use_color4 < 2; use_color4++) - { - uint64 trial_error = 0; - - uint subblock; - for (subblock = 0; subblock < 2; subblock++) - { - if (flip) - memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); - else - { - const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; - subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; - subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; - } - - results[2].m_error = cUINT64_MAX; - if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4))) - { - const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32; - for (r = 7; r >= 1; --r) - if (subblock_pixels[r].m_u32 != subblock_pixel0_u32) - break; - if (!r) - { - pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); - } - } - - params.m_use_color4 = (use_color4 != 0); - params.m_constrain_against_base_color5 = false; - - if ((!use_color4) && (subblock)) - { - params.m_constrain_against_base_color5 = true; - params.m_base_color5 = results[0].m_block_color_unscaled; - } - - if (params.m_quality == cHighQuality) - { - static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; - params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4); - params.m_pScan_deltas = s_scan_delta_0_to_4; - } - else if (params.m_quality == cMediumQuality) - { - static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; - params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1); - params.m_pScan_deltas = s_scan_delta_0_to_1; - } - else - { - static const int s_scan_delta_0[] = { 0 }; - params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0); - params.m_pScan_deltas = s_scan_delta_0; - } - - optimizer.init(params, results[subblock]); - if (!optimizer.compute()) - break; - - if (params.m_quality >= cMediumQuality) - { - // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. - const uint refinement_error_thresh0 = 3000; - const uint refinement_error_thresh1 = 6000; - if (results[subblock].m_error > refinement_error_thresh0) - { - if (params.m_quality == cMediumQuality) - { - static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; - params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3); - params.m_pScan_deltas = s_scan_delta_2_to_3; - } - else - { - static const int s_scan_delta_5_to_5[] = { -5, 5 }; - static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; - if (results[subblock].m_error > refinement_error_thresh1) - { - params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8); - params.m_pScan_deltas = s_scan_delta_5_to_8; - } - else - { - params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5); - params.m_pScan_deltas = s_scan_delta_5_to_5; - } - } - - if (!optimizer.compute()) - break; - } - - if (results[2].m_error < results[subblock].m_error) - results[subblock] = results[2]; - } - - trial_error += results[subblock].m_error; - if (trial_error >= best_error) - break; - } - - if (subblock < 2) - continue; - - best_error = trial_error; - best_results[0] = results[0]; - best_results[1] = results[1]; - best_flip = flip; - best_use_color4 = use_color4; - - } // use_color4 - - } // flip - - int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; - int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; - int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; - RG_ETC1_ASSERT(best_use_color4 || (rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)); - - if (best_use_color4) - { - dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); - dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); - dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); - } - else - { - if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr); - if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg); - if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db); - } - - dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip ); - - uint selector0 = 0, selector1 = 0; - if (best_flip) - { - // flipped: - // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, - // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } - // - // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, - // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } - const uint8* pSelectors0 = best_results[0].m_pSelectors; - const uint8* pSelectors1 = best_results[1].m_pSelectors; - for (int x = 3; x >= 0; --x) - { - uint b; - b = g_selector_index_to_etc1[pSelectors1[4 + x]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors1[x]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors0[4 + x]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors0[x]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - } - } - else - { - // non-flipped: - // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, - // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } - // - // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, - // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } - for (int subblock = 1; subblock >= 0; --subblock) - { - const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; - for (uint i = 0; i < 2; i++) - { - uint b; - b = g_selector_index_to_etc1[pSelectors[3]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors[2]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors[1]]; - selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); - - b = g_selector_index_to_etc1[pSelectors[0]]; - selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1); - - pSelectors -= 4; - } - } - } - - dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF); - dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF); - - return static_cast<unsigned int>(best_error); - } - -} // namespace rg_etc1 diff --git a/thirdparty/rg-etc1/rg_etc1.h b/thirdparty/rg-etc1/rg_etc1.h deleted file mode 100644 index 9ce89a6cc6..0000000000 --- a/thirdparty/rg-etc1/rg_etc1.h +++ /dev/null @@ -1,76 +0,0 @@ -// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com> -// Please see ZLIB license at the end of this file. -#pragma once - -namespace rg_etc1 -{ - // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels. - // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping. - // This function is thread safe, and does not dynamically allocate any memory. - // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255. - bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); - - // Quality setting = the higher the quality, the slower. - // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality). - enum etc1_quality - { - cLowQuality, - cMediumQuality, - cHighQuality, - }; - - struct etc1_pack_params - { - etc1_quality m_quality; - bool m_dithering; - - inline etc1_pack_params() - { - clear(); - } - - void clear() - { - m_quality = cHighQuality; - m_dithering = false; - } - }; - - // Important: pack_etc1_block_init() must be called before calling pack_etc1_block(). - void pack_etc1_block_init(); - - // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block. - // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255. - // Returns squared error of result. - // This function is thread safe, and does not dynamically allocate any memory. - // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE. - unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params); - -} // namespace rg_etc1 - -//------------------------------------------------------------------------------ -// -// rg_etc1 uses the ZLIB license: -// http://opensource.org/licenses/Zlib -// -// Copyright (c) 2012 Rich Geldreich -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// -// 3. This notice may not be removed or altered from any source distribution. -// -//------------------------------------------------------------------------------ diff --git a/thirdparty/tinyexr/tinyexr.cc b/thirdparty/tinyexr/tinyexr.cc new file mode 100644 index 0000000000..969a6d505d --- /dev/null +++ b/thirdparty/tinyexr/tinyexr.cc @@ -0,0 +1,2 @@ +#define TINYEXR_IMPLEMENTATION +#include "tinyexr.h" diff --git a/thirdparty/tinyexr/tinyexr.h b/thirdparty/tinyexr/tinyexr.h new file mode 100644 index 0000000000..c82768be9a --- /dev/null +++ b/thirdparty/tinyexr/tinyexr.h @@ -0,0 +1,12419 @@ +/* +Copyright (c) 2014 - 2017, Syoyo Fujita +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Syoyo Fujita nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// TinyEXR contains some OpenEXR code, which is licensed under ------------ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Industrial Light & Magic nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// End of OpenEXR license ------------------------------------------------- + +#ifndef TINYEXR_H_ +#define TINYEXR_H_ + +// +// +// Do this: +// #define TINYEXR_IMPLEMENTATION +// before you include this file in *one* C or C++ file to create the +// implementation. +// +// // i.e. it should look like this: +// #include ... +// #include ... +// #include ... +// #define TINYEXR_IMPLEMENTATION +// #include "tinyexr.h" +// +// + +#include <stddef.h> // for size_t +#include <stdint.h> // guess stdint.h is available(C99) + +// -- GODOT change for old MinGW on Travis CI -- +#if defined(__MINGW32__) +#include <_mingw.h> // for __MINGW64_VERSION_MAJOR +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib +// required if this flas is 0. +#ifndef TINYEXR_USE_MINIZ +#define TINYEXR_USE_MINIZ (1) +#endif + +// Disable PIZ comporession when applying cpplint. +#ifndef TINYEXR_USE_PIZ +#define TINYEXR_USE_PIZ (1) +#endif + +#ifndef TINYEXR_USE_ZFP +#define TINYEXR_USE_ZFP (0) // TinyEXR extension. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#define TINYEXR_SUCCESS (0) +#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) +#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) +#define TINYEXR_ERROR_INVALID_ARGUMENT (-3) +#define TINYEXR_ERROR_INVALID_DATA (-4) +#define TINYEXR_ERROR_INVALID_FILE (-5) +#define TINYEXR_ERROR_INVALID_PARAMETER (-5) +#define TINYEXR_ERROR_CANT_OPEN_FILE (-6) +#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-7) +#define TINYEXR_ERROR_INVALID_HEADER (-8) + +// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf } + +// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2 +#define TINYEXR_PIXELTYPE_UINT (0) +#define TINYEXR_PIXELTYPE_HALF (1) +#define TINYEXR_PIXELTYPE_FLOAT (2) + +#define TINYEXR_MAX_ATTRIBUTES (128) + +#define TINYEXR_COMPRESSIONTYPE_NONE (0) +#define TINYEXR_COMPRESSIONTYPE_RLE (1) +#define TINYEXR_COMPRESSIONTYPE_ZIPS (2) +#define TINYEXR_COMPRESSIONTYPE_ZIP (3) +#define TINYEXR_COMPRESSIONTYPE_PIZ (4) +#define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension + +#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) +#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1) +#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2) + +#define TINYEXR_TILE_ONE_LEVEL (0) +#define TINYEXR_TILE_MIPMAP_LEVELS (1) +#define TINYEXR_TILE_RIPMAP_LEVELS (2) + +#define TINYEXR_TILE_ROUND_DOWN (0) +#define TINYEXR_TILE_ROUND_UP (1) + +typedef struct _EXRVersion { + int version; // this must be 2 + int tiled; // tile format image + int long_name; // long name attribute + int non_image; // deep image(EXR 2.0) + int multipart; // multi-part(EXR 2.0) +} EXRVersion; + +typedef struct _EXRAttribute { + char name[256]; // name and type are up to 255 chars long. + char type[256]; + unsigned char *value; // uint8_t* + int size; + int pad0; +} EXRAttribute; + +typedef struct _EXRChannelInfo { + char name[256]; // less than 255 bytes long + int pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} EXRChannelInfo; + +typedef struct _EXRTile { + int offset_x; + int offset_y; + int level_x; + int level_y; + + int width; // actual width in a tile. + int height; // actual height int a tile. + + unsigned char **images; // image[channels][pixels] +} EXRTile; + +typedef struct _EXRHeader { + float pixel_aspect_ratio; + int line_order; + int data_window[4]; + int display_window[4]; + float screen_window_center[2]; + float screen_window_width; + + int chunk_count; + + // Properties for tiled format(`tiledesc`). + int tiled; + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + int long_name; + int non_image; + int multipart; + unsigned int header_len; + + // Custom attributes(exludes required attributes(e.g. `channels`, + // `compression`, etc) + int num_custom_attributes; + EXRAttribute custom_attributes[TINYEXR_MAX_ATTRIBUTES]; + + EXRChannelInfo *channels; // [num_channels] + + int *pixel_types; // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for + // each channel. This is overwritten with `requested_pixel_types` when + // loading. + int num_channels; + + int compression_type; // compression type(TINYEXR_COMPRESSIONTYPE_*) + int *requested_pixel_types; // Filled initially by + // ParseEXRHeaderFrom(Meomory|File), then users + // can edit it(only valid for HALF pixel type + // channel) + +} EXRHeader; + +typedef struct _EXRMultiPartHeader { + int num_headers; + EXRHeader *headers; + +} EXRMultiPartHeader; + +typedef struct _EXRImage { + EXRTile *tiles; // Tiled pixel data. The application must reconstruct image + // from tiles manually. NULL if scanline format. + unsigned char **images; // image[channels][pixels]. NULL if tiled format. + + int width; + int height; + int num_channels; + + // Properties for tile format. + int num_tiles; + +} EXRImage; + +typedef struct _EXRMultiPartImage { + int num_images; + EXRImage *images; + +} EXRMultiPartImage; + +typedef struct _DeepImage { + const char **channel_names; + float ***image; // image[channels][scanlines][samples] + int **offset_table; // offset_table[scanline][offsets] + int num_channels; + int width; + int height; + int pad0; +} DeepImage; + +// @deprecated { to be removed. } +// Loads single-frame OpenEXR image. Assume EXR image contains RGB(A) channels. +// Application must free image data as returned by `out_rgba` +// Result image format is: float x RGBA x width x hight +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXR(float **out_rgba, int *width, int *height, + const char *filename, const char **err); + +// @deprecated { to be removed. } +// Saves single-frame OpenEXR image. Assume EXR image contains RGB(A) channels. +// components must be 3(RGB) or 4(RGBA). +// Result image format is: float x RGB(A) x width x hight +extern int SaveEXR(const float *data, int width, int height, int components, + const char *filename); + +// Initialize EXRHeader struct +extern void InitEXRHeader(EXRHeader *exr_header); + +// Initialize EXRImage struct +extern void InitEXRImage(EXRImage *exr_image); + +// Free's internal data of EXRHeader struct +extern int FreeEXRHeader(EXRHeader *exr_header); + +// Free's internal data of EXRImage struct +extern int FreeEXRImage(EXRImage *exr_image); + +// Parse EXR version header of a file. +extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename); + +// Parse EXR version header from memory-mapped EXR data. +extern int ParseEXRVersionFromMemory(EXRVersion *version, + const unsigned char *memory, size_t size); + +// Parse single-part OpenEXR header from a file and initialize `EXRHeader`. +extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version, + const char *filename, const char **err); + +// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`. +extern int ParseEXRHeaderFromMemory(EXRHeader *header, + const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err); + +// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*` +// array. +extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const char *filename, + const char **err); + +// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*` +// array +extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const unsigned char *memory, + size_t size, const char **err); + +// Loads single-part OpenEXR image from a file. +// Application must setup `ParseEXRHeaderFromFile` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header, + const char *filename, const char **err); + +// Loads single-part OpenEXR image from a memory. +// Application must setup `EXRHeader` with +// `ParseEXRHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header, + const unsigned char *memory, + const size_t size, const char **err); + +// Loads multi-part OpenEXR image from a file. +// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this +// function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXRMultipartImageFromFile(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const char *filename, + const char **err); + +// Loads multi-part OpenEXR image from a memory. +// Application must setup `EXRHeader*` array with +// `ParseEXRMultipartHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXRMultipartImageFromMemory(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a file. +// Returns negative value and may set error string in `err` when there's an +// error +extern int SaveEXRImageToFile(const EXRImage *image, + const EXRHeader *exr_header, const char *filename, + const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// Return the number of bytes if succes. +// Returns negative value and may set error string in `err` when there's an +// error +extern size_t SaveEXRImageToMemory(const EXRImage *image, + const EXRHeader *exr_header, + unsigned char **memory, const char **err); + +// Loads single-frame OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadDeepEXR(DeepImage *out_image, const char *filename, + const char **err); + +// NOT YET IMPLEMENTED: +// Saves single-frame OpenEXR deep image. +// Returns negative value and may set error string in `err` when there's an +// error +// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename, +// const char **err); + +// NOT YET IMPLEMENTED: +// Loads multi-part OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const +// char *filename, +// const char **err); + +// For emscripten. +// Loads single-frame OpenEXR image from memory. Assume EXR image contains +// RGB(A) channels. +// `out_rgba` must have enough memory(at least sizeof(float) x 4(RGBA) x width x +// hight) +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXRFromMemory(float *out_rgba, const unsigned char *memory, + size_t size, const char **err); + +#ifdef __cplusplus +} +#endif + +#endif // TINYEXR_H_ + +#ifdef TINYEXR_IMPLEMENTATION +#ifndef TINYEXR_IMPLEMENTATION_DEIFNED +#define TINYEXR_IMPLEMENTATION_DEIFNED + +#include <algorithm> +#include <cassert> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <sstream> + +#include <string> +#include <vector> + +#if __cplusplus > 199711L +// C++11 +#include <cstdint> +#endif // __cplusplus > 199711L + +#ifdef _OPENMP +#include <omp.h> +#endif + +#if TINYEXR_USE_MINIZ +#else +#include "zlib.h" +#endif + +#if TINYEXR_USE_ZFP +#include "zfp.h" +#endif + +namespace tinyexr { + +#if __cplusplus > 199711L +// C++11 +typedef uint64_t tinyexr_uint64; +typedef int64_t tinyexr_int64; +#else +// Although `long long` is not a standard type pre C++11, assume it is defined +// as a compiler's extension. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#endif +typedef unsigned long long tinyexr_uint64; +typedef long long tinyexr_int64; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif + +#if TINYEXR_USE_MINIZ + +namespace miniz { + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wpadded" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++11-extensions" +#pragma clang diagnostic ignored "-Wconversion" +#ifdef __APPLE__ +#if __clang_major__ >= 8 && __clang__minor__ > 1 +#pragma clang diagnostic ignored "-Wcomma" +#endif +#endif +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP + reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: + http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the + archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO + (see the list below for more macros). + + * Change History + 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major + release with Zip64 support (almost there!): + - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug + (thanks kahmyong.moon@hp.com) which could cause locate files to not find + files. This bug + would only have occured in earlier versions if you explicitly used this + flag, OR if you used mz_zip_extract_archive_file_to_heap() or + mz_zip_add_mem_to_archive_file_in_place() + (which used this flag). If you can't switch to v1.15 but want to fix + this bug, just remove the uses of this flag from both helper funcs (and of + course don't use the flag). + - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when + pUser_read_buf is not NULL and compressed size is > uncompressed size + - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract + compressed data from directory entries, to account for weird zipfiles which + contain zero-size compressed data on dir entries. + Hopefully this fix won't cause any issues on weird zip archives, + because it assumes the low 16-bits of zip external attributes are DOS + attributes (which I believe they always are in practice). + - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the + internal attributes, just the filename and external attributes + - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed + - Added cmake support for Linux builds which builds all the examples, + tested with clang v3.3 and gcc v4.6. + - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti + - Merged MZ_FORCEINLINE fix from hdeanclark + - Fix <time.h> include before config #ifdef, thanks emil.brink + - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping + (super useful for OpenGL apps), and explicit control over the compression + level (so you can + set it to 1 for real-time compression). + - Merged in some compiler fixes from paulharris's github repro. + - Retested this build under Windows (VS 2010, including static analysis), + tcc 0.9.26, gcc v4.6 and clang v3.3. + - Added example6.c, which dumps an image of the mandelbrot set to a PNG + file. + - Modified example2 to help test the + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. + - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix + possible src file fclose() leak if alignment bytes+local header file write + faiiled + - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): + Was pushing the wrong central dir header offset, appears harmless in this + release, but it became a problem in the zip64 branch + 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, + #include <time.h> (thanks fermtect). + 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix + mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. + - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and + re-ran a randomized regression test on ~500k files. + - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. + - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze + (static analysis) option and fixed all warnings (except for the silly + "Use of the comma-operator in a tested expression.." analysis warning, + which I purposely use to work around a MSVC compiler warning). + - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and + tested Linux executables. The codeblocks workspace is compatible with + Linux+Win32/x64. + - Added miniz_tester solution/project, which is a useful little app + derived from LZHAM's tester app that I use as part of the regression test. + - Ran miniz.c and tinfl.c through another series of regression testing on + ~500,000 files and archives. + - Modified example5.c so it purposely disables a bunch of high-level + functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the + MINIZ_NO_STDIO bug report.) + - Fix ftell() usage in examples so they exit with an error on files which + are too large (a limitation of the examples, not miniz itself). + 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple + minor level_and_flags issues in the archive API's. + level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce + Dawson <bruced@valvesoftware.com> for the feedback/bug report. + 5/28/11 v1.11 - Added statement from unlicense.org + 5/27/11 v1.10 - Substantial compressor optimizations: + - Level 1 is now ~4x faster than before. The L1 compressor's throughput + now varies between 70-110MB/sec. on a + - Core i7 (actual throughput varies depending on the type of data, and x64 + vs. x86). + - Improved baseline L2-L9 compression perf. Also, greatly improved + compression perf. issues on some file types. + - Refactored the compression code for better readability and + maintainability. + - Added level 10 compression level (L10 has slightly better ratio than + level 9, but could have a potentially large + drop in throughput on some files). + 5/15/11 v1.09 - Initial stable release. + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, + and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. + It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is + implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB + (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory + allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough + functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly + routines. + Supports raw deflate streams or standard zlib streams with adler-32 + checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or + zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing + and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, + originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in + mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file + information, read files from + existing archives, create new archives, append new files to existing + archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, + on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a + disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const + char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an + archive, the entire central + directory is located and read as-is into memory, and subsequent file access + only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a + loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, + const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one + example) can be used to identify + multiple versions of the same file in an archive. This function uses a + simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using + mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer + immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The + central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file + data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, + the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives + written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is + to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, + const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 + comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be + appended to. + Note the appending is done in-place and is not an atomic operation, so if + something goes wrong + during the operation it's possible the archive could be left without a + central directory (although the local + file headers and file data will be fine, so the archive will be + recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, + cloning only those bits you want to + preserve into a new archive using using the + mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and + rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or + heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using + mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an + updated central directory to the + original archive. (This is basically what + mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this + method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle + unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, + either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then + include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your + target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before + including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be + able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +//#include <stdlib.h> + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be +// CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on +// stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able +// to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be +// called. +// The current downside is the times written to your archives will be from 1979. +#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive +// API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression +// API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent +// conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom +// user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's +// which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and +// tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc +// on Linux +#define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +//#include <time.h> +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__i386) || defined(__i486__) || defined(__i486) || \ + defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if defined(__sparcv9) +// Big endian +#else +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient +// integer loads and stores from unaligned addresses. +//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \ + 0 // disable to suppress compiler warnings +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \ + defined(_LP64) || defined(__LP64__) || defined(__ia64__) || \ + defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are +// reasonably fast (and don't involve compiler generated calls to helper +// functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some +// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() +// unless you've modified the MZ_MALLOC macro) to release a block allocated from +// the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with +// ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with +// ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { + MZ_DEFAULT_STRATEGY = 0, + MZ_FILTERED = 1, + MZ_HUFFMAN_ONLY = 2, + MZ_RLE = 3, + MZ_FIXED = 4 +}; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: +// items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, + size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The +// other values are for advanced use (refer to the zlib docs). +enum { + MZ_NO_FLUSH = 0, + MZ_PARTIAL_FLUSH = 1, + MZ_SYNC_FLUSH = 2, + MZ_FULL_FLUSH = 3, + MZ_FINISH = 4, + MZ_BLOCK = 5 +}; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { + MZ_OK = 0, + MZ_STREAM_END = 1, + MZ_NEED_DICT = 2, + MZ_ERRNO = -1, + MZ_STREAM_ERROR = -2, + MZ_DATA_ERROR = -3, + MZ_MEM_ERROR = -4, + MZ_BUF_ERROR = -5, + MZ_VERSION_ERROR = -6, + MZ_PARAM_ERROR = -10000 +}; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best +// possible compression (not zlib compatible, and may be very slow), +// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { + MZ_NO_COMPRESSION = 0, + MZ_BEST_SPEED = 1, + MZ_BEST_COMPRESSION = 9, + MZ_UBER_COMPRESSION = 10, + MZ_DEFAULT_LEVEL = 6, + MZ_DEFAULT_COMPRESSION = -1 +}; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s { + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func + zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been +// optimized purely for performance, not ratio. +// (This special func. is currently only enabled when +// MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with +// zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no +// header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, + int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as +// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input +// and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update +// the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or +// MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not +// available, and/or there's more output to be written but the output buffer +// is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been +// written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or +// output buffers are empty. (Fill up the input buffer or free up some output +// space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of +// data that could be generated by deflate(), assuming flush is set to only +// MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on +// failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of +// data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that +// controls the window size and whether or not the stream has been wrapped with +// a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or +// -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the +// input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update +// the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output +// buffers are both sized large enough to decompress the entire stream in a +// single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside +// what's already in the input buffer, and that the output buffer is large +// enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or +// there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes +// have been written. For zlib streams, the adler-32 of the decompressed data +// has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is +// empty but the inflater needs more input to continue, or if the output +// buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when +// using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on +// failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the +// error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used +// as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you +// use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef mz_ulong uLong; +typedef Byte Bytef; +typedef uInt uIntf; +typedef char charf; +typedef int intf; +typedef void *voidpf; +typedef uLong uLongf; +typedef void *voidp; +typedef void *const voidpc; +#define Z_NULL 0 +#define Z_NO_FLUSH MZ_NO_FLUSH +#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH +#define Z_SYNC_FLUSH MZ_SYNC_FLUSH +#define Z_FULL_FLUSH MZ_FULL_FLUSH +#define Z_FINISH MZ_FINISH +#define Z_BLOCK MZ_BLOCK +#define Z_OK MZ_OK +#define Z_STREAM_END MZ_STREAM_END +#define Z_NEED_DICT MZ_NEED_DICT +#define Z_ERRNO MZ_ERRNO +#define Z_STREAM_ERROR MZ_STREAM_ERROR +#define Z_DATA_ERROR MZ_DATA_ERROR +#define Z_MEM_ERROR MZ_MEM_ERROR +#define Z_BUF_ERROR MZ_BUF_ERROR +#define Z_VERSION_ERROR MZ_VERSION_ERROR +#define Z_PARAM_ERROR MZ_PARAM_ERROR +#define Z_NO_COMPRESSION MZ_NO_COMPRESSION +#define Z_BEST_SPEED MZ_BEST_SPEED +#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION +#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION +#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY +#define Z_FILTERED MZ_FILTERED +#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY +#define Z_RLE MZ_RLE +#define Z_FIXED MZ_FIXED +#define Z_DEFLATED MZ_DEFLATED +#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS +#define alloc_func mz_alloc_func +#define free_func mz_free_func +#define internal_state mz_internal_state +#define z_stream mz_stream +#define deflateInit mz_deflateInit +#define deflateInit2 mz_deflateInit2 +#define deflateReset mz_deflateReset +#define deflate mz_deflate +#define deflateEnd mz_deflateEnd +#define deflateBound mz_deflateBound +#define compress mz_compress +#define compress2 mz_compress2 +#define compressBound mz_compressBound +#define inflateInit mz_inflateInit +#define inflateInit2 mz_inflateInit2 +#define inflate mz_inflate +#define inflateEnd mz_inflateEnd +#define uncompress mz_uncompress +#define crc32 mz_crc32 +#define adler32 mz_adler32 +#define MAX_WBITS 15 +#define MAX_MEM_LEVEL 9 +#define zError mz_error +#define ZLIB_VERSION MZ_VERSION +#define ZLIB_VERNUM MZ_VERNUM +#define ZLIB_VER_MAJOR MZ_VER_MAJOR +#define ZLIB_VER_MINOR MZ_VER_MINOR +#define ZLIB_VER_REVISION MZ_VER_REVISION +#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION +#define zlibVersion mz_version +#define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional +// expression is constant" message. +#ifdef _MSC_VER +#define MZ_MACRO_END while (0, 0) +#else +#define MZ_MACRO_END while (0) +#endif + +// ------------------- ZIP archive reading/writing + +#ifndef MINIZ_NO_ARCHIVE_APIS + +enum { + MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 +}; + +typedef struct { + mz_uint32 m_file_index; + mz_uint32 m_central_dir_ofs; + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; +#ifndef MINIZ_NO_TIME + time_t m_time; +#endif + mz_uint32 m_crc32; + mz_uint64 m_comp_size; + mz_uint64 m_uncomp_size; + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; + mz_uint64 m_local_header_ofs; + mz_uint32 m_comment_size; + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; +} mz_zip_archive_file_stat; + +typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, + void *pBuf, size_t n); +typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, + const void *pBuf, size_t n); + +struct mz_zip_internal_state_tag; +typedef struct mz_zip_internal_state_tag mz_zip_internal_state; + +typedef enum { + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 +} mz_zip_mode; + +typedef struct mz_zip_archive_tag { + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; + mz_uint m_total_files; + mz_zip_mode m_zip_mode; + + mz_uint m_file_offset_alignment; + + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; + + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + void *m_pIO_opaque; + + mz_zip_internal_state *m_pState; + +} mz_zip_archive; + +typedef enum { + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 +} mz_zip_flags; + +// ZIP archive reading + +// Inits a ZIP archive reader. +// These functions read and validate the archive's central directory. +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, + mz_uint32 flags); +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, + size_t size, mz_uint32 flags); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint32 flags); +#endif + +// Returns the total number of files in the archive. +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); + +// Returns detailed information about an archive file entry. +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, + mz_zip_archive_file_stat *pStat); + +// Determines if an archive file entry is a directory entry. +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, + mz_uint file_index); +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, + mz_uint file_index); + +// Retrieves the filename of an archive file entry. +// Returns the number of bytes written to pFilename, or if filename_buf_size is +// 0 this function returns the number of bytes needed to fully store the +// filename. +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, + char *pFilename, mz_uint filename_buf_size); + +// Attempts to locates a file in the archive's central directory. +// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH +// Returns -1 if the file cannot be found. +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, + const char *pComment, mz_uint flags); + +// Extracts a archive file to a memory buffer using no memory allocation. +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, + mz_uint file_index, void *pBuf, + size_t buf_size, mz_uint flags, + void *pUser_read_buf, + size_t user_read_buf_size); +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( + mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, + mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + +// Extracts a archive file to a memory buffer. +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, + void *pBuf, size_t buf_size, + mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, + const char *pFilename, void *pBuf, + size_t buf_size, mz_uint flags); + +// Extracts a archive file to a dynamically allocated heap buffer. +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, + size_t *pSize, mz_uint flags); +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, + const char *pFilename, size_t *pSize, + mz_uint flags); + +// Extracts a archive file using a callback function to output the file's data. +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, + mz_uint file_index, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, + const char *pFilename, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags); + +#ifndef MINIZ_NO_STDIO +// Extracts a archive file to a disk file and sets its last accessed and +// modified times. +// This function only extracts files, not archive directory records. +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, + const char *pDst_filename, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, + const char *pArchive_filename, + const char *pDst_filename, + mz_uint flags); +#endif + +// Ends archive reading, freeing all allocations, and closing the input archive +// file if mz_zip_reader_init_file() was used. +mz_bool mz_zip_reader_end(mz_zip_archive *pZip); + +// ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +// Inits a ZIP archive writer. +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, + size_t size_to_reserve_at_beginning, + size_t initial_allocation_size); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint64 size_to_reserve_at_beginning); +#endif + +// Converts a ZIP archive reader object into a writer object, to allow efficient +// in-place file appends to occur on an existing archive. +// For archives opened using mz_zip_reader_init_file, pFilename must be the +// archive's filename so it can be reopened for writing. If the file can't be +// reopened, mz_zip_reader_end() will be called. +// For archives opened using mz_zip_reader_init_mem, the memory block must be +// growable using the realloc callback (which defaults to realloc unless you've +// overridden it). +// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's +// user provided m_pWrite function cannot be NULL. +// Note: In-place archive modification is not recommended unless you know what +// you're doing, because if execution stops or something goes wrong before +// the archive is finalized the file's central directory will be hosed. +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, + const char *pFilename); + +// Adds the contents of a memory buffer to an archive. These functions record +// the current local time into the archive. +// To add a directory entry, call this method with an archive name ending in a +// forwardslash with empty buffer. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or +// just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, + const void *pBuf, size_t buf_size, + mz_uint level_and_flags); +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, + const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, + mz_uint16 comment_size, + mz_uint level_and_flags, mz_uint64 uncomp_size, + mz_uint32 uncomp_crc32); + +#ifndef MINIZ_NO_STDIO +// Adds the contents of a disk file to an archive. This function also records +// the disk file's modified time into the archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or +// just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, + const char *pSrc_filename, const void *pComment, + mz_uint16 comment_size, mz_uint level_and_flags); +#endif + +// Adds a file to an archive by fully cloning the data from another archive. +// This function fully clones the source file's compressed data (no +// recompression), along with its full filename, extra data, and comment fields. +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, + mz_zip_archive *pSource_zip, + mz_uint file_index); + +// Finalizes the archive by writing the central directory records followed by +// the end of central directory record. +// After an archive is finalized, the only valid call on the mz_zip_archive +// struct is mz_zip_writer_end(). +// An archive must be manually finalized by calling this function for it to be +// valid. +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, + size_t *pSize); + +// Ends archive writing, freeing all allocations, and closing the output file if +// mz_zip_writer_init_file() was used. +// Note for the archive to be valid, it must have been finalized before ending. +mz_bool mz_zip_writer_end(mz_zip_archive *pZip); + +// Misc. high-level helper functions: + +// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) +// appends a memory blob to a ZIP archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or +// just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_add_mem_to_archive_file_in_place( + const char *pZip_filename, const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, mz_uint16 comment_size, + mz_uint level_and_flags); + +// Reads a single file from an archive into a heap block. +// Returns NULL on failure. +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, + const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and +// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the +// input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available +// beyond the end of the supplied input buffer. If clear, the input buffer +// contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large +// enough to hold the entire decompressed stream. If clear, the output buffer is +// at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the +// decompressed bytes. +enum { + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block +// allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data +// to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger +// than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer +// needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block +// in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes +// written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an +// internal 32KB buffer, and a user provided callback function will be called to +// flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, + tinfl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; +typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum { + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) \ + do { \ + (r)->m_state = 0; \ + } \ + MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function +// actually needed for decompression. All the other functions are just +// high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any +// desired higher level decompression API. In the limit case, it can be called +// once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, + const mz_uint8 *pIn_buf_next, + size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, + mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, + const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum { + TINFL_MAX_HUFF_TABLES = 3, + TINFL_MAX_HUFF_SYMBOLS_0 = 288, + TINFL_MAX_HUFF_SYMBOLS_1 = 32, + TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, + TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct { + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], + m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS +#define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF +typedef mz_uint64 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (64) +#else +typedef mz_uint32 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag { + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, + m_check_adler32, m_dist, m_counter, m_num_extra, + m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], + m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly +// slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain +// the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes +// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap +// compression), 4095=Huffman+LZ (slowest/best compression). +enum { + TDEFL_HUFFMAN_ONLY = 0, + TDEFL_DEFAULT_MAX_PROBES = 128, + TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before +// the deflate data, and the Adler-32 of the source data at the end. Otherwise, +// you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even +// when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more +// efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's +// initialization time to the minimum, but the output may vary from run to run +// given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per +// dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum { + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block +// allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against +// the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger +// than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in +// memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be +// 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost +// pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL +// apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be +// larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, + int h, int num_chans, + size_t *pLen_out, + mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, + int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write +// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, + void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The +// above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags); + +enum { + TDEFL_MAX_HUFF_TABLES = 3, + TDEFL_MAX_HUFF_SYMBOLS_0 = 288, + TDEFL_MAX_HUFF_SYMBOLS_1 = 32, + TDEFL_MAX_HUFF_SYMBOLS_2 = 19, + TDEFL_LZ_DICT_SIZE = 32768, + TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, + TDEFL_MIN_MATCH_LEN = 3, + TDEFL_MAX_MATCH_LEN = 258 +}; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed +// output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { + TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 12, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#else +enum { + TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 15, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#endif + +// The low-level tdefl functions below may be used directly if the above helper +// functions aren't flexible enough. The low-level functions don't make any heap +// allocations, unlike the above helper functions. +typedef enum { + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1 +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum { + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct { + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, + m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, + m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, + m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not +// dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified +// callback. In this case, the user should call the tdefl_compress_buffer() API +// for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() +// API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, +// etc.) +tdefl_status tdefl_init(tdefl_compressor *d, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer +// as possible, and writing as much compressed data to the specified output +// buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, + size_t *pIn_buf_size, void *pOut_buf, + size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a +// non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, + size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't +// defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be +// much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, +// MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, + int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want +// the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; + +//#include <assert.h> +//#include <string.h> + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC +#define MZ_MALLOC(x) NULL +#define MZ_FREE(x) (void)x, ((void)0) +#define MZ_REALLOC(p, x) NULL +#else +#define MZ_MALLOC(x) malloc(x) +#define MZ_FREE(x) free(x) +#define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) +#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else +#define MZ_READ_LE16(p) \ + ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) +#define MZ_READ_LE32(p) \ + ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER +#define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else +#define MZ_FORCEINLINE inline +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); + size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; + s1 += ptr[1], s2 += s1; + s1 += ptr[2], s2 += s1; + s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; + s1 += ptr[5], s2 += s1; + s1 += ptr[6], s2 += s1; + s1 += ptr[7], s2 += s1; + } + for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C +// implementation that balances processor cache usage against speed": +// http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { + static const mz_uint32 s_crc32[16] = { + 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, + 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, + 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c}; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; + while (buf_len--) { + mz_uint8 b = *ptr++; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; + } + return ~crcu32; +} + +void mz_free(void *p) { MZ_FREE(p); } + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { + (void)opaque, (void)items, (void)size; + return MZ_MALLOC(items * size); +} +static void def_free_func(void *opaque, void *address) { + (void)opaque, (void)address; + MZ_FREE(address); +} +static void *def_realloc_func(void *opaque, void *address, size_t items, + size_t size) { + (void)opaque, (void)address, (void)items, (void)size; + return MZ_REALLOC(address, items * size); +} + +const char *mz_version(void) { return MZ_VERSION; } + +int mz_deflateInit(mz_streamp pStream, int level) { + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, + MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, + int mem_level, int strategy) { + tdefl_compressor *pComp; + mz_uint comp_flags = + TDEFL_COMPUTE_ADLER32 | + tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || + ((window_bits != MZ_DEFAULT_WINDOW_BITS) && + (-window_bits != MZ_DEFAULT_WINDOW_BITS))) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, + sizeof(tdefl_compressor)); + if (!pComp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) { + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || + (!pStream->zfree)) + return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, + ((tdefl_compressor *)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) { + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || + (!pStream->next_out)) + return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor *)pStream->state)->m_prev_return_status == + TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; + orig_total_out = pStream->total_out; + for (;;) { + tdefl_status defl_status; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor *)pStream->state, + pStream->next_in, &in_bytes, pStream->next_out, + &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) { + mz_status = MZ_STREAM_ERROR; + break; + } else if (defl_status == TDEFL_STATUS_DONE) { + mz_status = MZ_STREAM_END; + break; + } else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { + if ((flush) || (pStream->total_in != orig_total_in) || + (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) { + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty + // tricky to compute a true upper bound given the way tdefl's blocking works.) + return MZ_MAX(128 + (source_len * 110) / 100, + 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len, int level) { + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len) { + return mz_compress2(pDest, pDest_len, pSource, source_len, + MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) { + return mz_deflateBound(NULL, source_len); +} + +typedef struct { + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; + int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) { + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && + (-window_bits != MZ_DEFAULT_WINDOW_BITS)) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, + sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) { + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate(mz_streamp pStream, int flush) { + inflate_state *pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) + return MZ_STREAM_ERROR; + + pState = (inflate_state *)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; + pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) { + // MZ_FINISH on the first call implies that the input and output buffers are + // large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, + pStream->next_out, pStream->next_out, &out_bytes, + decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && + (!pState->m_dict_avail)) + ? MZ_STREAM_END + : MZ_OK; + } + + for (;;) { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress( + &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, + pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some + // uncompressed data left in the output dictionary - + // oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress + // without supplying more input or by setting flush + // to MZ_FINISH. + else if (flush == MZ_FINISH) { + // The output buffer MUST be large to hold the remaining uncompressed data + // when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's + // at least 1 more byte on the way. If there's no more room left in the + // output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || + (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) + ? MZ_STREAM_END + : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) { + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len) { + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR + : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) { + static struct { + int m_err; + const char *m_pDesc; + } s_error_descs[] = {{MZ_OK, ""}, + {MZ_STREAM_END, "stream end"}, + {MZ_NEED_DICT, "need dictionary"}, + {MZ_ERRNO, "file error"}, + {MZ_STREAM_ERROR, "stream error"}, + {MZ_DATA_ERROR, "data error"}, + {MZ_MEM_ERROR, "out of memory"}, + {MZ_BUF_ERROR, "buf error"}, + {MZ_VERSION_ERROR, "version error"}, + {MZ_PARAM_ERROR, "parameter error"}}; + mz_uint i; + for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) + if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all +// compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN \ + switch (r->m_state) { \ + case 0: +#define TINFL_CR_RETURN(state_index, result) \ + do { \ + status = result; \ + r->m_state = state_index; \ + goto common_exit; \ + case state_index:; \ + } \ + MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) \ + do { \ + for (;;) { \ + TINFL_CR_RETURN(state_index, result); \ + } \ + } \ + MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt +// to read beyond the input buf, then something is wrong with the input because +// the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of +// the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) \ + do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for (;;) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else \ + c = *pIn_buf_cur++; \ + } \ + MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) \ + do { \ + mz_uint c; \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) \ + do { \ + if (num_bits < (mz_uint)(n)) { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) \ + do { \ + if (num_bits < (mz_uint)(n)) { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + b = bit_buf & ((1 << (n)) - 1); \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes +// remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode +// the next Huffman code (and absolutely no more). It works by trying to fully +// decode a +// Huffman code by using whatever bits are currently present in the bit buffer. +// If this fails, it reads another byte, and tries again until it succeeds or +// until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); \ + if (temp >= 0) break; \ + } \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex +// than you would initially expect because the zlib API expects the decompressor +// to never read +// beyond the final byte of the deflate stream. (In other words, when this macro +// wants to read another byte from the input, it REALLY needs another byte in +// order to fully +// decode the next Huffman code.) Handling this properly is particularly +// important on raw deflate (non-zlib) streams, which aren't followed by a byte +// aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ + do { \ + int temp; \ + mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | \ + (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ + pIn_buf_cur += 2; \ + num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \ + 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while (temp < 0); \ + } \ + sym = temp; \ + bit_buf >>= code_len; \ + num_bits -= code_len; \ + } \ + MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, + const mz_uint8 *pIn_buf_next, + size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, + mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, + const mz_uint32 decomp_flags) { + static const int s_length_base[31] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, + 4, 4, 5, 5, 5, 5, 0, 0, 0}; + static const int s_dist_base[32] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, + 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, + 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0}; + static const int s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + static const mz_uint8 s_length_dezigzag[19] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + static const int s_min_table_sizes[3] = {257, 1, 4}; + + tinfl_status status = TINFL_STATUS_FAILED; + mz_uint32 num_bits, dist, counter, num_extra; + tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, + *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, + *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = + (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) + ? (size_t)-1 + : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, + dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer + // is large enough to hold the entire output file (in which case it doesn't + // matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || + (pOut_buf_next < pOut_buf_start)) { + *pIn_buf_size = *pOut_buf_size = 0; + return TINFL_STATUS_BAD_PARAM; + } + + num_bits = r->m_num_bits; + bit_buf = r->m_bit_buf; + dist = r->m_dist; + counter = r->m_counter; + num_extra = r->m_num_extra; + dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; + r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { + TINFL_GET_BYTE(1, r->m_zhdr0); + TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || + (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || + ((out_buf_size_mask + 1) < + (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { + TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); + } + } + + do { + TINFL_GET_BITS(3, r->m_final, 3); + r->m_type = r->m_final >> 1; + if (r->m_type == 0) { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { + if (num_bits) + TINFL_GET_BITS(6, r->m_raw_header[counter], 8); + else + TINFL_GET_BYTE(7, r->m_raw_header[counter]); + } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != + (mz_uint)(0xFFFF ^ + (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { + TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); + } + while ((counter) && (num_bits)) { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) { + size_t n; + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); + } + while (pIn_buf_cur >= pIn_buf_end) { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } else { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), + (size_t)(pIn_buf_end - pIn_buf_cur)), + counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); + pIn_buf_cur += n; + pOut_buf_cur += n; + counter -= (mz_uint)n; + } + } else if (r->m_type == 3) { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } else { + if (r->m_type == 1) { + mz_uint8 *p = r->m_tables[0].m_code_size; + mz_uint i; + r->m_table_sizes[0] = 288; + r->m_table_sizes[1] = 32; + TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for (i = 0; i <= 143; ++i) *p++ = 8; + for (; i <= 255; ++i) *p++ = 9; + for (; i <= 279; ++i) *p++ = 7; + for (; i <= 287; ++i) *p++ = 8; + } else { + for (counter = 0; counter < 3; counter++) { + TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); + r->m_table_sizes[counter] += s_min_table_sizes[counter]; + } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); + for (counter = 0; counter < r->m_table_sizes[2]; counter++) { + mz_uint s; + TINFL_GET_BITS(14, s, 3); + r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; + } + r->m_table_sizes[2] = 19; + } + for (; (int)r->m_type >= 0; r->m_type--) { + int tree_next, tree_cur; + tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], + total_syms[16]; + pTable = &r->m_tables[r->m_type]; + MZ_CLEAR_OBJ(total_syms); + MZ_CLEAR_OBJ(pTable->m_look_up); + MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) + total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; + next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { + used_syms += total_syms[i]; + next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); + } + if ((65536 != total) && (used_syms > 1)) { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; + sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { + mz_uint rev_code = 0, l, cur_code, + code_size = pTable->m_code_size[sym_index]; + if (!code_size) continue; + cur_code = next_code[code_size]++; + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { + mz_int16 k = (mz_int16)((code_size << 9) | sym_index); + while (rev_code < TINFL_FAST_LOOKUP_SIZE) { + pTable->m_look_up[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + if (0 == + (tree_cur = pTable->m_look_up[rev_code & + (TINFL_FAST_LOOKUP_SIZE - 1)])) { + pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = + (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { + pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } else + tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); + pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) { + for (counter = 0; + counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { + mz_uint s; + TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); + if (dist < 16) { + r->m_len_codes[counter++] = (mz_uint8)dist; + continue; + } + if ((dist == 16) && (!counter)) { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; + TINFL_GET_BITS(18, s, num_extra); + s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, + (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); + counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, + r->m_table_sizes[0]); + TINFL_MEMCPY(r->m_tables[1].m_code_size, + r->m_len_codes + r->m_table_sizes[0], + r->m_table_sizes[1]); + } + } + for (;;) { + mz_uint8 *pSrc; + for (;;) { + if (((pIn_buf_end - pIn_buf_cur) < 4) || + ((pOut_buf_end - pOut_buf_cur) < 2)) { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) break; + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)counter; + } else { + int sym2; + mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { + bit_buf |= + (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 4; + num_bits += 32; + } +#else + if (num_bits < 15) { + bit_buf |= + (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = + r->m_tables[0] + .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= + 0) + code_len = sym2 >> 9; + else { + code_len = TINFL_FAST_LOOKUP_BITS; + do { + sym2 = r->m_tables[0] + .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + counter = sym2; + bit_buf >>= code_len; + num_bits -= code_len; + if (counter & 256) break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { + bit_buf |= + (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = + r->m_tables[0] + .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= + 0) + code_len = sym2 >> 9; + else { + code_len = TINFL_FAST_LOOKUP_BITS; + do { + sym2 = r->m_tables[0] + .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + bit_buf >>= code_len; + num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; + counter = s_length_base[counter - 257]; + if (num_extra) { + mz_uint extra_bits; + TINFL_GET_BITS(25, extra_bits, num_extra); + counter += extra_bits; + } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; + dist = s_dist_base[dist]; + if (num_extra) { + mz_uint extra_bits; + TINFL_GET_BITS(27, extra_bits, num_extra); + dist += extra_bits; + } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && + (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { + while (counter--) { + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = + pOut_buf_start[(dist_from_out_buf_start++ - dist) & + out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) { + if (counter) { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; + pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { + TINFL_SKIP_BITS(32, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { + mz_uint s; + if (num_bits) + TINFL_GET_BITS(41, s, 8); + else + TINFL_GET_BYTE(42, s); + r->m_z_adler32 = (r->m_z_adler32 << 8) | s; + } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; + r->m_bit_buf = bit_buf; + r->m_dist = dist; + r->m_counter = counter; + r->m_num_extra = num_extra; + r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; + *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & + (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && + (status >= 0)) { + const mz_uint8 *ptr = pOut_buf_next; + size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, + s2 = r->m_check_adler32 >> 16; + size_t block_len = buf_len % 5552; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; + s1 += ptr[1], s2 += s1; + s1 += ptr[2], s2 += s1; + s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; + s1 += ptr[5], s2 += s1; + s1 += ptr[6], s2 += s1; + s1 += ptr[7], s2 += s1; + } + for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && + (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && + (r->m_check_adler32 != r->m_z_adler32)) + status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags) { + tinfl_decompressor decomp; + void *pBuf = NULL, *pNew_buf; + size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for (;;) { + size_t src_buf_size = src_buf_len - src_buf_ofs, + dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress( + &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, + (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, + &dst_buf_size, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; + if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + pBuf = pNew_buf; + out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags) { + tinfl_decompressor decomp; + tinfl_status status; + tinfl_init(&decomp); + status = + tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, + (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED + : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, + tinfl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags) { + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); + size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for (;;) { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, + dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = + tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, + &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & + ~(TINFL_FLAG_HAS_MORE_INPUT | + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && + (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression +// API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, + 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, + 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, + 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, + 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, + 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 285}; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0}; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, + 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted +// values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, + tdefl_sym_freq *pSyms0, + tdefl_sym_freq *pSyms1) { + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; + tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; + MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { + mz_uint freq = pSyms0[i].m_key; + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) + total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { + const mz_uint32 *pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + } + for (i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = + pCur_syms[i]; + { + tdefl_sym_freq *t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, +// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { + int root, leaf, next, avbl, used, dpth; + if (n == 0) + return; + else if (n == 1) { + A[0].m_key = 1; + return; + } + A[0].m_key += A[1].m_key; + root = 0; + leaf = 2; + for (next = 1; next < n - 1; next++) { + if (leaf >= n || A[root].m_key < A[leaf].m_key) { + A[next].m_key = A[root].m_key; + A[root++].m_key = (mz_uint16)next; + } else + A[next].m_key = A[leaf++].m_key; + if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { + A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); + A[root++].m_key = (mz_uint16)next; + } else + A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); + } + A[n - 2].m_key = 0; + for (next = n - 3; next >= 0; next--) + A[next].m_key = A[A[next].m_key].m_key + 1; + avbl = 1; + used = dpth = 0; + root = n - 2; + next = n - 1; + while (avbl > 0) { + while (root >= 0 && (int)A[root].m_key == dpth) { + used++; + root--; + } + while (avbl > used) { + A[next--].m_key = (mz_uint16)(dpth); + avbl--; + } + avbl = 2 * used; + dpth++; + used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, + int code_list_len, + int max_code_size) { + int i; + mz_uint32 total = 0; + if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) + pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) + total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) + if (pNum_codes[i]) { + pNum_codes[i]--; + pNum_codes[i + 1] += 2; + break; + } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, + int table_len, int code_size_limit, + int static_table) { + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; + mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; + MZ_CLEAR_OBJ(num_codes); + if (static_table) { + for (i = 0; i < table_len; i++) + num_codes[d->m_huff_code_sizes[table_num][i]]++; + } else { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], + *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) + if (pSym_count[i]) { + syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; + syms0[num_used_syms++].m_sym_index = (mz_uint16)i; + } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); + tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, + code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); + MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) + d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; + for (j = 0, i = 2; i <= code_size_limit; i++) + next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) { + mz_uint rev_code = 0, code, code_size; + if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; + for (l = code_size; l > 0; l--, code >>= 1) + rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) \ + do { \ + mz_uint bits = b; \ + mz_uint len = l; \ + MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); \ + d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ + } \ + MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() \ + { \ + if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)( \ + d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) \ + packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 16; \ + packed_code_sizes[num_packed_code_sizes++] = \ + (mz_uint8)(rle_repeat_count - 3); \ + } \ + rle_repeat_count = 0; \ + } \ + } + +#define TDEFL_RLE_ZERO_CODE_SIZE() \ + { \ + if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = \ + (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ + while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 17; \ + packed_code_sizes[num_packed_code_sizes++] = \ + (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 18; \ + packed_code_sizes[num_packed_code_sizes++] = \ + (mz_uint8)(rle_z_count - 11); \ + } \ + rle_z_count = 0; \ + } \ + } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) { + int num_lit_codes, num_dist_codes, num_bit_lengths; + mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, + rle_repeat_count, packed_code_sizes_index; + mz_uint8 + code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], + packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], + prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) + if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) + if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], + num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; + num_packed_code_sizes = 0; + rle_z_count = 0; + rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, + sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + } else { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = + (mz_uint16)(d->m_huff_count[2][code_size] + 1); + packed_code_sizes[num_packed_code_sizes++] = code_size; + } else if (++rle_repeat_count == 6) { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { + TDEFL_RLE_PREV_CODE_SIZE(); + } else { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) + if (d->m_huff_code_sizes + [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) + break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); + TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) + TDEFL_PUT_BITS( + d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; + packed_code_sizes_index < num_packed_code_sizes;) { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; + MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) + TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], + "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) { + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for (; i <= 255; ++i) *p++ = 9; + for (; i <= 279; ++i) *p++ = 7; + for (; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, + 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF}; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \ + MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) \ + { \ + bit_buffer |= (((mz_uint64)(b)) << bits_in); \ + bits_in += (l); \ + } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; + flags >>= 1) { + if (flags == 1) flags = *pLZ_codes++ | 0x100; + + if (flags & 1) { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], + match_dist = *(const mz_uint16 *)(pLZ_codes + 1); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], + d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], + s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], + d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], + num_extra_bits); + } else { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], + d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], + d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], + d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; + + *(mz_uint64 *)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; + flags >>= 1) { + if (flags == 1) flags = *pLZ_codes++ | 0x100; + if (flags & 1) { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], + match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], + d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], + s_tdefl_len_extra[match_len]); + + if (match_dist < 512) { + sym = s_tdefl_small_dist_sym[match_dist]; + num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } else { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; + num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } else { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && + // MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) { + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = + ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && + (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = + ((d->m_pPut_buf_func == NULL) && + ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) + ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) + : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { + TDEFL_PUT_BITS(0x78, 8); + TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; + saved_bit_buf = d->m_bit_buffer; + saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = + tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || + (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output + // buffer and send a raw block instead. + if (((use_raw_block) || + ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= + d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { + mz_uint i; + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) { + TDEFL_PUT_BITS( + d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], + 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed + // block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) { + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) { + if (flush == TDEFL_FINISH) { + if (d->m_bits_in) { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { + mz_uint i, a = d->m_adler32; + for (i = 0; i < 4; i++) { + TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); + a <<= 8; + } + } + } else { + mz_uint i, z = 0; + TDEFL_PUT_BITS(0, 3); + if (d->m_bits_in) { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, z ^= 0xFFFF) { + TDEFL_PUT_BITS(z & 0xFFFF, 16); + } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, + sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, + sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; + d->m_total_lz_bytes = 0; + d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { + if (d->m_pPut_buf_func) { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } else if (pOutput_buf_start == d->m_output_buf) { + int bytes_to_copy = (int)MZ_MIN( + (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, + bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } else { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) +static MZ_FORCEINLINE void tdefl_find_match( + tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, + mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, + match_len = *pMatch_len, probe_pos = pos, next_probe_pos, + probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), + s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) return; + for (;;) { + for (;;) { + if (--num_probes_left == 0) return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || \ + ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) break; + q = (const mz_uint16 *)(d->m_dict + probe_pos); + if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; + p = s; + probe_len = 32; + do { + } while ( + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (--probe_len > 0)); + if (!probe_len) { + *pMatch_dist = dist; + *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); + break; + } else if ((probe_len = ((mz_uint)(p - s) * 2) + + (mz_uint)(*(const mz_uint8 *)p == + *(const mz_uint8 *)q)) > match_len) { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == + max_match_len) + break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match( + tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, + mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, + match_len = *pMatch_len, probe_pos = pos, next_probe_pos, + probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) return; + for (;;) { + for (;;) { + if (--num_probes_left == 0) return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || \ + ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && \ + (d->m_dict[probe_pos + match_len - 1] == c1)) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) break; + p = s; + q = d->m_dict + probe_pos; + for (probe_len = 0; probe_len < max_match_len; probe_len++) + if (*p++ != *q++) break; + if (probe_len > match_len) { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; + c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) { + // Faster, minimally featured LZRW1-style match+parse loop with better + // register utilization. Intended for applications where raw throughput is + // valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, + lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, + total_lz_bytes = d->m_total_lz_bytes, + num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = + (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( + d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, + MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) + break; + + while (lookahead_size >= 4) { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = + (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & + TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= + dict_size) && + ((*(const mz_uint32 *)(d->m_dict + + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & + 0xFFFFFF) == first_trigram)) { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { + } while ((TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (--probe_len > 0)); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || + ((cur_match_len == TDEFL_MIN_MATCH_LEN) && + (cur_match_dist >= 8U * 1024U))) { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } else { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && + (cur_match_dist >= 1) && + (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - + TDEFL_MIN_MATCH_LEN]]++; + } + } else { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, + mz_uint8 lit) { + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); + if (--d->m_num_flags_left == 0) { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, + mz_uint match_len, + mz_uint match_dist) { + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && + (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); + d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); + if (--d->m_num_flags_left == 0) { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) + d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) { + const mz_uint8 *pSrc = d->m_pSrc; + size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to + // TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & + TDEFL_LZ_DICT_SIZE_MASK, + ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] + << TDEFL_LZ_HASH_SHIFT) ^ + d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( + src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) { + mz_uint8 c = *pSrc++; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + ins_pos++; + } + } else { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & + TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] + << (TDEFL_LZ_HASH_SHIFT * 2)) ^ + (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] + << TDEFL_LZ_HASH_SHIFT) ^ + c) & + (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = + MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; + cur_match_dist = 0; + cur_match_len = + d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); + cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; + while (cur_match_len < d->m_lookahead_size) { + if (d->m_dict[cur_pos + cur_match_len] != c) break; + cur_match_len++; + } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) + cur_match_len = 0; + else + cur_match_dist = 1; + } + } else { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, + d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && + (cur_match_dist >= 8U * 1024U)) || + (cur_pos == cur_match_dist) || + ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) { + if (cur_match_len > d->m_saved_match_len) { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; + len_to_move = cur_match_len; + } else { + d->m_saved_lit = d->m_dict[cur_pos]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + } else { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; + d->m_saved_match_len = 0; + } + } else if (!cur_match_dist) + tdefl_record_literal(d, + d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || + (cur_match_len >= 128)) { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } else { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = + MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output + // buffer. + if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ((d->m_total_lz_bytes > 31 * 1024) && + (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= + d->m_total_lz_bytes) || + (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { + int n; + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { + if (d->m_pIn_buf_size) { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, + d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, + d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE + : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, + size_t *pIn_buf_size, void *pOut_buf, + size_t *pOut_buf_size, tdefl_flush flush) { + if (!d) { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; + d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; + d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); + d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if (((d->m_pPut_buf_func != NULL) == + ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || + (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || + (pIn_buf_size && *pIn_buf_size && !pIn_buf) || + (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | + TDEFL_RLE_MATCHES)) == 0)) { + if (!tdefl_compress_fast(d)) return d->m_prev_return_status; + } else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && + (pIn_buf)) + d->m_adler32 = + (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, + d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && + (!d->m_output_flush_remaining)) { + if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { + MZ_CLEAR_OBJ(d->m_hash); + MZ_CLEAR_OBJ(d->m_next); + d->m_dict_size = 0; + } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, + size_t in_buf_size, tdefl_flush flush) { + MZ_ASSERT(d->m_pPut_buf_func); + return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags) { + d->m_pPut_buf_func = pPut_buf_func; + d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); + d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; + d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = + d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = + d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; + d->m_pOutput_buf_end = d->m_output_buf; + d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; + d->m_adler32 = 1; + d->m_pIn_buf = NULL; + d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; + d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; + d->m_pSrc = NULL; + d->m_src_buf_left = 0; + d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, + sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, + sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags) { + tdefl_compressor *pComp; + mz_bool succeeded; + if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == + TDEFL_STATUS_OKAY); + succeeded = + succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == + TDEFL_STATUS_DONE); + MZ_FREE(pComp); + return succeeded; +} + +typedef struct { + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, + void *pUser) { + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) { + size_t new_capacity = p->m_capacity; + mz_uint8 *pNew_buf; + if (!p->m_expandable) return MZ_FALSE; + do { + new_capacity = MZ_MAX(128U, new_capacity << 1U); + } while (new_size > new_capacity); + pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); + if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; + p->m_capacity = new_capacity; + } + memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); + p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags) { + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) + return MZ_FALSE; + else + *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output( + pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return NULL; + *pOut_len = out_buf.m_size; + return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags) { + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8 *)pOut_buf; + out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output( + pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, + 128, 256, 512, 768, 1500}; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we +// want a bit more compression and it's fine if throughput to fall off a cliff +// on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, + int strategy) { + mz_uint comp_flags = + s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | + ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) + comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) + comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) + comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) + comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) + comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif // MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) // nonstandard extension used : non-constant + // aggregate initializer (also supported by GNU + // C and C99, so no big deal) +#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning( \ + disable : 4267) // 'argument': conversion from '__int64' to 'int', + // possible loss of data +#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is + // deprecated. Instead, use the ISO C and C++ + // conformant name: _strdup. +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public +// domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files +// generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, + int h, int num_chans, + size_t *pLen_out, + mz_uint level, mz_bool flip) { + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was + // defined. + static const mz_uint s_tdefl_png_num_probes[11] = { + 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500}; + tdefl_compressor *pComp = + (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + tdefl_output_buffer out_buf; + int i, bpl = w * num_chans, y, z; + mz_uint32 c; + *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); + out_buf.m_expandable = MZ_TRUE; + out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); + if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { + MZ_FREE(pComp); + return NULL; + } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init( + pComp, tdefl_output_buffer_putter, &out_buf, + s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { + tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); + tdefl_compress_buffer(pComp, + (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, + bpl, TDEFL_NO_FLUSH); + } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != + TDEFL_STATUS_DONE) { + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + // write real header + *pLen_out = out_buf.m_size - 41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41] = {0x89, + 0x50, + 0x4e, + 0x47, + 0x0d, + 0x0a, + 0x1a, + 0x0a, + 0x00, + 0x00, + 0x00, + 0x0d, + 0x49, + 0x48, + 0x44, + 0x52, + 0, + 0, + (mz_uint8)(w >> 8), + (mz_uint8)w, + 0, + 0, + (mz_uint8)(h >> 8), + (mz_uint8)h, + 8, + chans[num_chans], + 0, + 0, + 0, + 0, + 0, + 0, + 0, + (mz_uint8)(*pLen_out >> 24), + (mz_uint8)(*pLen_out >> 16), + (mz_uint8)(*pLen_out >> 8), + (mz_uint8)*pLen_out, + 0x49, + 0x44, + 0x41, + 0x54}; + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); + for (i = 0; i < 4; ++i, c <<= 8) + ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter( + "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { + *pLen_out = 0; + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, + *pLen_out + 4); + for (i = 0; i < 4; ++i, c <<= 8) + (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; + MZ_FREE(pComp); + return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, + int num_chans, size_t *pLen_out) { + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we + // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's + // where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, + pLen_out, 6, MZ_FALSE); +} + +// ------------------- .ZIP archive reading + +#ifndef MINIZ_NO_ARCHIVE_APIS +#error "No arvhive APIs" + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * +#else +#include <stdio.h> +#include <sys/stat.h> + +// -- GODOT change for old MinGW on Travis CI -- +//#if defined(_MSC_VER) || defined(__MINGW64__) +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- +static FILE *mz_fopen(const char *pFilename, const char *pMode) { + FILE *pFile = NULL; + fopen_s(&pFile, pFilename, pMode); + return pFile; +} +static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { + FILE *pFile = NULL; + if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; + return pFile; +} +#ifndef MINIZ_NO_TIME +#include <sys/utime.h> +#endif +#define MZ_FILE FILE +#define MZ_FOPEN mz_fopen +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 _ftelli64 +#define MZ_FSEEK64 _fseeki64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN mz_freopen +#define MZ_DELETE_FILE remove +#elif defined(__MINGW32__) +#ifndef MINIZ_NO_TIME +#include <sys/utime.h> +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__TINYC__) +#ifndef MINIZ_NO_TIME +#include <sys/utime.h> +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftell +#define MZ_FSEEK64 fseek +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE +#ifndef MINIZ_NO_TIME +#include <utime.h> +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen64(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT stat64 +#define MZ_FILE_STAT stat64 +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) +#define MZ_DELETE_FILE remove +#else +#ifndef MINIZ_NO_TIME +#include <utime.h> +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello +#define MZ_FSEEK64 fseeko +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#endif // #ifdef _MSC_VER +#endif // #ifdef MINIZ_NO_STDIO + +#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) + +// Various ZIP archive enums. To completely avoid cross platform compiler +// alignment and platform endian issues, miniz.c doesn't use structs for any of +// this stuff. +enum { + // ZIP archive identifiers and record sizes + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, + MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, + // Central directory header record offsets + MZ_ZIP_CDH_SIG_OFS = 0, + MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, + MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, + MZ_ZIP_CDH_BIT_FLAG_OFS = 8, + MZ_ZIP_CDH_METHOD_OFS = 10, + MZ_ZIP_CDH_FILE_TIME_OFS = 12, + MZ_ZIP_CDH_FILE_DATE_OFS = 14, + MZ_ZIP_CDH_CRC32_OFS = 16, + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, + MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, + MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, + MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, + MZ_ZIP_CDH_DISK_START_OFS = 34, + MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, + MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, + // Local directory header offsets + MZ_ZIP_LDH_SIG_OFS = 0, + MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, + MZ_ZIP_LDH_BIT_FLAG_OFS = 6, + MZ_ZIP_LDH_METHOD_OFS = 8, + MZ_ZIP_LDH_FILE_TIME_OFS = 10, + MZ_ZIP_LDH_FILE_DATE_OFS = 12, + MZ_ZIP_LDH_CRC32_OFS = 14, + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, + MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, + MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, + // End of central directory offsets + MZ_ZIP_ECDH_SIG_OFS = 0, + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, + MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, + MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, + MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, +}; + +typedef struct { + void *m_p; + size_t m_size, m_capacity; + mz_uint m_element_size; +} mz_zip_array; + +struct mz_zip_internal_state_tag { + mz_zip_array m_central_dir; + mz_zip_array m_central_dir_offsets; + mz_zip_array m_sorted_central_dir_offsets; + MZ_FILE *m_pFile; + void *m_pMem; + size_t m_mem_size; + size_t m_mem_capacity; +}; + +#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \ + (array_ptr)->m_element_size = element_size +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \ + ((element_type *)((array_ptr)->m_p))[index] + +static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, + mz_zip_array *pArray) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); + memset(pArray, 0, sizeof(mz_zip_array)); +} + +static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t min_new_capacity, + mz_uint growing) { + void *pNew_p; + size_t new_capacity = min_new_capacity; + MZ_ASSERT(pArray->m_element_size); + if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; + if (growing) { + new_capacity = MZ_MAX(1, pArray->m_capacity); + while (new_capacity < min_new_capacity) new_capacity *= 2; + } + if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, + pArray->m_element_size, new_capacity))) + return MZ_FALSE; + pArray->m_p = pNew_p; + pArray->m_capacity = new_capacity; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t new_capacity, + mz_uint growing) { + if (new_capacity > pArray->m_capacity) { + if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) + return MZ_FALSE; + } + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t new_size, + mz_uint growing) { + if (new_size > pArray->m_capacity) { + if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) + return MZ_FALSE; + } + pArray->m_size = new_size; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t n) { + return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, + mz_zip_array *pArray, + const void *pElements, + size_t n) { + size_t orig_size = pArray->m_size; + if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) + return MZ_FALSE; + memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, + pElements, n * pArray->m_element_size); + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) { + struct tm tm; + memset(&tm, 0, sizeof(tm)); + tm.tm_isdst = -1; + tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; + tm.tm_mon = ((dos_date >> 5) & 15) - 1; + tm.tm_mday = dos_date & 31; + tm.tm_hour = (dos_time >> 11) & 31; + tm.tm_min = (dos_time >> 5) & 63; + tm.tm_sec = (dos_time << 1) & 62; + return mktime(&tm); +} + +static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, + mz_uint16 *pDOS_date) { +#ifdef _MSC_VER + struct tm tm_struct; + struct tm *tm = &tm_struct; + errno_t err = localtime_s(tm, &time); + if (err) { + *pDOS_date = 0; + *pDOS_time = 0; + return; + } +#else + struct tm *tm = localtime(&time); +#endif + *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + + ((tm->tm_sec) >> 1)); + *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + + ((tm->tm_mon + 1) << 5) + tm->tm_mday); +} +#endif + +#ifndef MINIZ_NO_STDIO +static mz_bool mz_zip_get_file_modified_time(const char *pFilename, + mz_uint16 *pDOS_time, + mz_uint16 *pDOS_date) { +#ifdef MINIZ_NO_TIME + (void)pFilename; + *pDOS_date = *pDOS_time = 0; +#else + struct MZ_FILE_STAT_STRUCT file_stat; + // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 + // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. + if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; + mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); +#endif // #ifdef MINIZ_NO_TIME + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, + time_t modified_time) { + struct utimbuf t; + t.actime = access_time; + t.modtime = modified_time; + return !utime(pFilename, &t); +} +#endif // #ifndef MINIZ_NO_TIME +#endif // #ifndef MINIZ_NO_STDIO + +static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, + mz_uint32 flags) { + (void)flags; + if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_READING; + pZip->m_archive_size = 0; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, + sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, + sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, + sizeof(mz_uint32)); + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool +mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, + const mz_zip_array *pCentral_dir_offsets, + mz_uint l_index, mz_uint r_index) { + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( + pCentral_dir_array, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, + l_index)), + *pE; + const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT( + pCentral_dir_array, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), + r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; + pL++; + pR++; + } + return (pL == pE) ? (l_len < r_len) : (l < r); +} + +#define MZ_SWAP_UINT32(a, b) \ + do { \ + mz_uint32 t = a; \ + a = b; \ + b = t; \ + } \ + MZ_MACRO_END + +// Heap sort of lowercased filenames, used to help accelerate plain central +// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), +// but it could allocate memory.) +static void mz_zip_reader_sort_central_dir_offsets_by_filename( + mz_zip_archive *pZip) { + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( + &pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + int start = (size - 2) >> 1, end; + while (start >= 0) { + int child, root = start; + for (;;) { + if ((child = (root << 1) + 1) >= size) break; + child += + (((child + 1) < size) && + (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[child], pIndices[child + 1]))); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); + root = child; + } + start--; + } + + end = size - 1; + while (end > 0) { + int child, root = 0; + MZ_SWAP_UINT32(pIndices[end], pIndices[0]); + for (;;) { + if ((child = (root << 1) + 1) >= end) break; + child += + (((child + 1) < end) && + mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[child], pIndices[child + 1])); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); + root = child; + } + end--; + } +} + +static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, + mz_uint32 flags) { + mz_uint cdir_size, num_this_disk, cdir_disk_index; + mz_uint64 cdir_ofs; + mz_int64 cur_file_ofs; + const mz_uint8 *p; + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; + mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + mz_bool sort_central_dir = + ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); + // Basic sanity checks - reject files which are too small, and check the first + // 4 bytes of the file to make sure a local header is there. + if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + // Find the end of central directory record by scanning the file from the end + // towards the beginning. + cur_file_ofs = + MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); + for (;;) { + int i, + n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) + return MZ_FALSE; + for (i = n - 4; i >= 0; --i) + if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break; + if (i >= 0) { + cur_file_ofs += i; + break; + } + if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= + (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) + return MZ_FALSE; + cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); + } + // Read and verify the end of central directory record. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || + ((pZip->m_total_files = + MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != + MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) + return MZ_FALSE; + + num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); + cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); + if (((num_this_disk | cdir_disk_index) != 0) && + ((num_this_disk != 1) || (cdir_disk_index != 1))) + return MZ_FALSE; + + if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < + pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + + cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); + if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE; + + pZip->m_central_directory_file_ofs = cdir_ofs; + + if (pZip->m_total_files) { + mz_uint i, n; + + // Read the entire central directory into a heap block, and allocate another + // heap block to hold the unsorted central dir file record offsets, and + // another to hold the sorted indices. + if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, + MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, + pZip->m_total_files, MZ_FALSE))) + return MZ_FALSE; + + if (sort_central_dir) { + if (!mz_zip_array_resize(pZip, + &pZip->m_pState->m_sorted_central_dir_offsets, + pZip->m_total_files, MZ_FALSE)) + return MZ_FALSE; + } + + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, + pZip->m_pState->m_central_dir.m_p, + cdir_size) != cdir_size) + return MZ_FALSE; + + // Now create an index into the central directory file records, do some + // basic sanity checking on each record, and check for zip64 entries (which + // are not yet supported). + p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; + for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { + mz_uint total_header_size, comp_size, decomp_size, disk_index; + if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || + (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) + return MZ_FALSE; + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, + i) = + (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); + if (sort_central_dir) + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, + mz_uint32, i) = i; + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && + (decomp_size != comp_size)) || + (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || + (comp_size == 0xFFFFFFFF)) + return MZ_FALSE; + disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); + if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE; + if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) + return MZ_FALSE; + if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > + n) + return MZ_FALSE; + n -= total_header_size; + p += total_header_size; + } + } + + if (sort_central_dir) + mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); + + return MZ_TRUE; +} + +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, + mz_uint32 flags) { + if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE; + if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; + pZip->m_archive_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, + void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + size_t s = (file_ofs >= pZip->m_archive_size) + ? 0 + : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); + memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); + return s; +} + +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, + size_t size, mz_uint32 flags) { + if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; + pZip->m_archive_size = size; + pZip->m_pRead = mz_zip_mem_read_func; + pZip->m_pIO_opaque = pZip; +#ifdef __cplusplus + pZip->m_pState->m_pMem = const_cast<void *>(pMem); +#else + pZip->m_pState->m_pMem = (void *)pMem; +#endif + pZip->m_pState->m_mem_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, + void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || + (((cur_ofs != (mz_int64)file_ofs)) && + (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint32 flags) { + mz_uint64 file_size; + MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); + if (!pFile) return MZ_FALSE; + if (MZ_FSEEK64(pFile, 0, SEEK_END)) { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + file_size = MZ_FTELL64(pFile); + if (!mz_zip_reader_init_internal(pZip, flags)) { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + pZip->m_pRead = mz_zip_file_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = file_size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { + return pZip ? pZip->m_total_files : 0; +} + +static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh( + mz_zip_archive *pZip, mz_uint file_index) { + if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || + (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return NULL; + return &MZ_ZIP_ARRAY_ELEMENT( + &pZip->m_pState->m_central_dir, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, + file_index)); +} + +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, + mz_uint file_index) { + mz_uint m_bit_flag; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) return MZ_FALSE; + m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + return (m_bit_flag & 1); +} + +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, + mz_uint file_index) { + mz_uint filename_len, external_attr; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) return MZ_FALSE; + + // First see if the filename ends with a '/' character. + filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_len) { + if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') + return MZ_TRUE; + } + + // Bugfix: This code was also checking if the internal attribute was non-zero, + // which wasn't correct. + // Most/all zip writers (hopefully) set DOS file/directory attributes in the + // low 16-bits, so check for the DOS directory flag and ignore the source OS + // ID in the created by field. + // FIXME: Remove this check? Is it necessary - we already check the filename. + external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + if ((external_attr & 0x10) != 0) return MZ_TRUE; + + return MZ_FALSE; +} + +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, + mz_zip_archive_file_stat *pStat) { + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if ((!p) || (!pStat)) return MZ_FALSE; + + // Unpack the central directory record. + pStat->m_file_index = file_index; + pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT( + &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); + pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); + pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); + pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); +#ifndef MINIZ_NO_TIME + pStat->m_time = + mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), + MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); +#endif + pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); + pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + + // Copy as much of the filename and comment as possible. + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); + memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pStat->m_filename[n] = '\0'; + + n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); + n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); + pStat->m_comment_size = n; + memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), + n); + pStat->m_comment[n] = '\0'; + + return MZ_TRUE; +} + +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, + char *pFilename, mz_uint filename_buf_size) { + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) { + if (filename_buf_size) pFilename[0] = '\0'; + return 0; + } + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_buf_size) { + n = MZ_MIN(n, filename_buf_size - 1); + memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pFilename[n] = '\0'; + } + return n + 1; +} + +static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, + const char *pB, + mz_uint len, + mz_uint flags) { + mz_uint i; + if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); + for (i = 0; i < len; ++i) + if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; + return MZ_TRUE; +} + +static MZ_FORCEINLINE int mz_zip_reader_filename_compare( + const mz_zip_array *pCentral_dir_array, + const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, + mz_uint r_len) { + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( + pCentral_dir_array, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, + l_index)), + *pE; + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; + pL++; + pR++; + } + return (pL == pE) ? (int)(l_len - r_len) : (l - r); +} + +static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, + const char *pFilename) { + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( + &pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + const mz_uint filename_len = (mz_uint)strlen(pFilename); + int l = 0, h = size - 1; + while (l <= h) { + int m = (l + h) >> 1, file_index = pIndices[m], + comp = + mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, + file_index, pFilename, filename_len); + if (!comp) + return file_index; + else if (comp < 0) + l = m + 1; + else + h = m - 1; + } + return -1; +} + +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, + const char *pComment, mz_uint flags) { + mz_uint file_index; + size_t name_len, comment_len; + if ((!pZip) || (!pZip->m_pState) || (!pName) || + (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return -1; + if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && + (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) + return mz_zip_reader_locate_file_binary_search(pZip, pName); + name_len = strlen(pName); + if (name_len > 0xFFFF) return -1; + comment_len = pComment ? strlen(pComment) : 0; + if (comment_len > 0xFFFF) return -1; + for (file_index = 0; file_index < pZip->m_total_files; file_index++) { + const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT( + &pZip->m_pState->m_central_dir, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, + file_index)); + mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); + const char *pFilename = + (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + if (filename_len < name_len) continue; + if (comment_len) { + mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), + file_comment_len = + MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); + const char *pFile_comment = pFilename + filename_len + file_extra_len; + if ((file_comment_len != comment_len) || + (!mz_zip_reader_string_equal(pComment, pFile_comment, + file_comment_len, flags))) + continue; + } + if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { + int ofs = filename_len - 1; + do { + if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || + (pFilename[ofs] == ':')) + break; + } while (--ofs >= 0); + ofs++; + pFilename += ofs; + filename_len -= ofs; + } + if ((filename_len == name_len) && + (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) + return file_index; + } + return -1; +} + +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, + mz_uint file_index, void *pBuf, + size_t buf_size, mz_uint flags, + void *pUser_read_buf, + size_t user_read_buf_size) { + int status = TINFL_STATUS_DONE; + mz_uint64 needed_size, cur_file_ofs, comp_remaining, + out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + mz_uint32 + local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / + sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + tinfl_decompressor inflator; + + if ((buf_size) && (!pBuf)) return MZ_FALSE; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips + // with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have + // compressed deflate data which inflates to 0 bytes, but these entries claim + // to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && + (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Ensure supplied output buffer is large enough. + needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size + : file_stat.m_uncomp_size; + if (buf_size < needed_size) return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, + (size_t)needed_size) != needed_size) + return MZ_FALSE; + return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || + (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, + (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); + } + + // Decompress the file either directly from memory or from a file input + // buffer. + tinfl_init(&inflator); + + if (pZip->m_pState->m_pMem) { + // Read directly from the archive in memory. + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } else if (pUser_read_buf) { + // Use a user provided read buffer. + if (!user_read_buf_size) return MZ_FALSE; + pRead_buf = (mz_uint8 *)pUser_read_buf; + read_buf_size = user_read_buf_size; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } else { + // Temporarily allocate a read buffer. + read_buf_size = + MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && + (read_buf_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#endif + return MZ_FALSE; + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, + (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + do { + size_t in_buf_size, + out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress( + &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, + (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | + (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + out_buf_ofs += out_buf_size; + } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); + + if (status == TINFL_STATUS_DONE) { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || + (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, + (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( + mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, + mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) return MZ_FALSE; + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, + flags, pUser_read_buf, + user_read_buf_size); +} + +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, + void *pBuf, size_t buf_size, + mz_uint flags) { + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, + flags, NULL, 0); +} + +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, + const char *pFilename, void *pBuf, + size_t buf_size, mz_uint flags) { + return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, + buf_size, flags, NULL, 0); +} + +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, + size_t *pSize, mz_uint flags) { + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) *pSize = 0; + if (!p) return NULL; + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#endif + return NULL; + if (NULL == + (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + return NULL; + + if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, + flags)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return NULL; + } + + if (pSize) *pSize = (size_t)alloc_size; + return pBuf; +} + +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, + const char *pFilename, size_t *pSize, + mz_uint flags) { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) { + if (pSize) *pSize = 0; + return MZ_FALSE; + } + return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); +} + +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, + mz_uint file_index, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags) { + int status = TINFL_STATUS_DONE; + mz_uint file_crc32 = MZ_CRC32_INIT; + mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, + out_buf_ofs = 0, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf = NULL; + void *pWrite_buf = NULL; + mz_uint32 + local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / + sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips + // with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have + // compressed deflate data which inflates to 0 bytes, but these entries claim + // to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && + (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + // Decompress the file either directly from memory or from a file input + // buffer. + if (pZip->m_pState->m_pMem) { + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } else { + read_buf_size = + MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, + (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pState->m_pMem) { +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && + (file_stat.m_comp_size > 0xFFFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && + (file_stat.m_comp_size > 0xFFFFFFFF)) +#endif + return MZ_FALSE; + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, + (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) + status = TINFL_STATUS_FAILED; + else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = + (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, + (size_t)file_stat.m_comp_size); + cur_file_ofs += file_stat.m_comp_size; + out_buf_ofs += file_stat.m_comp_size; + comp_remaining = 0; + } else { + while (comp_remaining) { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + + if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32( + file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + out_buf_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + } + } + } else { + tinfl_decompressor inflator; + tinfl_init(&inflator); + + if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, + TINFL_LZ_DICT_SIZE))) + status = TINFL_STATUS_FAILED; + else { + do { + mz_uint8 *pWrite_buf_cur = + (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + size_t in_buf_size, + out_buf_size = + TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress( + &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, + (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, + comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + + if (out_buf_size) { + if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != + out_buf_size) { + status = TINFL_STATUS_FAILED; + break; + } + file_crc32 = + (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); + if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { + status = TINFL_STATUS_FAILED; + break; + } + } + } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || + (status == TINFL_STATUS_HAS_MORE_OUTPUT)); + } + } + + if ((status == TINFL_STATUS_DONE) && + (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || + (file_crc32 != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, + const char *pFilename, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags) { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) return MZ_FALSE; + return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, + flags); +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, + const void *pBuf, size_t n) { + (void)ofs; + return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); +} + +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, + const char *pDst_filename, + mz_uint flags) { + mz_bool status; + mz_zip_archive_file_stat file_stat; + MZ_FILE *pFile; + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; + pFile = MZ_FOPEN(pDst_filename, "wb"); + if (!pFile) return MZ_FALSE; + status = mz_zip_reader_extract_to_callback( + pZip, file_index, mz_zip_file_write_callback, pFile, flags); + if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE; +#ifndef MINIZ_NO_TIME + if (status) + mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); +#endif + return status; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || + (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + + if (pZip->m_pState) { + mz_zip_internal_state *pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + } + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, + const char *pArchive_filename, + const char *pDst_filename, + mz_uint flags) { + int file_index = + mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); + if (file_index < 0) return MZ_FALSE; + return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); +} +#endif + +// ------------------- .ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { + p[0] = (mz_uint8)v; + p[1] = (mz_uint8)(v >> 8); +} +static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { + p[0] = (mz_uint8)v; + p[1] = (mz_uint8)(v >> 8); + p[2] = (mz_uint8)(v >> 16); + p[3] = (mz_uint8)(v >> 24); +} +#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) +#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) + +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { + if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || + (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (pZip->m_file_offset_alignment) { + // Ensure user specified file offset alignment is a power of 2. + if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) + return MZ_FALSE; + } + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_archive_size = existing_size; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, + sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, + sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, + sizeof(mz_uint32)); + return MZ_TRUE; +} + +static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, + const void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); +#ifdef _MSC_VER + if ((!n) || + ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#else + if ((!n) || + ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#endif + return 0; + if (new_size > pState->m_mem_capacity) { + void *pNew_block; + size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); + while (new_capacity < new_size) new_capacity *= 2; + if (NULL == (pNew_block = pZip->m_pRealloc( + pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) + return 0; + pState->m_pMem = pNew_block; + pState->m_mem_capacity = new_capacity; + } + memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); + pState->m_mem_size = (size_t)new_size; + return n; +} + +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, + size_t size_to_reserve_at_beginning, + size_t initial_allocation_size) { + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; + if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, + size_to_reserve_at_beginning))) { + if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_mem_capacity = initial_allocation_size; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, + const void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || + (((cur_ofs != (mz_int64)file_ofs)) && + (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint64 size_to_reserve_at_beginning) { + MZ_FILE *pFile; + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; + if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_pFile = pFile; + if (size_to_reserve_at_beginning) { + mz_uint64 cur_ofs = 0; + char buf[4096]; + MZ_CLEAR_OBJ(buf); + do { + size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + cur_ofs += n; + size_to_reserve_at_beginning -= n; + } while (size_to_reserve_at_beginning); + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, + const char *pFilename) { + mz_zip_internal_state *pState; + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + // No sense in trying to write to an archive that's already at the support max + // size + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if (pState->m_pFile) { +#ifdef MINIZ_NO_STDIO + pFilename; + return MZ_FALSE; +#else + // Archive is being read from stdio - try to reopen as writable. + if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; + if (!pFilename) return MZ_FALSE; + pZip->m_pWrite = mz_zip_file_write_func; + if (NULL == + (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { + // The mz_zip_archive is now in a bogus state because pState->m_pFile is + // NULL, so just close it. + mz_zip_reader_end(pZip); + return MZ_FALSE; + } +#endif // #ifdef MINIZ_NO_STDIO + } else if (pState->m_pMem) { + // Archive lives in a memory block. Assume it's from the heap that we can + // resize using the realloc callback. + if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; + pState->m_mem_capacity = pState->m_mem_size; + pZip->m_pWrite = mz_zip_heap_write_func; + } + // Archive is being read via a user provided read function - make sure the + // user has specified a write function too. + else if (!pZip->m_pWrite) + return MZ_FALSE; + + // Start writing new files at the archive's current central directory + // location. + pZip->m_archive_size = pZip->m_central_directory_file_ofs; + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_central_directory_file_ofs = 0; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, + const void *pBuf, size_t buf_size, + mz_uint level_and_flags) { + return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, + level_and_flags, 0, 0); +} + +typedef struct { + mz_zip_archive *m_pZip; + mz_uint64 m_cur_archive_file_ofs; + mz_uint64 m_comp_size; +} mz_zip_writer_add_state; + +static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, + void *pUser) { + mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; + if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, + pState->m_cur_archive_file_ofs, pBuf, + len) != len) + return MZ_FALSE; + pState->m_cur_archive_file_ofs += len; + pState->m_comp_size += len; + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_local_dir_header( + mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, + mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, + mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, + mz_uint16 dos_time, mz_uint16 dos_date) { + (void)pZip; + memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_central_dir_header( + mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, + mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, + mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, + mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, + mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { + (void)pZip; + memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_add_to_central_dir( + mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, + const void *pExtra, mz_uint16 extra_size, const void *pComment, + mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, + mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, + mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, + mz_uint32 ext_attributes) { + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; + size_t orig_central_dir_size = pState->m_central_dir.m_size; + mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + + // No zip64 support yet + if ((local_header_ofs > 0xFFFFFFFF) || + (((mz_uint64)pState->m_central_dir.m_size + + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + + comment_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_central_dir_header( + pZip, central_dir_header, filename_size, extra_size, comment_size, + uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, + dos_date, local_header_ofs, ext_attributes)) + return MZ_FALSE; + + if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, + filename_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, + extra_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, + comment_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, + ¢ral_dir_ofs, 1))) { + // Try to push the central directory array back into its original state. + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, + MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { + // Basic ZIP archive filename validity checks: Valid filenames cannot start + // with a forward slash, cannot contain a drive letter, and cannot use + // DOS-style backward slashes. + if (*pArchive_name == '/') return MZ_FALSE; + while (*pArchive_name) { + if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE; + pArchive_name++; + } + return MZ_TRUE; +} + +static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment( + mz_zip_archive *pZip) { + mz_uint32 n; + if (!pZip->m_file_offset_alignment) return 0; + n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); + return (pZip->m_file_offset_alignment - n) & + (pZip->m_file_offset_alignment - 1); +} + +static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, + mz_uint64 cur_file_ofs, mz_uint32 n) { + char buf[4096]; + memset(buf, 0, MZ_MIN(sizeof(buf), n)); + while (n) { + mz_uint32 s = MZ_MIN(sizeof(buf), n); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) + return MZ_FALSE; + cur_file_ofs += s; + n -= s; + } + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, + const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, + mz_uint16 comment_size, + mz_uint level_and_flags, mz_uint64 uncomp_size, + mz_uint32 uncomp_crc32) { + mz_uint16 method = 0, dos_time = 0, dos_date = 0; + mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, + cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + tdefl_compressor *pComp = NULL; + mz_bool store_data_uncompressed; + mz_zip_internal_state *pState; + + if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + store_data_uncompressed = + ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); + + if ((!pZip) || (!pZip->m_pState) || + (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || + (!pArchive_name) || ((comment_size) && (!pComment)) || + (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) + return MZ_FALSE; + // No zip64 support yet + if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; + +#ifndef MINIZ_NO_TIME + { + time_t cur_time; + time(&cur_time); + mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); + } +#endif // #ifndef MINIZ_NO_TIME + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) return MZ_FALSE; + + num_alignment_padding_bytes = + mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + num_alignment_padding_bytes + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { + // Set DOS Subdirectory attribute bit. + ext_attributes |= 0x10; + // Subdirectories cannot contain data. + if ((buf_size) || (uncomp_size)) return MZ_FALSE; + } + + // Try to do any allocations before writing to the archive, so if an + // allocation fails the file remains unmodified. (A good idea if we're doing + // an in-place modification.) + if ((!mz_zip_array_ensure_room( + pZip, &pState->m_central_dir, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || + (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) + return MZ_FALSE; + + if ((!store_data_uncompressed) && (buf_size)) { + if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) + return MZ_FALSE; + } + + if (!mz_zip_writer_write_zeros( + pZip, cur_archive_file_ofs, + num_alignment_padding_bytes + sizeof(local_dir_header))) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == + 0); + } + cur_archive_file_ofs += + num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, + archive_name_size) != archive_name_size) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { + uncomp_crc32 = + (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); + uncomp_size = buf_size; + if (uncomp_size <= 3) { + level = 0; + store_data_uncompressed = MZ_TRUE; + } + } + + if (store_data_uncompressed) { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, + buf_size) != buf_size) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + cur_archive_file_ofs += buf_size; + comp_size = buf_size; + + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED; + } else if (buf_size) { + mz_zip_writer_add_state state; + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, + tdefl_create_comp_flags_from_zip_params( + level, -15, MZ_DEFAULT_STRATEGY)) != + TDEFL_STATUS_OKAY) || + (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != + TDEFL_STATUS_DONE)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pComp = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header( + pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, + comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, + sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir( + pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, + comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, + dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, + const char *pSrc_filename, const void *pComment, + mz_uint16 comment_size, + mz_uint level_and_flags) { + mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; + mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, + cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, + comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + MZ_FILE *pSrc_file = NULL; + + if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + + if ((!pZip) || (!pZip->m_pState) || + (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || + ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) return MZ_FALSE; + + num_alignment_padding_bytes = + mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + num_alignment_padding_bytes + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) + return MZ_FALSE; + + pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); + if (!pSrc_file) return MZ_FALSE; + MZ_FSEEK64(pSrc_file, 0, SEEK_END); + uncomp_size = MZ_FTELL64(pSrc_file); + MZ_FSEEK64(pSrc_file, 0, SEEK_SET); + + if (uncomp_size > 0xFFFFFFFF) { + // No zip64 support yet + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + if (uncomp_size <= 3) level = 0; + + if (!mz_zip_writer_write_zeros( + pZip, cur_archive_file_ofs, + num_alignment_padding_bytes + sizeof(local_dir_header))) { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == + 0); + } + cur_archive_file_ofs += + num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, + archive_name_size) != archive_name_size) { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (uncomp_size) { + mz_uint64 uncomp_remaining = uncomp_size; + void *pRead_buf = + pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); + if (!pRead_buf) { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + if (!level) { + while (uncomp_remaining) { + mz_uint n = + (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); + if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || + (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, + n) != n)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + uncomp_crc32 = + (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); + uncomp_remaining -= n; + cur_archive_file_ofs += n; + } + comp_size = uncomp_size; + } else { + mz_bool result = MZ_FALSE; + mz_zip_writer_add_state state; + tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, + tdefl_create_comp_flags_from_zip_params( + level, -15, MZ_DEFAULT_STRATEGY)) != + TDEFL_STATUS_OKAY) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + for (;;) { + size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, + (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); + tdefl_status status; + + if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) + break; + + uncomp_crc32 = (mz_uint32)mz_crc32( + uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); + uncomp_remaining -= in_buf_size; + + status = tdefl_compress_buffer( + pComp, pRead_buf, in_buf_size, + uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); + if (status == TDEFL_STATUS_DONE) { + result = MZ_TRUE; + break; + } else if (status != TDEFL_STATUS_OKAY) + break; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + + if (!result) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + } + + MZ_FCLOSE(pSrc_file); + pSrc_file = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header( + pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, + comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, + sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir( + pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, + comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, + dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, + mz_zip_archive *pSource_zip, + mz_uint file_index) { + mz_uint n, bit_flags, num_alignment_padding_bytes; + mz_uint64 comp_bytes_remaining, local_dir_header_ofs; + mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; + mz_uint32 + local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / + sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + size_t orig_central_dir_size; + mz_zip_internal_state *pState; + void *pBuf; + const mz_uint8 *pSrc_central_header; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + if (NULL == + (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) + return MZ_FALSE; + pState = pZip->m_pState; + + num_alignment_padding_bytes = + mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + num_alignment_padding_bytes + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > + 0xFFFFFFFF)) + return MZ_FALSE; + + cur_src_file_ofs = + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + cur_dst_file_ofs = pZip->m_archive_size; + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, + pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, + num_alignment_padding_bytes)) + return MZ_FALSE; + cur_dst_file_ofs += num_alignment_padding_bytes; + local_dir_header_ofs = cur_dst_file_ofs; + if (pZip->m_file_offset_alignment) { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == + 0); + } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + comp_bytes_remaining = + n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + + if (NULL == (pBuf = pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, + (size_t)MZ_MAX(sizeof(mz_uint32) * 4, + MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, + comp_bytes_remaining))))) + return MZ_FALSE; + + while (comp_bytes_remaining) { + n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, + n) != n) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_src_file_ofs += n; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_dst_file_ofs += n; + + comp_bytes_remaining -= n; + } + + bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + if (bit_flags & 8) { + // Copy data descriptor + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, + sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + cur_src_file_ofs += n; + cur_dst_file_ofs += n; + } + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + + // no zip64 support yet + if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE; + + orig_central_dir_size = pState->m_central_dir.m_size; + + memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, + local_dir_header_ofs); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + return MZ_FALSE; + + n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); + if (!mz_zip_array_push_back( + pZip, &pState->m_central_dir, + pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, + MZ_FALSE); + return MZ_FALSE; + } + + if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE; + n = (mz_uint32)orig_central_dir_size; + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, + MZ_FALSE); + return MZ_FALSE; + } + + pZip->m_total_files++; + pZip->m_archive_size = cur_dst_file_ofs; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { + mz_zip_internal_state *pState; + mz_uint64 central_dir_ofs, central_dir_size; + mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + + pState = pZip->m_pState; + + // no zip64 support yet + if ((pZip->m_total_files > 0xFFFF) || + ((pZip->m_archive_size + pState->m_central_dir.m_size + + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + central_dir_ofs = 0; + central_dir_size = 0; + if (pZip->m_total_files) { + // Write central directory + central_dir_ofs = pZip->m_archive_size; + central_dir_size = pState->m_central_dir.m_size; + pZip->m_central_directory_file_ofs = central_dir_ofs; + if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, + pState->m_central_dir.m_p, + (size_t)central_dir_size) != central_dir_size) + return MZ_FALSE; + pZip->m_archive_size += central_dir_size; + } + + // Write end of central directory record + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, + pZip->m_total_files); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, + sizeof(hdr)) != sizeof(hdr)) + return MZ_FALSE; +#ifndef MINIZ_NO_STDIO + if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE; +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_archive_size += sizeof(hdr); + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, + size_t *pSize) { + if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE; + if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE; + if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; + + *pBuf = pZip->m_pState->m_pMem; + *pSize = pZip->m_pState->m_mem_size; + pZip->m_pState->m_pMem = NULL; + pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { + mz_zip_internal_state *pState; + mz_bool status = MZ_TRUE; + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || + ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && + (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) + return MZ_FALSE; + + pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); + pState->m_pMem = NULL; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + return status; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_add_mem_to_archive_file_in_place( + const char *pZip_filename, const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, mz_uint16 comment_size, + mz_uint level_and_flags) { + mz_bool status, created_new_archive = MZ_FALSE; + mz_zip_archive zip_archive; + struct MZ_FILE_STAT_STRUCT file_stat; + MZ_CLEAR_OBJ(zip_archive); + if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; + if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || + ((comment_size) && (!pComment)) || + ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; + if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { + // Create a new archive. + if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) + return MZ_FALSE; + created_new_archive = MZ_TRUE; + } else { + // Append to an existing archive. + if (!mz_zip_reader_init_file( + &zip_archive, pZip_filename, + level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return MZ_FALSE; + if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) { + mz_zip_reader_end(&zip_archive); + return MZ_FALSE; + } + } + status = + mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, + pComment, comment_size, level_and_flags, 0, 0); + // Always finalize, even if adding failed for some reason, so we have a valid + // central directory. (This may not always succeed, but we can try.) + if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE; + if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE; + if ((!status) && (created_new_archive)) { + // It's a new archive and something went wrong, so just delete it. + int ignoredStatus = MZ_DELETE_FILE(pZip_filename); + (void)ignoredStatus; + } + return status; +} + +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, + const char *pArchive_name, + size_t *pSize, mz_uint flags) { + int file_index; + mz_zip_archive zip_archive; + void *p = NULL; + + if (pSize) *pSize = 0; + + if ((!pZip_filename) || (!pArchive_name)) return NULL; + + MZ_CLEAR_OBJ(zip_archive); + if (!mz_zip_reader_init_file( + &zip_archive, pZip_filename, + flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return NULL; + + if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, + flags)) >= 0) + p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); + + mz_zip_reader_end(&zip_archive); + return p; +} + +#endif // #ifndef MINIZ_NO_STDIO + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_FILE_ONLY + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to <http://unlicense.org/> +*/ + +// ---------------------- end of miniz ---------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} +#else + +// Reuse MINIZ_LITTE_ENDIAN macro + +#if defined(__sparcv9) +// Big endian +#else +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif +#endif + +#endif // TINYEXR_USE_MINIZ + +// static bool IsBigEndian(void) { +// union { +// unsigned int i; +// char c[4]; +// } bint = {0x01020304}; +// +// return bint.c[0] == 1; +//} + +static const int kEXRVersionSize = 8; + +static void swap2(unsigned short *val) { +#ifdef MINIZ_LITTLE_ENDIAN + (void)val; +#else + unsigned short tmp = *val; + unsigned char *dst = reinterpret_cast<unsigned char *>(val); + unsigned char *src = reinterpret_cast<unsigned char *>(&tmp); + + dst[0] = src[1]; + dst[1] = src[0]; +#endif +} + +static void swap4(unsigned int *val) { +#ifdef MINIZ_LITTLE_ENDIAN + (void)val; +#else + unsigned int tmp = *val; + unsigned char *dst = reinterpret_cast<unsigned char *>(val); + unsigned char *src = reinterpret_cast<unsigned char *>(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +static void swap8(tinyexr::tinyexr_uint64 *val) { +#ifdef MINIZ_LITTLE_ENDIAN + (void)val; +#else + tinyexr::tinyexr_uint64 tmp = (*val); + unsigned char *dst = reinterpret_cast<unsigned char *>(val); + unsigned char *src = reinterpret_cast<unsigned char *>(&tmp); + + dst[0] = src[7]; + dst[1] = src[6]; + dst[2] = src[5]; + dst[3] = src[4]; + dst[4] = src[3]; + dst[5] = src[2]; + dst[6] = src[1]; + dst[7] = src[0]; +#endif +} + +// https://gist.github.com/rygorous/2156668 +// Reuse MINIZ_LITTLE_ENDIAN flag from miniz. +union FP32 { + unsigned int u; + float f; + struct { +#if MINIZ_LITTLE_ENDIAN + unsigned int Mantissa : 23; + unsigned int Exponent : 8; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 8; + unsigned int Mantissa : 23; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +union FP16 { + unsigned short u; + struct { +#if MINIZ_LITTLE_ENDIAN + unsigned int Mantissa : 10; + unsigned int Exponent : 5; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 5; + unsigned int Mantissa : 10; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static FP32 half_to_float(FP16 h) { + static const FP32 magic = {113 << 23}; + static const unsigned int shifted_exp = 0x7c00 + << 13; // exponent mask after shift + FP32 o; + + o.u = (h.u & 0x7fffU) << 13U; // exponent/mantissa bits + unsigned int exp_ = shifted_exp & o.u; // just the exponent + o.u += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp_ == shifted_exp) // Inf/NaN? + o.u += (128 - 16) << 23; // extra exp adjust + else if (exp_ == 0) // Zero/Denormal? + { + o.u += 1 << 23; // extra exp adjust + o.f -= magic.f; // renormalize + } + + o.u |= (h.u & 0x8000U) << 16U; // sign bit + return o; +} + +static FP16 float_to_half_full(FP32 f) { + FP16 o = {0}; + + // Based on ISPC reference code (with minor modifications) + if (f.s.Exponent == 0) // Signed zero/denormal (which will underflow) + o.s.Exponent = 0; + else if (f.s.Exponent == 255) // Inf or NaN (all exponent bits set) + { + o.s.Exponent = 31; + o.s.Mantissa = f.s.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf + } else // Normalized number + { + // Exponent unbias the single, then bias the halfp + int newexp = f.s.Exponent - 127 + 15; + if (newexp >= 31) // Overflow, return signed infinity + o.s.Exponent = 31; + else if (newexp <= 0) // Underflow + { + if ((14 - newexp) <= 24) // Mantissa might be non-zero + { + unsigned int mant = f.s.Mantissa | 0x800000; // Hidden 1 bit + o.s.Mantissa = mant >> (14 - newexp); + if ((mant >> (13 - newexp)) & 1) // Check for rounding + o.u++; // Round, might overflow into exp bit, but this is OK + } + } else { + o.s.Exponent = static_cast<unsigned int>(newexp); + o.s.Mantissa = f.s.Mantissa >> 13; + if (f.s.Mantissa & 0x1000) // Check for rounding + o.u++; // Round, might overflow to inf, this is OK + } + } + + o.s.Sign = f.s.Sign; + return o; +} + +// NOTE: From OpenEXR code +// #define IMF_INCREASING_Y 0 +// #define IMF_DECREASING_Y 1 +// #define IMF_RAMDOM_Y 2 +// +// #define IMF_NO_COMPRESSION 0 +// #define IMF_RLE_COMPRESSION 1 +// #define IMF_ZIPS_COMPRESSION 2 +// #define IMF_ZIP_COMPRESSION 3 +// #define IMF_PIZ_COMPRESSION 4 +// #define IMF_PXR24_COMPRESSION 5 +// #define IMF_B44_COMPRESSION 6 +// #define IMF_B44A_COMPRESSION 7 + +static const char *ReadString(std::string *s, const char *ptr) { + // Read untile NULL(\0). + const char *p = ptr; + const char *q = ptr; + while ((*q) != 0) q++; + + (*s) = std::string(p, q); + + return q + 1; // skip '\0' +} + +static bool ReadAttribute(std::string *name, std::string *type, + std::vector<unsigned char> *data, size_t *marker_size, + const char *marker, size_t size) { + size_t name_len = strnlen(marker, size); + if (name_len == size) { + // String does not have a terminating character. + return false; + } + *name = std::string(marker, name_len); + + marker += name_len + 1; + size -= name_len + 1; + + size_t type_len = strnlen(marker, size); + if (type_len == size) { + return false; + } + *type = std::string(marker, type_len); + + marker += type_len + 1; + size -= type_len + 1; + + if (size < sizeof(uint32_t)) { + return false; + } + + uint32_t data_len; + memcpy(&data_len, marker, sizeof(uint32_t)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len)); + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + if (size < data_len) { + return false; + } + + data->resize(static_cast<size_t>(data_len)); + memcpy(&data->at(0), marker, static_cast<size_t>(data_len)); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len; + return true; +} + +static void WriteAttributeToMemory(std::vector<unsigned char> *out, + const char *name, const char *type, + const unsigned char *data, int len) { + out->insert(out->end(), name, name + strlen(name) + 1); + out->insert(out->end(), type, type + strlen(type) + 1); + + int outLen = len; + tinyexr::swap4(reinterpret_cast<unsigned int *>(&outLen)); + out->insert(out->end(), reinterpret_cast<unsigned char *>(&outLen), + reinterpret_cast<unsigned char *>(&outLen) + sizeof(int)); + out->insert(out->end(), data, data + len); +} + +typedef struct { + std::string name; // less than 255 bytes long + int pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} ChannelInfo; + +typedef struct { + std::vector<tinyexr::ChannelInfo> channels; + std::vector<EXRAttribute> attributes; + + int data_window[4]; + int line_order; + int display_window[4]; + float screen_window_center[2]; + float screen_window_width; + float pixel_aspect_ratio; + + int chunk_count; + + // Tiled format + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + unsigned int header_len; + + int compression_type; + + void clear() { + channels.clear(); + attributes.clear(); + + data_window[0] = 0; + data_window[1] = 0; + data_window[2] = 0; + data_window[3] = 0; + line_order = 0; + display_window[0] = 0; + display_window[1] = 0; + display_window[2] = 0; + display_window[3] = 0; + screen_window_center[0] = 0.0f; + screen_window_center[1] = 0.0f; + screen_window_width = 0.0f; + pixel_aspect_ratio = 0.0f; + + chunk_count = 0; + + // Tiled format + tile_size_x = 0; + tile_size_y = 0; + tile_level_mode = 0; + tile_rounding_mode = 0; + + header_len = 0; + compression_type = 0; + } +} HeaderInfo; + +static void ReadChannelInfo(std::vector<ChannelInfo> &channels, + const std::vector<unsigned char> &data) { + const char *p = reinterpret_cast<const char *>(&data.at(0)); + + for (;;) { + if ((*p) == 0) { + break; + } + ChannelInfo info; + p = ReadString(&info.name, p); + + memcpy(&info.pixel_type, p, sizeof(int)); + p += 4; + info.p_linear = static_cast<unsigned char>(p[0]); // uchar + p += 1 + 3; // reserved: uchar[3] + memcpy(&info.x_sampling, p, sizeof(int)); // int + p += 4; + memcpy(&info.y_sampling, p, sizeof(int)); // int + p += 4; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.pixel_type)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.x_sampling)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.y_sampling)); + + channels.push_back(info); + } +} + +static void WriteChannelInfo(std::vector<unsigned char> &data, + const std::vector<ChannelInfo> &channels) { + size_t sz = 0; + + // Calculate total size. + for (size_t c = 0; c < channels.size(); c++) { + sz += strlen(channels[c].name.c_str()) + 1; // +1 for \0 + sz += 16; // 4 * int + } + data.resize(sz + 1); + + unsigned char *p = &data.at(0); + + for (size_t c = 0; c < channels.size(); c++) { + memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str())); + p += strlen(channels[c].name.c_str()); + (*p) = '\0'; + p++; + + int pixel_type = channels[c].pixel_type; + int x_sampling = channels[c].x_sampling; + int y_sampling = channels[c].y_sampling; + tinyexr::swap4(reinterpret_cast<unsigned int *>(&pixel_type)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&x_sampling)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&y_sampling)); + + memcpy(p, &pixel_type, sizeof(int)); + p += sizeof(int); + + (*p) = channels[c].p_linear; + p += 4; + + memcpy(p, &x_sampling, sizeof(int)); + p += sizeof(int); + + memcpy(p, &y_sampling, sizeof(int)); + p += sizeof(int); + } + + (*p) = '\0'; +} + +static void CompressZip(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector<unsigned char> tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast<const char *>(src); + + { + char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0)); + char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast<unsigned char>(d); + ++t; + } + } + +#if TINYEXR_USE_MINIZ + // + // Compress the data using miniz + // + + miniz::mz_ulong outSize = miniz::mz_compressBound(src_size); + int ret = miniz::mz_compress( + dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)), + src_size); + assert(ret == miniz::MZ_OK); + (void)ret; + + compressedSize = outSize; +#else + uLong outSize = compressBound(static_cast<uLong>(src_size)); + int ret = compress(dst, &outSize, static_cast<const Bytef *>(&tmpBuf.at(0)), + src_size); + assert(ret == Z_OK); + + compressedSize = outSize; +#endif +} + +static void DecompressZip(unsigned char *dst, + unsigned long *uncompressed_size /* inout */, + const unsigned char *src, unsigned long src_size) { + std::vector<unsigned char> tmpBuf(*uncompressed_size); + +#if TINYEXR_USE_MINIZ + int ret = + miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + assert(ret == miniz::MZ_OK); + (void)ret; +#else + int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + assert(ret == Z_OK); + (void)ret; +#endif + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size); + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast<unsigned char>(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) + + (*uncompressed_size + 1) / 2; + char *s = reinterpret_cast<char *>(dst); + char *stop = s + (*uncompressed_size); + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } +} + +// RLE code from OpenEXR -------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" +#endif + +const int MIN_RUN_LENGTH = 3; +const int MAX_RUN_LENGTH = 127; + +// +// Compress an array of bytes, using run-length encoding, +// and return the length of the compressed data. +// + +static int rleCompress(int inLength, const char in[], signed char out[]) { + const char *inEnd = in + inLength; + const char *runStart = in; + const char *runEnd = in + 1; + signed char *outWrite = out; + + while (runStart < inEnd) { + while (runEnd < inEnd && *runStart == *runEnd && + runEnd - runStart - 1 < MAX_RUN_LENGTH) { + ++runEnd; + } + + if (runEnd - runStart >= MIN_RUN_LENGTH) { + // + // Compressable run + // + + *outWrite++ = static_cast<char>(runEnd - runStart) - 1; + *outWrite++ = *(reinterpret_cast<const signed char *>(runStart)); + runStart = runEnd; + } else { + // + // Uncompressable run + // + + while (runEnd < inEnd && + ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) || + (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) && + runEnd - runStart < MAX_RUN_LENGTH) { + ++runEnd; + } + + *outWrite++ = static_cast<char>(runStart - runEnd); + + while (runStart < runEnd) { + *outWrite++ = *(reinterpret_cast<const signed char *>(runStart++)); + } + } + + ++runEnd; + } + + return static_cast<int>(outWrite - out); +} + +// +// Uncompress an array of bytes compressed with rleCompress(). +// Returns the length of the oncompressed data, or 0 if the +// length of the uncompressed data would be more than maxLength. +// + +static int rleUncompress(int inLength, int maxLength, const signed char in[], + char out[]) { + char *outStart = out; + + while (inLength > 0) { + if (*in < 0) { + int count = -(static_cast<int>(*in++)); + inLength -= count + 1; + + if (0 > (maxLength -= count)) return 0; + + memcpy(out, in, count); + out += count; + in += count; + } else { + int count = *in++; + inLength -= 2; + + if (0 > (maxLength -= count + 1)) return 0; + + memset(out, *reinterpret_cast<const char *>(in), count + 1); + out += count + 1; + + in++; + } + } + + return static_cast<int>(out - outStart); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +// End of RLE code from OpenEXR ----------------------------------- + +static void CompressRle(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector<unsigned char> tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast<const char *>(src); + + { + char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0)); + char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast<unsigned char>(d); + ++t; + } + } + + // outSize will be (srcSiz * 3) / 2 at max. + int outSize = rleCompress(static_cast<int>(src_size), + reinterpret_cast<const char *>(&tmpBuf.at(0)), + reinterpret_cast<signed char *>(dst)); + assert(outSize > 0); + + compressedSize = static_cast<tinyexr::tinyexr_uint64>(outSize); +} + +static void DecompressRle(unsigned char *dst, + const unsigned long uncompressed_size, + const unsigned char *src, unsigned long src_size) { + std::vector<unsigned char> tmpBuf(uncompressed_size); + + int ret = rleUncompress(static_cast<int>(src_size), + static_cast<int>(uncompressed_size), + reinterpret_cast<const signed char *>(src), + reinterpret_cast<char *>(&tmpBuf.at(0))); + assert(ret == static_cast<int>(uncompressed_size)); + (void)ret; + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + uncompressed_size; + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast<unsigned char>(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) + + (uncompressed_size + 1) / 2; + char *s = reinterpret_cast<char *>(dst); + char *stop = s + uncompressed_size; + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } +} + +#if TINYEXR_USE_PIZ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wpadded" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++11-extensions" +#pragma clang diagnostic ignored "-Wconversion" +#endif + +// +// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp +// +// ----------------------------------------------------------------- +// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC) +// (3 clause BSD license) +// + +struct PIZChannelData { + unsigned short *start; + unsigned short *end; + int nx; + int ny; + int ys; + int size; +}; + +//----------------------------------------------------------------------------- +// +// 16-bit Haar Wavelet encoding and decoding +// +// The source code in this file is derived from the encoding +// and decoding routines written by Christian Rouet for his +// PIZ image file format. +// +//----------------------------------------------------------------------------- + +// +// Wavelet basis functions without modulo arithmetic; they produce +// the best compression ratios when the wavelet-transformed data are +// Huffman-encoded, but the wavelet transform works only for 14-bit +// data (untransformed data values must be less than (1 << 14)). +// + +inline void wenc14(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + short as = static_cast<short>(a); + short bs = static_cast<short>(b); + + short ms = (as + bs) >> 1; + short ds = as - bs; + + l = static_cast<unsigned short>(ms); + h = static_cast<unsigned short>(ds); +} + +inline void wdec14(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + short ls = static_cast<short>(l); + short hs = static_cast<short>(h); + + int hi = hs; + int ai = ls + (hi & 1) + (hi >> 1); + + short as = static_cast<short>(ai); + short bs = static_cast<short>(ai - hi); + + a = static_cast<unsigned short>(as); + b = static_cast<unsigned short>(bs); +} + +// +// Wavelet basis functions with modulo arithmetic; they work with full +// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't +// compress the data quite as well. +// + +const int NBITS = 16; +const int A_OFFSET = 1 << (NBITS - 1); +const int M_OFFSET = 1 << (NBITS - 1); +const int MOD_MASK = (1 << NBITS) - 1; + +inline void wenc16(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + int ao = (a + A_OFFSET) & MOD_MASK; + int m = ((ao + b) >> 1); + int d = ao - b; + + if (d < 0) m = (m + M_OFFSET) & MOD_MASK; + + d &= MOD_MASK; + + l = static_cast<unsigned short>(m); + h = static_cast<unsigned short>(d); +} + +inline void wdec16(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + int m = l; + int d = h; + int bb = (m - (d >> 1)) & MOD_MASK; + int aa = (d + bb - A_OFFSET) & MOD_MASK; + b = static_cast<unsigned short>(bb); + a = static_cast<unsigned short>(aa); +} + +// +// 2D Wavelet encoding: +// + +static void wav2Encode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; // == 1 << level + int p2 = 2; // == 1 << (level+1) + + // + // Hierachical loop on smaller dimension n + // + + while (p2 <= n) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet encoding + // + + if (w14) { + wenc14(*px, *p01, i00, i01); + wenc14(*p10, *p11, i10, i11); + wenc14(i00, i10, *px, *p10); + wenc14(i01, i11, *p01, *p11); + } else { + wenc16(*px, *p01, i00, i01); + wenc16(*p10, *p11, i10, i11); + wenc16(i00, i10, *px, *p10); + wenc16(i01, i11, *p01, *p11); + } + } + + // + // Encode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wenc14(*px, *p10, i00, *p10); + else + wenc16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Encode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wenc14(*px, *p01, i00, *p01); + else + wenc16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p = p2; + p2 <<= 1; + } +} + +// +// 2D Wavelet decoding: +// + +static void wav2Decode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; + int p2; + + // + // Search max level + // + + while (p <= n) p <<= 1; + + p >>= 1; + p2 = p; + p >>= 1; + + // + // Hierarchical loop on smaller dimension n + // + + while (p >= 1) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet decoding + // + + if (w14) { + wdec14(*px, *p10, i00, i10); + wdec14(*p01, *p11, i01, i11); + wdec14(i00, i01, *px, *p01); + wdec14(i10, i11, *p10, *p11); + } else { + wdec16(*px, *p10, i00, i10); + wdec16(*p01, *p11, i01, i11); + wdec16(i00, i01, *px, *p01); + wdec16(i10, i11, *p10, *p11); + } + } + + // + // Decode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wdec14(*px, *p10, i00, *p10); + else + wdec16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Decode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wdec14(*px, *p01, i00, *p01); + else + wdec16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p2 = p; + p >>= 1; + } +} + +//----------------------------------------------------------------------------- +// +// 16-bit Huffman compression and decompression. +// +// The source code in this file is derived from the 8-bit +// Huffman compression and decompression routines written +// by Christian Rouet for his PIZ image file format. +// +//----------------------------------------------------------------------------- + +// Adds some modification for tinyexr. + +const int HUF_ENCBITS = 16; // literal (value) bit length +const int HUF_DECBITS = 14; // decoding bit size (>= 8) + +const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size +const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size +const int HUF_DECMASK = HUF_DECSIZE - 1; + +struct HufDec { // short code long code + //------------------------------- + int len : 8; // code length 0 + int lit : 24; // lit p size + int *p; // 0 lits +}; + +inline long long hufLength(long long code) { return code & 63; } + +inline long long hufCode(long long code) { return code >> 6; } + +inline void outputBits(int nBits, long long bits, long long &c, int &lc, + char *&out) { + c <<= nBits; + lc += nBits; + + c |= bits; + + while (lc >= 8) *out++ = static_cast<char>((c >> (lc -= 8))); +} + +inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { + while (lc < nBits) { + c = (c << 8) | *(reinterpret_cast<const unsigned char *>(in++)); + lc += 8; + } + + lc -= nBits; + return (c >> lc) & ((1 << nBits) - 1); +} + +// +// ENCODING TABLE BUILDING & (UN)PACKING +// + +// +// Build a "canonical" Huffman code table: +// - for each (uncompressed) symbol, hcode contains the length +// of the corresponding code (in the compressed data) +// - canonical codes are computed and stored in hcode +// - the rules for constructing canonical codes are as follows: +// * shorter codes (if filled with zeroes to the right) +// have a numerically higher value than longer codes +// * for codes with the same length, numerical values +// increase with numerical symbol values +// - because the canonical code table can be constructed from +// symbol lengths alone, the code table can be transmitted +// without sending the actual code values +// - see http://www.compressconsult.com/huffman/ +// + +static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) { + long long n[59]; + + // + // For each i from 0 through 58, count the + // number of different codes of length i, and + // store the count in n[i]. + // + + for (int i = 0; i <= 58; ++i) n[i] = 0; + + for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1; + + // + // For each i from 58 through 1, compute the + // numerically lowest code with length i, and + // store that code in n[i]. + // + + long long c = 0; + + for (int i = 58; i > 0; --i) { + long long nc = ((c + n[i]) >> 1); + n[i] = c; + c = nc; + } + + // + // hcode[i] contains the length, l, of the + // code for symbol i. Assign the next available + // code of length l to the symbol and store both + // l and the code in hcode[i]. + // + + for (int i = 0; i < HUF_ENCSIZE; ++i) { + int l = static_cast<int>(hcode[i]); + + if (l > 0) hcode[i] = l | (n[l]++ << 6); + } +} + +// +// Compute Huffman codes (based on frq input) and store them in frq: +// - code structure is : [63:lsb - 6:msb] | [5-0: bit length]; +// - max code length is 58 bits; +// - codes outside the range [im-iM] have a null length (unused values); +// - original frequencies are destroyed; +// - encoding tables are used by hufEncode() and hufBuildDecTable(); +// + +struct FHeapCompare { + bool operator()(long long *a, long long *b) { return *a > *b; } +}; + +static void hufBuildEncTable( + long long *frq, // io: input frequencies [HUF_ENCSIZE], output table + int *im, // o: min frq index + int *iM) // o: max frq index +{ + // + // This function assumes that when it is called, array frq + // indicates the frequency of all possible symbols in the data + // that are to be Huffman-encoded. (frq[i] contains the number + // of occurrences of symbol i in the data.) + // + // The loop below does three things: + // + // 1) Finds the minimum and maximum indices that point + // to non-zero entries in frq: + // + // frq[im] != 0, and frq[i] == 0 for all i < im + // frq[iM] != 0, and frq[i] == 0 for all i > iM + // + // 2) Fills array fHeap with pointers to all non-zero + // entries in frq. + // + // 3) Initializes array hlink such that hlink[i] == i + // for all array entries. + // + + int hlink[HUF_ENCSIZE]; + long long *fHeap[HUF_ENCSIZE]; + + *im = 0; + + while (!frq[*im]) (*im)++; + + int nf = 0; + + for (int i = *im; i < HUF_ENCSIZE; i++) { + hlink[i] = i; + + if (frq[i]) { + fHeap[nf] = &frq[i]; + nf++; + *iM = i; + } + } + + // + // Add a pseudo-symbol, with a frequency count of 1, to frq; + // adjust the fHeap and hlink array accordingly. Function + // hufEncode() uses the pseudo-symbol for run-length encoding. + // + + (*iM)++; + frq[*iM] = 1; + fHeap[nf] = &frq[*iM]; + nf++; + + // + // Build an array, scode, such that scode[i] contains the number + // of bits assigned to symbol i. Conceptually this is done by + // constructing a tree whose leaves are the symbols with non-zero + // frequency: + // + // Make a heap that contains all symbols with a non-zero frequency, + // with the least frequent symbol on top. + // + // Repeat until only one symbol is left on the heap: + // + // Take the two least frequent symbols off the top of the heap. + // Create a new node that has first two nodes as children, and + // whose frequency is the sum of the frequencies of the first + // two nodes. Put the new node back into the heap. + // + // The last node left on the heap is the root of the tree. For each + // leaf node, the distance between the root and the leaf is the length + // of the code for the corresponding symbol. + // + // The loop below doesn't actually build the tree; instead we compute + // the distances of the leaves from the root on the fly. When a new + // node is added to the heap, then that node's descendants are linked + // into a single linear list that starts at the new node, and the code + // lengths of the descendants (that is, their distance from the root + // of the tree) are incremented by one. + // + + std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + long long scode[HUF_ENCSIZE]; + memset(scode, 0, sizeof(long long) * HUF_ENCSIZE); + + while (nf > 1) { + // + // Find the indices, mm and m, of the two smallest non-zero frq + // values in fHeap, add the smallest frq to the second-smallest + // frq, and remove the smallest frq value from fHeap. + // + + int mm = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + --nf; + + int m = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + frq[m] += frq[mm]; + std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + // + // The entries in scode are linked into lists with the + // entries in hlink serving as "next" pointers and with + // the end of a list marked by hlink[j] == j. + // + // Traverse the lists that start at scode[m] and scode[mm]. + // For each element visited, increment the length of the + // corresponding code by one bit. (If we visit scode[j] + // during the traversal, then the code for symbol j becomes + // one bit longer.) + // + // Merge the lists that start at scode[m] and scode[mm] + // into a single list that starts at scode[m]. + // + + // + // Add a bit to all codes in the first list. + // + + for (int j = m;; j = hlink[j]) { + scode[j]++; + + assert(scode[j] <= 58); + + if (hlink[j] == j) { + // + // Merge the two lists. + // + + hlink[j] = mm; + break; + } + } + + // + // Add a bit to all codes in the second list + // + + for (int j = mm;; j = hlink[j]) { + scode[j]++; + + assert(scode[j] <= 58); + + if (hlink[j] == j) break; + } + } + + // + // Build a canonical Huffman code table, replacing the code + // lengths in scode with (code, code length) pairs. Copy the + // code table from scode into frq. + // + + hufCanonicalCodeTable(scode); + memcpy(frq, scode, sizeof(long long) * HUF_ENCSIZE); +} + +// +// Pack an encoding table: +// - only code lengths, not actual codes, are stored +// - runs of zeroes are compressed as follows: +// +// unpacked packed +// -------------------------------- +// 1 zero 0 (6 bits) +// 2 zeroes 59 +// 3 zeroes 60 +// 4 zeroes 61 +// 5 zeroes 62 +// n zeroes (6 or more) 63 n-6 (6 + 8 bits) +// + +const int SHORT_ZEROCODE_RUN = 59; +const int LONG_ZEROCODE_RUN = 63; +const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; +const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN; + +static void hufPackEncTable( + const long long *hcode, // i : encoding table [HUF_ENCSIZE] + int im, // i : min hcode index + int iM, // i : max hcode index + char **pcode) // o: ptr to packed table (updated) +{ + char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + int l = hufLength(hcode[im]); + + if (l == 0) { + int zerun = 1; + + while ((im < iM) && (zerun < LONGEST_LONG_RUN)) { + if (hufLength(hcode[im + 1]) > 0) break; + im++; + zerun++; + } + + if (zerun >= 2) { + if (zerun >= SHORTEST_LONG_RUN) { + outputBits(6, LONG_ZEROCODE_RUN, c, lc, p); + outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p); + } else { + outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p); + } + continue; + } + } + + outputBits(6, l, c, lc, p); + } + + if (lc > 0) *p++ = (unsigned char)(c << (8 - lc)); + + *pcode = p; +} + +// +// Unpack an encoding table packed by hufPackEncTable(): +// + +static bool hufUnpackEncTable( + const char **pcode, // io: ptr to packed table (updated) + int ni, // i : input size (in bytes) + int im, // i : min hcode index + int iM, // i : max hcode index + long long *hcode) // o: encoding table [HUF_ENCSIZE] +{ + memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE); + + const char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + if (p - *pcode > ni) { + return false; + } + + long long l = hcode[im] = getBits(6, c, lc, p); // code length + + if (l == (long long)LONG_ZEROCODE_RUN) { + if (p - *pcode > ni) { + return false; + } + + int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } else if (l >= (long long)SHORT_ZEROCODE_RUN) { + int zerun = l - SHORT_ZEROCODE_RUN + 2; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } + } + + *pcode = const_cast<char *>(p); + + hufCanonicalCodeTable(hcode); + + return true; +} + +// +// DECODING TABLE BUILDING +// + +// +// Clear a newly allocated decoding table so that it contains only zeroes. +// + +static void hufClearDecTable(HufDec *hdecod) // io: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + hdecod[i].len = 0; + hdecod[i].lit = 0; + hdecod[i].p = NULL; + } + // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE); +} + +// +// Build a decoding hash table based on the encoding table hcode: +// - short codes (<= HUF_DECBITS) are resolved with a single table access; +// - long code entry allocations are not optimized, because long codes are +// unfrequent; +// - decoding tables are used by hufDecode(); +// + +static bool hufBuildDecTable(const long long *hcode, // i : encoding table + int im, // i : min index in hcode + int iM, // i : max index in hcode + HufDec *hdecod) // o: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + // + // Init hashtable & loop on all codes. + // Assumes that hufClearDecTable(hdecod) has already been called. + // + + for (; im <= iM; im++) { + long long c = hufCode(hcode[im]); + int l = hufLength(hcode[im]); + + if (c >> l) { + // + // Error: c is supposed to be an l-bit code, + // but c contains a value that is greater + // than the largest l-bit number. + // + + // invalidTableEntry(); + return false; + } + + if (l > HUF_DECBITS) { + // + // Long code: add a secondary entry + // + + HufDec *pl = hdecod + (c >> (l - HUF_DECBITS)); + + if (pl->len) { + // + // Error: a short code has already + // been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->lit++; + + if (pl->p) { + int *p = pl->p; + pl->p = new int[pl->lit]; + + for (int i = 0; i < pl->lit - 1; ++i) pl->p[i] = p[i]; + + delete[] p; + } else { + pl->p = new int[1]; + } + + pl->p[pl->lit - 1] = im; + } else if (l) { + // + // Short code: init all primary entries + // + + HufDec *pl = hdecod + (c << (HUF_DECBITS - l)); + + for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) { + if (pl->len || pl->p) { + // + // Error: a short code or a long code has + // already been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->len = l; + pl->lit = im; + } + } + } + + return true; +} + +// +// Free the long code entries of a decoding table built by hufBuildDecTable() +// + +static void hufFreeDecTable(HufDec *hdecod) // io: Decoding table +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + if (hdecod[i].p) { + delete[] hdecod[i].p; + hdecod[i].p = 0; + } + } +} + +// +// ENCODING +// + +inline void outputCode(long long code, long long &c, int &lc, char *&out) { + outputBits(hufLength(code), hufCode(code), c, lc, out); +} + +inline void sendCode(long long sCode, int runCount, long long runCode, + long long &c, int &lc, char *&out) { + // + // Output a run of runCount instances of the symbol sCount. + // Output the symbols explicitly, or if that is shorter, output + // the sCode symbol once followed by a runCode symbol and runCount + // expressed as an 8-bit number. + // + + if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) { + outputCode(sCode, c, lc, out); + outputCode(runCode, c, lc, out); + outputBits(8, runCount, c, lc, out); + } else { + while (runCount-- >= 0) outputCode(sCode, c, lc, out); + } +} + +// +// Encode (compress) ni values based on the Huffman encoding table hcode: +// + +static int hufEncode // return: output size (in bits) + (const long long *hcode, // i : encoding table + const unsigned short *in, // i : uncompressed input buffer + const int ni, // i : input buffer size (in bytes) + int rlc, // i : rl code + char *out) // o: compressed output buffer +{ + char *outStart = out; + long long c = 0; // bits not yet written to out + int lc = 0; // number of valid bits in c (LSB) + int s = in[0]; + int cs = 0; + + // + // Loop on input values + // + + for (int i = 1; i < ni; i++) { + // + // Count same values or send code + // + + if (s == in[i] && cs < 255) { + cs++; + } else { + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + cs = 0; + } + + s = in[i]; + } + + // + // Send remaining code + // + + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + + if (lc) *out = (c << (8 - lc)) & 0xff; + + return (out - outStart) * 8 + lc; +} + +// +// DECODING +// + +// +// In order to force the compiler to inline them, +// getChar() and getCode() are implemented as macros +// instead of "inline" functions. +// + +#define getChar(c, lc, in) \ + { \ + c = (c << 8) | *(unsigned char *)(in++); \ + lc += 8; \ + } + +#define getCode(po, rlc, c, lc, in, out, oe) \ + { \ + if (po == rlc) { \ + if (lc < 8) getChar(c, lc, in); \ + \ + lc -= 8; \ + \ + unsigned char cs = (c >> lc); \ + \ + if (out + cs > oe) return false; \ + \ + unsigned short s = out[-1]; \ + \ + while (cs-- > 0) *out++ = s; \ + } else if (out < oe) { \ + *out++ = po; \ + } else { \ + return false; \ + } \ + } + +// +// Decode (uncompress) ni bits based on encoding & decoding tables: +// + +static bool hufDecode(const long long *hcode, // i : encoding table + const HufDec *hdecod, // i : decoding table + const char *in, // i : compressed input buffer + int ni, // i : input size (in bits) + int rlc, // i : run-length code + int no, // i : expected output size (in bytes) + unsigned short *out) // o: uncompressed output buffer +{ + long long c = 0; + int lc = 0; + unsigned short *outb = out; + unsigned short *oe = out + no; + const char *ie = in + (ni + 7) / 8; // input byte size + + // + // Loop on input bytes + // + + while (in < ie) { + getChar(c, lc, in); + + // + // Access decoding table + // + + while (lc >= HUF_DECBITS) { + const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK]; + + if (pl.len) { + // + // Get short code + // + + lc -= pl.len; + getCode(pl.lit, rlc, c, lc, in, out, oe); + } else { + if (!pl.p) { + return false; + } + // invalidCode(); // wrong code + + // + // Search long code + // + + int j; + + for (j = 0; j < pl.lit; j++) { + int l = hufLength(hcode[pl.p[j]]); + + while (lc < l && in < ie) // get more bits + getChar(c, lc, in); + + if (lc >= l) { + if (hufCode(hcode[pl.p[j]]) == + ((c >> (lc - l)) & (((long long)(1) << l) - 1))) { + // + // Found : get long code + // + + lc -= l; + getCode(pl.p[j], rlc, c, lc, in, out, oe); + break; + } + } + } + + if (j == pl.lit) { + return false; + // invalidCode(); // Not found + } + } + } + } + + // + // Get remaining (short) codes + // + + int i = (8 - ni) & 7; + c >>= i; + lc -= i; + + while (lc > 0) { + const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK]; + + if (pl.len) { + lc -= pl.len; + getCode(pl.lit, rlc, c, lc, in, out, oe); + } else { + return false; + // invalidCode(); // wrong (long) code + } + } + + if (out - outb != no) { + return false; + } + // notEnoughData (); + + return true; +} + +static void countFrequencies(long long freq[HUF_ENCSIZE], + const unsigned short data[/*n*/], int n) { + for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; + + for (int i = 0; i < n; ++i) ++freq[data[i]]; +} + +static void writeUInt(char buf[4], unsigned int i) { + unsigned char *b = (unsigned char *)buf; + + b[0] = i; + b[1] = i >> 8; + b[2] = i >> 16; + b[3] = i >> 24; +} + +static unsigned int readUInt(const char buf[4]) { + const unsigned char *b = (const unsigned char *)buf; + + return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) | + ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000); +} + +// +// EXTERNAL INTERFACE +// + +static int hufCompress(const unsigned short raw[], int nRaw, + char compressed[]) { + if (nRaw == 0) return 0; + + long long freq[HUF_ENCSIZE]; + + countFrequencies(freq, raw, nRaw); + + int im = 0; + int iM = 0; + hufBuildEncTable(freq, &im, &iM); + + char *tableStart = compressed + 20; + char *tableEnd = tableStart; + hufPackEncTable(freq, im, iM, &tableEnd); + int tableLength = tableEnd - tableStart; + + char *dataStart = tableEnd; + int nBits = hufEncode(freq, raw, nRaw, iM, dataStart); + int data_length = (nBits + 7) / 8; + + writeUInt(compressed, im); + writeUInt(compressed + 4, iM); + writeUInt(compressed + 8, tableLength); + writeUInt(compressed + 12, nBits); + writeUInt(compressed + 16, 0); // room for future extensions + + return dataStart + data_length - compressed; +} + +static bool hufUncompress(const char compressed[], int nCompressed, + unsigned short raw[], int nRaw) { + if (nCompressed == 0) { + if (nRaw != 0) return false; + + return false; + } + + int im = readUInt(compressed); + int iM = readUInt(compressed + 4); + // int tableLength = readUInt (compressed + 8); + int nBits = readUInt(compressed + 12); + + if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false; + + const char *ptr = compressed + 20; + + // + // Fast decoder needs at least 2x64-bits of compressed data, and + // needs to be run-able on this platform. Otherwise, fall back + // to the original decoder + // + + // if (FastHufDecoder::enabled() && nBits > 128) + //{ + // FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); + // fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw); + //} + // else + { + std::vector<long long> freq(HUF_ENCSIZE); + std::vector<HufDec> hdec(HUF_DECSIZE); + + hufClearDecTable(&hdec.at(0)); + + hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM, + &freq.at(0)); + + { + if (nBits > 8 * (nCompressed - (ptr - compressed))) { + return false; + } + + hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0)); + hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, nRaw, raw); + } + // catch (...) + //{ + // hufFreeDecTable (hdec); + // throw; + //} + + hufFreeDecTable(&hdec.at(0)); + } + + return true; +} + +// +// Functions to compress the range of values in the pixel data +// + +const int USHORT_RANGE = (1 << 16); +const int BITMAP_SIZE = (USHORT_RANGE >> 3); + +static void bitmapFromData(const unsigned short data[/*nData*/], int nData, + unsigned char bitmap[BITMAP_SIZE], + unsigned short &minNonZero, + unsigned short &maxNonZero) { + for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0; + + for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7)); + + bitmap[0] &= ~1; // zero is not explicitly stored in + // the bitmap; we assume that the + // data always contain zeroes + minNonZero = BITMAP_SIZE - 1; + maxNonZero = 0; + + for (int i = 0; i < BITMAP_SIZE; ++i) { + if (bitmap[i]) { + if (minNonZero > i) minNonZero = i; + if (maxNonZero < i) maxNonZero = i; + } + } +} + +static unsigned short forwardLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) + lut[i] = k++; + else + lut[i] = 0; + } + + return k - 1; // maximum value stored in lut[], +} // i.e. number of ones in bitmap minus 1 + +static unsigned short reverseLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i; + } + + int n = k - 1; + + while (k < USHORT_RANGE) lut[k++] = 0; + + return n; // maximum k where lut[k] is non-zero, +} // i.e. number of ones in bitmap minus 1 + +static void applyLut(const unsigned short lut[USHORT_RANGE], + unsigned short data[/*nData*/], int nData) { + for (int i = 0; i < nData; ++i) data[i] = lut[data[i]]; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ + +static bool CompressPiz(unsigned char *outPtr, unsigned int &outSize, + const unsigned char *inPtr, size_t inSize, + const std::vector<ChannelInfo> &channelInfo, + int data_width, int num_lines) { + unsigned char bitmap[BITMAP_SIZE]; + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !MINIZ_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + assert(0); + return false; +#endif + + // Assume `inSize` is multiple of 2 or 4. + std::vector<unsigned short> tmpBuffer(inSize / sizeof(unsigned short)); + + std::vector<PIZChannelData> channelData(channelInfo.size()); + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t c = 0; c < channelData.size(); c++) { + PIZChannelData &cd = channelData[c]; + + cd.start = tmpBufferEnd; + cd.end = cd.start; + + cd.nx = data_width; + cd.ny = num_lines; + // cd.ys = c.channel().ySampling; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + cd.size = static_cast<int>(pixelSize / sizeof(short)); + + tmpBufferEnd += cd.nx * cd.ny * cd.size; + } + + const unsigned char *ptr = inPtr; + for (int y = 0; y < num_lines; ++y) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast<size_t>(cd.nx * cd.size); + memcpy(cd.end, ptr, n * sizeof(unsigned short)); + ptr += n * sizeof(unsigned short); + cd.end += n; + } + } + + bitmapFromData(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()), bitmap, + minNonZero, maxNonZero); + + unsigned short lut[USHORT_RANGE]; + unsigned short maxValue = forwardLutFromBitmap(bitmap, lut); + applyLut(lut, &tmpBuffer.at(0), static_cast<int>(tmpBuffer.size())); + + // + // Store range compression info in _outBuffer + // + + char *buf = reinterpret_cast<char *>(outPtr); + + memcpy(buf, &minNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + memcpy(buf, &maxNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + + if (minNonZero <= maxNonZero) { + memcpy(buf, reinterpret_cast<char *>(&bitmap[0] + minNonZero), + maxNonZero - minNonZero + 1); + buf += maxNonZero - minNonZero + 1; + } + + // + // Apply wavelet encoding + // + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Apply Huffman encoding; append the result to _outBuffer + // + + // length header(4byte), then huff data. Initialize length header with zero, + // then later fill it by `length`. + char *lengthPtr = buf; + int zero = 0; + memcpy(buf, &zero, sizeof(int)); + buf += sizeof(int); + + int length = + hufCompress(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()), buf); + memcpy(lengthPtr, &length, sizeof(int)); + + outSize = static_cast<unsigned int>( + (reinterpret_cast<unsigned char *>(buf) - outPtr) + + static_cast<unsigned int>(length)); + return true; +} + +static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, + size_t tmpBufSize, int num_channels, + const EXRChannelInfo *channels, int data_width, + int num_lines) { + unsigned char bitmap[BITMAP_SIZE]; + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !MINIZ_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + assert(0); + return false; +#endif + + memset(bitmap, 0, BITMAP_SIZE); + + const unsigned char *ptr = inPtr; + minNonZero = *(reinterpret_cast<const unsigned short *>(ptr)); + maxNonZero = *(reinterpret_cast<const unsigned short *>(ptr + 2)); + ptr += 4; + + if (maxNonZero >= BITMAP_SIZE) { + return false; + } + + if (minNonZero <= maxNonZero) { + memcpy(reinterpret_cast<char *>(&bitmap[0] + minNonZero), ptr, + maxNonZero - minNonZero + 1); + ptr += maxNonZero - minNonZero + 1; + } + + unsigned short lut[USHORT_RANGE]; + memset(lut, 0, sizeof(unsigned short) * USHORT_RANGE); + unsigned short maxValue = reverseLutFromBitmap(bitmap, lut); + + // + // Huffman decoding + // + + int length; + + length = *(reinterpret_cast<const int *>(ptr)); + ptr += sizeof(int); + + std::vector<unsigned short> tmpBuffer(tmpBufSize); + hufUncompress(reinterpret_cast<const char *>(ptr), length, &tmpBuffer.at(0), + static_cast<int>(tmpBufSize)); + + // + // Wavelet decoding + // + + std::vector<PIZChannelData> channelData(static_cast<size_t>(num_channels)); + + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t i = 0; i < static_cast<size_t>(num_channels); ++i) { + const EXRChannelInfo &chan = channels[i]; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + channelData[i].start = tmpBufferEnd; + channelData[i].end = channelData[i].start; + channelData[i].nx = data_width; + channelData[i].ny = num_lines; + // channelData[i].ys = 1; + channelData[i].size = static_cast<int>(pixelSize / sizeof(short)); + + tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size; + } + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Expand the pixel data to their original range + // + + applyLut(lut, &tmpBuffer.at(0), static_cast<int>(tmpBufSize)); + + for (int y = 0; y < num_lines; y++) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast<size_t>(cd.nx * cd.size); + memcpy(outPtr, cd.end, static_cast<size_t>(n * sizeof(unsigned short))); + outPtr += n * sizeof(unsigned short); + cd.end += n; + } + } + + return true; +} +#endif // TINYEXR_USE_PIZ + +#if TINYEXR_USE_ZFP +struct ZFPCompressionParam { + double rate; + int precision; + double tolerance; + int type; // TINYEXR_ZFP_COMPRESSIONTYPE_* + + ZFPCompressionParam() { + type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE; + rate = 2.0; + precision = 0; + tolerance = 0.0f; + } +}; + +bool FindZFPCompressionParam(ZFPCompressionParam *param, + const EXRAttribute *attributes, + int num_attributes) { + bool foundType = false; + + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionType") == 0) && + (attributes[i].size == 1)) { + param->type = static_cast<int>(attributes[i].value[0]); + + foundType = true; + } + } + + if (!foundType) { + return false; + } + + if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) && + (attributes[i].size == 8)) { + param->rate = *(reinterpret_cast<double *>(attributes[i].value)); + return true; + } + } + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) && + (attributes[i].size == 4)) { + param->rate = *(reinterpret_cast<int *>(attributes[i].value)); + return true; + } + } + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) && + (attributes[i].size == 8)) { + param->tolerance = *(reinterpret_cast<double *>(attributes[i].value)); + return true; + } + } + } else { + assert(0); + } + + return false; +} + +// Assume pixel format is FLOAT for all channels. +static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines, + int num_channels, const unsigned char *src, + unsigned long src_size, + const ZFPCompressionParam ¶m) { + size_t uncompressed_size = dst_width * dst_num_lines * num_channels; + + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + assert((dst_width % 4) == 0); + assert((dst_num_lines % 4) == 0); + + if ((dst_width & 3U) || (dst_num_lines & 3U)) { + return false; + } + + field = + zfp_field_2d(reinterpret_cast<void *>(const_cast<unsigned char *>(src)), + zfp_type_float, dst_width, dst_num_lines * num_channels); + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimention */ 2, + /* write random access */ 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision, zfp_type_float); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); + } else { + assert(0); + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + std::vector<unsigned char> buf(buf_size); + memcpy(&buf.at(0), src, src_size); + + bitstream *stream = stream_open(&buf.at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_stream_rewind(zfp); + + size_t image_size = dst_width * dst_num_lines; + + for (int c = 0; c < num_channels; c++) { + // decompress 4x4 pixel block. + for (int y = 0; y < dst_num_lines; y += 4) { + for (int x = 0; x < dst_width; x += 4) { + float fblock[16]; + zfp_decode_block_float_2(zfp, fblock); + for (int j = 0; j < 4; j++) { + for (int i = 0; i < 4; i++) { + dst[c * image_size + ((y + j) * dst_width + (x + i))] = + fblock[j * 4 + i]; + } + } + } + } + } + + zfp_field_free(field); + zfp_stream_close(zfp); + stream_close(stream); + + return true; +} + +// Assume pixel format is FLOAT for all channels. +bool CompressZfp(std::vector<unsigned char> *outBuf, unsigned int *outSize, + const float *inPtr, int width, int num_lines, int num_channels, + const ZFPCompressionParam ¶m) { + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + assert((width % 4) == 0); + assert((num_lines % 4) == 0); + + if ((width & 3U) || (num_lines & 3U)) { + return false; + } + + // create input array. + field = zfp_field_2d(reinterpret_cast<void *>(const_cast<float *>(inPtr)), + zfp_type_float, width, num_lines * num_channels); + + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision, zfp_type_float); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); + } else { + assert(0); + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + + outBuf->resize(buf_size); + + bitstream *stream = stream_open(&outBuf->at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_field_free(field); + + size_t image_size = width * num_lines; + + for (int c = 0; c < num_channels; c++) { + // compress 4x4 pixel block. + for (int y = 0; y < num_lines; y += 4) { + for (int x = 0; x < width; x += 4) { + float fblock[16]; + for (int j = 0; j < 4; j++) { + for (int i = 0; i < 4; i++) { + fblock[j * 4 + i] = + inPtr[c * image_size + ((y + j) * width + (x + i))]; + } + } + zfp_encode_block_float_2(zfp, fblock); + } + } + } + + zfp_stream_flush(zfp); + (*outSize) = zfp_stream_compressed_size(zfp); + + zfp_stream_close(zfp); + + return true; +} + +#endif + +// +// ----------------------------------------------------------------- +// + +static void DecodePixelData(/* out */ unsigned char **out_images, + const int *requested_pixel_types, + const unsigned char *data_ptr, size_t data_len, + int compression_type, int line_order, int width, + int height, int x_stride, int y, int line_no, + int num_lines, size_t pixel_data_size, + size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector<size_t> &channel_offset_list) { + if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { // PIZ +#if TINYEXR_USE_PIZ + // Allocate original data size. + std::vector<unsigned char> outBuf(static_cast<size_t>( + static_cast<size_t>(width * num_lines) * pixel_data_size)); + size_t tmpBufLen = static_cast<size_t>( + static_cast<size_t>(width * num_lines) * pixel_data_size); + + bool ret = tinyexr::DecompressPiz( + reinterpret_cast<unsigned char *>(&outBuf.at(0)), data_ptr, tmpBufLen, + static_cast<int>(num_channels), channels, width, num_lines); + + assert(ret); + (void)ret; + + // For PIZ_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + FP16 hf; + + hf.u = line_ptr[u]; + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast<unsigned short **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += static_cast<size_t>( + (height - 1 - (line_no + static_cast<int>(v)))) * + static_cast<size_t>(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += static_cast<size_t>( + (height - 1 - (line_no + static_cast<int>(v)))) * + static_cast<size_t>(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); + + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast<unsigned int *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + unsigned int val = line_ptr[u]; + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast<unsigned int **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += static_cast<size_t>( + (height - 1 - (line_no + static_cast<int>(v)))) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const float *line_ptr = reinterpret_cast<float *>(&outBuf.at( + v * pixel_data_size * static_cast<size_t>(x_stride) + + channel_offset_list[c] * static_cast<size_t>(x_stride))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + float val = line_ptr[u]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += static_cast<size_t>( + (height - 1 - (line_no + static_cast<int>(v)))) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + } + } +#else + assert(0 && "PIZ is enabled in this build"); +#endif + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS || + compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + // Allocate original data size. + std::vector<unsigned char> outBuf(static_cast<size_t>(width) * + static_cast<size_t>(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast<unsigned long>(outBuf.size()); + assert(dstLen > 0); + tinyexr::DecompressZip(reinterpret_cast<unsigned char *>(&outBuf.at(0)), + &dstLen, data_ptr, + static_cast<unsigned long>(data_len)); + + // For ZIP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &outBuf.at(v * static_cast<size_t>(pixel_data_size) * + static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + tinyexr::FP16 hf; + + hf.u = line_ptr[u]; + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast<unsigned short **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); + + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast<unsigned int *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + unsigned int val = line_ptr[u]; + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast<unsigned int **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const float *line_ptr = reinterpret_cast<float *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + float val = line_ptr[u]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // Allocate original data size. + std::vector<unsigned char> outBuf(static_cast<size_t>(width) * + static_cast<size_t>(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast<unsigned long>(outBuf.size()); + assert(dstLen > 0); + tinyexr::DecompressRle(reinterpret_cast<unsigned char *>(&outBuf.at(0)), + dstLen, data_ptr, + static_cast<unsigned long>(data_len)); + + // For RLE_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &outBuf.at(v * static_cast<size_t>(pixel_data_size) * + static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + tinyexr::FP16 hf; + + hf.u = line_ptr[u]; + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast<unsigned short **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); + + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast<unsigned int *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + unsigned int val = line_ptr[u]; + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast<unsigned int **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const float *line_ptr = reinterpret_cast<float *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + float val = line_ptr[u]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + if (!FindZFPCompressionParam(&zfp_compression_param, attributes, + num_attributes)) { + assert(0); + return; + } + + // Allocate original data size. + std::vector<unsigned char> outBuf(static_cast<size_t>(width) * + static_cast<size_t>(num_lines) * + pixel_data_size); + + unsigned long dstLen = outBuf.size(); + assert(dstLen > 0); + tinyexr::DecompressZfp(reinterpret_cast<float *>(&outBuf.at(0)), width, + num_lines, num_channels, data_ptr, + static_cast<unsigned long>(data_len), + zfp_compression_param); + + // For ZFP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + assert(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) { + const float *line_ptr = reinterpret_cast<float *>( + &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) + + channel_offset_list[c] * static_cast<size_t>(width))); + for (size_t u = 0; u < static_cast<size_t>(width); u++) { + float val = line_ptr[u]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + float *image = reinterpret_cast<float **>(out_images)[c]; + if (line_order == 0) { + image += (static_cast<size_t>(line_no) + v) * + static_cast<size_t>(x_stride) + + u; + } else { + image += (static_cast<size_t>(height) - 1U - + (static_cast<size_t>(line_no) + v)) * + static_cast<size_t>(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + } + } +#else + (void)attributes; + (void)num_attributes; + (void)num_channels; + assert(0); +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + for (size_t c = 0; c < num_channels; c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + const unsigned short *line_ptr = + reinterpret_cast<const unsigned short *>( + data_ptr + + c * static_cast<size_t>(width) * sizeof(unsigned short)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *outLine = + reinterpret_cast<unsigned short *>(out_images[c]); + if (line_order == 0) { + outLine += y * x_stride; + } else { + outLine += (height - 1 - y) * x_stride; + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + hf.u = line_ptr[u]; + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u)); + + outLine[u] = hf.u; + } + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + float *outLine = reinterpret_cast<float *>(out_images[c]); + if (line_order == 0) { + outLine += y * x_stride; + } else { + outLine += (height - 1 - y) * x_stride; + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + hf.u = line_ptr[u]; + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u)); + + tinyexr::FP32 f32 = half_to_float(hf); + + outLine[u] = f32.f; + } + } else { + assert(0); + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + const float *line_ptr = reinterpret_cast<const float *>( + data_ptr + c * static_cast<size_t>(width) * sizeof(float)); + + float *outLine = reinterpret_cast<float *>(out_images[c]); + if (line_order == 0) { + outLine += y * x_stride; + } else { + outLine += (height - 1 - y) * x_stride; + } + + for (int u = 0; u < width; u++) { + float val = line_ptr[u]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + outLine[u] = val; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + const unsigned int *line_ptr = reinterpret_cast<const unsigned int *>( + data_ptr + c * static_cast<size_t>(width) * sizeof(unsigned int)); + + unsigned int *outLine = reinterpret_cast<unsigned int *>(out_images[c]); + if (line_order == 0) { + outLine += y * x_stride; + } else { + outLine += (height - 1 - y) * x_stride; + } + + for (int u = 0; u < width; u++) { + unsigned int val = line_ptr[u]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + outLine[u] = val; + } + } + } + } +} + +static void DecodeTiledPixelData( + unsigned char **out_images, int *width, int *height, + const int *requested_pixel_types, const unsigned char *data_ptr, + size_t data_len, int compression_type, int line_order, int data_width, + int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, + int tile_size_y, size_t pixel_data_size, size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector<size_t> &channel_offset_list) { + assert(tile_offset_x * tile_size_x < data_width); + assert(tile_offset_y * tile_size_y < data_height); + + // Compute actual image size in a tile. + if ((tile_offset_x + 1) * tile_size_x >= data_width) { + (*width) = data_width - (tile_offset_x * tile_size_x); + } else { + (*width) = tile_size_x; + } + + if ((tile_offset_y + 1) * tile_size_y >= data_height) { + (*height) = data_height - (tile_offset_y * tile_size_y); + } else { + (*height) = tile_size_y; + } + + // Image size = tile size. + DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, + compression_type, line_order, (*width), tile_size_y, + /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, + (*height), pixel_data_size, num_attributes, attributes, + num_channels, channels, channel_offset_list); +} + +static void ComputeChannelLayout(std::vector<size_t> *channel_offset_list, + int *pixel_data_size, size_t *channel_offset, + int num_channels, + const EXRChannelInfo *channels) { + channel_offset_list->resize(static_cast<size_t>(num_channels)); + + (*pixel_data_size) = 0; + (*channel_offset) = 0; + + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + (*channel_offset_list)[c] = (*channel_offset); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + (*pixel_data_size) += sizeof(unsigned short); + (*channel_offset) += sizeof(unsigned short); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + (*pixel_data_size) += sizeof(float); + (*channel_offset) += sizeof(float); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + (*pixel_data_size) += sizeof(unsigned int); + (*channel_offset) += sizeof(unsigned int); + } else { + assert(0); + } + } +} + +static unsigned char **AllocateImage(int num_channels, + const EXRChannelInfo *channels, + const int *requested_pixel_types, + int data_width, int data_height) { + unsigned char **images = + reinterpret_cast<unsigned char **>(static_cast<float **>( + malloc(sizeof(float *) * static_cast<size_t>(num_channels)))); + + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + size_t data_len = + static_cast<size_t>(data_width) * static_cast<size_t>(data_height); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + // pixel_data_size += sizeof(unsigned short); + // channel_offset += sizeof(unsigned short); + // Alloc internal image for half type. + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + images[c] = + reinterpret_cast<unsigned char *>(static_cast<unsigned short *>( + malloc(sizeof(unsigned short) * data_len))); + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + images[c] = reinterpret_cast<unsigned char *>( + static_cast<float *>(malloc(sizeof(float) * data_len))); + } else { + assert(0); + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + // pixel_data_size += sizeof(float); + // channel_offset += sizeof(float); + images[c] = reinterpret_cast<unsigned char *>( + static_cast<float *>(malloc(sizeof(float) * data_len))); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + // pixel_data_size += sizeof(unsigned int); + // channel_offset += sizeof(unsigned int); + images[c] = reinterpret_cast<unsigned char *>( + static_cast<unsigned int *>(malloc(sizeof(unsigned int) * data_len))); + } else { + assert(0); + } + } + + return images; +} + +static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, + const EXRVersion *version, std::string *err, + const unsigned char *buf, size_t size) { + const char *marker = reinterpret_cast<const char *>(&buf[0]); + + if (empty_header) { + (*empty_header) = false; + } + + if (version->multipart) { + if (size > 0 && marker[0] == '\0') { + // End of header list. + if (empty_header) { + (*empty_header) = true; + } + return TINYEXR_SUCCESS; + } + } + + // According to the spec, the header of every OpenEXR file must contain at + // least the following attributes: + // + // channels chlist + // compression compression + // dataWindow box2i + // displayWindow box2i + // lineOrder lineOrder + // pixelAspectRatio float + // screenWindowCenter v2f + // screenWindowWidth float + bool has_channels = false; + bool has_compression = false; + bool has_data_window = false; + bool has_display_window = false; + bool has_line_order = false; + bool has_pixel_aspect_ratio = false; + bool has_screen_window_center = false; + bool has_screen_window_width = false; + + info->data_window[0] = 0; + info->data_window[1] = 0; + info->data_window[2] = 0; + info->data_window[3] = 0; + info->line_order = 0; // @fixme + info->display_window[0] = 0; + info->display_window[1] = 0; + info->display_window[2] = 0; + info->display_window[3] = 0; + info->screen_window_center[0] = 0.0f; + info->screen_window_center[1] = 0.0f; + info->screen_window_width = -1.0f; + info->pixel_aspect_ratio = -1.0f; + + info->tile_size_x = -1; + info->tile_size_y = -1; + info->tile_level_mode = -1; + info->tile_rounding_mode = -1; + + info->attributes.clear(); + + // Read attributes + size_t orig_size = size; + for (;;) { + if (0 == size) { + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector<unsigned char> data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + if (version->tiled && attr_name.compare("tiles") == 0) { + unsigned int x_size, y_size; + unsigned char tile_mode; + assert(data.size() == 9); + memcpy(&x_size, &data.at(0), sizeof(int)); + memcpy(&y_size, &data.at(4), sizeof(int)); + tile_mode = data[8]; + tinyexr::swap4(&x_size); + tinyexr::swap4(&y_size); + + info->tile_size_x = static_cast<int>(x_size); + info->tile_size_y = static_cast<int>(y_size); + + // mode = levelMode + roundingMode * 16 + info->tile_level_mode = tile_mode & 0x3; + info->tile_rounding_mode = (tile_mode >> 4) & 0x1; + + } else if (attr_name.compare("compression") == 0) { + bool ok = false; + if ((data[0] >= TINYEXR_COMPRESSIONTYPE_NONE) && + (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ)) { + ok = true; + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + ok = true; +#else + if (err) { + (*err) = "PIZ compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + ok = true; +#else + if (err) { + (*err) = "ZFP compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (!ok) { + if (err) { + (*err) = "Unknown compression type."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + info->compression_type = static_cast<int>(data[0]); + has_compression = true; + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + ReadChannelInfo(info->channels, data); + + if (info->channels.size() < 1) { + if (err) { + (*err) = "# of channels is zero."; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + has_channels = true; + + } else if (attr_name.compare("dataWindow") == 0) { + memcpy(&info->data_window[0], &data.at(0), sizeof(int)); + memcpy(&info->data_window[1], &data.at(4), sizeof(int)); + memcpy(&info->data_window[2], &data.at(8), sizeof(int)); + memcpy(&info->data_window[3], &data.at(12), sizeof(int)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[0])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[1])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[2])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[3])); + + has_data_window = true; + } else if (attr_name.compare("displayWindow") == 0) { + memcpy(&info->display_window[0], &data.at(0), sizeof(int)); + memcpy(&info->display_window[1], &data.at(4), sizeof(int)); + memcpy(&info->display_window[2], &data.at(8), sizeof(int)); + memcpy(&info->display_window[3], &data.at(12), sizeof(int)); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->display_window[0])); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->display_window[1])); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->display_window[2])); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->display_window[3])); + + has_display_window = true; + } else if (attr_name.compare("lineOrder") == 0) { + info->line_order = static_cast<int>(data[0]); + has_line_order = true; + } else if (attr_name.compare("pixelAspectRatio") == 0) { + memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float)); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->pixel_aspect_ratio)); + has_pixel_aspect_ratio = true; + } else if (attr_name.compare("screenWindowCenter") == 0) { + memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float)); + memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float)); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->screen_window_center[0])); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->screen_window_center[1])); + has_screen_window_center = true; + } else if (attr_name.compare("screenWindowWidth") == 0) { + memcpy(&info->screen_window_width, &data.at(0), sizeof(float)); + tinyexr::swap4( + reinterpret_cast<unsigned int *>(&info->screen_window_width)); + + has_screen_window_width = true; + } else if (attr_name.compare("chunkCount") == 0) { + memcpy(&info->chunk_count, &data.at(0), sizeof(int)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->chunk_count)); + } else { + // Custom attribute(up to TINYEXR_MAX_ATTRIBUTES) + if (info->attributes.size() < TINYEXR_MAX_ATTRIBUTES) { + EXRAttribute attrib; + strncpy(attrib.name, attr_name.c_str(), 255); + attrib.name[255] = '\0'; + strncpy(attrib.type, attr_type.c_str(), 255); + attrib.type[255] = '\0'; + attrib.size = static_cast<int>(data.size()); + attrib.value = static_cast<unsigned char *>(malloc(data.size())); + memcpy(reinterpret_cast<char *>(attrib.value), &data.at(0), + data.size()); + info->attributes.push_back(attrib); + } + } + } + + // Check if required attributes exist + { + std::stringstream ss_err; + + if (!has_compression) { + ss_err << "\"compression\" attribute not found in the header." + << std::endl; + } + + if (!has_channels) { + ss_err << "\"channels\" attribute not found in the header." << std::endl; + } + + if (!has_line_order) { + ss_err << "\"lineOrder\" attribute not found in the header." << std::endl; + } + + if (!has_display_window) { + ss_err << "\"displayWindow\" attribute not found in the header." + << std::endl; + } + + if (!has_data_window) { + ss_err << "\"dataWindow\" attribute not found in the header." + << std::endl; + } + + if (!has_pixel_aspect_ratio) { + ss_err << "\"pixelAspectRatio\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_width) { + ss_err << "\"screenWindowWidth\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_center) { + ss_err << "\"screenWindowCenter\" attribute not found in the header." + << std::endl; + } + + if (!(ss_err.str().empty())) { + if (err) { + (*err) += ss_err.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + } + + info->header_len = static_cast<unsigned int>(orig_size - size); + + return TINYEXR_SUCCESS; +} + +// C++ HeaderInfo to C EXRHeader conversion. +static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { + exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio; + exr_header->screen_window_center[0] = info.screen_window_center[0]; + exr_header->screen_window_center[1] = info.screen_window_center[1]; + exr_header->screen_window_width = info.screen_window_width; + exr_header->chunk_count = info.chunk_count; + exr_header->display_window[0] = info.display_window[0]; + exr_header->display_window[1] = info.display_window[1]; + exr_header->display_window[2] = info.display_window[2]; + exr_header->display_window[3] = info.display_window[3]; + exr_header->data_window[0] = info.data_window[0]; + exr_header->data_window[1] = info.data_window[1]; + exr_header->data_window[2] = info.data_window[2]; + exr_header->data_window[3] = info.data_window[3]; + exr_header->line_order = info.line_order; + exr_header->compression_type = info.compression_type; + + exr_header->tile_size_x = info.tile_size_x; + exr_header->tile_size_y = info.tile_size_y; + exr_header->tile_level_mode = info.tile_level_mode; + exr_header->tile_rounding_mode = info.tile_rounding_mode; + + exr_header->num_channels = static_cast<int>(info.channels.size()); + + exr_header->channels = static_cast<EXRChannelInfo *>(malloc( + sizeof(EXRChannelInfo) * static_cast<size_t>(exr_header->num_channels))); + for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { + strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); + // manually add '\0' for safety. + exr_header->channels[c].name[255] = '\0'; + + exr_header->channels[c].pixel_type = info.channels[c].pixel_type; + exr_header->channels[c].p_linear = info.channels[c].p_linear; + exr_header->channels[c].x_sampling = info.channels[c].x_sampling; + exr_header->channels[c].y_sampling = info.channels[c].y_sampling; + } + + exr_header->pixel_types = static_cast<int *>( + malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels))); + for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { + exr_header->pixel_types[c] = info.channels[c].pixel_type; + } + + // Initially fill with values of `pixel_types` + exr_header->requested_pixel_types = static_cast<int *>( + malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels))); + for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { + exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; + } + + assert(info.attributes.size() < TINYEXR_MAX_ATTRIBUTES); + exr_header->num_custom_attributes = static_cast<int>(info.attributes.size()); + + for (size_t i = 0; i < info.attributes.size(); i++) { + memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, 256); + memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, 256); + exr_header->custom_attributes[i].size = info.attributes[i].size; + // Just copy poiner + exr_header->custom_attributes[i].value = info.attributes[i].value; + } + + exr_header->header_len = info.header_len; +} + +static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, + const std::vector<tinyexr::tinyexr_uint64> &offsets, + const unsigned char *head) { + int num_channels = exr_header->num_channels; + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + } + + int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1; + int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1; + + size_t num_blocks = offsets.size(); + + std::vector<size_t> channel_offset_list; + int pixel_data_size = 0; + size_t channel_offset = 0; + tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size, + &channel_offset, num_channels, + exr_header->channels); + + if (exr_header->tiled) { + size_t num_tiles = offsets.size(); // = # of blocks + + exr_image->tiles = static_cast<EXRTile *>( + malloc(sizeof(EXRTile) * static_cast<size_t>(num_tiles))); + + for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) { + // Allocate memory for each tile. + exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( + num_channels, exr_header->channels, exr_header->requested_pixel_types, + data_width, data_height); + + // 16 byte: tile coordinates + // 4 byte : data size + // ~ : data(uncompressed or compressed) + const unsigned char *data_ptr = + reinterpret_cast<const unsigned char *>(head + offsets[tile_idx]); + + int tile_coordinates[4]; + memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[0])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[1])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[2])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[3])); + + // @todo{ LoD } + assert(tile_coordinates[2] == 0); + assert(tile_coordinates[3] == 0); + + int data_len; + memcpy(&data_len, data_ptr + 16, + sizeof(int)); // 16 = sizeof(tile_coordinates) + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len)); + assert(data_len >= 4); + + // Move to data addr: 20 = 16 + 4; + data_ptr += 20; + + tinyexr::DecodeTiledPixelData( + exr_image->tiles[tile_idx].images, + &(exr_image->tiles[tile_idx].width), + &(exr_image->tiles[tile_idx].height), + exr_header->requested_pixel_types, data_ptr, + static_cast<size_t>(data_len), exr_header->compression_type, + exr_header->line_order, data_width, data_height, tile_coordinates[0], + tile_coordinates[1], exr_header->tile_size_x, exr_header->tile_size_y, + static_cast<size_t>(pixel_data_size), + static_cast<size_t>(exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast<size_t>(exr_header->num_channels), exr_header->channels, + channel_offset_list); + + exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; + exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; + exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; + exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; + + exr_image->num_tiles = static_cast<int>(num_tiles); + } + } else { // scanline format + + exr_image->images = tinyexr::AllocateImage( + num_channels, exr_header->channels, exr_header->requested_pixel_types, + data_width, data_height); + +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int y = 0; y < static_cast<int>(num_blocks); y++) { + size_t y_idx = static_cast<size_t>(y); + const unsigned char *data_ptr = + reinterpret_cast<const unsigned char *>(head + offsets[y_idx]); + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed or compressed) + int line_no; + memcpy(&line_no, data_ptr, sizeof(int)); + int data_len; + memcpy(&data_len, data_ptr + 4, sizeof(int)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&line_no)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len)); + + int end_line_no = (std::min)(line_no + num_scanline_blocks, + (exr_header->data_window[3] + 1)); + + int num_lines = end_line_no - line_no; + assert(num_lines > 0); + + // Move to data addr: 8 = 4 + 4; + data_ptr += 8; + + // Adjust line_no with data_window.bmin.y + line_no -= exr_header->data_window[1]; + + tinyexr::DecodePixelData( + exr_image->images, exr_header->requested_pixel_types, data_ptr, + static_cast<size_t>(data_len), exr_header->compression_type, + exr_header->line_order, data_width, data_height, data_width, y, + line_no, num_lines, static_cast<size_t>(pixel_data_size), + static_cast<size_t>(exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast<size_t>(exr_header->num_channels), exr_header->channels, + channel_offset_list); + } // omp parallel + } + + // Overwrite `pixel_type` with `requested_pixel_type`. + { + for (int c = 0; c < exr_header->num_channels; c++) { + exr_header->pixel_types[c] = exr_header->requested_pixel_types[c]; + } + } + + { + exr_image->num_channels = num_channels; + + exr_image->width = data_width; + exr_image->height = data_height; + } + + return TINYEXR_SUCCESS; +} + +static bool ReconstructLineOffsets( + std::vector<tinyexr::tinyexr_uint64> *offsets, size_t n, + const unsigned char *head, const unsigned char *marker, const size_t size) { + assert(head < marker); + assert(offsets->size() == n); + + for (size_t i = 0; i < n; i++) { + size_t offset = static_cast<size_t>(marker - head); + // Offset should not exceed whole EXR file/data size. + if (offset >= size) { + return false; + } + + int y; + unsigned int data_len; + + memcpy(&y, marker, sizeof(int)); + memcpy(&data_len, marker + 4, sizeof(unsigned int)); + + if (data_len >= size) { + return false; + } + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&y)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len)); + + (*offsets)[i] = offset; + + marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len) + } + + return true; +} + +static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *head, + const unsigned char *marker, const size_t size, + const char **err) { + if (exr_image == NULL || exr_header == NULL || head == NULL || + marker == NULL || (size <= tinyexr::kEXRVersionSize)) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + } + + int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1; + int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1; + + // Read offset tables. + size_t num_blocks; + + if (exr_header->chunk_count > 0) { + // Use `chunkCount` attribute. + num_blocks = static_cast<size_t>(exr_header->chunk_count); + } else if (exr_header->tiled) { + // @todo { LoD } + size_t num_x_tiles = static_cast<size_t>(data_width) / + static_cast<size_t>(exr_header->tile_size_x); + if (num_x_tiles * static_cast<size_t>(exr_header->tile_size_x) < + static_cast<size_t>(data_width)) { + num_x_tiles++; + } + size_t num_y_tiles = static_cast<size_t>(data_height) / + static_cast<size_t>(exr_header->tile_size_y); + if (num_y_tiles * static_cast<size_t>(exr_header->tile_size_y) < + static_cast<size_t>(data_height)) { + num_y_tiles++; + } + + num_blocks = num_x_tiles * num_y_tiles; + } else { + num_blocks = static_cast<size_t>(data_height) / + static_cast<size_t>(num_scanline_blocks); + if (num_blocks * static_cast<size_t>(num_scanline_blocks) < + static_cast<size_t>(data_height)) { + num_blocks++; + } + } + + std::vector<tinyexr::tinyexr_uint64> offsets(num_blocks); + + for (size_t y = 0; y < num_blocks; y++) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + if (err) { + (*err) = "Invalid offset value."; + } + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offsets[y] = offset; + } + + // If line offsets are invalid, we try to reconstruct it. + // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. + for (size_t y = 0; y < num_blocks; y++) { + if (offsets[y] <= 0) { + // TODO(syoyo) Report as warning? + // if (err) { + // stringstream ss; + // ss << "Incomplete lineOffsets." << std::endl; + // (*err) += ss.str(); + //} + bool ret = + ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); + if (ret) { + // OK + break; + } else { + if (err) { + (*err) = "Cannot reconstruct lineOffset table."; + } + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + return DecodeChunk(exr_image, exr_header, offsets, head); +} + +} // namespace tinyexr + +int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, + const char **err) { + if (out_rgba == NULL) { + if (err) { + (*err) = "Invalid argument.\n"; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + InitEXRImage(&exr_image); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + if (err) { + (*err) = "Loading multipart or DeepImage is not supported yet.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + { + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + { + int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + for (int c = 0; c < exr_header.num_channels; c++) { + if (strcmp(exr_header.channels[c].name, "R") == 0) { + idxR = c; + } else if (strcmp(exr_header.channels[c].name, "G") == 0) { + idxG = c; + } else if (strcmp(exr_header.channels[c].name, "B") == 0) { + idxB = c; + } else if (strcmp(exr_header.channels[c].name, "A") == 0) { + idxA = c; + } + } + + if (idxR == -1) { + if (err) { + (*err) = "R channel not found\n"; + } + + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + if (err) { + (*err) = "G channel not found\n"; + } + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + if (err) { + (*err) = "B channel not found\n"; + } + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + (*out_rgba) = reinterpret_cast<float *>( + malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) * + static_cast<size_t>(exr_image.height))); + for (int i = 0; i < exr_image.width * exr_image.height; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast<float **>(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast<float **>(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast<float **>(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast<float **>(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_header == NULL) { + if (err) { + (*err) = "Invalid argument.\n"; + } + + // Invalid argument + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + tinyexr::HeaderInfo info; + info.clear(); + + std::string err_str; + int ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + if (err && !err_str.empty()) { + (*err) = strdup(err_str.c_str()); // May leak + } + } + + ConvertHeader(exr_header, info); + + // transfoer `tiled` from version. + exr_header->tiled = version->tiled; + + return ret; +} + +int LoadEXRFromMemory(float *out_rgba, const unsigned char *memory, size_t size, + const char **err) { + if (out_rgba == NULL || memory == NULL) { + if (err) { + (*err) = "Invalid argument.\n"; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + + InitEXRHeader(&exr_header); + + int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + InitEXRImage(&exr_image); + ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + for (int c = 0; c < exr_header.num_channels; c++) { + if (strcmp(exr_header.channels[c].name, "R") == 0) { + idxR = c; + } else if (strcmp(exr_header.channels[c].name, "G") == 0) { + idxG = c; + } else if (strcmp(exr_header.channels[c].name, "B") == 0) { + idxB = c; + } else if (strcmp(exr_header.channels[c].name, "A") == 0) { + idxA = c; + } + } + + if (idxR == -1) { + if (err) { + (*err) = "R channel not found\n"; + } + + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + if (err) { + (*err) = "G channel not found\n"; + } + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + if (err) { + (*err) = "B channel not found\n"; + } + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Assume `out_rgba` have enough memory allocated. + for (int i = 0; i < exr_image.width * exr_image.height; i++) { + out_rgba[4 * i + 0] = reinterpret_cast<float **>(exr_image.images)[idxR][i]; + out_rgba[4 * i + 1] = reinterpret_cast<float **>(exr_image.images)[idxG][i]; + out_rgba[4 * i + 2] = reinterpret_cast<float **>(exr_image.images)[idxB][i]; + if (idxA > 0) { + out_rgba[4 * i + 3] = + reinterpret_cast<float **>(exr_image.images)[idxA][i]; + } else { + out_rgba[4 * i + 3] = 1.0; + } + } + + return TINYEXR_SUCCESS; +} + +int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +// -- GODOT change for old MinGW on Travis CI -- +//#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + if (err) { + (*err) = "Cannot read file."; + } + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast<size_t>(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector<unsigned char> buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + (void)ret; + } + + return LoadEXRImageFromMemory(exr_image, exr_header, &buf.at(0), filesize, + err); +} + +int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *memory, const size_t size, + const char **err) { + if (exr_image == NULL || memory == NULL || + (size < tinyexr::kEXRVersionSize)) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->header_len == 0) { + if (err) { + (*err) = "EXRHeader is not initialized."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + const unsigned char *head = memory; + const unsigned char *marker = reinterpret_cast<const unsigned char *>( + memory + exr_header->header_len + + 8); // +8 for magic number + version header. + return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, + err); +} + +size_t SaveEXRImageToMemory(const EXRImage *exr_image, + const EXRHeader *exr_header, + unsigned char **memory_out, const char **err) { + if (exr_image == NULL || memory_out == NULL || + exr_header->compression_type < 0) { + if (err) { + (*err) = "Invalid argument."; + } + return 0; // @fixme + } + +#if !TINYEXR_USE_PIZ + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + if (err) { + (*err) = "PIZ compression is not supported in this build."; + } + return 0; + } +#endif + +#if !TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + if (err) { + (*err) = "ZFP compression is not supported in this build."; + } + return 0; + } +#endif + +#if TINYEXR_USE_ZFP + for (size_t i = 0; i < static_cast<size_t>(exr_header->num_channels); i++) { + if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) { + if (err) { + (*err) = "Pixel type must be FLOAT for ZFP compression."; + } + return 0; + } + } +#endif + + std::vector<unsigned char> memory; + + // Header + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + memory.insert(memory.end(), header, header + 4); + } + + // Version, scanline. + { + char marker[] = {2, 0, 0, 0}; + /* @todo + if (exr_header->tiled) { + marker[1] |= 0x2; + } + if (exr_header->long_name) { + marker[1] |= 0x4; + } + if (exr_header->non_image) { + marker[1] |= 0x8; + } + if (exr_header->multipart) { + marker[1] |= 0x10; + } + */ + memory.insert(memory.end(), marker, marker + 4); + } + + int num_scanlines = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanlines = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanlines = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanlines = 16; + } + + // Write attributes. + std::vector<tinyexr::ChannelInfo> channels; + { + std::vector<unsigned char> data; + + for (int c = 0; c < exr_header->num_channels; c++) { + tinyexr::ChannelInfo info; + info.p_linear = 0; + info.pixel_type = exr_header->requested_pixel_types[c]; + info.x_sampling = 1; + info.y_sampling = 1; + info.name = std::string(exr_header->channels[c].name); + channels.push_back(info); + } + + tinyexr::WriteChannelInfo(data, channels); + + tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), + static_cast<int>(data.size())); + } + + { + int comp = exr_header->compression_type; + tinyexr::swap4(reinterpret_cast<unsigned int *>(&comp)); + tinyexr::WriteAttributeToMemory( + &memory, "compression", "compression", + reinterpret_cast<const unsigned char *>(&comp), 1); + } + + { + int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1}; + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[0])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[1])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[2])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[3])); + tinyexr::WriteAttributeToMemory( + &memory, "dataWindow", "box2i", + reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4); + tinyexr::WriteAttributeToMemory( + &memory, "displayWindow", "box2i", + reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4); + } + + { + unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } + tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", + &line_order, 1); + } + + { + float aspectRatio = 1.0f; + tinyexr::swap4(reinterpret_cast<unsigned int *>(&aspectRatio)); + tinyexr::WriteAttributeToMemory( + &memory, "pixelAspectRatio", "float", + reinterpret_cast<const unsigned char *>(&aspectRatio), sizeof(float)); + } + + { + float center[2] = {0.0f, 0.0f}; + tinyexr::swap4(reinterpret_cast<unsigned int *>(¢er[0])); + tinyexr::swap4(reinterpret_cast<unsigned int *>(¢er[1])); + tinyexr::WriteAttributeToMemory( + &memory, "screenWindowCenter", "v2f", + reinterpret_cast<const unsigned char *>(center), 2 * sizeof(float)); + } + + { + float w = static_cast<float>(exr_image->width); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&w)); + tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float", + reinterpret_cast<const unsigned char *>(&w), + sizeof(float)); + } + + // Custom attributes + if (exr_header->num_custom_attributes > 0) { + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + tinyexr::WriteAttributeToMemory( + &memory, exr_header->custom_attributes[i].name, + exr_header->custom_attributes[i].type, + reinterpret_cast<const unsigned char *>( + exr_header->custom_attributes[i].value), + exr_header->custom_attributes[i].size); + } + } + + { // end of header + unsigned char e = 0; + memory.push_back(e); + } + + int num_blocks = exr_image->height / num_scanlines; + if (num_blocks * num_scanlines < exr_image->height) { + num_blocks++; + } + + std::vector<tinyexr::tinyexr_uint64> offsets(static_cast<size_t>(num_blocks)); + + size_t headerSize = memory.size(); + tinyexr::tinyexr_uint64 offset = + headerSize + + static_cast<size_t>(num_blocks) * + sizeof( + tinyexr::tinyexr_int64); // sizeof(header) + sizeof(offsetTable) + + std::vector<unsigned char> data; + + std::vector<std::vector<unsigned char> > data_list( + static_cast<size_t>(num_blocks)); + std::vector<size_t> channel_offset_list( + static_cast<size_t>(exr_header->num_channels)); + + int pixel_data_size = 0; + size_t channel_offset = 0; + for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { + channel_offset_list[c] = channel_offset; + if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + pixel_data_size += sizeof(unsigned short); + channel_offset += sizeof(unsigned short); + } else if (exr_header->requested_pixel_types[c] == + TINYEXR_PIXELTYPE_FLOAT) { + pixel_data_size += sizeof(float); + channel_offset += sizeof(float); + } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { + pixel_data_size += sizeof(unsigned int); + channel_offset += sizeof(unsigned int); + } else { + assert(0); + } + } + +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + + // Use ZFP compression parameter from custom attributes(if such a parameter + // exists) + { + bool ret = tinyexr::FindZFPCompressionParam( + &zfp_compression_param, exr_header->custom_attributes, + exr_header->num_custom_attributes); + + if (!ret) { + // Use predefined compression parameter. + zfp_compression_param.type = 0; + zfp_compression_param.rate = 2; + } + } +#endif + +// Use signed int since some OpenMP compiler doesn't allow unsigned type for +// `parallel for` +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_blocks; i++) { + size_t ii = static_cast<size_t>(i); + int start_y = num_scanlines * i; + int endY = (std::min)(num_scanlines * (i + 1), exr_image->height); + int h = endY - start_y; + + std::vector<unsigned char> buf( + static_cast<size_t>(exr_image->width * h * pixel_data_size)); + + for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) { + if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < exr_image->width; x++) { + tinyexr::FP16 h16; + h16.u = reinterpret_cast<unsigned short **>( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::FP32 f32 = half_to_float(h16); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&f32.f)); + + // Assume increasing Y + float *line_ptr = reinterpret_cast<float *>(&buf.at( + static_cast<size_t>(pixel_data_size * y * exr_image->width) + + channel_offset_list[c] * + static_cast<size_t>(exr_image->width))); + line_ptr[x] = f32.f; + } + } + } else if (exr_header->requested_pixel_types[c] == + TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < exr_image->width; x++) { + unsigned short val = reinterpret_cast<unsigned short **>( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::swap2(&val); + + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &buf.at(static_cast<size_t>(pixel_data_size * y * + exr_image->width) + + channel_offset_list[c] * + static_cast<size_t>(exr_image->width))); + line_ptr[x] = val; + } + } + } else { + assert(0); + } + + } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < exr_image->width; x++) { + tinyexr::FP32 f32; + f32.f = reinterpret_cast<float **>( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::FP16 h16; + h16 = float_to_half_full(f32); + + tinyexr::swap2(reinterpret_cast<unsigned short *>(&h16.u)); + + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast<unsigned short *>( + &buf.at(static_cast<size_t>(pixel_data_size * y * + exr_image->width) + + channel_offset_list[c] * + static_cast<size_t>(exr_image->width))); + line_ptr[x] = h16.u; + } + } + } else if (exr_header->requested_pixel_types[c] == + TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < exr_image->width; x++) { + float val = reinterpret_cast<float **>( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&val)); + + // Assume increasing Y + float *line_ptr = reinterpret_cast<float *>(&buf.at( + static_cast<size_t>(pixel_data_size * y * exr_image->width) + + channel_offset_list[c] * + static_cast<size_t>(exr_image->width))); + line_ptr[x] = val; + } + } + } else { + assert(0); + } + } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < exr_image->width; x++) { + unsigned int val = reinterpret_cast<unsigned int **>( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::swap4(&val); + + // Assume increasing Y + unsigned int *line_ptr = reinterpret_cast<unsigned int *>(&buf.at( + static_cast<size_t>(pixel_data_size * y * exr_image->width) + + channel_offset_list[c] * + static_cast<size_t>(exr_image->width))); + line_ptr[x] = val; + } + } + } + } + + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed) + std::vector<unsigned char> header(8); + unsigned int data_len = static_cast<unsigned int>(buf.size()); + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), buf.begin(), + buf.begin() + data_len); + + } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#if TINYEXR_USE_MINIZ + std::vector<unsigned char> block(tinyexr::miniz::mz_compressBound( + static_cast<unsigned long>(buf.size()))); +#else + std::vector<unsigned char> block( + compressBound(static_cast<uLong>(buf.size()))); +#endif + tinyexr::tinyexr_uint64 outSize = block.size(); + + tinyexr::CompressZip(&block.at(0), outSize, + reinterpret_cast<const unsigned char *>(&buf.at(0)), + static_cast<unsigned long>(buf.size())); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector<unsigned char> header(8); + unsigned int data_len = static_cast<unsigned int>(outSize); // truncate + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // (buf.size() * 3) / 2 would be enough. + std::vector<unsigned char> block((buf.size() * 3) / 2); + + tinyexr::tinyexr_uint64 outSize = block.size(); + + tinyexr::CompressRle(&block.at(0), outSize, + reinterpret_cast<const unsigned char *>(&buf.at(0)), + static_cast<unsigned long>(buf.size())); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector<unsigned char> header(8); + unsigned int data_len = static_cast<unsigned int>(outSize); // truncate + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + unsigned int bufLen = + 1024 + static_cast<unsigned int>( + 1.2 * static_cast<unsigned int>( + buf.size())); // @fixme { compute good bound. } + std::vector<unsigned char> block(bufLen); + unsigned int outSize = static_cast<unsigned int>(block.size()); + + CompressPiz(&block.at(0), outSize, + reinterpret_cast<const unsigned char *>(&buf.at(0)), + buf.size(), channels, exr_image->width, h); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector<unsigned char> header(8); + unsigned int data_len = outSize; + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + +#else + assert(0); +#endif + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + std::vector<unsigned char> block; + unsigned int outSize; + + tinyexr::CompressZfp( + &block, &outSize, reinterpret_cast<const float *>(&buf.at(0)), + exr_image->width, h, exr_header->num_channels, zfp_compression_param); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector<unsigned char> header(8); + unsigned int data_len = outSize; + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0))); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + +#else + assert(0); +#endif + } else { + assert(0); + } + } // omp parallel + + for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) { + data.insert(data.end(), data_list[i].begin(), data_list[i].end()); + + offsets[i] = offset; + tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offsets[i])); + offset += data_list[i].size(); + } + + { + memory.insert( + memory.end(), reinterpret_cast<unsigned char *>(&offsets.at(0)), + reinterpret_cast<unsigned char *>(&offsets.at(0)) + + sizeof(tinyexr::tinyexr_uint64) * static_cast<size_t>(num_blocks)); + } + + { memory.insert(memory.end(), data.begin(), data.end()); } + + assert(memory.size() > 0); + + (*memory_out) = static_cast<unsigned char *>(malloc(memory.size())); + memcpy((*memory_out), &memory.at(0), memory.size()); + + return memory.size(); // OK +} + +int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL || filename == NULL || + exr_header->compression_type < 0) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#if !TINYEXR_USE_PIZ + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + if (err) { + (*err) = "PIZ compression is not supported in this build."; + } + return 0; + } +#endif + +#if !TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + if (err) { + (*err) = "ZFP compression is not supported in this build."; + } + return 0; + } +#endif + +// -- GODOT change for old MinGW on Travis CI -- +//#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- + FILE *fp = NULL; + fopen_s(&fp, filename, "wb"); +#else + FILE *fp = fopen(filename, "wb"); +#endif + if (!fp) { + if (err) { + (*err) = "Cannot write a file."; + } + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err); + + if ((mem_size > 0) && mem) { + fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + return TINYEXR_SUCCESS; +} + +int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { + if (deep_image == NULL) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + FILE *fp = fopen(filename, "rb"); + if (!fp) { + if (err) { + (*err) = "Cannot read file."; + } + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast<size_t>(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + if (filesize == 0) { + fclose(fp); + if (err) { + (*err) = "File size is zero."; + } + return TINYEXR_ERROR_INVALID_FILE; + } + + std::vector<char> buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + (void)ret; + } + fclose(fp); + + const char *head = &buf[0]; + const char *marker = &buf[0]; + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + if (err) { + (*err) = "Invalid magic number."; + } + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + // Version, scanline. + { + // ver 2.0, scanline, deep bit on(0x800) + // must be [2, 0, 0, 0] + if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) { + if (err) { + (*err) = "Unsupported version or scanline."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + marker += 4; + } + + int dx = -1; + int dy = -1; + int dw = -1; + int dh = -1; + int num_scanline_blocks = 1; // 16 for ZIP compression. + int compression_type = -1; + int num_channels = -1; + std::vector<tinyexr::ChannelInfo> channels; + + // Read attributes + size_t size = filesize - tinyexr::kEXRVersionSize; + for (;;) { + if (0 == size) { + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector<unsigned char> data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + if (attr_name.compare("compression") == 0) { + compression_type = data[0]; + if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { + if (err) { + (*err) = "Unsupported compression type."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + tinyexr::ReadChannelInfo(channels, data); + + num_channels = static_cast<int>(channels.size()); + + if (num_channels < 1) { + if (err) { + (*err) = "Invalid channels format."; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + } else if (attr_name.compare("dataWindow") == 0) { + memcpy(&dx, &data.at(0), sizeof(int)); + memcpy(&dy, &data.at(4), sizeof(int)); + memcpy(&dw, &data.at(8), sizeof(int)); + memcpy(&dh, &data.at(12), sizeof(int)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&dx)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&dy)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&dw)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&dh)); + + } else if (attr_name.compare("displayWindow") == 0) { + int x; + int y; + int w; + int h; + memcpy(&x, &data.at(0), sizeof(int)); + memcpy(&y, &data.at(4), sizeof(int)); + memcpy(&w, &data.at(8), sizeof(int)); + memcpy(&h, &data.at(12), sizeof(int)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&x)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&y)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&w)); + tinyexr::swap4(reinterpret_cast<unsigned int *>(&h)); + } + } + + assert(dx >= 0); + assert(dy >= 0); + assert(dw >= 0); + assert(dh >= 0); + assert(num_channels >= 1); + + int data_width = dw - dx + 1; + int data_height = dh - dy + 1; + + std::vector<float> image( + static_cast<size_t>(data_width * data_height * 4)); // 4 = RGBA + + // Read offset tables. + int num_blocks = data_height / num_scanline_blocks; + if (num_blocks * num_scanline_blocks < data_height) { + num_blocks++; + } + + std::vector<tinyexr::tinyexr_int64> offsets(static_cast<size_t>(num_blocks)); + + for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) { + tinyexr::tinyexr_int64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offset)); + marker += sizeof(tinyexr::tinyexr_int64); // = 8 + offsets[y] = offset; + } + +#if TINYEXR_USE_PIZ + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) || + (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) { +#else + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#endif + // OK + } else { + if (err) { + (*err) = "Unsupported format."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + deep_image->image = static_cast<float ***>( + malloc(sizeof(float **) * static_cast<size_t>(num_channels))); + for (int c = 0; c < num_channels; c++) { + deep_image->image[c] = static_cast<float **>( + malloc(sizeof(float *) * static_cast<size_t>(data_height))); + for (int y = 0; y < data_height; y++) { + } + } + + deep_image->offset_table = static_cast<int **>( + malloc(sizeof(int *) * static_cast<size_t>(data_height))); + for (int y = 0; y < data_height; y++) { + deep_image->offset_table[y] = static_cast<int *>( + malloc(sizeof(int) * static_cast<size_t>(data_width))); + } + + for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) { + const unsigned char *data_ptr = + reinterpret_cast<const unsigned char *>(head + offsets[y]); + + // int: y coordinate + // int64: packed size of pixel offset table + // int64: packed size of sample data + // int64: unpacked size of sample data + // compressed pixel offset table + // compressed sample data + int line_no; + tinyexr::tinyexr_int64 packedOffsetTableSize; + tinyexr::tinyexr_int64 packedSampleDataSize; + tinyexr::tinyexr_int64 unpackedSampleDataSize; + memcpy(&line_no, data_ptr, sizeof(int)); + memcpy(&packedOffsetTableSize, data_ptr + 4, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&packedSampleDataSize, data_ptr + 12, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&unpackedSampleDataSize, data_ptr + 20, + sizeof(tinyexr::tinyexr_int64)); + + tinyexr::swap4(reinterpret_cast<unsigned int *>(&line_no)); + tinyexr::swap8( + reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedOffsetTableSize)); + tinyexr::swap8( + reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedSampleDataSize)); + tinyexr::swap8( + reinterpret_cast<tinyexr::tinyexr_uint64 *>(&unpackedSampleDataSize)); + + std::vector<int> pixelOffsetTable(static_cast<size_t>(data_width)); + + // decode pixel offset table. + { + unsigned long dstLen = + static_cast<unsigned long>(pixelOffsetTable.size() * sizeof(int)); + tinyexr::DecompressZip( + reinterpret_cast<unsigned char *>(&pixelOffsetTable.at(0)), &dstLen, + data_ptr + 28, static_cast<unsigned long>(packedOffsetTableSize)); + + assert(dstLen == pixelOffsetTable.size() * sizeof(int)); + for (size_t i = 0; i < static_cast<size_t>(data_width); i++) { + deep_image->offset_table[y][i] = pixelOffsetTable[i]; + } + } + + std::vector<unsigned char> sample_data( + static_cast<size_t>(unpackedSampleDataSize)); + + // decode sample data. + { + unsigned long dstLen = static_cast<unsigned long>(unpackedSampleDataSize); + tinyexr::DecompressZip( + reinterpret_cast<unsigned char *>(&sample_data.at(0)), &dstLen, + data_ptr + 28 + packedOffsetTableSize, + static_cast<unsigned long>(packedSampleDataSize)); + assert(dstLen == static_cast<unsigned long>(unpackedSampleDataSize)); + } + + // decode sample + int sampleSize = -1; + std::vector<int> channel_offset_list(static_cast<size_t>(num_channels)); + { + int channel_offset = 0; + for (size_t i = 0; i < static_cast<size_t>(num_channels); i++) { + channel_offset_list[i] = channel_offset; + if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) { // UINT + channel_offset += 4; + } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) { // half + channel_offset += 2; + } else if (channels[i].pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { // float + channel_offset += 4; + } else { + assert(0); + } + } + sampleSize = channel_offset; + } + assert(sampleSize >= 2); + + assert(static_cast<size_t>( + pixelOffsetTable[static_cast<size_t>(data_width - 1)] * + sampleSize) == sample_data.size()); + int samples_per_line = static_cast<int>(sample_data.size()) / sampleSize; + + // + // Alloc memory + // + + // + // pixel data is stored as image[channels][pixel_samples] + // + { + tinyexr::tinyexr_uint64 data_offset = 0; + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { + deep_image->image[c][y] = static_cast<float *>( + malloc(sizeof(float) * static_cast<size_t>(samples_per_line))); + + if (channels[c].pixel_type == 0) { // UINT + for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) { + unsigned int ui = *reinterpret_cast<unsigned int *>( + &sample_data.at(data_offset + x * sizeof(int))); + deep_image->image[c][y][x] = static_cast<float>(ui); // @fixme + } + data_offset += + sizeof(unsigned int) * static_cast<size_t>(samples_per_line); + } else if (channels[c].pixel_type == 1) { // half + for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) { + tinyexr::FP16 f16; + f16.u = *reinterpret_cast<unsigned short *>( + &sample_data.at(data_offset + x * sizeof(short))); + tinyexr::FP32 f32 = half_to_float(f16); + deep_image->image[c][y][x] = f32.f; + } + data_offset += sizeof(short) * static_cast<size_t>(samples_per_line); + } else { // float + for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) { + float f = *reinterpret_cast<float *>( + &sample_data.at(data_offset + x * sizeof(float))); + deep_image->image[c][y][x] = f; + } + data_offset += sizeof(float) * static_cast<size_t>(samples_per_line); + } + } + } + } // y + + deep_image->width = data_width; + deep_image->height = data_height; + + deep_image->channel_names = static_cast<const char **>( + malloc(sizeof(const char *) * static_cast<size_t>(num_channels))); + for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) { +#ifdef _WIN32 + deep_image->channel_names[c] = _strdup(channels[c].name.c_str()); +#else + deep_image->channel_names[c] = strdup(channels[c].name.c_str()); +#endif + } + deep_image->num_channels = num_channels; + + return TINYEXR_SUCCESS; +} + +void InitEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return; + } + + exr_image->width = 0; + exr_image->height = 0; + exr_image->num_channels = 0; + + exr_image->images = NULL; + exr_image->tiles = NULL; + + exr_image->num_tiles = 0; +} + +void InitEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return; + } + + memset(exr_header, 0, sizeof(EXRHeader)); +} + +int FreeEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->channels) { + free(exr_header->channels); + } + + if (exr_header->pixel_types) { + free(exr_header->pixel_types); + } + + if (exr_header->requested_pixel_types) { + free(exr_header->requested_pixel_types); + } + + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + if (exr_header->custom_attributes[i].value) { + free(exr_header->custom_attributes[i].value); + } + } + + return TINYEXR_SUCCESS; +} + +int FreeEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->images && exr_image->images[i]) { + free(exr_image->images[i]); + } + } + + if (exr_image->images) { + free(exr_image->images); + } + + if (exr_image->tiles) { + for (int tid = 0; tid < exr_image->num_tiles; tid++) { + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { + free(exr_image->tiles[tid].images[i]); + } + } + if (exr_image->tiles[tid].images) { + free(exr_image->tiles[tid].images); + } + } + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_header == NULL || exr_version == NULL || filename == NULL) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +// -- GODOT change for old MinGW on Travis CI -- +//#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + if (err) { + (*err) = "Cannot read file."; + } + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast<size_t>(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector<unsigned char> buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + + if (ret != filesize) { + if (err) { + (*err) = "fread error."; + } + return TINYEXR_ERROR_INVALID_FILE; + } + } + + return ParseEXRHeaderFromMemory(exr_header, exr_version, &buf.at(0), filesize, + err); +} + +int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, + int *num_headers, + const EXRVersion *exr_version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_headers == NULL || num_headers == NULL || + exr_version == NULL) { + // Invalid argument + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + std::vector<tinyexr::HeaderInfo> infos; + + for (;;) { + tinyexr::HeaderInfo info; + info.clear(); + + std::string err_str; + bool empty_header = false; + int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str, + marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + if (err) { + (*err) = strdup(err_str.c_str()); // may leak + } + return ret; + } + + if (empty_header) { + marker += 1; // skip '\0' + break; + } + + // `chunkCount` must exist in the header. + if (info.chunk_count == 0) { + if (err) { + (*err) = "`chunkCount' attribute is not found in the header."; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + infos.push_back(info); + + // move to next header. + marker += info.header_len; + size -= info.header_len; + } + + // allocate memory for EXRHeader and create array of EXRHeader pointers. + (*exr_headers) = + static_cast<EXRHeader **>(malloc(sizeof(EXRHeader *) * infos.size())); + for (size_t i = 0; i < infos.size(); i++) { + EXRHeader *exr_header = static_cast<EXRHeader *>(malloc(sizeof(EXRHeader))); + + ConvertHeader(exr_header, infos[i]); + + // transfoer `tiled` from version. + exr_header->tiled = exr_version->tiled; + + (*exr_headers)[i] = exr_header; + } + + (*num_headers) = static_cast<int>(infos.size()); + + return TINYEXR_SUCCESS; +} + +int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, + const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_headers == NULL || num_headers == NULL || exr_version == NULL || + filename == NULL) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +// -- GODOT change for old MinGW on Travis CI -- +//#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + if (err) { + (*err) = "Cannot read file."; + } + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast<size_t>(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector<unsigned char> buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + + if (ret != filesize) { + if (err) { + (*err) = "fread error."; + } + return TINYEXR_ERROR_INVALID_FILE; + } + } + + return ParseEXRMultipartHeaderFromMemory( + exr_headers, num_headers, exr_version, &buf.at(0), filesize, err); +} + +int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, + size_t size) { + if (version == NULL || memory == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory; + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + version->tiled = false; + version->long_name = false; + version->non_image = false; + version->multipart = false; + + // Parse version header. + { + // must be 2 + if (marker[0] != 2) { + return TINYEXR_ERROR_INVALID_EXR_VERSION; + } + + if (version == NULL) { + return TINYEXR_SUCCESS; // May OK + } + + version->version = 2; + + if (marker[1] & 0x2) { // 9th bit + version->tiled = true; + } + if (marker[1] & 0x4) { // 10th bit + version->long_name = true; + } + if (marker[1] & 0x8) { // 11th bit + version->non_image = true; // (deep image) + } + if (marker[1] & 0x10) { // 12th bit + version->multipart = true; + } + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { + if (filename == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +// -- GODOT change for old MinGW on Travis CI -- +//#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t file_size; + // Compute size + fseek(fp, 0, SEEK_END); + file_size = static_cast<size_t>(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + if (file_size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_FILE; + } + + unsigned char buf[tinyexr::kEXRVersionSize]; + size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp); + fclose(fp); + + if (ret != tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_FILE; + } + + return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize); +} + +int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory == NULL || (size <= tinyexr::kEXRVersionSize)) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + // compute total header size. + size_t total_header_size = 0; + for (unsigned int i = 0; i < num_parts; i++) { + if (exr_headers[i]->header_len == 0) { + if (err) { + (*err) = "EXRHeader is not initialized."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + total_header_size += exr_headers[i]->header_len; + } + + const char *marker = reinterpret_cast<const char *>( + memory + total_header_size + 4 + + 4); // +8 for magic number and version header. + + marker += 1; // Skip empty header. + + // NOTE 1: + // In multipart image, There is 'part number' before chunk data. + // 4 byte : part number + // 4+ : chunk + // + // NOTE 2: + // EXR spec says 'part number' is 'unsigned long' but actually this is + // 'unsigned int(4 bytes)' in OpenEXR implementation... + // http://www.openexr.com/openexrfilelayout.pdf + + // Load chunk offset table. + std::vector<std::vector<tinyexr::tinyexr_uint64> > chunk_offset_table_list; + for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) { + std::vector<tinyexr::tinyexr_uint64> offset_table( + static_cast<size_t>(exr_headers[i]->chunk_count)); + + for (size_t c = 0; c < offset_table.size(); c++) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, 8); + tinyexr::swap8(&offset); + + if (offset >= size) { + if (err) { + (*err) = "Invalid offset size."; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + offset_table[c] = offset + 4; // +4 to skip 'part number' + marker += 8; + } + + chunk_offset_table_list.push_back(offset_table); + } + + // Decode image. + for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) { + std::vector<tinyexr::tinyexr_uint64> &offset_table = + chunk_offset_table_list[i]; + + // First check 'part number' is identitical to 'i' + for (size_t c = 0; c < offset_table.size(); c++) { + const unsigned char *part_number_addr = + memory + offset_table[c] - 4; // -4 to move to 'part number' field. + unsigned int part_no; + memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 + tinyexr::swap4(&part_no); + + if (part_no != i) { + assert(0); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table, + memory); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + } + + return TINYEXR_SUCCESS; +} + +int LoadEXRMultipartImageFromFile(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, const char *filename, + const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { + if (err) { + (*err) = "Invalid argument."; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +// -- GODOT change for old MinGW on Travis CI -- +//#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(__MINGW32__) && __MINGW64_VERSION_MAJOR >= 3) +// -- GODOT end -- + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + if (err) { + (*err) = "Cannot read file."; + } + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast<size_t>(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector<unsigned char> buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + (void)ret; + } + + return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts, + &buf.at(0), filesize, err); +} + +int SaveEXR(const float *data, int width, int height, int components, + const char *outfilename) { + if (components == 3 || components == 4) { + // OK + } else { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + // Assume at least 16x16 pixels. + if (width < 16) return TINYEXR_ERROR_INVALID_ARGUMENT; + if (height < 16) return TINYEXR_ERROR_INVALID_ARGUMENT; + + EXRHeader header; + InitEXRHeader(&header); + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = components; + + std::vector<float> images[4]; + images[0].resize(static_cast<size_t>(width * height)); + images[1].resize(static_cast<size_t>(width * height)); + images[2].resize(static_cast<size_t>(width * height)); + images[3].resize(static_cast<size_t>(width * height)); + + // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers + for (size_t i = 0; i < static_cast<size_t>(width * height); i++) { + images[0][i] = data[static_cast<size_t>(components) * i + 0]; + images[1][i] = data[static_cast<size_t>(components) * i + 1]; + images[2][i] = data[static_cast<size_t>(components) * i + 2]; + if (components == 4) { + images[3][i] = data[static_cast<size_t>(components) * i + 3]; + } + } + + float *image_ptr[4] = {0, 0, 0, 0}; + if (components == 4) { + image_ptr[0] = &(images[3].at(0)); // A + image_ptr[1] = &(images[2].at(0)); // B + image_ptr[2] = &(images[1].at(0)); // G + image_ptr[3] = &(images[0].at(0)); // R + } else { + image_ptr[0] = &(images[2].at(0)); // B + image_ptr[1] = &(images[1].at(0)); // G + image_ptr[2] = &(images[0].at(0)); // R + } + + image.images = reinterpret_cast<unsigned char **>(image_ptr); + image.width = width; + image.height = height; + + header.num_channels = components; + header.channels = static_cast<EXRChannelInfo *>(malloc( + sizeof(EXRChannelInfo) * static_cast<size_t>(header.num_channels))); + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + if (components == 4) { + strncpy(header.channels[0].name, "A", 255); + header.channels[0].name[strlen("A")] = '\0'; + strncpy(header.channels[1].name, "B", 255); + header.channels[1].name[strlen("B")] = '\0'; + strncpy(header.channels[2].name, "G", 255); + header.channels[2].name[strlen("G")] = '\0'; + strncpy(header.channels[3].name, "R", 255); + header.channels[3].name[strlen("R")] = '\0'; + } else { + strncpy(header.channels[0].name, "B", 255); + header.channels[0].name[strlen("B")] = '\0'; + strncpy(header.channels[1].name, "G", 255); + header.channels[1].name[strlen("G")] = '\0'; + strncpy(header.channels[2].name, "R", 255); + header.channels[2].name[strlen("R")] = '\0'; + } + + header.pixel_types = static_cast<int *>( + malloc(sizeof(int) * static_cast<size_t>(header.num_channels))); + header.requested_pixel_types = static_cast<int *>( + malloc(sizeof(int) * static_cast<size_t>(header.num_channels))); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_HALF; // pixel type of output image to be stored in + // .EXR + } + + const char *err; + int ret = SaveEXRImageToFile(&image, &header, outfilename, &err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + return ret; +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // TINYEXR_IMPLEMENTATION_DEIFNED +#endif // TINYEXR_IMPLEMENTATION diff --git a/thirdparty/zstd/LICENSE b/thirdparty/zstd/LICENSE new file mode 100644 index 0000000000..a793a80289 --- /dev/null +++ b/thirdparty/zstd/LICENSE @@ -0,0 +1,30 @@ +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/zstd/PATENTS b/thirdparty/zstd/PATENTS new file mode 100644 index 0000000000..15b4a2ea5c --- /dev/null +++ b/thirdparty/zstd/PATENTS @@ -0,0 +1,33 @@ +Additional Grant of Patent Rights Version 2 + +"Software" means the Zstandard software distributed by Facebook, Inc. + +Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software +("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable +(subject to the termination provision below) license under any Necessary +Claims, to make, have made, use, sell, offer to sell, import, and otherwise +transfer the Software. For avoidance of doubt, no license is granted under +Facebook’s rights in any patent claims that are infringed by (i) modifications +to the Software made by you or any third party or (ii) the Software in +combination with any software or other technology. + +The license granted hereunder will terminate, automatically and without notice, +if you (or any of your subsidiaries, corporate affiliates or agents) initiate +directly or indirectly, or take a direct financial interest in, any Patent +Assertion: (i) against Facebook or any of its subsidiaries or corporate +affiliates, (ii) against any party if such Patent Assertion arises in whole or +in part from any software, technology, product or service of Facebook or any of +its subsidiaries or corporate affiliates, or (iii) against any party relating +to the Software. Notwithstanding the foregoing, if Facebook or any of its +subsidiaries or corporate affiliates files a lawsuit alleging patent +infringement against you in the first instance, and you respond by filing a +patent infringement counterclaim in that lawsuit against that party that is +unrelated to the Software, the license granted hereunder will not terminate +under section (i) of this paragraph due to such counterclaim. + +A "Necessary Claim" is a claim of a patent owned by Facebook that is +necessarily infringed by the Software standing alone. + +A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, +or contributory infringement or inducement to infringe any patent, including a +cross-claim or counterclaim. diff --git a/thirdparty/zstd/README.md b/thirdparty/zstd/README.md new file mode 100644 index 0000000000..7caee5fd3f --- /dev/null +++ b/thirdparty/zstd/README.md @@ -0,0 +1,146 @@ + __Zstandard__, or `zstd` as short version, is a fast lossless compression algorithm, + targeting real-time compression scenarios at zlib-level and better compression ratios. + +It is provided as an open-source BSD-licensed **C** library, +and a command line utility producing and decoding `.zst` and `.gz` files. +For other programming languages, +you can consult a list of known ports on [Zstandard homepage](http://www.zstd.net/#other-languages). + +|Branch |Status | +|------------|---------| +|master | [](https://travis-ci.org/facebook/zstd) | +|dev | [](https://travis-ci.org/facebook/zstd) | + +As a reference, several fast compression algorithms were tested and compared +on a server running Linux Debian (`Linux version 4.8.0-1-amd64`), +with a Core i7-6700K CPU @ 4.0GHz, +using [lzbench], an open-source in-memory benchmark by @inikep +compiled with GCC 6.3.0, +on the [Silesia compression corpus]. + +[lzbench]: https://github.com/inikep/lzbench +[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia + +| Compressor name | Ratio | Compression| Decompress.| +| --------------- | ------| -----------| ---------- | +| **zstd 1.1.3 -1** | 2.877 | 430 MB/s | 1110 MB/s | +| zlib 1.2.8 -1 | 2.743 | 110 MB/s | 400 MB/s | +| brotli 0.5.2 -0 | 2.708 | 400 MB/s | 430 MB/s | +| quicklz 1.5.0 -1 | 2.238 | 550 MB/s | 710 MB/s | +| lzo1x 2.09 -1 | 2.108 | 650 MB/s | 830 MB/s | +| lz4 1.7.5 | 2.101 | 720 MB/s | 3600 MB/s | +| snappy 1.1.3 | 2.091 | 500 MB/s | 1650 MB/s | +| lzf 3.6 -1 | 2.077 | 400 MB/s | 860 MB/s | + +[zlib]:http://www.zlib.net/ +[LZ4]: http://www.lz4.org/ + +Zstd can also offer stronger compression ratios at the cost of compression speed. +Speed vs Compression trade-off is configurable by small increments. Decompression speed is preserved and remains roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib] or lzma. + +The following tests were run +on a server running Linux Debian (`Linux version 4.8.0-1-amd64`) +with a Core i7-6700K CPU @ 4.0GHz, +using [lzbench], an open-source in-memory benchmark by @inikep +compiled with GCC 6.3.0, +on the [Silesia compression corpus]. + +Compression Speed vs Ratio | Decompression Speed +---------------------------|-------------------- + |  + +Several algorithms can produce higher compression ratios, but at slower speeds, falling outside of the graph. +For a larger picture including very slow modes, [click on this link](doc/images/DCspeed5.png) . + + +### The case for Small Data compression + +Previous charts provide results applicable to typical file and stream scenarios (several MB). Small data comes with different perspectives. + +The smaller the amount of data to compress, the more difficult it is to compress. This problem is common to all compression algorithms, and reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new data set, there is no "past" to build upon. + +To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data. +Training Zstandard is achieved by provide it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression. +Using this dictionary, the compression ratio achievable on small data improves dramatically. + +The following example uses the `github-users` [sample set](https://github.com/facebook/zstd/releases/tag/v1.1.3), created from [github public API](https://developer.github.com/v3/users/#get-all-users). +It consists of roughly 10K records weighting about 1KB each. + +Compression Ratio | Compression Speed | Decompression Speed +------------------|-------------------|-------------------- + |  |  + + +These compression gains are achieved while simultaneously providing _faster_ compression and decompression speeds. + +Training works if there is some correlation in a family of small data samples. The more data-specific a dictionary is, the more efficient it is (there is no _universal dictionary_). +Hence, deploying one dictionary per type of data will provide the greatest benefits. +Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will gradually use previously decoded content to better compress the rest of the file. + +#### Dictionary compression How To : + +1) Create the dictionary + +`zstd --train FullPathToTrainingSet/* -o dictionaryName` + +2) Compress with dictionary + +`zstd -D dictionaryName FILE` + +3) Decompress with dictionary + +`zstd -D dictionaryName --decompress FILE.zst` + + +### Build + +Once you have the repository cloned, there are multiple ways provided to build Zstandard. + +#### Makefile + +If your system is compatible with a standard `make` (or `gmake`) binary generator, +you can simply run it at the root directory. +It will generate `zstd` within root directory. + +Other available options include : +- `make install` : create and install zstd binary, library and man page +- `make test` : create and run `zstd` and test tools on local platform + +#### cmake + +A `cmake` project generator is provided within `build/cmake`. +It can generate Makefiles or other build scripts +to create `zstd` binary, and `libzstd` dynamic and static libraries. + +#### Meson + +A Meson project is provided within `contrib/meson`. + +#### Visual Studio (Windows) + +Going into `build` directory, you will find additional possibilities : +- Projects for Visual Studio 2005, 2008 and 2010 + + VS2010 project is compatible with VS2012, VS2013 and VS2015 +- Automated build scripts for Visual compiler by @KrzysFR , in `build/VS_scripts`, + which will build `zstd` cli and `libzstd` library without any need to open Visual Studio solution. + + +### Status + +Zstandard is currently deployed within Facebook. It is used daily to compress and decompress very large amounts of data in multiple formats and use cases. +Zstandard is considered safe for production environments. + +### License + +Zstandard is [BSD-licensed](LICENSE). We also provide an [additional patent grant](PATENTS). + +### Contributing + +The "dev" branch is the one where all contributions will be merged before reaching "master". +If you plan to propose a patch, please commit into the "dev" branch or its own feature branch. +Direct commit to "master" are not permitted. +For more information, please read [CONTRIBUTING](CONTRIBUTING.md). + +### Miscellaneous + +Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entropy library](https://github.com/Cyan4973/FiniteStateEntropy). diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h new file mode 100644 index 0000000000..ca42850df3 --- /dev/null +++ b/thirdparty/zstd/common/bitstream.h @@ -0,0 +1,446 @@ +/* ****************************************************************** + bitstream + Part of FSE library + header file (to include) + Copyright (C) 2013-2017, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "error_private.h" /* error codes and messages */ + + +/*-************************************* +* Debug +***************************************/ +#if defined(BIT_DEBUG) && (BIT_DEBUG>=1) +# include <assert.h> +#else +# define assert(condition) ((void)0) +#endif + + +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include <immintrin.h> /* support for bextr (experimental) */ +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. +* A critical property of these streams is that they encode and decode in **reverse** direction. +* So the first bit sequence you add will be the last to be read, like a LIFO stack. +*/ +typedef struct +{ + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, + 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, + 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + otherwise an error code (can be tested using ERR_isError() ) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + can add up to 26 bits into `bitC`. + Does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) ); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + assert(bitC->ptr <= bitC->endPtr); + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) ); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : +* Initialize a BIT_DStream_t. +* `bitD` : a pointer to an already allocated BIT_DStream_t structure. +* `srcSize` must be the *exact* size of the bitStream, in bytes. +* @return : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + default:; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ +# if defined(__x86_64__) + if (sizeof(bitContainer)==8) + return _bextr_u64(bitContainer, start, nbBits); + else +# endif + return _bextr_u32(bitContainer, start, nbBits); +#else + return (bitContainer >> start) & BIT_mask[nbBits]; +#endif +} + +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + return bitContainer & BIT_mask[nbBits]; +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted + */ + MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. + */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStream() : +* Refill `bitD` from buffer previously set in BIT_initDStream() . +* This function is safe, it guarantees it will not read beyond src buffer. +* @return : status of `BIT_DStream_t` internal register. + if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : +* @return Tells if DStream has exactly reached its end (all bits consumed). +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/thirdparty/zstd/common/entropy_common.c b/thirdparty/zstd/common/entropy_common.c new file mode 100644 index 0000000000..b37a082fee --- /dev/null +++ b/thirdparty/zstd/common/entropy_common.c @@ -0,0 +1,221 @@ +/* + Common functions of New Generation Entropy library + Copyright (C) 2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*************************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +#include "huf.h" + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return ERROR(srcSize_wrong); + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<<nbBits)+1; + threshold = 1<<nbBits; + nbBits++; + + while ((remaining>1) & (charnum<=*maxSVPtr)) { + if (previous0) { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) { + n0 += 24; + if (ip < iend-5) { + ip += 2; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 16; + bitCount += 16; + } } + while ((bitStream & 3) == 3) { + n0 += 3; + bitStream >>= 2; + bitCount += 2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 2; + } } + { int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= count < 0 ? -count : count; /* -1 means +1 */ + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + while (remaining < threshold) { + nbBits--; + threshold >>= 1; + } + + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + if (remaining != 1) return ERROR(corruption_detected); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n<oSize; n+=2) { + huffWeight[n] = ip[n/2] >> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n<oSize; n++) { + if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} diff --git a/thirdparty/zstd/common/error_private.c b/thirdparty/zstd/common/error_private.c new file mode 100644 index 0000000000..b3287245f1 --- /dev/null +++ b/thirdparty/zstd/common/error_private.c @@ -0,0 +1,44 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "error_private.h" + +const char* ERR_getErrorString(ERR_enum code) +{ + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(parameter_unknown): return "Unknown parameter type"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(maxCode): + default: return notErrorCode; + } +} diff --git a/thirdparty/zstd/common/error_private.h b/thirdparty/zstd/common/error_private.h new file mode 100644 index 0000000000..1bc2e49548 --- /dev/null +++ b/thirdparty/zstd/common/error_private.h @@ -0,0 +1,76 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include <stddef.h> /* size_t */ +#include "zstd_errors.h" /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#ifdef ERROR +# undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#endif +#define ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h new file mode 100644 index 0000000000..6d5d41def1 --- /dev/null +++ b/thirdparty/zstd/common/fse.h @@ -0,0 +1,698 @@ +/* ****************************************************************** + FSE : Finite State Entropy codec + Public Prototypes declaration + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef FSE_H +#define FSE_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-***************************************** +* Dependencies +******************************************/ +#include <stddef.h> /* size_t, ptrdiff_t */ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_count(): + Provides the precise count of each byte within a table 'count'. + 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + *maxSymbolValuePtr will be updated if detected smaller than initial value. + @return : the count of the most frequent symbol (which is not identified). + if return == srcSize, there is only one symbol. + Can also return an error code, which can be tested with FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + + +#ifdef FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** +* FSE advanced API +*******************************************/ +/* FSE_count_wksp() : + * Same as FSE_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned + */ +size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace); + +/** FSE_countFast() : + * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr + */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` must be a table of minimum `1024` unsigned + */ +size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); + +/*! FSE_count_simple + * Same as FSE_countFast(), but does not use any additional memory (not even on stack). + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). +*/ +size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + + + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `(1<<tableLog)`. + */ +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ + + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<<tableLog; + statePtr->stateTable = u16ptr+2; + statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) +#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) +#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) +#define FSE_MIN_TABLELOG 5 + +#define FSE_TABLELOG_ABSOLUTE_MAX 15 +#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* FSE_H */ diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c new file mode 100644 index 0000000000..8474a4c079 --- /dev/null +++ b/thirdparty/zstd/common/fse_decompress.c @@ -0,0 +1,328 @@ +/* ****************************************************************** + FSE : Finite State Entropy decoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include <intrin.h> /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include <stdlib.h> /* malloc, free, qsort */ +#include <string.h> /* memcpy, memset */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + +/* check and forward error code */ +#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + free(dt); +} + +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s<maxSV1; s++) { + if (normalizedCounter[s]==-1) { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } else { + if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s<maxSV1; s++) { + int i; + for (i=0; i<normalizedCounter[s]; i++) { + tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; + position = (position + step) & tableMask; + while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; u<tableSize; u++) { + FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); + U16 nextState = symbolNext[symbol]++; + tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); + tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); + } } + + return 0; +} + + +#ifndef FSE_COMMONDEFS_ONLY + +/*-******************************************************* +* Decompression (Byte symbols) +*********************************************************/ +size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const cell = (FSE_decode_t*)dPtr; + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<maxSV1; s++) { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + BIT_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + + /* Init */ + CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) { + op[0] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(NCountLength)) return NCountLength; + //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + ip += NCountLength; + cSrcSize -= NCountLength; + + CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); + + return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h new file mode 100644 index 0000000000..7873ca3d42 --- /dev/null +++ b/thirdparty/zstd/common/huf.h @@ -0,0 +1,283 @@ +/* ****************************************************************** + Huffman coder, part of New Generation Entropy library + header file + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* *** Dependencies *** */ +#include <stddef.h> /* size_t */ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* *** simple functions *** */ +/** +HUF_compress() : + Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + 'dst' buffer must be already allocated. + Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + @return : size of compressed data (<= `dstCapacity`). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single repeated byte symbol (RLE compression). + if HUF_isError(return), compression failed (more details using HUF_getErrorName()) +*/ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** +HUF_decompress() : + Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + into already allocated buffer 'dst', of minimum size 'dstSize'. + `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + Note : in contrast with FSE, HUF_decompress can regenerate + RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + because it knows size to regenerate. + @return : size of regenerated data (== originalSize), + or an error code, which can be tested using HUF_isError() +*/ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`. + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as following macro */ +#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + + + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of dll + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + *******************************************************************/ +#ifdef HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +#include "mem.h" /* U32 */ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ + void* name##hv = &(name##hb); \ + HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + + +/* **************************************** +* HUF detailed API +******************************************/ +/*! +HUF_compress() does the following: +1. count symbol occurrence from source[] into table count[] using FSE_count() +2. (optional) refine tableLog using HUF_optimalTableLog() +3. build Huffman table from count using HUF_buildCTable() +4. save Huffman table to memory buffer using HUF_writeCTable() +5. encode the data stream using HUF_compress4X_usingCTable() + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and regenerate 'CTable' using external methods. +*/ +/* FSE_count() : find it within "fse.h" */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : +* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. +* If it uses hufTable it does not modify hufTable or repeat. +* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. +* If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. + */ +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/** HUF_readCTable() : +* Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize); + + +/* +HUF_decompress() does the following: +1. select the decompression algorithm (X2, X4) based on pre-computed heuristics +2. build Huffman table from save, using HUF_readDTableXn() +3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable +*/ + +/** HUF_selectDecoder() : +* Tells which decoder is likely to decode faster, +* based on a set of pre-determined metrics. +* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . +* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + + +/* single stream variants */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : +* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. +* If it uses hufTable it does not modify hufTable or repeat. +* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. +* If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + +#endif /* HUF_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* HUF_H_298734234 */ diff --git a/thirdparty/zstd/common/mem.h b/thirdparty/zstd/common/mem.h new file mode 100644 index 0000000000..4773a8b930 --- /dev/null +++ b/thirdparty/zstd/common/mem.h @@ -0,0 +1,373 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include <stddef.h> /* size_t, ptrdiff_t */ +#include <string.h> /* memcpy */ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include <stdlib.h> /* _byteswap_ulong */ +# include <intrin.h> /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/* code only tested on 32 and 64 bits systems */ +#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } +MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include <stdint.h> + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; + typedef intptr_t iPtrDiff; + typedef uintptr_t uPtrDiff; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; + typedef ptrdiff_t iPtrDiff; + typedef size_t uPtrDiff; +#endif + + +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; + __pragma( pack(pop) ) +#else + typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + + +/* function safe only for comparisons */ +MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c new file mode 100644 index 0000000000..e439fe1b0d --- /dev/null +++ b/thirdparty/zstd/common/pool.c @@ -0,0 +1,194 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/* ====== Dependencies ======= */ +#include <stddef.h> /* size_t */ +#include <stdlib.h> /* malloc, calloc, free */ +#include "pool.h" + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + /* Keep track of the threads */ + pthread_t *threads; + size_t numThreads; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + /* The mutex protects the queue */ + pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + Work thread for the thread pool. + Waits for jobs and executes them. + @returns : NULL on failure else non-null. +*/ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + pthread_mutex_lock(&ctx->queueMutex); + while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) { + pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* empty => shutting down: so stop */ + if (ctx->queueHead == ctx->queueTail) { + pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + /* Unlock the mutex, signal a pusher, and run the job */ + pthread_mutex_unlock(&ctx->queueMutex); + pthread_cond_signal(&ctx->queuePushCond); + job.function(job.opaque); + } + } + /* Unreachable */ +} + +POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { + POOL_ctx *ctx; + /* Check the parameters */ + if (!numThreads || !queueSize) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx)); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate empty + * and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job)); + ctx->queueHead = 0; + ctx->queueTail = 0; + pthread_mutex_init(&ctx->queueMutex, NULL); + pthread_cond_init(&ctx->queuePushCond, NULL); + pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t)); + ctx->numThreads = 0; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->numThreads = i; + POOL_free(ctx); + return NULL; + } } + ctx->numThreads = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx *ctx) { + /* Shut down the queue */ + pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + pthread_cond_broadcast(&ctx->queuePushCond); + pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->numThreads; ++i) { + pthread_join(ctx->threads[i], NULL); + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + pthread_mutex_destroy(&ctx->queueMutex); + pthread_cond_destroy(&ctx->queuePushCond); + pthread_cond_destroy(&ctx->queuePopCond); + if (ctx->queue) free(ctx->queue); + if (ctx->threads) free(ctx->threads); + free(ctx); +} + +void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { + POOL_ctx *ctx = (POOL_ctx *)ctxVoid; + if (!ctx) { return; } + + pthread_mutex_lock(&ctx->queueMutex); + { POOL_job const job = {function, opaque}; + /* Wait until there is space in the queue for the new job */ + size_t newTail = (ctx->queueTail + 1) % ctx->queueSize; + while (ctx->queueHead == newTail && !ctx->shutdown) { + pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + newTail = (ctx->queueTail + 1) % ctx->queueSize; + } + /* The queue is still going => there is space */ + if (!ctx->shutdown) { + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = newTail; + } + } + pthread_mutex_unlock(&ctx->queueMutex); + pthread_cond_signal(&ctx->queuePopCond); +} + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multi-threading support */ + +/* We don't need any data, but if it is empty malloc() might return NULL. */ +struct POOL_ctx_s { + int data; +}; + +POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { + (void)numThreads; + (void)queueSize; + return (POOL_ctx *)malloc(sizeof(POOL_ctx)); +} + +void POOL_free(POOL_ctx *ctx) { + if (ctx) free(ctx); +} + +void POOL_add(void *ctx, POOL_function function, void *opaque) { + (void)ctx; + function(opaque); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h new file mode 100644 index 0000000000..50cb25b12c --- /dev/null +++ b/thirdparty/zstd/common/pool.h @@ -0,0 +1,56 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include <stddef.h> /* size_t */ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + Create a thread pool with at most `numThreads` threads. + `numThreads` must be at least 1. + The maximum number of queued jobs before blocking is `queueSize`. + `queueSize` must be at least 1. + @return : The POOL_ctx pointer on success else NULL. +*/ +POOL_ctx *POOL_create(size_t numThreads, size_t queueSize); + +/*! POOL_free() : + Free a thread pool returned by POOL_create(). +*/ +void POOL_free(POOL_ctx *ctx); + +/*! POOL_function : + The function type that can be added to a thread pool. +*/ +typedef void (*POOL_function)(void *); +/*! POOL_add_function : + The function type for a generic thread pool add function. +*/ +typedef void (*POOL_add_function)(void *, POOL_function, void *); + +/*! POOL_add() : + Add the job `function(opaque)` to the thread pool. + Possibly blocks until there is room in the queue. + Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. +*/ +void POOL_add(void *ctx, POOL_function function, void *opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/thirdparty/zstd/common/threading.c b/thirdparty/zstd/common/threading.c new file mode 100644 index 0000000000..32d58796a9 --- /dev/null +++ b/thirdparty/zstd/common/threading.c @@ -0,0 +1,80 @@ + +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/* When ZSTD_MULTITHREAD is not defined, this file would become an empty translation unit. +* Include some ISO C header code to prevent this and portably avoid related warnings. +* (Visual C++: C4206 / GCC: -Wpedantic / Clang: -Wempty-translation-unit) +*/ +#include <stddef.h> + + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include <process.h> +#include <errno.h> +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + pthread_t* const thread = (pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int pthread_create(pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int _pthread_join(pthread_t * thread, void **value_ptr) +{ + DWORD result; + + if (!thread->handle) return 0; + + result = WaitForSingleObject(thread->handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread->arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/thirdparty/zstd/common/threading.h b/thirdparty/zstd/common/threading.h new file mode 100644 index 0000000000..c0086139ea --- /dev/null +++ b/thirdparty/zstd/common/threading.h @@ -0,0 +1,104 @@ + +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include <windows.h> + +/* mutex */ +#define pthread_mutex_t CRITICAL_SECTION +#define pthread_mutex_init(a,b) InitializeCriticalSection((a)) +#define pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define pthread_mutex_lock(a) EnterCriticalSection((a)) +#define pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define pthread_cond_t CONDITION_VARIABLE +#define pthread_cond_init(a, b) InitializeConditionVariable((a)) +#define pthread_cond_destroy(a) /* No delete */ +#define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define pthread_cond_signal(a) WakeConditionVariable((a)) +#define pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* pthread_create() and pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} pthread_t; + +int pthread_create(pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +#define pthread_join(a, b) _pthread_join(&(a), (b)) +int _pthread_join(pthread_t* thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include <pthread.h> + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +#define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */ +#define pthread_mutex_init(a,b) +#define pthread_mutex_destroy(a) +#define pthread_mutex_lock(a) +#define pthread_mutex_unlock(a) + +#define pthread_cond_t int +#define pthread_cond_init(a,b) +#define pthread_cond_destroy(a) +#define pthread_cond_wait(a,b) +#define pthread_cond_signal(a) +#define pthread_cond_broadcast(a) + +/* do not use pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/thirdparty/zstd/common/xxhash.c b/thirdparty/zstd/common/xxhash.c new file mode 100644 index 0000000000..eb44222c5f --- /dev/null +++ b/thirdparty/zstd/common/xxhash.c @@ -0,0 +1,869 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include <stdlib.h> +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ +#include <string.h> +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +#include "xxhash.h" + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define FORCE_INLINE static __forceinline +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include <stdint.h> + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p<bEnd) { + h32 += (*p) * PRIME32_5; + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + p++; + } + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p<bEnd) { + h64 ^= (*p) * PRIME64_5; + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p<bEnd) { + h32 += (*p) * PRIME32_5; + h32 = XXH_rotl32(h32, 11) * PRIME32_1; + p++; + } + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p<bEnd) { + h64 ^= (*p) * PRIME64_5; + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} diff --git a/thirdparty/zstd/common/xxhash.h b/thirdparty/zstd/common/xxhash.h new file mode 100644 index 0000000000..9bad1f59f6 --- /dev/null +++ b/thirdparty/zstd/common/xxhash.h @@ -0,0 +1,305 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +#include <stddef.h> /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +# include "xxhash.c" /* include xxhash functions as `static`, for inlining */ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/thirdparty/zstd/common/zstd_common.c b/thirdparty/zstd/common/zstd_common.c new file mode 100644 index 0000000000..8408a589ae --- /dev/null +++ b/thirdparty/zstd/common/zstd_common.c @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#include <stdlib.h> /* malloc */ +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +/*! ZSTD_isError() : +* tells if a return value is an error code */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : +* provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : +* convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : +* provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +/* default uses stdlib */ +void* ZSTD_defaultAllocFunction(void* opaque, size_t size) +{ + void* address = malloc(size); + (void)opaque; + return address; +} + +void ZSTD_defaultFreeFunction(void* opaque, void* address) +{ + (void)opaque; + free(address); +} + +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +{ + return customMem.customAlloc(customMem.opaque, size); +} + +void ZSTD_free(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) + customMem.customFree(customMem.opaque, ptr); +} diff --git a/thirdparty/zstd/common/zstd_errors.h b/thirdparty/zstd/common/zstd_errors.h new file mode 100644 index 0000000000..3d579d9693 --- /dev/null +++ b/thirdparty/zstd/common/zstd_errors.h @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include <stddef.h> /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +#else +# define ZSTDERRORLIB_VISIBILITY +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-**************************************** +* error codes list +******************************************/ +typedef enum { + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_version_unsupported, + ZSTD_error_parameter_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_frameParameter_windowTooLarge, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_checksum_wrong, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, + ZSTD_error_dictionaryCreation_failed, + ZSTD_error_maxCode +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare directly with enum list published into "error_public.h" */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h new file mode 100644 index 0000000000..2533333ba8 --- /dev/null +++ b/thirdparty/zstd/common/zstd_internal.h @@ -0,0 +1,284 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include <intrin.h> /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +#include "xxhash.h" /* XXH_reset, update, digest */ + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) +#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ +#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) +#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<<Litbits) - 1) +#define MaxML 52 +#define MaxLL 35 +#define MaxOff 28 +#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ +#define MLFSELog 9 +#define LLFSELog 9 +#define OffFSELog 8 + +static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12, + 13,14,15,16 }; +static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 }; +#define LL_DEFAULTNORMLOG 6 /* for static allocation */ +static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; + +static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11, + 12,13,14,15,16 }; +static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 }; +#define ML_DEFAULTNORMLOG 6 /* for static allocation */ +static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; + +static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; +#define OF_DEFAULTNORMLOG 5 /* for static allocation */ +static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; + + +/*-******************************************* +* Shared functions to include for inlining +*********************************************/ +static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } +#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } + +/*! ZSTD_wildcopy() : +* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ +#define WILDCOPY_OVERLENGTH 8 +MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) +{ + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + length; + do + COPY8(op, ip) + while (op < oend); +} + +MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ +{ + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + BYTE* const oend = (BYTE*)dstEnd; + do + COPY8(op, ip) + while (op < oend); +} + + +/*-******************************************* +* Private interfaces +*********************************************/ +typedef struct ZSTD_stats_s ZSTD_stats_t; + +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + U32 price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + + +typedef struct seqDef_s { + U32 offset; + U16 litLength; + U16 matchLength; +} seqDef; + + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; + BYTE* litStart; + BYTE* lit; + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ + U32 longLengthPos; + /* opt */ + ZSTD_optimal_t* priceTable; + ZSTD_match_t* matchTable; + U32* matchLengthFreq; + U32* litLengthFreq; + U32* litFreq; + U32* offCodeFreq; + U32 matchLengthSum; + U32 matchSum; + U32 litLengthSum; + U32 litSum; + U32 offCodeSum; + U32 log2matchLengthSum; + U32 log2matchSum; + U32 log2litLengthSum; + U32 log2litSum; + U32 log2offCodeSum; + U32 factor; + U32 staticPrices; + U32 cachedPrice; + U32 cachedLitLength; + const BYTE* cachedLiterals; +} seqStore_t; + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); +int ZSTD_isSkipFrame(ZSTD_DCtx* dctx); + +/* custom memory allocation functions */ +void* ZSTD_defaultAllocFunction(void* opaque, size_t size); +void ZSTD_defaultFreeFunction(void* opaque, void* address); +#ifndef ZSTD_DLL_IMPORT +static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL }; +#endif +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); +void ZSTD_free(void* ptr, ZSTD_customMem customMem); + + +/*====== common function ======*/ + +MEM_STATIC U32 ZSTD_highbit32(U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/* hidden functions */ + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); + + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c new file mode 100644 index 0000000000..26e8052ddc --- /dev/null +++ b/thirdparty/zstd/compress/fse_compress.c @@ -0,0 +1,857 @@ +/* ****************************************************************** + FSE : Finite State Entropy encoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include <intrin.h> /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include <stdlib.h> /* malloc, free, qsort */ +#include <string.h> /* memcpy, memset */ +#include <stdio.h> /* printf (debug) */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` + * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements + */ +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + U32 const tableSize = 1 << tableLog; + U32 const tableMask = tableSize - 1; + void* const ptr = ct; + U16* const tableU16 = ( (U16*) ptr) + 2; + void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; + + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; + U32 highThreshold = tableSize-1; + + /* CTable header */ + if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u<=maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurences; + for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) { + tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; + position = (position + step) & tableMask; + while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u<tableSize; u++) { + FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ + tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */ + } } + + /* Build Symbol Transformation Table */ + { unsigned total = 0; + unsigned s; + for (s=0; s<=maxSymbolValue; s++) { + switch (normalizedCounter[s]) + { + case 0: break; + + case -1: + case 1: + symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); + symbolTT[s].deltaFindState = total - 1; + total ++; + break; + default : + { + U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); + U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; + symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; + symbolTT[s].deltaFindState = total - normalizedCounter[s]; + total += normalizedCounter[s]; + } } } } + + return 0; +} + + +size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ + return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) +{ + size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + bitStream = 0; + bitCount = 0; + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while (remaining>1) { /* stops at 1 */ + if (previous0) { + unsigned start = charnum; + while (!normalizedCounter[charnum]) charnum++; + while (charnum >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (charnum >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (charnum-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[charnum++]; + int const max = (2*threshold-1)-remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count<max); + previous0 = (count==1); + if (remaining<1) return ERROR(GENERIC); + while (remaining<threshold) nbBits--, threshold>>=1; + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); +} + + + +/*-************************************************************** +* Counting histogram +****************************************************************/ +/*! FSE_count_simple + This function counts byte values within `src`, and store the histogram into table `count`. + It doesn't use any additional memory. + But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element +*/ +size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + + memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip<end) count[*ip++]++; + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + + { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } + + return (size_t)max; +} + + +/* FSE_count_parallel_wksp() : + * Same as FSE_count_parallel(), but using an externally provided scratch buffer. + * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ +static size_t FSE_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax, unsigned* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + memset(Counting1, 0, 4*256*sizeof(unsigned)); + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip<iend) Counting1[*ip++]++; + + if (checkMax) { /* verify stats will fit into destination table */ + U32 s; for (s=255; s>maxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) +{ + if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[1024]; + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); +} + +/* FSE_count_wksp() : + * Same as FSE_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) +{ + if (*maxSymbolValuePtr < 255) + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); + *maxSymbolValuePtr = 255; + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); +} + +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[1024]; + return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); +} + + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable +Allocation is manual (C standard does not support variable-size structures). +*/ +size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); +} + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) maxV=s, maxC=count[s]; + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) ToDistribute--, norm[s]++; + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ + U64 tmpTotal = mid; + for (s=0; s<=maxSymbolValue; s++) { + if (norm[s]==NOT_YET_ASSIGNED) { + U64 const end = tmpTotal + (count[s] * rStep); + U32 const sStart = (U32)(tmpTotal >> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + U64 const scale = 62 - tableLog; + U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<<tableLog; + unsigned s; + unsigned largest=0; + short largestP=0; + U32 lowThreshold = (U32)(total >> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; + } + if (proba > largestP) largestP=proba, largest=s; + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + printf("%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<<tableLog)) + printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); + getchar(); + } +#endif + + return tableLog; +} + + +/* fake FSE_CTable, for raw (uncompressed) input */ +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) +{ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + void* const ptr = ct; + U16* const tableU16 = ( (U16*) ptr) + 2; + void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s<tableSize; s++) + tableU16[s] = (U16)(tableSize + s); + + /* Build Symbol Transformation Table */ + { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); + for (s=0; s<=maxSymbolValue; s++) { + symbolTT[s].deltaNbBits = deltaNbBits; + symbolTT[s].deltaFindState = s-1; + } } + + return 0; +} + +/* fake FSE_CTable, for rle input (always same symbol) */ +size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) +{ + void* ptr = ct; + U16* tableU16 = ( (U16*) ptr) + 2; + void* FSCTptr = (U32*)ptr + 2; + FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr; + + /* header */ + tableU16[-2] = (U16) 0; + tableU16[-1] = (U16) symbolValue; + + /* Build table */ + tableU16[0] = 0; + tableU16[1] = 0; /* just in case */ + + /* Build Symbol Transformation Table */ + symbolTT[symbolValue].deltaNbBits = 0; + symbolTT[symbolValue].deltaFindState = 0; + + return 0; +} + + +static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct, const unsigned fast) +{ + const BYTE* const istart = (const BYTE*) src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip=iend; + + BIT_CStream_t bitC; + FSE_CState_t CState1, CState2; + + /* init */ + if (srcSize <= 2) return 0; + { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } + +#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + + if (srcSize & 1) { + FSE_initCState2(&CState1, ct, *--ip); + FSE_initCState2(&CState2, ct, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } else { + FSE_initCState2(&CState2, ct, *--ip); + FSE_initCState2(&CState1, ct, *--ip); + } + + /* join to mod 4 */ + srcSize -= 2; + if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1<<tableLog)`. + */ +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 count[FSE_MAX_SYMBOL_VALUE+1]; + S16 norm[FSE_MAX_SYMBOL_VALUE+1]; + FSE_CTable* CTable = (FSE_CTable*)workSpace; + size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); + void* scratchBuffer = (void*)(CTable + CTableSize); + size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); + + /* init conditions */ + if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); + if (srcSize <= 1) return 0; /* Not compressible */ + if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; + + /* Scan input and build symbol stats */ + { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) ); + if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + + return op-ostart; +} + +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; +} fseWkspMax_t; + +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c new file mode 100644 index 0000000000..fe11aafb8f --- /dev/null +++ b/thirdparty/zstd/compress/huf_compress.c @@ -0,0 +1,684 @@ +/* ****************************************************************** + Huffman encoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include <string.h> /* memcpy, memset */ +#include <stdio.h> /* printf (debug) */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; + + U32 count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) ); + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return op-ostart; +} + + +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt within "huf.h" */ + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; + + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n<huffLog+1; n++) + bitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; n<maxSymbolValue; n++) + huffWeight[n] = bitsToWeight[CTable[n].nbBits]; + + /* attempt weights compression by FSE */ + { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); + if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; n<maxSymbolValue; n+=2) + op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); + return ((maxSymbolValue+1)/2) + 1; +} + + +size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + + /* get symbol weights */ + CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } + + /* fill nbBits */ + { U32 n; for (n=0; n<nbSymbols; n++) { + const U32 w = huffWeight[n]; + CTable[n].nbBits = (BYTE)(tableLog + 1 - w); + } } + + /* fill val */ + { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } + /* determine stating value per rank */ + valPerRank[tableLog+1] = 0; /* for w==0 */ + { U16 min = 0; + U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } + } + + return readSize; +} + + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + U32 n = lastNonNull; + + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ + + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + int pos; + + /* Get pos of last (smallest) symbol per rank */ + memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = pos; + } } + + while (totalCost > 0) { + U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 highPos = rankLast[nBitsToDecrease]; + U32 lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + nBitsToDecrease ++; + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } } /* while (totalCost > 0) */ + + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + rankLast[1] = n+1; + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } } } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + + +typedef struct { + U32 base; + U32 current; +} rankPos; + +static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +{ + rankPos rank[32]; + U32 n; + + memset(rank, 0, sizeof(rank)); + for (n=0; n<=maxSymbolValue; n++) { + U32 r = BIT_highbit32(count[n] + 1); + rank[r].base ++; + } + for (n=30; n>0; n--) rank[n-1].base += rank[n].base; + for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=0; n<=maxSymbolValue; n++) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rank[r].current++; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) +typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1]; +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +{ + nodeElt* const huffNode0 = (nodeElt*)workSpace; + nodeElt* const huffNode = huffNode0+1; + U32 n, nonNullRank; + int lowS, lowN; + U16 nodeNb = STARTNODE; + U32 nodeRoot; + + /* safety checks */ + if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */ + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); + memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue); + + /* init for parents */ + nonNullRank = maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + + /* fill result into tree (val, nbBits) */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine stating value per rank */ + { U16 min = 0; + for (n=maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; n<=maxSymbolValue; n++) + tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ + for (n=0; n<=maxSymbolValue; n++) + tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ + } + + return maxNbBits; +} + +/** HUF_buildCTable() : + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +{ + huffNodeTable nodeTable; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); +} + +static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + case 0 : + default: ; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); + if (cSize==0) return 0; + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); + if (cSize==0) return 0; + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); + if (cSize==0) return 0; + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) ); + if (cSize==0) return 0; + op += cSize; + } + + return op-ostart; +} + + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + unsigned singleStream, const HUF_CElt* CTable) +{ + size_t const cSize = singleStream ? + HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : + HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return op-ostart; +} + + +/* `workSpace` must a table of at least 1024 unsigned */ +static size_t HUF_compress_internal ( + void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + unsigned singleStream, + void* workSpace, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + U32* count; + size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1); + HUF_CElt* CTable; + size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1); + + /* checks & inits */ + if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC); + if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ + if (!dstSize) return 0; /* cannot fit within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + count = (U32*)workSpace; + workSpace = (BYTE*)workSpace + countSize; + wkspSize -= countSize; + CTable = (HUF_CElt*)workSpace; + workSpace = (BYTE*)workSpace + CTableSize; + wkspSize -= CTableSize; + + /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } + + /* Check validity of previous table */ + if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) ); + huffLog = (U32)maxBits; + /* Zero the unused symbols so we can check it for validity */ + memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt)); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) ); + /* Check if using the previous table will be beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); + } + } + /* Use the new table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[1024]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0); +} + +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[1024]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); +} diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c new file mode 100644 index 0000000000..c08b315dab --- /dev/null +++ b/thirdparty/zstd/compress/zstd_compress.c @@ -0,0 +1,3598 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/*-************************************* +* Dependencies +***************************************/ +#include <string.h> /* memset */ +#include "mem.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" /* includes zstd.h */ + + +/*-************************************* +* Debug +***************************************/ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) +# include <assert.h> +#else +# define assert(condition) ((void)0) +#endif + +#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } + +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) +# include <stdio.h> + static unsigned g_debugLevel = ZSTD_DEBUG; +# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +/*-************************************* +* Constants +***************************************/ +static const U32 g_searchStrength = 8; /* control skip over incompressible data */ +#define HASH_READ_SIZE 8 +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; + +/* entropy tables always have same size */ +static size_t const hufCTable_size = HUF_CTABLE_SIZE(255); +static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL); +static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff); +static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML); +static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE; + + +/*-************************************* +* Helper functions +***************************************/ +size_t ZSTD_compressBound(size_t srcSize) { + size_t const lowLimit = 256 KB; + size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0; /* from 64 to 0 */ + return srcSize + (srcSize >> 8) + margin; +} + + +/*-************************************* +* Sequence storage +***************************************/ +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CCtx_s { + const BYTE* nextSrc; /* next block here to continue on current prefix */ + const BYTE* base; /* All regular indexes relative to this position */ + const BYTE* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ + U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 nextToUpdate3; /* index from which to continue dictionary update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 loadedDictEnd; /* index of end of dictionary */ + U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */ + U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */ + ZSTD_compressionStage_e stage; + U32 rep[ZSTD_REP_NUM]; + U32 repToConfirm[ZSTD_REP_NUM]; + U32 dictID; + ZSTD_parameters params; + void* workSpace; + size_t workSpaceSize; + size_t blockSize; + U64 frameContentSize; + U64 consumedSrcSize; + XXH64_state_t xxhState; + ZSTD_customMem customMem; + + seqStore_t seqStore; /* sequences storage ptrs */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + HUF_repeat hufCTable_repeatMode; + HUF_CElt* hufCTable; + U32 fseCTables_ready; + FSE_CTable* offcodeCTable; + FSE_CTable* matchlengthCTable; + FSE_CTable* litlengthCTable; + unsigned* entropyScratchSpace; +}; + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(defaultCustomMem); +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_CCtx* cctx; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + memset(cctx, 0, sizeof(ZSTD_CCtx)); + cctx->customMem = customMem; + return cctx; +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + ZSTD_free(cctx->workSpace, cctx->customMem); + ZSTD_free(cctx, cctx->customMem); + return 0; /* reserved as a potential error code in the future */ +} + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*cctx) + cctx->workSpaceSize; +} + +size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value) +{ + switch(param) + { + case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0; + case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0; + default: return ERROR(parameter_unknown); + } +} + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +{ + return &(ctx->seqStore); +} + +static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) +{ + return cctx->params; +} + + +/** ZSTD_checkParams() : + ensure param values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); } + CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported); + return 0; +} + + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_adjustCParams() : + optimize `cPar` for a given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization. + Both `srcSize` and `dictSize` are optional (use 0 if unknown), + but if both are 0, no optimization can be done. + Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ +ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) +{ + if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */ + + /* resize params, to use less memory when necessary */ + { U32 const minSrcSize = (srcSize==0) ? 500 : 0; + U64 const rSize = srcSize + dictSize + minSrcSize; + if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) { + U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1); + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } } + if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; + { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + + return cPar; +} + + +size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams) +{ + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog); + U32 const divider = (cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + + size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog); + size_t const hSize = ((size_t)1) << cParams.hashLog; + U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const entropySpace = hufCTable_size + litlengthCTable_size + + offcodeCTable_size + matchlengthCTable_size + + entropyScratchSpace_size; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + + size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32) + + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); + size_t const neededSpace = entropySpace + tableSpace + tokenSpace + + (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0); + + return sizeof(ZSTD_CCtx) + neededSpace; +} + + +static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2) +{ + return (param1.cParams.hashLog == param2.cParams.hashLog) + & (param1.cParams.chainLog == param2.cParams.chainLog) + & (param1.cParams.strategy == param2.cParams.strategy) + & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3)); +} + +/*! ZSTD_continueCCtx() : + reuse CCtx without reset (note : requires no dictionary) */ +static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize) +{ + U32 const end = (U32)(cctx->nextSrc - cctx->base); + cctx->params = params; + cctx->frameContentSize = frameContentSize; + cctx->consumedSrcSize = 0; + cctx->lowLimit = end; + cctx->dictLimit = end; + cctx->nextToUpdate = end+1; + cctx->stage = ZSTDcs_init; + cctx->dictID = 0; + cctx->loadedDictEnd = 0; + { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; } + cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */ + XXH64_reset(&cctx->xxhState, 0); + return 0; +} + +typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e; + +/*! ZSTD_resetCCtx_internal() : + note : `params` must be validated */ +static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc, + ZSTD_parameters params, U64 frameContentSize, + ZSTD_compResetPolicy_e const crp) +{ + if (crp == ZSTDcrp_continue) + if (ZSTD_equivalentParams(params, zc->params)) { + zc->fseCTables_ready = 0; + zc->hufCTable_repeatMode = HUF_repeat_none; + return ZSTD_continueCCtx(zc, params, frameContentSize); + } + + { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog); + U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); + size_t const hSize = ((size_t)1) << params.cParams.hashLog; + U32 const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog); + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + void* ptr; + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const entropySpace = hufCTable_size + litlengthCTable_size + + offcodeCTable_size + matchlengthCTable_size + + entropyScratchSpace_size; + size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32) + + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t)); + size_t const optSpace = ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optPotentialSpace : 0; + size_t const neededSpace = entropySpace + optSpace + tableSpace + tokenSpace; + if (zc->workSpaceSize < neededSpace) { + zc->workSpaceSize = 0; + ZSTD_free(zc->workSpace, zc->customMem); + zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + ptr = zc->workSpace; + + /* entropy space */ + zc->hufCTable = (HUF_CElt*)ptr; + ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */ + zc->offcodeCTable = (FSE_CTable*) ptr; + ptr = (char*)ptr + offcodeCTable_size; + zc->matchlengthCTable = (FSE_CTable*) ptr; + ptr = (char*)ptr + matchlengthCTable_size; + zc->litlengthCTable = (FSE_CTable*) ptr; + ptr = (char*)ptr + litlengthCTable_size; + assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */ + zc->entropyScratchSpace = (unsigned*) ptr; + } } + + /* init params */ + zc->params = params; + zc->blockSize = blockSize; + zc->frameContentSize = frameContentSize; + zc->consumedSrcSize = 0; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->loadedDictEnd = 0; + zc->fseCTables_ready = 0; + zc->hufCTable_repeatMode = HUF_repeat_none; + zc->nextToUpdate = 1; + zc->nextSrc = NULL; + zc->base = NULL; + zc->dictBase = NULL; + zc->dictLimit = 0; + zc->lowLimit = 0; + { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; } + zc->hashLog3 = hashLog3; + zc->seqStore.litLengthSum = 0; + + /* ensure entropy tables are close together at the beginning */ + assert((void*)zc->hufCTable == zc->workSpace); + assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size); + assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size); + assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size); + assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size); + ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size; + + /* opt parser space */ + if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) { + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->seqStore.litFreq = (U32*)ptr; + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits); + zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); + zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); + ptr = zc->seqStore.offCodeFreq + (MaxOff+1); + zc->seqStore.matchTable = (ZSTD_match_t*)ptr; + ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1; + zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr; + ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + } + + /* table Space */ + if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->hashTable = (U32*)(ptr); + zc->chainTable = zc->hashTable + hSize; + zc->hashTable3 = zc->chainTable + chainSize; + ptr = zc->hashTable3 + h3Size; + + /* sequences storage */ + zc->seqStore.sequencesStart = (seqDef*)ptr; + ptr = zc->seqStore.sequencesStart + maxNbSeq; + zc->seqStore.llCode = (BYTE*) ptr; + zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; + zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0; +} + + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1 + * @return : 0, or an error code */ +size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize) +{ + if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); + + memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_parameters params = srcCCtx->params; + params.fParams = fParams; + DEBUGLOG(5, "ZSTD_resetCCtx_internal : dictIDFlag : %u \n", !fParams.noDictIDFlag); + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset); + } + + /* copy tables */ + { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->params.cParams.hashLog; + size_t const h3Size = (size_t)1 << srcCCtx->hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize); + memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* copy dictionary offsets */ + dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; + dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; + dstCCtx->nextSrc = srcCCtx->nextSrc; + dstCCtx->base = srcCCtx->base; + dstCCtx->dictBase = srcCCtx->dictBase; + dstCCtx->dictLimit = srcCCtx->dictLimit; + dstCCtx->lowLimit = srcCCtx->lowLimit; + dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; + dstCCtx->dictID = srcCCtx->dictID; + + /* copy entropy tables */ + dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready; + if (srcCCtx->fseCTables_ready) { + memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size); + memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size); + memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size); + } + dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode; + if (srcCCtx->hufCTable_repeatMode) { + memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size); + } + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + fParams.contentSizeFlag = pledgedSrcSize>0; + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize); +} + + +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) +{ + U32 u; + for (u=0 ; u < size ; u++) { + if (table[u] < reducerValue) table[u] = 0; + else table[u] -= reducerValue; + } +} + +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +{ + { U32 const hSize = 1 << zc->params.cParams.hashLog; + ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } + + { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); + ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } + + { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + default: /*note : should not be necessary : flSize is within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + + +static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } + +static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t const minGain = ZSTD_minGain(srcSize); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + + /* small ? don't even attempt compression (speed opt) */ +# define LITERAL_NOENTROPY 63 + { size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + { HUF_repeat repeat = zc->hufCTable_repeatMode; + int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat) + : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat); + if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */ + else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */ + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) { + zc->hufCTable_repeatMode = HUF_repeat_none; + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + zc->hufCTable_repeatMode = HUF_repeat_none; + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + default: /* should not be necessary, lhSize is only {3,4,5} */ + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + } + return lhSize+cLitSize; +} + +static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + +static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + BYTE const LL_deltaCode = 19; + BYTE const ML_deltaCode = 36; + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + for (u=0; u<nbSeq; u++) { + U32 const llv = sequences[u].litLength; + U32 const mlv = sequences[u].matchLength; + llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv]; + ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); + mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv]; + } + if (seqStorePtr->longLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN; + const seqStore_t* seqStorePtr = &(zc->seqStore); + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = zc->litlengthCTable; + FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; + FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* seqHead; + BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = seqStorePtr->lit - literals; + size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); + if (ZSTD_isError(cSize)) return cSize; + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) goto _check_compressibility; + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + +#define MIN_SEQ_FOR_DYNAMIC_FSE 64 +#define MAX_SEQ_FOR_STATIC_FSE 1000 + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + + /* CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = set_rle; + } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = set_repeat; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return NCountSize; + op += NCountSize; } + FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_compressed; + } } + + /* CTable for Offsets */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = set_rle; + } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + Offtype = set_repeat; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return NCountSize; + op += NCountSize; } + FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_compressed; + } } + + /* CTable for MatchLengths */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = set_rle; + } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + MLtype = set_repeat; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { + FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return NCountSize; + op += NCountSize; } + FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_compressed; + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + zc->fseCTables_ready = 0; + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ + BYTE const llCode = llCodeTable[n]; + BYTE const ofCode = ofCodeTable[n]; + BYTE const mlCode = mlCodeTable[n]; + U32 const llBits = LL_bits[llCode]; + U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ + U32 const mlBits = ML_bits[mlCode]; + /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + /* check compressibility */ +_check_compressibility: + { size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + if ((size_t)(op-ostart) >= maxCSize) { + zc->hufCTable_repeatMode = HUF_repeat_none; + return 0; + } } + + /* confirm repcodes */ + { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; } + + return op - ostart; +} + +#if 0 /* for debug */ +# define STORESEQ_DEBUG +#include <stdio.h> /* fprintf */ +U32 g_startDebug = 0; +const BYTE* g_start = NULL; +#endif + +/*! ZSTD_storeSeq() : + Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + `offsetCode` : distance to match, or 0 == repCode. + `matchCode` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) +{ +#ifdef STORESEQ_DEBUG + if (g_startDebug) { + const U32 pos = (U32)((const BYTE*)literals - g_start); + if (g_start==NULL) g_start = (const BYTE*)literals; + if ((pos > 1895000) && (pos < 1895300)) + DEBUGLOG(5, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + } +#endif + /* copy Literals */ + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offsetCode + 1; + + /* match Length */ + if (matchCode>0xFFFF) { + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)matchCode; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; + return (size_t)(pIn - pStart); +} + +/** ZSTD_count_2segments() : +* can count match length with `ip` & `match` in 2 different segments. +* convention : on reaching mEnd, match count continue starting from iStart +*/ +static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) +{ + const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); + size_t const matchLength = ZSTD_count(ip, match, vEnd); + if (match + matchLength != mEnd) return matchLength; + return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); +} + + +/*-************************************* +* Hashes +***************************************/ +static const U32 prime3bytes = 506832829U; +static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + + +/*-************************************* +* Fast Scan +***************************************/ +static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) +{ + U32* const hashTable = zc->hashTable; + U32 const hBits = zc->params.cParams.hashLog; + const BYTE* const base = zc->base; + const BYTE* ip = base + zc->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashTable = cctx->hashTable; + U32 const hBits = cctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hBits, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + hashTable[h] = current; /* update hash table */ + + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashTable[h] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex < lowestIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + U32 const mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + } +} + + +/*-************************************* +* Double Fast +***************************************/ +static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls) +{ + U32* const hashLarge = cctx->hashTable; + U32 const hBitsL = cctx->params.cParams.hashLog; + U32* const hashSmall = cctx->chainTable; + U32 const hBitsS = cctx->params.cParams.chainLog; + const BYTE* const base = cctx->base; + const BYTE* ip = base + cctx->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); + hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashLong = cctx->hashTable; + const U32 hBitsL = cctx->params.cParams.hashLog; + U32* const hashSmall = cctx->chainTable; + const U32 hBitsS = cctx->params.cParams.chainLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 const matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + + assert(offset_1 <= current); /* supposed guaranteed by construction */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + /* favor repcode */ + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { + size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + } else { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashLong = ctx->hashTable; + U32 const hBitsL = ctx->params.cParams.hashLog; + U32* const hashSmall = ctx->chainTable; + U32 const hBitsS = ctx->params.cParams.chainLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + U32 const mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; + } +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. +* ip : assumed <= iend-8 . +* @return : nb of positions added */ +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + U32 const hashLog = zc->params.cParams.hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + U32 const btLog = zc->params.cParams.chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = zc->lowLimit; + U32 matchEndIdx = current+8; + size_t bestLength = 8; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + + if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + if (matchEndIdx > current + 8) return matchEndIdx - current - 8; + return 1; +} + + +static size_t ZSTD_insertBtAndFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 nbCompares, const U32 mls, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + U32 const hashLog = zc->params.cParams.hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + U32 const btLog = zc->params.cParams.chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return bestLength; +} + + +static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); +} + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); +} + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 7 : + case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + zc->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls, const U32 extDict) +{ + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->params.cParams.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = zc->lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + int nbAttempts=maxNbAttempts; + size_t ml=4-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); + + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base + ctx->dictLimit; + + U32 const maxSearches = 1 << ctx->params.cParams.searchLog; + U32 const mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0; + + /* init */ + ip += (ip==base); + ctx->nextToUpdate3 = ctx->nextToUpdate; + { U32 const maxRep = (U32)(ip-base); + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { + /* repcode : we take it */ + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip<ilimit) { + ip ++; + if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip<ilimit)) { + ip ++; + if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(ml2 * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((ml2 >= 4) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + while ( (start > anchor) + && (start > base+offset-ZSTD_REP_MOVE) + && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ((offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + + /* Save reps for next block */ + ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; + ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); +} + +static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); +} + + +FORCE_INLINE +void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ctx->lowLimit; + + const U32 maxSearches = 1 << ctx->params.cParams.searchLog; + const U32 mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; + + /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; + ip += (ip == prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip<ilimit) { + ip ++; + current++; + /* check repCode */ + if (offset) { + const U32 repIndex = (U32)(current - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip<ilimit)) { + ip ++; + current++; + /* check repCode */ + if (offset) { + const U32 repIndex = (U32)(current - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); +} + +static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); +} + + +/* The optimal parser */ +#include "zstd_opt.h" + +static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + +static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + +static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + +static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + + +typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +{ + static const ZSTD_blockCompressor blockCompressor[2][8] = { + { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 }, + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict } + }; + + return blockCompressor[extDict][(U32)strat]; +} + + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); + const BYTE* const base = zc->base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + ZSTD_resetSeqStore(&(zc->seqStore)); + if (current > zc->nextToUpdate + 384) + zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */ + blockCompressor(zc, src, srcSize); + return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); +} + + +/*! ZSTD_compress_generic() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = 1 << cctx->params.cParams.windowLog; + + if (cctx->params.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + size_t cSize; + + if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) + return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + if (remaining < blockSize) blockSize = remaining; + + /* preemptive overflow correction */ + if (cctx->lowLimit > (3U<<29)) { + U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1; + U32 const current = (U32)(ip - cctx->base); + U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog); + U32 const correction = current - newCurrent; + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30); + ZSTD_reduceIndex(cctx, correction); + cctx->base += correction; + cctx->dictBase += correction; + cctx->lowLimit -= correction; + cctx->dictLimit -= correction; + if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0; + else cctx->nextToUpdate -= correction; + } + + if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { + /* enforce maxDist */ + U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; + if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; + if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit; + } + + cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + + if (cSize == 0) { /* block is not compressible */ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); + if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ + memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_blockHeaderSize+blockSize; + } else { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } + + remaining -= blockSize; + dstCapacity -= cSize; + ip += blockSize; + op += cSize; + } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return op-ostart; +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params.fParams.checksumFlag>0; + U32 const windowSize = 1U << params.cParams.windowLog; + U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params.fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */ + 0; + BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos; + + if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); + DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u \n", !params.fParams.noDictIDFlag); + DEBUGLOG(5, "ZSTD_writeFrameHeader : dictID : %u \n", dictID); + DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDSizeCode : %u \n", dictIDSizeCode); + + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + op[4] = frameHeaderDecriptionByte; pos=5; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + const BYTE* const ip = (const BYTE*) src; + size_t fhSize = 0; + + if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + /* Check if blocks follow each other */ + if (src != cctx->nextSrc) { + /* not contiguous */ + ptrdiff_t const delta = cctx->nextSrc - ip; + cctx->lowLimit = cctx->dictLimit; + cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base); + cctx->dictBase = cctx->base; + cctx->base -= delta; + cctx->nextToUpdate = cctx->dictLimit; + if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */ + } + + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx; + cctx->lowLimit = lowLimitMax; + } + + cctx->nextSrc = ip + srcSize; + + if (srcSize) { + size_t const cSize = frame ? + ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + cctx->consumedSrcSize += srcSize; + return cSize + fhSize; + } else + return fhSize; +} + + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx) +{ + return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); + if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + const BYTE* const ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + /* input becomes current prefix */ + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base += ip - zc->nextSrc; + zc->nextToUpdate = zc->dictLimit; + zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base); + + zc->nextSrc = iend; + if (srcSize <= HASH_READ_SIZE) return 0; + + switch(zc->params.cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (srcSize >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength); + break; + + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btopt2: + if (srcSize >= HASH_READ_SIZE) + ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); + break; + + default: + return ERROR(GENERIC); /* strategy doesn't exist; impossible */ + } + + zc->nextToUpdate = (U32)(iend - zc->base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + when FSE encoding. Refuse dictionaries that assign zero probability to symbols + that we may encounter during compression. + NOTE: This behavior is not standard and could be improved in the future. */ +static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + } + return 0; +} + + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : 0, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed > 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; + + dictPtr += 4; /* skip magic number */ + cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); + dictPtr += 4; + + { size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), + dictionary_corrupted); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + /* Every match length code must have non-zero probability */ + CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), + dictionary_corrupted); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + /* Every literal length code must have non-zero probability */ + CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), + dictionary_corrupted); + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + cctx->rep[0] = MEM_readLE32(dictPtr+0); + cctx->rep[1] = MEM_readLE32(dictPtr+4); + cctx->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable */ + CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + /* All repCodes must be <= dictContentSize and != 0*/ + { U32 u; + for (u=0; u<3; u++) { + if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted); + if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); + } } + + cctx->fseCTables_ready = 1; + cctx->hufCTable_repeatMode = HUF_repeat_valid; + return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize); + } +} + +/** ZSTD_compress_insertDictionary() : +* @return : 0, or an error code */ +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + if ((dict==NULL) || (dictSize<=8)) return 0; + + /* dict as pure content */ + if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict)) + return ZSTD_loadDictionaryContent(cctx, dict, dictSize); + + /* dict as zstd dictionary */ + return ZSTD_loadZstdDictionary(cctx, dict, dictSize); +} + +/*! ZSTD_compressBegin_internal() : +* @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + CHECK_F(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, crp)); + return ZSTD_compress_insertDictionary(cctx, dict, dictSize); +} + + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + /* compression parameters verification and optimization */ + CHECK_F(ZSTD_checkCParams(params.cParams)); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); +} + + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0); +} + + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->params.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 1 /* last chunk */); + if (ZSTD_isError(cSize)) return cSize; + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + if (ZSTD_isError(endResult)) return endResult; + if (cctx->params.fParams.contentSizeFlag) { /* control src size */ + if (cctx->frameContentSize != cctx->consumedSrcSize) return ERROR(srcSize_wrong); + } + return cSize + endResult; +} + + +static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize)); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + CHECK_F(ZSTD_checkCParams(params.cParams)); + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, + const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0); + params.fParams.contentSizeFlag = 1; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + size_t result; + ZSTD_CCtx ctxBody; + memset(&ctxBody, 0, sizeof(ctxBody)); + memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem)); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */ + return result; +} + + +/* ===== Dictionary API ===== */ + +struct ZSTD_CDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictContentSize; + ZSTD_CCtx* refContext; +}; /* typedef'd tp ZSTD_CDict within "zstd.h" */ + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); +} + +static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams) +{ + ZSTD_parameters params; + params.cParams = cParams; + params.fParams = fParams; + return params; +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem); + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem); + + if (!cdict || !cctx) { + ZSTD_free(cdict, customMem); + ZSTD_freeCCtx(cctx); + return NULL; + } + + if ((byReference) || (!dictBuffer) || (!dictSize)) { + cdict->dictBuffer = NULL; + cdict->dictContent = dictBuffer; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, customMem); + if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; } + memcpy(internalBuffer, dictBuffer, dictSize); + cdict->dictBuffer = internalBuffer; + cdict->dictContent = internalBuffer; + } + + { ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksumFlag */, 0 /* noDictIDFlag */ }; /* dummy */ + ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams); + size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0); + if (ZSTD_isError(errorCode)) { + ZSTD_free(cdict->dictBuffer, customMem); + ZSTD_free(cdict, customMem); + ZSTD_freeCCtx(cctx); + return NULL; + } } + + cdict->refContext = cctx; + cdict->dictContentSize = dictSize; + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, 0, cParams, allocator); +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, 1, cParams, allocator); +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->refContext->customMem; + ZSTD_freeCCtx(cdict->refContext); + ZSTD_free(cdict->dictBuffer, cMem); + ZSTD_free(cdict, cMem); + return 0; + } +} + +static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) { + return ZSTD_getParamsFromCCtx(cdict->refContext); +} + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + if (cdict==NULL) return ERROR(GENERIC); /* does not support NULL cdict */ + DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced : dictIDFlag == %u \n", !fParams.noDictIDFlag); + if (cdict->dictContentSize) + CHECK_F( ZSTD_copyCCtx_internal(cctx, cdict->refContext, fParams, pledgedSrcSize) ) + else { + ZSTD_parameters params = cdict->refContext->params; + params.fParams = fParams; + CHECK_F(ZSTD_compressBegin_internal(cctx, NULL, 0, params, pledgedSrcSize)); + } + return 0; +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u \n", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage; + +struct ZSTD_CStream_s { + ZSTD_CCtx* cctx; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + size_t blockSize; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZSTD_cStreamStage stage; + U32 checksum; + U32 frameEnded; + U64 pledgedSrcSize; + ZSTD_parameters params; + ZSTD_customMem customMem; +}; /* typedef'd to ZSTD_CStream within "zstd.h" */ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + return ZSTD_createCStream_advanced(defaultCustomMem); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ + ZSTD_CStream* zcs; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem); + if (zcs==NULL) return NULL; + memset(zcs, 0, sizeof(ZSTD_CStream)); + memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem)); + zcs->cctx = ZSTD_createCCtx_advanced(customMem); + if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; } + return zcs; +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + if (zcs==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = zcs->customMem; + ZSTD_freeCCtx(zcs->cctx); + zcs->cctx = NULL; + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = NULL; + ZSTD_free(zcs->inBuff, cMem); + zcs->inBuff = NULL; + ZSTD_free(zcs->outBuff, cMem); + zcs->outBuff = NULL; + ZSTD_free(zcs, cMem); + return 0; + } +} + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) +{ + if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */ + + DEBUGLOG(5, "ZSTD_resetCStream_internal : dictIDFlag == %u \n", !zcs->params.fParams.noDictIDFlag); + + if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict_advanced(zcs->cctx, zcs->cdict, zcs->params.fParams, pledgedSrcSize)) + else CHECK_F(ZSTD_compressBegin_internal(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); + + zcs->inToCompress = 0; + zcs->inBuffPos = 0; + zcs->inBuffTarget = zcs->blockSize; + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + zcs->stage = zcss_load; + zcs->frameEnded = 0; + zcs->pledgedSrcSize = pledgedSrcSize; + return 0; /* ready to go */ +} + +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) +{ + + zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0); + DEBUGLOG(5, "ZSTD_resetCStream : dictIDFlag == %u \n", !zcs->params.fParams.noDictIDFlag); + return ZSTD_resetCStream_internal(zcs, pledgedSrcSize); +} + +/* ZSTD_initCStream_internal() : + * params are supposed validated at this stage + * and zcs->cdict is supposed to be correct */ +static size_t ZSTD_initCStream_stage2(ZSTD_CStream* zcs, + const ZSTD_parameters params, + unsigned long long pledgedSrcSize) +{ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + /* allocate buffers */ + { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; + if (zcs->inBuffSize < neededInBuffSize) { + zcs->inBuffSize = 0; + ZSTD_free(zcs->inBuff, zcs->customMem); + zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem); + if (zcs->inBuff == NULL) return ERROR(memory_allocation); + zcs->inBuffSize = neededInBuffSize; + } + zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize); + } + if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) { + size_t const outBuffSize = ZSTD_compressBound(zcs->blockSize)+1; + zcs->outBuffSize = 0; + ZSTD_free(zcs->outBuff, zcs->customMem); + zcs->outBuff = (char*) ZSTD_malloc(outBuffSize, zcs->customMem); + if (zcs->outBuff == NULL) return ERROR(memory_allocation); + zcs->outBuffSize = outBuffSize; + } + + zcs->checksum = params.fParams.checksumFlag > 0; + zcs->params = params; + + DEBUGLOG(5, "ZSTD_initCStream_stage2 : dictIDFlag == %u \n", !params.fParams.noDictIDFlag); + return ZSTD_resetCStream_internal(zcs, pledgedSrcSize); +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams) +{ + if (!cdict) return ERROR(GENERIC); /* cannot handle NULL cdict (does not know what to do) */ + { ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict); + params.fParams = fParams; + zcs->cdict = cdict; + return ZSTD_initCStream_stage2(zcs, params, pledgedSrcSize); + } +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /* content */, 0 /* checksum */, 0 /* noDictID */ }; + return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, 0, fParams); +} + +static size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + zcs->cdict = NULL; + + if (dict && dictSize >= 8) { + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* copy */, params.cParams, zcs->customMem); + if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); + zcs->cdict = zcs->cdictLocal; + } + + DEBUGLOG(5, "ZSTD_initCStream_internal : dictIDFlag == %u \n", !params.fParams.noDictIDFlag); + return ZSTD_initCStream_stage2(zcs, params, pledgedSrcSize); +} + +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + CHECK_F( ZSTD_checkCParams(params.cParams) ); + DEBUGLOG(5, "ZSTD_initCStream_advanced : dictIDFlag == %u \n", !params.fParams.noDictIDFlag); + return ZSTD_initCStream_internal(zcs, dict, dictSize, params, pledgedSrcSize); +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + return ZSTD_initCStream_internal(zcs, dict, dictSize, params, 0); +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize) +{ + ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); + params.fParams.contentSizeFlag = (pledgedSrcSize>0); + return ZSTD_initCStream_internal(zcs, NULL, 0, params, pledgedSrcSize); +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); + return ZSTD_initCStream_internal(zcs, NULL, 0, params, 0); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + if (zcs==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize; +} + +/*====== Compression ======*/ + +typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e; + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr, + ZSTD_flush_e const flush) +{ + U32 someMoreWork = 1; + const char* const istart = (const char*)src; + const char* const iend = istart + *srcSizePtr; + const char* ip = istart; + char* const ostart = (char*)dst; + char* const oend = ostart + *dstCapacityPtr; + char* op = ostart; + + while (someMoreWork) { + switch(zcs->stage) + { + case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ + + case zcss_load: + /* complete inBuffer */ + { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip); + zcs->inBuffPos += loaded; + ip += loaded; + if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) { + someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */ + } } + /* compress current block (note : this stage cannot be stopped in the middle) */ + { void* cDst; + size_t cSize; + size_t const iSize = zcs->inBuffPos - zcs->inToCompress; + size_t oSize = oend-op; + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress directly into output buffer (avoid flush stage) */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + cSize = (flush == zsf_end) ? + ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + if (flush == zsf_end) zcs->frameEnded = 1; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */ + zcs->inToCompress = zcs->inBuffPos; + if (cDst == op) { op += cSize; break; } /* no need to flush */ + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->stage = zcss_flush; /* pass-through to flush stage */ + } + + case zcss_flush: + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */ + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + zcs->stage = zcss_load; + break; + } + + case zcss_final: + someMoreWork = 0; /* do nothing */ + break; + + default: + return ERROR(GENERIC); /* impossible */ + } + } + + *srcSizePtr = ip - istart; + *dstCapacityPtr = op - ostart; + if (zcs->frameEnded) return 0; + { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; + if (hintInSize==0) hintInSize = zcs->blockSize; + return hintInSize; + } +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + size_t sizeRead = input->size - input->pos; + size_t sizeWritten = output->size - output->pos; + size_t const result = ZSTD_compressStream_generic(zcs, + (char*)(output->dst) + output->pos, &sizeWritten, + (const char*)(input->src) + input->pos, &sizeRead, zsf_gather); + input->pos += sizeRead; + output->pos += sizeWritten; + return result; +} + + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : +* @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + size_t srcSize = 0; + size_t sizeWritten = output->size - output->pos; + size_t const result = ZSTD_compressStream_generic(zcs, + (char*)(output->dst) + output->pos, &sizeWritten, + &srcSize, &srcSize, /* use a valid src address instead of NULL */ + zsf_flush); + output->pos += sizeWritten; + if (ZSTD_isError(result)) return result; + return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + BYTE* const ostart = (BYTE*)(output->dst) + output->pos; + BYTE* const oend = (BYTE*)(output->dst) + output->size; + BYTE* op = ostart; + + if (zcs->stage != zcss_final) { + /* flush whatever remains */ + size_t srcSize = 0; + size_t sizeWritten = output->size - output->pos; + size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, + &srcSize /* use a valid src address instead of NULL */, &srcSize, zsf_end); + size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + op += sizeWritten; + if (remainingToFlush) { + output->pos += sizeWritten; + return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4); + } + /* create epilogue */ + zcs->stage = zcss_final; + zcs->outBuffContentSize = !notEnded ? 0 : + /* write epilogue, including final empty block, into outBuff */ + ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0); + if (ZSTD_isError(zcs->outBuffContentSize)) return zcs->outBuffContentSize; + } + + /* flush epilogue */ + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + op += flushed; + zcs->outBuffFlushedSize += flushed; + output->pos += op-ostart; + if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */ + return toFlush - flushed; + } +} + + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_DEFAULT_CLEVEL 1 +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" */ + /* W, C, H, S, L, TL, strat */ + { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */ + { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */ + { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/ + { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/ + { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */ + { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */ + { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */ + { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */ + { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */ + { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */ + { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */ + { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */ + { 23, 22, 22, 5, 4, 32, ZSTD_btopt }, /* level 18 */ + { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */ + { 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */ + { 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */ + { 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */ + { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */ + { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */ + { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */ + { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/ + { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/ + { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/ + { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */ + { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/ + { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/ + { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */ + { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ + { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ + { 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/ + { 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/ + { 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */ + { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */ + { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */ + { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */ + { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */ + { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ + { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */ + { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */ + { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/ + { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ + { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/ + { 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */ + { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */ + { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */ + { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/ + { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/ + { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/ + { 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/ +}, +}; + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`. +* Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) +{ + ZSTD_compressionParameters cp; + size_t const addedSize = srcSize ? 0 : 500; + U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ + if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */ + if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; + cp = ZSTD_defaultCParameters[tableID][compressionLevel]; + if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ + if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; + if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; + if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; + } + cp = ZSTD_adjustCParams(cp, srcSize, dictSize); + return cp; +} + +/*! ZSTD_getParams() : +* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). +* All fields of `ZSTD_frameParameters` are set to default (0) */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + return params; +} diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h new file mode 100644 index 0000000000..5437611912 --- /dev/null +++ b/thirdparty/zstd/compress/zstd_opt.h @@ -0,0 +1,921 @@ +/** + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/* Note : this file is intended to be included within zstd_compress.c */ + + +#ifndef ZSTD_OPT_H_91842398743 +#define ZSTD_OPT_H_91842398743 + + +#define ZSTD_LITFREQ_ADD 2 +#define ZSTD_FREQ_DIV 4 +#define ZSTD_MAX_PRICE (1<<30) + +/*-************************************* +* Price functions for optimal parser +***************************************/ +FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) +{ + ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1); + ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1); + ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1); + ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); + ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); +} + + +MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize) +{ + unsigned u; + + ssPtr->cachedLiterals = NULL; + ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; + ssPtr->staticPrices = 0; + + if (ssPtr->litLengthSum == 0) { + if (srcSize <= 1024) ssPtr->staticPrices = 1; + + for (u=0; u<=MaxLit; u++) + ssPtr->litFreq[u] = 0; + for (u=0; u<srcSize; u++) + ssPtr->litFreq[src[u]]++; + + ssPtr->litSum = 0; + ssPtr->litLengthSum = MaxLL+1; + ssPtr->matchLengthSum = MaxML+1; + ssPtr->offCodeSum = (MaxOff+1); + ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits); + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) + ssPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + ssPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + ssPtr->offCodeFreq[u] = 1; + } else { + ssPtr->matchLengthSum = 0; + ssPtr->litLengthSum = 0; + ssPtr->offCodeSum = 0; + ssPtr->matchSum = 0; + ssPtr->litSum = 0; + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); + ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); + } + ssPtr->matchSum *= ZSTD_LITFREQ_ADD; + for (u=0; u<=MaxOff; u++) { + ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + } + } + + ZSTD_setLog2Prices(ssPtr); +} + + +FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) +{ + U32 price, u; + + if (ssPtr->staticPrices) + return ZSTD_highbit32((U32)litLength+1) + (litLength*6); + + if (litLength == 0) + return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); + + /* literals */ + if (ssPtr->cachedLiterals == literals) { + U32 const additional = litLength - ssPtr->cachedLitLength; + const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; + price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; + for (u=0; u < additional; u++) + price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1); + ssPtr->cachedPrice = price; + ssPtr->cachedLitLength = litLength; + } else { + price = litLength * ssPtr->log2litSum; + for (u=0; u < litLength; u++) + price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1); + + if (litLength >= 12) { + ssPtr->cachedLiterals = literals; + ssPtr->cachedPrice = price; + ssPtr->cachedLitLength = litLength; + } + } + + /* literal Length */ + { const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1); + } + + return price; +} + + +FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) +{ + /* offset */ + U32 price; + BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + + if (seqStorePtr->staticPrices) + return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; + + price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); + if (!ultra && offCode >= 20) price += (offCode-19)*2; + + /* match Length */ + { const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1); + } + + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; +} + + +MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + U32 u; + + /* literals */ + seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD; + for (u=0; u < litLength; u++) + seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + + /* literal Length */ + { const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + seqStorePtr->litLengthFreq[llCode]++; + seqStorePtr->litLengthSum++; + } + + /* match offset */ + { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + seqStorePtr->offCodeSum++; + seqStorePtr->offCodeFreq[offCode]++; + } + + /* match Length */ + { const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + seqStorePtr->matchLengthFreq[mlCode]++; + seqStorePtr->matchLengthSum++; + } + + ZSTD_setLog2Prices(seqStorePtr); +} + + +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + } + + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + U32 const hashLog3 = zc->hashLog3; + const BYTE* const base = zc->base; + U32 idx = zc->nextToUpdate3; + const U32 target = zc->nextToUpdate3 = (U32)(ip - base); + const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +static U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + U32 nbCompares, const U32 mls, + U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) +{ + const BYTE* const base = zc->base; + const U32 current = (U32)(ip-base); + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const hashTable = zc->hashTable; + U32 matchIndex = hashTable[h]; + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + + const U32 minMatch = (mls == 3) ? 3 : 4; + size_t bestLength = minMatchLen-1; + + if (minMatch == 3) { /* HC3 match finder */ + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); + if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex3; + if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + } + + /* save best solution */ + if (currentMl > bestLength) { + bestLength = currentMl; + matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; + matches[mnum].len = (U32)currentMl; + mnum++; + if (currentMl > ZSTD_OPT_NUM) goto update; + if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ + } + } + } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) { + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; + } + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; + matches[mnum].len = (U32)matchLength; + mnum++; + if (matchLength > ZSTD_OPT_NUM) break; + if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + +update: + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return mnum; +} + + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 7 : + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 7 : + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const int ultra) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const BYTE* const prefixStart = base + ctx->dictLimit; + + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + U32 offset, rep[ZSTD_REP_NUM]; + + /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); + ip += (ip==prefixStart); + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); + for (i=(ip == anchor); i<last_i; i++) { + const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; + if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) + && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { + mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + best_off = i - (ip == anchor); + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + + if (!last_pos && !match_num) { ip++; continue; } + + if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + /* set prices using matches at position = 0 */ + best_mlen = (last_pos) ? last_pos : minMatch; + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ + mlen++; + } } + + if (last_pos < minMatch) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } + opt[0].mlen = 1; + opt[0].litlen = litlen; + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } + + best_mlen = minMatch; + { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); + for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ + const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; + if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { + mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = i - (opt[cur].mlen != 1); + if (mlen > best_mlen) best_mlen = mlen; + + do { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); + + if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); + else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos;) { + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + + if (offset > ZSTD_REP_MOVE_OPT) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { + if (offset != 0) { + best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + if (litLength==0) offset--; + } + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + /* Save reps for next block */ + { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const int ultra) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 lowestIndex = ctx->lowLimit; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + + /* init */ + U32 offset, rep[ZSTD_REP_NUM]; + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + U32 current = (U32)(ip-base); + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + opt[0].litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); + for (i = (ip==anchor); i<last_i; i++) { + const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; + const U32 repIndex = (U32)(current - repCur); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( (repCur > 0 && repCur <= (S32)current) + && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + + best_off = i - (ip==anchor); + litlen = opt[0].litlen; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ + + if (!last_pos && !match_num) { ip++; continue; } + + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } + opt[0].mlen = 1; + + if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + best_mlen = (last_pos) ? last_pos : minMatch; + + /* set prices using matches at position = 0 */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + litlen = opt[0].litlen; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); + mlen++; + } } + + if (last_pos < minMatch) { + ip++; continue; + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } + + best_mlen = minMatch; + { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); + for (i = (mlen != 1); i<last_i; i++) { + const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; + const U32 repIndex = (U32)(current+cur - repCur); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( (repCur > 0 && repCur <= (S32)(current+cur)) + && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = i - (opt[cur].mlen != 1); + if (mlen > best_mlen) best_mlen = mlen; + + do { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); + + if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); + else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } /* for (cur = 1; cur <= last_pos; cur++) */ + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos; ) { + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + + if (offset > ZSTD_REP_MOVE_OPT) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { + if (offset != 0) { + best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + + if (litLength==0) offset--; + } + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + /* Save reps for next block */ + { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; } + + /* Last Literals */ + { size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + +#endif /* ZSTD_OPT_H_91842398743 */ diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c new file mode 100644 index 0000000000..fc7f52a290 --- /dev/null +++ b/thirdparty/zstd/compress/zstdmt_compress.c @@ -0,0 +1,751 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/* ====== Tuning parameters ====== */ +#define ZSTDMT_NBTHREADS_MAX 128 + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Dependencies ====== */ +#include <stdlib.h> /* malloc */ +#include <string.h> /* memcpy */ +#include "pool.h" /* threadpool */ +#include "threading.h" /* mutex */ +#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ +#include "zstdmt_compress.h" + + +/* ====== Debug ====== */ +#if 0 + +# include <stdio.h> +# include <unistd.h> +# include <sys/times.h> + static unsigned g_debugLevel = 5; +# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); } +# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + DEBUGLOGRAW(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); } +} + +#define MUTEX_WAIT_TIME_DLEVEL 5 +#define PTHREAD_MUTEX_LOCK(mutex) \ +if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ +} else pthread_mutex_lock(mutex); + +#else + +# define DEBUGLOG(l, ...) {} /* disabled */ +# define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ + +typedef struct buffer_s { + void* start; + size_t size; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + unsigned totalBuffers; + unsigned nbBuffers; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads) +{ + unsigned const maxNbBuffers = 2*nbThreads + 2; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t)); + if (bufPool==NULL) return NULL; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; u<bufPool->totalBuffers; u++) + free(bufPool->bTable[u].start); + free(bufPool); +} + +/* assumption : invocation from main thread only ! */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) +{ + if (pool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = pool->bTable[--(pool->nbBuffers)]; + size_t const availBufferSize = buf.size; + if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ + return buf; + free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */ + } + /* create new buffer */ + { buffer_t buffer; + void* const start = malloc(bSize); + if (start==NULL) bSize = 0; + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.size = bSize; + return buffer; + } +} + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) +{ + if (buf.start == NULL) return; /* release on NULL */ + if (pool->nbBuffers < pool->totalBuffers) { + pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ + return; + } + /* Reached bufferPool capacity (should not happen) */ + free(buf.start); +} + + +/* ===== CCtx Pool ===== */ + +typedef struct { + unsigned totalCCtx; + unsigned availCCtx; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* assumption : CCtxPool invocation only from main thread */ + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + unsigned u; + for (u=0; u<pool->totalCCtx; u++) + ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */ + free(pool); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*)); + if (!cctxPool) return NULL; + cctxPool->totalCCtx = nbThreads; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx(); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads); + return cctxPool; +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) +{ + if (pool->availCCtx) { + pool->availCCtx--; + return pool->cctx[pool->availCCtx]; + } + return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else + /* pool overflow : should not happen, since totalCCtx==nbThreads */ + ZSTD_freeCCtx(cctx); +} + + +/* ===== Thread worker ===== */ + +typedef struct { + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + ZSTD_CCtx* cctx; + buffer_t src; + const void* srcStart; + size_t srcSize; + size_t dictSize; + buffer_t dstBuff; + size_t cSize; + size_t dstFlushed; + unsigned firstChunk; + unsigned lastChunk; + unsigned jobCompleted; + unsigned jobScanned; + pthread_mutex_t* jobCompleted_mutex; + pthread_cond_t* jobCompleted_cond; + ZSTD_parameters params; + ZSTD_CDict* cdict; + unsigned long long fullFrameSize; +} ZSTDMT_jobDescription; + +/* ZSTDMT_compressChunk() : POOL_function type */ +void ZSTDMT_compressChunk(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + const void* const src = (const char*)job->srcStart + job->dictSize; + buffer_t const dstBuff = job->dstBuff; + DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", + job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); + if (job->cdict) { /* should only happen for first segment */ + size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize); + if (job->cdict) DEBUGLOG(3, "using CDict "); + if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + } else { /* srcStart points at reloaded section */ + if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */ + { size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */ + size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); + if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; } + ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1); + } } + if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */ + size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); + if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + ZSTD_invalidateRepCodes(job->cctx); + } + + DEBUGLOG(4, "Compressing : "); + DEBUG_PRINTHEX(4, job->srcStart, 12); + job->cSize = (job->lastChunk) ? + ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : + ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize); + DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", + (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); + DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize)); + +_endJob: + PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); + job->jobCompleted = 1; + job->jobScanned = 0; + pthread_cond_signal(job->jobCompleted_cond); + pthread_mutex_unlock(job->jobCompleted_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_bufferPool* buffPool; + ZSTDMT_CCtxPool* cctxPool; + pthread_mutex_t jobCompleted_mutex; + pthread_cond_t jobCompleted_cond; + size_t targetSectionSize; + size_t marginSize; + size_t inBuffSize; + size_t dictSize; + size_t targetDictSize; + inBuff_t inBuff; + ZSTD_parameters params; + XXH64_state_t xxhState; + unsigned nbThreads; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned overlapRLog; + unsigned long long frameContentSize; + size_t sectionSize; + ZSTD_CDict* cdict; + ZSTD_CStream* cstream; + ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ +}; + +ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) +{ + ZSTDMT_CCtx* cctx; + U32 const minNbJobs = nbThreads + 2; + U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; + U32 const nbJobs = 1 << nbJobsLog2; + DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", + nbThreads, minNbJobs, nbJobsLog2, nbJobs); + if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; + cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription)); + if (!cctx) return NULL; + cctx->nbThreads = nbThreads; + cctx->jobIDMask = nbJobs - 1; + cctx->allJobsCompleted = 1; + cctx->sectionSize = 0; + cctx->overlapRLog = 3; + cctx->factory = POOL_create(nbThreads, 1); + cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); + cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); + if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */ + ZSTDMT_freeCCtx(cctx); + return NULL; + } + if (nbThreads==1) { + cctx->cstream = ZSTD_createCStream(); + if (!cctx->cstream) { + ZSTDMT_freeCCtx(cctx); return NULL; + } } + pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */ + pthread_cond_init(&cctx->jobCompleted_cond, NULL); + DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads); + return cctx; +} + +/* ZSTDMT_releaseAllJobResources() : + * Ensure all workers are killed first. */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff); + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src); + mtctx->jobs[jobID].src = g_nullBuffer; + ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx); + mtctx->jobs[jobID].cctx = NULL; + } + memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->allJobsCompleted = 1; +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + POOL_free(mtctx->factory); + if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */ + ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */ + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTD_freeCDict(mtctx->cdict); + ZSTD_freeCStream(mtctx->cstream); + pthread_mutex_destroy(&mtctx->jobCompleted_mutex); + pthread_cond_destroy(&mtctx->jobCompleted_cond); + free(mtctx); + return 0; +} + +size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value) +{ + switch(parameter) + { + case ZSTDMT_p_sectionSize : + mtctx->sectionSize = value; + return 0; + case ZSTDMT_p_overlapSectionLog : + DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value); + mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value; + return 0; + default : + return ERROR(compressionParameter_unsupported); + } +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); + U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3; + size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog); + size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2); + unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1; + unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads); + size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; + size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ + size_t remainingSrcSize = srcSize; + const char* const srcStart = (const char*)src; + unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */ + size_t frameStartPos = 0, dstBufferPos = 0; + + DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize); + DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize); + params.fParams.contentSizeFlag = 1; + + if (nbChunks==1) { /* fallback to single-thread mode */ + ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; + return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + } + + { unsigned u; + for (u=0; u<nbChunks; u++) { + size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize); + size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize); + buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity }; + buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity); + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); + size_t dictSize = u ? overlapSize : 0; + + if ((cctx==NULL) || (dstBuffer.start==NULL)) { + mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */ + mtctx->jobs[u].jobCompleted = 1; + nbChunks = u+1; + break; /* let's wait for previous jobs to complete, but don't start new ones */ + } + + mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize; + mtctx->jobs[u].dictSize = dictSize; + mtctx->jobs[u].srcSize = chunkSize; + mtctx->jobs[u].fullFrameSize = srcSize; + mtctx->jobs[u].params = params; + mtctx->jobs[u].dstBuff = dstBuffer; + mtctx->jobs[u].cctx = cctx; + mtctx->jobs[u].firstChunk = (u==0); + mtctx->jobs[u].lastChunk = (u==nbChunks-1); + mtctx->jobs[u].jobCompleted = 0; + mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; + mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; + + DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize); + DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12); + POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]); + + frameStartPos += chunkSize; + dstBufferPos += dstBufferCapacity; + remainingSrcSize -= chunkSize; + } } + /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */ + + { unsigned chunkID; + size_t error = 0, dstPos = 0; + for (chunkID=0; chunkID<nbChunks; chunkID++) { + DEBUGLOG(3, "waiting for chunk %u ", chunkID); + PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex); + while (mtctx->jobs[chunkID].jobCompleted==0) { + DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID); + pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); + } + pthread_mutex_unlock(&mtctx->jobCompleted_mutex); + DEBUGLOG(3, "ready to write chunk %u ", chunkID); + + ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); + mtctx->jobs[chunkID].cctx = NULL; + mtctx->jobs[chunkID].srcStart = NULL; + { size_t const cSize = mtctx->jobs[chunkID].cSize; + if (ZSTD_isError(cSize)) error = cSize; + if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); + if (chunkID) { /* note : chunk 0 is already written directly into dst */ + if (!error) + memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap if chunk decompressed within dst */ + if (chunkID >= compressWithinDst) /* otherwise, it decompresses within dst */ + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); + mtctx->jobs[chunkID].dstBuff = g_nullBuffer; + } + dstPos += cSize ; + } + } + if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos); + return error ? error : dstPos; + } + +} + + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) { + while (zcs->doneJobID < zcs->nextJobID) { + unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; + PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); + while (zcs->jobs[jobID].jobCompleted==0) { + DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); + } + pthread_mutex_unlock(&zcs->jobCompleted_mutex); + zcs->doneJobID++; + } +} + + +static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, unsigned updateDict, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_customMem const cmem = { NULL, NULL, NULL }; + DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog); + if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize); + if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + zcs->allJobsCompleted = 1; + } + zcs->params = params; + if (updateDict) { + ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL; + if (dict && dictSize) { + zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem); + if (zcs->cdict == NULL) return ERROR(memory_allocation); + } } + zcs->frameContentSize = pledgedSrcSize; + zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog); + DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog); + DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10)); + zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2); + zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); + zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize); + DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10)); + zcs->marginSize = zcs->targetSectionSize >> 2; + zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize; + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); + if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); + zcs->inBuff.filled = 0; + zcs->dictSize = 0; + zcs->doneJobID = 0; + zcs->nextJobID = 0; + zcs->frameEnded = 0; + zcs->allJobsCompleted = 0; + if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0); + return 0; +} + +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize); +} + +/* ZSTDMT_resetCStream() : + * pledgedSrcSize is optional and can be zero == unknown */ +size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) +{ + if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize); + return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize); +} + +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); + return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0); +} + + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame) +{ + size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); + unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; + + if ((cctx==NULL) || (dstBuffer.start==NULL)) { + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } + + DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize); + zcs->jobs[jobID].src = zcs->inBuff.buffer; + zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; + zcs->jobs[jobID].srcSize = srcSize; + zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */ + zcs->jobs[jobID].params = zcs->params; + if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ + zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; + zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; + zcs->jobs[jobID].dstBuff = dstBuffer; + zcs->jobs[jobID].cctx = cctx; + zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); + zcs->jobs[jobID].lastChunk = endFrame; + zcs->jobs[jobID].jobCompleted = 0; + zcs->jobs[jobID].dstFlushed = 0; + zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; + zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; + + /* get a new buffer for next input */ + if (!endFrame) { + size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize); + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); + if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } + DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled); + zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize; + DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize)); + memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled); + DEBUGLOG(5, "new inBuff pre-filled"); + zcs->dictSize = newDictSize; + } else { + zcs->inBuff.buffer = g_nullBuffer; + zcs->inBuff.filled = 0; + zcs->dictSize = 0; + zcs->frameEnded = 1; + if (zcs->nextJobID == 0) + zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */ + } + + DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); + POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ + zcs->nextJobID++; + return 0; +} + + +/* ZSTDMT_flushNextJob() : + * output : will be updated with amount of data flushed . + * blockToFlush : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */ +static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) +{ + unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; + if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ + PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); + while (zcs->jobs[wJobID].jobCompleted==0) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); + if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */ + } + pthread_mutex_unlock(&zcs->jobCompleted_mutex); + /* compression job completed : output can be flushed */ + { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; + if (!job.jobScanned) { + if (ZSTD_isError(job.cSize)) { + DEBUGLOG(5, "compression error detected "); + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return job.cSize; + } + ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); + zcs->jobs[wJobID].cctx = NULL; + DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag); + if (zcs->params.fParams.checksumFlag) { + XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize); + if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */ + U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); + DEBUGLOG(4, "writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum); + job.cSize += 4; + zcs->jobs[wJobID].cSize += 4; + } } + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); + zcs->jobs[wJobID].srcStart = NULL; + zcs->jobs[wJobID].src = g_nullBuffer; + zcs->jobs[wJobID].jobScanned = 1; + } + { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); + memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); + output->pos += toWrite; + job.dstFlushed += toWrite; + } + if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */ + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); + zcs->jobs[wJobID].dstBuff = g_nullBuffer; + zcs->jobs[wJobID].jobCompleted = 0; + zcs->doneJobID++; + } else { + zcs->jobs[wJobID].dstFlushed = job.dstFlushed; + } + /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ + if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); + if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */ + zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */ + return 0; /* everything flushed */ +} } + + +size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize; + if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */ + if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input); + + /* fill input buffer */ + { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); + memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad); + input->pos += toLoad; + zcs->inBuff.filled += toLoad; + } + + if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */ + && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */ + CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) ); + } + + /* check for data to flush */ + CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */ + + /* recommended next input size : fill current input buffer */ + return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ +} + + +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) +{ + size_t const srcSize = zcs->inBuff.filled - zcs->dictSize; + + if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize); + if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) + && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { + CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); + } + + /* check if there is any data available to flush */ + DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID); + return ZSTDMT_flushNextJob(zcs, output, 1); +} + + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +{ + if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output); + return ZSTDMT_flushStream_internal(zcs, output, 0); +} + +size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +{ + if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output); + return ZSTDMT_flushStream_internal(zcs, output, 1); +} diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h new file mode 100644 index 0000000000..27f78ee031 --- /dev/null +++ b/thirdparty/zstd/compress/zstdmt_compress.h @@ -0,0 +1,78 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : All prototypes defined in this file shall be considered experimental. + * There is no guarantee of API continuity (yet) on any of these prototypes */ + +/* === Dependencies === */ +#include <stddef.h> /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ + + +/* === Simple one-pass functions === */ + +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads); +ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx); + +ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + + +/* === Streaming functions === */ + +ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ + +ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ + + +/* === Advanced functions and parameters === */ + +#ifndef ZSTDMT_SECTION_SIZE_MIN +# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ +#endif + +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ + +/* ZSDTMT_parameter : + * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ +typedef enum { + ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ + ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */ +} ZSDTMT_parameter; + +/* ZSTDMT_setMTCtxParameter() : + * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. + * The function must be called typically after ZSTD_createCCtx(). + * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c new file mode 100644 index 0000000000..ea35c36201 --- /dev/null +++ b/thirdparty/zstd/decompress/huf_decompress.c @@ -0,0 +1,888 @@ +/* ****************************************************************** + Huffman decoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include <string.h> /* memcpy, memset */ +#include "bitstream.h" /* BIT_* */ +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ + +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ + +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + + HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Calculate starting value for each rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<tableLog+1; n++) { + U32 const current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } + + /* fill DTable */ + { U32 n; + for (n=0; n<nbSymbols; n++) { + U32 const w = huffWeight[n]; + U32 const length = (1 << w) >> 1; + U32 u; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (u = rankVal[w]; u < rankVal[w] + length; u++) + dt[u] = D; + rankVal[w] += length; + } } + + return iSize; +} + + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + +static size_t HUF_decompress1X2_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; } + + HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} + + +static size_t HUF_decompress4X2_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; } + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + + +size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/* HUF_fillDTableX4Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ + const U32 symbol = sortedSymbols[s].symbol; + const U32 weight = sortedSymbols[s].weight; + const U32 nbBits = nbBitsBaseline - weight; + const U32 length = 1 << (sizeLog-nbBits); + const U32 start = rankVal[weight]; + U32 i = start; + const U32 end = start + length; + + MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); + DElt.nbBits = (BYTE)(nbBits + consumed); + DElt.length = 2; + do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ + + rankVal[weight] += length; + } } +} + +typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s<sortedListSize; s++) { + const U16 symbol = sortedList[s].symbol; + const U32 weight = sortedList[s].weight; + const U32 nbBits = nbBitsBaseline - weight; + const U32 start = rankVal[weight]; + const U32 length = 1 << (targetLog-nbBits); + + if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX4 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 }; + U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w<maxW+1; w++) { + U32 current = nextRankStart; + nextRankStart += rankStats[w]; + rankStart[w] = current; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { U32 s; + for (s=0; s<nbSymbols; s++) { + U32 const w = weightList[s]; + U32 const r = rankStart[w]++; + sortedSymbol[r].symbol = (BYTE)s; + sortedSymbol[r].weight = (BYTE)w; + } + rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ + } + + /* Build rankVal */ + { U32* const rankVal0 = rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; w<maxW+1; w++) { + U32 current = nextRankVal; + nextRankVal += rankStats[w] << (w+rescale); + rankVal0[w] = current; + } } + { U32 const minBits = tableLog+1 - maxW; + U32 consumed; + for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { + U32* const rankValPtr = rankVal[consumed]; + U32 w; + for (w = 1; w < maxW+1; w++) { + rankValPtr[w] = rankVal0[w] >> consumed; + } } } } + + HUF_fillDTableX4(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + +static size_t HUF_decompress1X4_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + } + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +size_t HUF_decompress1X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); +} + +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + +static size_t HUF_decompress4X4_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; + const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; } + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + + +size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); +} + +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* ********************************/ +/* Generic decompression selector */ +/* ********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : + HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : + HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +} + + +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; + +/** HUF_selectDecoder() : +* Tells which decoder is likely to decode faster, +* based on a set of pre-determined metrics. +* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . +* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + /* decoder timing evaluation */ + U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ + + return DTime1 < DTime0; +} + + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c new file mode 100644 index 0000000000..910f9ab783 --- /dev/null +++ b/thirdparty/zstd/decompress/zstd_decompress.c @@ -0,0 +1,2376 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() will allocate memory, + * in memory stack (0), or in memory heap (1, requires malloc()) + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! +* MAXWINDOWSIZE_DEFAULT : +* maximum window size accepted by DStream, by default. +* Frames requiring more memory will be rejected. +*/ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include <string.h> /* memcpy, memmove, memset */ +#include "mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "zstd_legacy.h" +#endif + + +#if defined(_MSC_VER) +# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define ZSTD_PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +#elif defined(__GNUC__) +# define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +#else +# define ZSTD_PREFETCH(ptr) /* disabled */ +#endif + +/*-************************************* +* Macros +***************************************/ +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* +* Context management +***************************************************************/ +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef struct { + FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; + FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; + FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyTables_t; + +struct ZSTD_DCtx_s +{ + const FSE_DTable* LLTptr; + const FSE_DTable* MLTptr; + const FSE_DTable* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyTables_t entropy; + const void* previousDstEnd; /* detect continuity */ + const void* base; /* start of current segment */ + const void* vBase; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameParams fParams; + blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + U32 dictID; + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { return (dctx==NULL) ? 0 : sizeof(ZSTD_DCtx); } + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize_prefix; + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + dctx->vBase = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + MEM_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_DCtx* dctx; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); + if (!dctx) return NULL; + memcpy(&dctx->customMem, &customMem, sizeof(customMem)); + ZSTD_decompressBegin(dctx); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + return ZSTD_createDCtx_advanced(defaultCustomMem); +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + ZSTD_free(dctx, dctx->customMem); + return 0; /* reserved as a potential error code in the future */ +} + +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; + memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ +} + +static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict); + + +/*-************************************************************* +* Decompression section +***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < 4) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + + +/** ZSTD_frameHeaderSize() : +* srcSize must be >= ZSTD_frameHeaderSize_prefix. +* @return : size of the Frame Header */ +static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); + { BYTE const fhd = ((const BYTE*)src)[4]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + + +/** ZSTD_getFrameParams() : +* decode Frame Header, or require larger `srcSize`. +* @return : 0, `fparamsPtr` is correctly filled, +* >0, `srcSize` is too small, result is expected `srcSize`, +* or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + + if (srcSize < ZSTD_frameHeaderSize_prefix) return ZSTD_frameHeaderSize_prefix; + if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) { + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ + memset(fparamsPtr, 0, sizeof(*fparamsPtr)); + fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4); + fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ + return 0; + } + return ERROR(prefix_unknown); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); + if (srcSize < fhsize) return fhsize; } + + { BYTE const fhdByte = ip[4]; + size_t pos = 5; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; + U32 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = 0; + if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */ + windowSize = (1U << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + + switch(dictIDSizeCode) + { + default: /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (!windowSize) windowSize = (U32)frameContentSize; + if (windowSize > windowSizeMax) return ERROR(frameParameter_windowTooLarge); + fparamsPtr->frameContentSize = frameContentSize; + fparamsPtr->windowSize = windowSize; + fparamsPtr->dictID = dictID; + fparamsPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameContentSize() : +* compatible with legacy mode +* @return : decompressed size of the single frame pointed to be `src` if known, otherwise +* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined +* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { + ZSTD_frameParams fParams; + if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0) return ZSTD_CONTENTSIZE_ERROR; + if (fParams.windowSize == 0) { + /* Either skippable or empty frame, size == 0 either way */ + return 0; + } else if (fParams.frameContentSize != 0) { + return fParams.frameContentSize; + } else { + return ZSTD_CONTENTSIZE_UNKNOWN; + } + } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + { + unsigned long long totalDstSize = 0; + while (srcSize >= ZSTD_frameHeaderSize_prefix) { + const U32 magicNumber = MEM_readLE32(src); + + if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t skippableSize; + if (srcSize < ZSTD_skippableHeaderSize) + return ERROR(srcSize_wrong); + skippableSize = MEM_readLE32((const BYTE *)src + 4) + + ZSTD_skippableHeaderSize; + if (srcSize < skippableSize) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { + size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } + + if (srcSize) { + return ZSTD_CONTENTSIZE_ERROR; + } + + return totalDstSize; + } +} + +/** ZSTD_getDecompressedSize() : +* compatible with legacy mode +* @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - decompressed size is not present within frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + return ret >= ZSTD_CONTENTSIZE_ERROR ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : +* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). +* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); + if (ZSTD_isError(result)) return result; /* invalid header */ + if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ + if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong); + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + return 0; +} + + +typedef struct +{ + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; + +/*! ZSTD_getcBlockSize() : +* Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); + return cSize; + } +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + size_t regenSize) +{ + if (srcSize != 1) return ERROR(srcSize_wrong); + if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, *(const BYTE*)src, regenSize); + return regenSize; +} + +/*! ZSTD_decodeLiteralsBlock() : + @return : nb of bytes read from src (< srcSize ) */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); + /* fall-through */ + case set_compressed: + if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + (istart[4] << 10); + break; + } + if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + + if (HUF_isError((litEncType==set_repeat) ? + ( singleStream ? + HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) : + HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) : + ( singleStream ? + HUF_decompress1X2_DCtx(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) : + HUF_decompress4X_hufOnly (dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) )) + return ERROR(corruption_detected); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + if (litSize+lhSize > srcSize) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ + break; + } + if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); + memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + return ERROR(corruption_detected); /* impossible */ + } + } +} + + +typedef union { + FSE_decode_t realData; + U32 alignedBy4; +} FSE_decode_t4; + +/* Default FSE distribution table for Literal Lengths */ +static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = { + { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */ + /* base, symbol, bits */ + { { 0, 0, 4 } }, { { 16, 0, 4 } }, { { 32, 1, 5 } }, { { 0, 3, 5 } }, + { { 0, 4, 5 } }, { { 0, 6, 5 } }, { { 0, 7, 5 } }, { { 0, 9, 5 } }, + { { 0, 10, 5 } }, { { 0, 12, 5 } }, { { 0, 14, 6 } }, { { 0, 16, 5 } }, + { { 0, 18, 5 } }, { { 0, 19, 5 } }, { { 0, 21, 5 } }, { { 0, 22, 5 } }, + { { 0, 24, 5 } }, { { 32, 25, 5 } }, { { 0, 26, 5 } }, { { 0, 27, 6 } }, + { { 0, 29, 6 } }, { { 0, 31, 6 } }, { { 32, 0, 4 } }, { { 0, 1, 4 } }, + { { 0, 2, 5 } }, { { 32, 4, 5 } }, { { 0, 5, 5 } }, { { 32, 7, 5 } }, + { { 0, 8, 5 } }, { { 32, 10, 5 } }, { { 0, 11, 5 } }, { { 0, 13, 6 } }, + { { 32, 16, 5 } }, { { 0, 17, 5 } }, { { 32, 19, 5 } }, { { 0, 20, 5 } }, + { { 32, 22, 5 } }, { { 0, 23, 5 } }, { { 0, 25, 4 } }, { { 16, 25, 4 } }, + { { 32, 26, 5 } }, { { 0, 28, 6 } }, { { 0, 30, 6 } }, { { 48, 0, 4 } }, + { { 16, 1, 4 } }, { { 32, 2, 5 } }, { { 32, 3, 5 } }, { { 32, 5, 5 } }, + { { 32, 6, 5 } }, { { 32, 8, 5 } }, { { 32, 9, 5 } }, { { 32, 11, 5 } }, + { { 32, 12, 5 } }, { { 0, 15, 6 } }, { { 32, 17, 5 } }, { { 32, 18, 5 } }, + { { 32, 20, 5 } }, { { 32, 21, 5 } }, { { 32, 23, 5 } }, { { 32, 24, 5 } }, + { { 0, 35, 6 } }, { { 0, 34, 6 } }, { { 0, 33, 6 } }, { { 0, 32, 6 } }, +}; /* LL_defaultDTable */ + +/* Default FSE distribution table for Match Lengths */ +static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = { + { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */ + /* base, symbol, bits */ + { { 0, 0, 6 } }, { { 0, 1, 4 } }, { { 32, 2, 5 } }, { { 0, 3, 5 } }, + { { 0, 5, 5 } }, { { 0, 6, 5 } }, { { 0, 8, 5 } }, { { 0, 10, 6 } }, + { { 0, 13, 6 } }, { { 0, 16, 6 } }, { { 0, 19, 6 } }, { { 0, 22, 6 } }, + { { 0, 25, 6 } }, { { 0, 28, 6 } }, { { 0, 31, 6 } }, { { 0, 33, 6 } }, + { { 0, 35, 6 } }, { { 0, 37, 6 } }, { { 0, 39, 6 } }, { { 0, 41, 6 } }, + { { 0, 43, 6 } }, { { 0, 45, 6 } }, { { 16, 1, 4 } }, { { 0, 2, 4 } }, + { { 32, 3, 5 } }, { { 0, 4, 5 } }, { { 32, 6, 5 } }, { { 0, 7, 5 } }, + { { 0, 9, 6 } }, { { 0, 12, 6 } }, { { 0, 15, 6 } }, { { 0, 18, 6 } }, + { { 0, 21, 6 } }, { { 0, 24, 6 } }, { { 0, 27, 6 } }, { { 0, 30, 6 } }, + { { 0, 32, 6 } }, { { 0, 34, 6 } }, { { 0, 36, 6 } }, { { 0, 38, 6 } }, + { { 0, 40, 6 } }, { { 0, 42, 6 } }, { { 0, 44, 6 } }, { { 32, 1, 4 } }, + { { 48, 1, 4 } }, { { 16, 2, 4 } }, { { 32, 4, 5 } }, { { 32, 5, 5 } }, + { { 32, 7, 5 } }, { { 32, 8, 5 } }, { { 0, 11, 6 } }, { { 0, 14, 6 } }, + { { 0, 17, 6 } }, { { 0, 20, 6 } }, { { 0, 23, 6 } }, { { 0, 26, 6 } }, + { { 0, 29, 6 } }, { { 0, 52, 6 } }, { { 0, 51, 6 } }, { { 0, 50, 6 } }, + { { 0, 49, 6 } }, { { 0, 48, 6 } }, { { 0, 47, 6 } }, { { 0, 46, 6 } }, +}; /* ML_defaultDTable */ + +/* Default FSE distribution table for Offset Codes */ +static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = { + { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */ + /* base, symbol, bits */ + { { 0, 0, 5 } }, { { 0, 6, 4 } }, + { { 0, 9, 5 } }, { { 0, 15, 5 } }, + { { 0, 21, 5 } }, { { 0, 3, 5 } }, + { { 0, 7, 4 } }, { { 0, 12, 5 } }, + { { 0, 18, 5 } }, { { 0, 23, 5 } }, + { { 0, 5, 5 } }, { { 0, 8, 4 } }, + { { 0, 14, 5 } }, { { 0, 20, 5 } }, + { { 0, 2, 5 } }, { { 16, 7, 4 } }, + { { 0, 11, 5 } }, { { 0, 17, 5 } }, + { { 0, 22, 5 } }, { { 0, 4, 5 } }, + { { 16, 8, 4 } }, { { 0, 13, 5 } }, + { { 0, 19, 5 } }, { { 0, 1, 5 } }, + { { 16, 6, 4 } }, { { 0, 10, 5 } }, + { { 0, 16, 5 } }, { { 0, 28, 5 } }, + { { 0, 27, 5 } }, { { 0, 26, 5 } }, + { { 0, 25, 5 } }, { { 0, 24, 5 } }, +}; /* OF_defaultDTable */ + +/*! ZSTD_buildSeqTable() : + @return : nb bytes read from src, + or an error code if it fails, testable with ZSTD_isError() +*/ +static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTablePtr, + symbolEncodingType_e type, U32 max, U32 maxLog, + const void* src, size_t srcSize, + const FSE_decode_t4* defaultTable, U32 flagRepeatTable) +{ + const void* const tmpPtr = defaultTable; /* bypass strict aliasing */ + switch(type) + { + case set_rle : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTableSpace, *(const BYTE*)src); + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = (const FSE_DTable*)tmpPtr; + return 0; + case set_repeat: + if (!flagRepeatTable) return ERROR(corruption_detected); + return 0; + default : /* impossible */ + case set_compressed : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTableSpace, norm, max, tableLog); + *DTablePtr = DTableSpace; + return headerSize; + } } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + + /* check */ + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); + + /* SeqHead */ + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + if (ip >= iend) return ERROR(srcSize_wrong); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + } + + /* FSE table descriptors */ + if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, LL_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); + ip += llhSize; + } + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, OF_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); + ip += ofhSize; + } + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, ML_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + BIT_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* base; + size_t pos; + uPtrDiff gotoDict; +} seqState_t; + + +FORCE_NOINLINE +size_t ZSTD_execSequenceLast7(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */ + + /* copy literals */ + if (op < oend_w) { + ZSTD_wildcopy(op, *litPtr, oend_w - op); + *litPtr += oend_w - op; + op = oend_w; + } + while (op < oLitEnd) *op++ = *(*litPtr)++; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + while (op < oMatchEnd) *op++ = *match++; + return sequenceLength; +} + + +static seq_t ZSTD_decodeSequence(seqState_t* seqState) +{ + seq_t seq; + + U32 const llCode = FSE_peekSymbol(&seqState->stateLL); + U32 const mlCode = FSE_peekSymbol(&seqState->stateML); + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; + + static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + + /* sequence */ + { size_t offset; + if (!ofCode) + offset = 0; + else { + offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + + if (ofCode <= 1) { + offset += (llCode==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + + seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); + + /* ANS state update */ + FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + + +FORCE_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix -> go into extDict */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd + (match - base); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + + +static size_t ZSTD_decompressSequences( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const base = (const BYTE*) (dctx->base); + const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + int nbSeq; + + /* Build Decoding Tables */ + { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + } + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + nbSeq--; + { seq_t const sequence = ZSTD_decodeSequence(&seqState); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + + +FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets) +{ + seq_t seq; + + U32 const llCode = FSE_peekSymbol(&seqState->stateLL); + U32 const mlCode = FSE_peekSymbol(&seqState->stateML); + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; + + static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + + /* sequence */ + { size_t offset; + if (!ofCode) + offset = 0; + else { + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); + offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofCode <= 1) { + offset += (llCode==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + + seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); + + { size_t const pos = seqState->pos + seq.litLength; + seq.match = seqState->base + pos - seq.offset; /* single memory segment */ + if (seq.offset > pos) seq.match += seqState->gotoDict; /* separate memory segment */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update */ + FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) { + if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { + return ZSTD_decodeSequenceLong_generic(seqState, 1); + } else { + return ZSTD_decodeSequenceLong_generic(seqState, 0); + } +} + +FORCE_INLINE +size_t ZSTD_execSequenceLong(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = sequence.match; + + /* check */ +#if 1 + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); +#endif + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ +#if 1 + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ +#endif + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + +static size_t ZSTD_decompressSequencesLong( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const base = (const BYTE*) (dctx->base); + const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + unsigned const windowSize = dctx->fParams.windowSize; + int nbSeq; + + /* Build Decoding Tables */ + { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + } + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STOSEQ_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } + seqState.base = base; + seqState.pos = (size_t)(op-base); + seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */ + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) { + sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize); + } + if (seqNb<seqAdvance) return ERROR(corruption_detected); + + /* decode and decompress */ + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) { + seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize); + size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + ZSTD_PREFETCH(sequence.match); + sequences[seqNb&STOSEQ_MASK] = sequence; + op += oneSeqSize; + } + if (seqNb<nbSeq) return ERROR(corruption_detected); + + /* finish queue */ + seqNb -= seqAdvance; + for ( ; seqNb<nbSeq ; seqNb++) { + size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* save reps for next block */ + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + + +static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + + if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ + /* likely because of register pressure */ + /* if that's the correct cause, then 32-bits ARM should be affected differently */ + /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ + if (dctx->fParams.windowSize > (1<<23)) + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); +} + + +static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dst; + dctx->previousDstEnd = dst; + } +} + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} + + +/** ZSTD_insertBlock() : + insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + ZSTD_checkContinuity(dctx, blockStart); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length) +{ + if (length > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, byte, length); + return length; +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); +#endif + if (srcSize >= ZSTD_skippableHeaderSize && + (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4); + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + ZSTD_frameParams fParams; + + size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize); + if (ZSTD_isError(headerSize)) return headerSize; + + /* Frame Header */ + { size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize); + if (ZSTD_isError(ret)) return ret; + if (ret > 0) return ERROR(srcSize_wrong); + } + + ip += headerSize; + remainingSize -= headerSize; + + /* Loop on each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + + if (blockProperties.lastBlock) break; + } + + if (fParams.checksumFlag) { /* Frame content checksum */ + if (remainingSize < 4) return ERROR(srcSize_wrong); + ip += 4; + remainingSize -= 4; + } + + return ip - ipstart; + } +} + +/*! ZSTD_decompressFrame() : +* @dctx must be properly initialized */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* ip = (const BYTE*)(*srcPtr); + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t remainingSize = *srcSizePtr; + + /* check */ + if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize)); + ip += frameHeaderSize; remainingSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + return ERROR(corruption_detected); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); + op += decodedSize; + ip += cBlockSize; + remainingSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + if (remainingSize<4) return ERROR(checksum_wrong); + checkRead = MEM_readLE32(ip); + if (checkRead != checkCalc) return ERROR(checksum_wrong); + ip += 4; + remainingSize -= 4; + } + + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSize; + return op-ostart; +} + +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict); +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict); + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void *dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + + if (ddict) { + if (dict) { + /* programmer error, these two cases should be mutually exclusive */ + return ERROR(GENERIC); + } + + dict = ZSTD_DDictDictContent(ddict); + dictSize = ZSTD_DDictDictSize(ddict); + } + + while (srcSize >= ZSTD_frameHeaderSize_prefix) { + U32 magicNumber; + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + magicNumber = MEM_readLE32(src); + if (magicNumber != ZSTD_MAGICNUMBER) { + if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t skippableSize; + if (srcSize < ZSTD_skippableHeaderSize) + return ERROR(srcSize_wrong); + skippableSize = MEM_readLE32((const BYTE *)src + 4) + + ZSTD_skippableHeaderSize; + if (srcSize < skippableSize) { + return ERROR(srcSize_wrong); + } + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } else { + return ERROR(prefix_unknown); + } + } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + ZSTD_refDDict(dctx, ddict); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + } + ZSTD_checkContinuity(dctx, dst); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + if (ZSTD_isError(res)) return res; + /* don't need to bounds check this, ZSTD_decompressFrame will have + * already */ + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + } + + if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ + + return (BYTE*)dst - (BYTE*)dststart; +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (dctx==NULL) return ERROR(memory_allocation); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ + +/** ZSTD_decompressContinue() : +* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) +* or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + /* Sanity check */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); + if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); /* impossible */ + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); + dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } + dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); + if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { + dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + } + dctx->expected = 0; /* not necessary to copy more */ + + case ZSTDds_decodeFrameHeader: + memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); + CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = 3; /* go directly to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); + break; + case bt_reserved : /* should never happen */ + default: + return ERROR(corruption_detected); + } + if (ZSTD_isError(rSize)) return rSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + dctx->previousDstEnd = (char*)dst + rSize; + } + return rSize; + } + case ZSTDds_checkChecksum: + { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ + if (check32 != h32) return ERROR(checksum_wrong); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + case ZSTDds_decodeSkippableHeader: + { memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); + dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); + dctx->stage = ZSTDds_skipFrame; + return 0; + } + case ZSTDds_skipFrame: + { dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + default: + return ERROR(GENERIC); /* impossible */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; + return 0; +} + +/* ZSTD_loadEntropy() : + * dict : must point at beginning of a valid zstd dictionary + * @return : size of entropy tables read */ +static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + if (dictSize <= 8) return ERROR(dictionary_corrupted); + dictPtr += 8; /* skip header = magic + dictID */ + + + { size_t const hSize = HUF_readDTableX4(entropy->hufTable, dictPtr, dictEnd-dictPtr); + if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + U32 offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted); + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted); + entropy->rep[i] = rep; + } } + + return dictPtr - (const BYTE*)dict; +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_DICT_MAGIC) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + 4); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + CHECK_F(ZSTD_decompressBegin(dctx)); + if (dict && dictSize) CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict) +{ + return ddict->dictContent; +} + +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict) +{ + return ddict->dictSize; +} + +static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict) +{ + ZSTD_decompressBegin(dstDCtx); /* init */ + if (ddict) { /* support refDDict on NULL */ + dstDCtx->dictID = ddict->dictID; + dstDCtx->base = ddict->dictContent; + dstDCtx->vBase = ddict->dictContent; + dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dstDCtx->previousDstEnd = dstDCtx->dictEnd; + if (ddict->entropyPresent) { + dstDCtx->litEntropy = 1; + dstDCtx->fseEntropy = 1; + dstDCtx->LLTptr = ddict->entropy.LLTable; + dstDCtx->MLTptr = ddict->entropy.MLTable; + dstDCtx->OFTptr = ddict->entropy.OFTable; + dstDCtx->HUFptr = ddict->entropy.hufTable; + dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; + dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; + dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dstDCtx->litEntropy = 0; + dstDCtx->fseEntropy = 0; + } + } +} + +static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (ddict->dictSize < 8) return 0; + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_DICT_MAGIC) return 0; /* pure content mode */ + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4); + + /* load entropy tables */ + CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted ); + ddict->entropyPresent = 1; + return 0; +} + + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem) +{ + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + if (!ddict) return NULL; + ddict->cMem = customMem; + + if ((byReference) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, customMem); + if (!internalBuffer) { ZSTD_freeDDict(ddict); return NULL; } + memcpy(internalBuffer, dict, dictSize); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + /* parse dictionary content */ + { size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict); + if (ZSTD_isError(errorCode)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator); +} + + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator); +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_free(ddict->dictBuffer, cMem); + ZSTD_free(ddict, cMem); + return 0; + } +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return 0; + return MEM_readLE32((const char*)dict + 4); +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompresse frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameParams(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameParams zfp = { 0 , 0 , 0 , 0 }; + size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +typedef enum { zdss_init, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +/* *** Resource management *** */ +struct ZSTD_DStream_s { + ZSTD_DCtx* dctx; + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; + ZSTD_frameParams fParams; + ZSTD_dStreamStage stage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t blockSize; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */ + size_t lhSize; + ZSTD_customMem customMem; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; +}; /* typedef'd to ZSTD_DStream within "zstd.h" */ + + +ZSTD_DStream* ZSTD_createDStream(void) +{ + return ZSTD_createDStream_advanced(defaultCustomMem); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + ZSTD_DStream* zds; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + zds = (ZSTD_DStream*) ZSTD_malloc(sizeof(ZSTD_DStream), customMem); + if (zds==NULL) return NULL; + memset(zds, 0, sizeof(ZSTD_DStream)); + memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); + zds->dctx = ZSTD_createDCtx_advanced(customMem); + if (zds->dctx == NULL) { ZSTD_freeDStream(zds); return NULL; } + zds->stage = zdss_init; + zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + return zds; +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + if (zds==NULL) return 0; /* support free on null */ + { ZSTD_customMem const cMem = zds->customMem; + ZSTD_freeDCtx(zds->dctx); + zds->dctx = NULL; + ZSTD_freeDDict(zds->ddictLocal); + zds->ddictLocal = NULL; + ZSTD_free(zds->inBuff, cMem); + zds->inBuff = NULL; + ZSTD_free(zds->outBuff, cMem); + zds->outBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (zds->legacyContext) + ZSTD_freeLegacyStreamContext(zds->legacyContext, zds->previousLegacyVersion); +#endif + ZSTD_free(zds, cMem); + return 0; + } +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } + +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + zds->stage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + ZSTD_freeDDict(zds->ddictLocal); + if (dict && dictSize >= 8) { + zds->ddictLocal = ZSTD_createDDict(dict, dictSize); + if (zds->ddictLocal == NULL) return ERROR(memory_allocation); + } else zds->ddictLocal = NULL; + zds->ddict = zds->ddictLocal; + zds->legacyVersion = 0; + zds->hostageByte = 0; + return ZSTD_frameHeaderSize_prefix; +} + +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + return ZSTD_initDStream_usingDict(zds, NULL, 0); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict) +{ + size_t const initResult = ZSTD_initDStream(zds); + zds->ddict = ddict; + return initResult; +} + +size_t ZSTD_resetDStream(ZSTD_DStream* zds) +{ + zds->stage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + return ZSTD_frameHeaderSize_prefix; +} + +size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, + ZSTD_DStreamParameter_e paramType, unsigned paramValue) +{ + switch(paramType) + { + default : return ERROR(parameter_unknown); + case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds) +{ + if (zds==NULL) return 0; /* support sizeof NULL */ + return sizeof(*zds) + + ZSTD_sizeof_DCtx(zds->dctx) + + ZSTD_sizeof_DDict(zds->ddictLocal) + + zds->inBuffSize + zds->outBuffSize; +} + + +/* ***** Decompression ***** */ + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const istart = (const char*)(input->src) + input->pos; + const char* const iend = (const char*)(input->src) + input->size; + const char* ip = istart; + char* const ostart = (char*)(output->dst) + output->pos; + char* const oend = (char*)(output->dst) + output->size; + char* op = ostart; + U32 someMoreWork = 1; + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) + return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); +#endif + + while (someMoreWork) { + switch(zds->stage) + { + case zdss_init : + ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ + /* fall-through */ + + case zdss_loadHeader : + { size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); + if (ZSTD_isError(hSize)) +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL; + size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; + CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, + dict, dictSize)); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + } else { + return hSize; /* error */ + } } +#else + return hSize; +#endif + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ + memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip); + zds->lhSize += iend-ip; + input->pos = input->size; + return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + if (cSize <= (size_t)(iend-istart)) { + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend-op, istart, cSize, zds->ddict); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + ip = istart + cSize; + op += decompressedSize; + zds->dctx->expected = 0; + zds->stage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Consume header */ + ZSTD_refDDict(zds->dctx, zds->ddict); + { size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ + CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); + { size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); + CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer+h1Size, h2Size)); + } } + + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); + size_t const neededOutSize = zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2; + zds->blockSize = blockSize; + if (zds->inBuffSize < blockSize) { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->inBuff = (char*)ZSTD_malloc(blockSize, zds->customMem); + if (zds->inBuff == NULL) return ERROR(memory_allocation); + zds->inBuffSize = blockSize; + } + if (zds->outBuffSize < neededOutSize) { + ZSTD_free(zds->outBuff, zds->customMem); + zds->outBuffSize = 0; + zds->outBuff = (char*)ZSTD_malloc(neededOutSize, zds->customMem); + if (zds->outBuff == NULL) return ERROR(memory_allocation); + zds->outBuffSize = neededOutSize; + } } + zds->stage = zdss_read; + /* pass-through */ + + case zdss_read: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); + if (neededInSize==0) { /* end of frame */ + zds->stage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); + size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, + zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), + ip, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + ip += neededInSize; + if (!decodedSize && !isSkipFrame) break; /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + zds->stage = zdss_flush; + break; + } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->stage = zdss_load; + /* pass-through */ + } + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); + size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ + size_t loadedSize; + if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + { const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); + size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, + zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, + zds->inBuff, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + zds->inPos = 0; /* input is consumed */ + if (!decodedSize && !isSkipFrame) { zds->stage = zdss_read; break; } /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + zds->stage = zdss_flush; + /* pass-through */ + } } + + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->stage = zdss_read; + if (zds->outStart + zds->blockSize > zds->outBuffSize) + zds->outStart = zds->outEnd = 0; + break; + } + /* cannot complete flush */ + someMoreWork = 0; + break; + } + default: return ERROR(GENERIC); /* impossible */ + } } + + /* result */ + input->pos += (size_t)(ip-istart); + output->pos += (size_t)(op-ostart); + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { zds->stage = zdss_read; return 1; } /* can't release hostage (not present) */ + input->pos++; /* release hostage */ + } + return 0; + } + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */ + if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */ + nextSrcSizeHint -= zds->inPos; /* already loaded*/ + return nextSrcSizeHint; + } +} diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h new file mode 100644 index 0000000000..f8050c1361 --- /dev/null +++ b/thirdparty/zstd/zstd.h @@ -0,0 +1,795 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include <stddef.h> /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +#else +# define ZSTDLIB_VISIBILITY +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios + at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and + decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22. + Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit memory management) + - unbounded multiple steps (described as Streaming compression) + The compression ratio achievable on small data can be highly improved using compression with a dictionary in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Fast dictionary API) + + Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h. + These APIs shall never be used with a dynamic library. + They are not "stable", their definition may change in the future. Only static linking is allowed. +*********************************************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 2 +#define ZSTD_VERSION_RELEASE 0 + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) +ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*************************************** +* Simple API +***************************************/ +/*! ZSTD_compress() : + * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize. + * ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single + * frame, but distinguishes empty frames from frames with an unknown size, or errors. + * + * Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple + * concatenated frames in one buffer, and so is more general. + * As a result however, it requires more computation and entire frames to be passed to it, + * as opposed to ZSTD_getFrameContentSize which requires only a single frame's header. + * + * 'src' is the start of a zstd compressed frame. + * @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==0`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can still use ZSTD_decompress() while relying on implied limits. + * (For example, data may be necessarily cut into blocks <= 16 KB). + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*************************************** +* Explicit memory management +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution in multi-threaded environments. */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution in multi-threaded environments. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : +* Compression using a predefined Dictionary (see dictBuilder/zdict.h). +* Note : This function loads the dictionary, resulting in significant startup delay. +* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : +* Decompression using a predefined Dictionary (see dictBuilder/zdict.h). +* Dictionary must be identical to the one used during compression. +* Note : This function loads the dictionary, resulting in significant startup delay. +* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/**************************** +* Fast dictionary API +****************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : +* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. +* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. +* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. +* `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_freeCDict() : +* Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression level is decided during dictionary creation. + * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* dictBuffer can be released after DDict creation, as its content is copied inside DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : +* Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a digested Dictionary. +* Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively, +* since it will play nicer with system's memory, by re-using already allocated memory. +* Use one separate ZSTD_CStream per thread for parallel execution. +* +* Start a new compression by initializing ZSTD_CStream. +* Use ZSTD_initCStream() to start a new compression operation. +* Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section) +* +* Use ZSTD_compressStream() repetitively to consume input stream. +* The function will automatically update both `pos` fields. +* Note that it may not consume the entire input, in which case `pos < size`, +* and it's up to the caller to present again remaining data. +* @return : a size hint, preferred nb of bytes to use as input for next function call +* or an error code, which can be tested using ZSTD_isError(). +* Note 1 : it's just a hint, to help latency a little, any other value will work fine. +* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() +* +* At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream(). +* `output->pos` will be updated. +* Note that some content might still be left within internal buffer if `output->size` is too small. +* @return : nb of bytes still present within internal buffer (0 if it's empty) +* or an error code, which can be tested using ZSTD_isError(). +* +* ZSTD_endStream() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small. +* In which case, call again ZSTD_endStream() to complete the flush. +* @return : nb of bytes still present within internal buffer (0 if it's empty, hence compression completed) +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef struct ZSTD_CStream_s ZSTD_CStream; +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ + + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation, +* or ZSTD_initDStream_usingDict() if decompression requires a dictionary. +* @return : recommended first input size +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* If `output.pos < output.size`, decoder has flushed everything it could. +* @return : 0 when a frame is completely decoded and fully flushed, +* an error code, which can be tested using ZSTD_isError(), +* any other value > 0, which means there is still some decoding to do to complete current frame. +* The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame. +* *******************************************************************************/ + +typedef struct ZSTD_DStream_s ZSTD_DStream; +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + +#endif /* ZSTD_H_235446 */ + + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ***************************************************************************************/ + +/* --- Constants ---*/ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U + +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + +#define ZSTD_WINDOWLOG_MAX_32 27 +#define ZSTD_WINDOWLOG_MAX_64 27 +#define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_HASHLOG3_MAX 17 +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MIN 4 +#define ZSTD_TARGETLENGTH_MAX 999 + +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ +#define ZSTD_FRAMEHEADERSIZE_MIN 6 +static const size_t ZSTD_frameHeaderSize_prefix = 5; +static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; +static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; +static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ + + +/*--- Advanced types ---*/ +typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btopt2 } ZSTD_strategy; /* from faster to stronger */ + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned searchLength; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; +} ZSTD_compressionParameters; + +typedef struct { + unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */ + unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +/*= Custom memory allocation functions */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; + +/*************************************** +* Compressed size functions +***************************************/ + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD encoded frame or skippable frame + * `srcSize` must be at least as large as the frame + * @return : the compressed size of the frame pointed to by `src`, suitable to pass to + * `ZSTD_decompress` or similar, or an error code if given invalid input. */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + +/*************************************** +* Decompressed size functions +***************************************/ +/*! ZSTD_getFrameContentSize() : +* `src` should point to the start of a ZSTD encoded frame +* `srcSize` must be at least as large as the frame header. A value greater than or equal +* to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases. +* @return : decompressed size of the frame pointed to be `src` if known, otherwise +* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined +* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_findDecompressedSize() : +* `src` should point the start of a series of ZSTD encoded and/or skippable frames +* `srcSize` must be the _exact_ size of this series +* (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`) +* @return : the decompressed size of all data in the contained frames, as a 64-bit value _if known_ +* - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN +* - if an error occurred: ZSTD_CONTENTSIZE_ERROR +* +* note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. +* When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. +* In which case, it's necessary to use streaming mode to decompress data. +* Optionally, application can still use ZSTD_decompress() while relying on implied limits. +* (For example, data may be necessarily cut into blocks <= 16 KB). +* note 2 : decompressed size is always present when compression is done with ZSTD_compress() +* note 3 : decompressed size can be very large (64-bits value), +* potentially larger than what local system can handle as a single memory segment. +* In which case, it's necessary to use streaming mode to decompress data. +* note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. +* Always ensure result fits within application's authorized limits. +* Each application can set its own limits. +* note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to +* read each contained frame header. This is efficient as most of the data is skipped, +* however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression functions +***************************************/ +/*! ZSTD_estimateCCtxSize() : + * Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters. + * `frameContentSize` is an optional parameter, provide `0` if unknown */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams); + +/*! ZSTD_createCCtx_advanced() : + * Create a ZSTD compression context using external alloc and free functions */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); + +/*! ZSTD_sizeofCCtx() : + * Gives the amount of memory used by a given ZSTD_CCtx */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); + +typedef enum { + ZSTD_p_forceWindow, /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */ + ZSTD_p_forceRawDict /* Force loading dictionary in "content-only" mode (no header analysis) */ +} ZSTD_CCtxParameter; +/*! ZSTD_setCCtxParameter() : + * Set advanced parameters, selected through enum ZSTD_CCtxParameter + * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is simply referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_createCDict_advanced() : + * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem); + +/*! ZSTD_sizeof_CDict() : + * Gives the amount of memory used by a given ZSTD_sizeof_CDict */ +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. +* `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : +* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. +* All fields of `ZSTD_frameParameters` are set to default (0) */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : +* Ensure param values remain within authorized range */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : +* optimize params for a given `srcSize` and `dictSize`. +* both values are optional, select `0` if unknown. */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : +* Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter */ +ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : +* Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams); + + +/*--- Advanced decompression functions ---*/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_estimateDCtxSize() : + * Gives the potential amount of memory allocated to create a ZSTD_DCtx */ +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_createDCtx_advanced() : + * Create a ZSTD decompression context using external alloc and free functions */ +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); + +/*! ZSTD_sizeof_DCtx() : + * Gives the amount of memory used by a given ZSTD_DCtx */ +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is simply referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_createDDict_advanced() : + * Create a ZSTD_DDict using external alloc and free, optionally by reference */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + unsigned byReference, ZSTD_customMem customMem); + +/*! ZSTD_sizeof_DDict() : + * Gives the amount of memory used by a given ZSTD_DDict */ +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameParams(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************** +* Advanced streaming functions +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); /**< size of CStream is variable, depending primarily on compression level */ +ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ +ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ +ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, ZSTD_frameParameters fParams); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ + +/*! ZSTD_resetCStream() : + * start a new compression job, using same parameters from previous job. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * pledgedSrcSize==0 means "srcSize unknown". + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +/*===== Advanced Streaming decompression functions =====*/ +typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ +ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with many restrictions (documented below). +* Prefer using normal streaming API for an easier experience +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. + - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames will be considered unfinished (corrupted) by decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize=0 means null-size */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + + +/*- + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams(). + It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame, + such as the minimum rolling buffer size to allocate to decompress data (`windowSize`), + and the dictionary ID used. + (Note : content size is optional, it may not be present. 0 means : content size unknown). + Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information. + As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation. + Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB. + Frame parameters are extracted from the beginning of the compressed frame. + Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes. + @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). + Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`. + They should preferably be located contiguously, prior to current block. + Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference. + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_decompressContinue() always returns 0. + For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable. + Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content. + For purposes of decompression, it is valid in both cases to skip the frame using + ZSTD_findFrameCompressedSize to find its size in bytes. + It also returns Frame Size as fparamsPtr->frameContentSize. +*/ + +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameParams; + +/*===== Buffer-less streaming decompression functions =====*/ +ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + +/** + Block functions + + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). + User will have to take in charge required information to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSizeMax() <= ZSTD_BLOCKSIZE_ABSOLUTEMAX + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. + In which case, nothing is produced into `dst`. + + User must test for such outcome and deal directly with uncompressed data + + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */ +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful for uncompressed blocks */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif |