diff options
Diffstat (limited to 'thirdparty/libwebp/src')
26 files changed, 223 insertions, 134 deletions
diff --git a/thirdparty/libwebp/src/dec/vp8_dec.c b/thirdparty/libwebp/src/dec/vp8_dec.c index 5f405e4c2a..2003935ec4 100644 --- a/thirdparty/libwebp/src/dec/vp8_dec.c +++ b/thirdparty/libwebp/src/dec/vp8_dec.c @@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; -// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 +// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { int v; if (!VP8GetBit(br, p[3], "coeffs")) { diff --git a/thirdparty/libwebp/src/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h index 20526a87c4..9af22f8cc6 100644 --- a/thirdparty/libwebp/src/dec/vp8i_dec.h +++ b/thirdparty/libwebp/src/dec/vp8i_dec.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 #define DEC_MIN_VERSION 2 -#define DEC_REV_VERSION 1 +#define DEC_REV_VERSION 2 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/thirdparty/libwebp/src/dec/vp8l_dec.c b/thirdparty/libwebp/src/dec/vp8l_dec.c index 73c3b54fff..78db014030 100644 --- a/thirdparty/libwebp/src/dec/vp8l_dec.c +++ b/thirdparty/libwebp/src/dec/vp8l_dec.c @@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { // to 256 (green component values) + 24 (length prefix values) // + color_cache_size (between 0 and 2048). // All values computed for 8-bit first level lookup with Mark Adler's tool: -// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c +// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c #define FIXED_TABLE_SIZE (630 * 3 + 410) static const uint16_t kTableSize[12] = { FIXED_TABLE_SIZE + 654, diff --git a/thirdparty/libwebp/src/demux/anim_decode.c b/thirdparty/libwebp/src/demux/anim_decode.c index 2bf4dcffe0..e077ffb536 100644 --- a/thirdparty/libwebp/src/demux/anim_decode.c +++ b/thirdparty/libwebp/src/demux/anim_decode.c @@ -23,6 +23,14 @@ #define NUM_CHANNELS 4 +// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA +// buffer. +#ifdef WORDS_BIGENDIAN +#define CHANNEL_SHIFT(i) (24 - (i) * 8) +#else +#define CHANNEL_SHIFT(i) ((i) * 8) +#endif + typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int); static void BlendPixelRowNonPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels); @@ -209,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, const uint8_t dst_channel = (dst >> shift) & 0xff; const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a; assert(blend_unscaled < (1ULL << 32) / scale); - return (blend_unscaled * scale) >> 24; + return (blend_unscaled * scale) >> CHANNEL_SHIFT(3); } // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha. static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) { - const uint8_t src_a = (src >> 24) & 0xff; + const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; if (src_a == 0) { return dst; } else { - const uint8_t dst_a = (dst >> 24) & 0xff; + const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff; // This is the approximate integer arithmetic for the actual formula: // dst_factor_a = (dst_a * (255 - src_a)) / 255. const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8; const uint8_t blend_a = src_a + dst_factor_a; const uint32_t scale = (1UL << 24) / blend_a; - const uint8_t blend_r = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0); - const uint8_t blend_g = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8); - const uint8_t blend_b = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16); + const uint8_t blend_r = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0)); + const uint8_t blend_g = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1)); + const uint8_t blend_b = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2)); assert(src_a + dst_factor_a < 256); - return (blend_r << 0) | - (blend_g << 8) | - (blend_b << 16) | - ((uint32_t)blend_a << 24); + return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) | + ((uint32_t)blend_g << CHANNEL_SHIFT(1)) | + ((uint32_t)blend_b << CHANNEL_SHIFT(2)) | + ((uint32_t)blend_a << CHANNEL_SHIFT(3)); } } @@ -247,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const uint8_t src_alpha = (src[i] >> 24) & 0xff; + const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; if (src_alpha != 0xff) { src[i] = BlendPixelNonPremult(src[i], dst[i]); } @@ -264,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) { // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha. static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) { - const uint8_t src_a = (src >> 24) & 0xff; + const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; return src + ChannelwiseMultiply(dst, 256 - src_a); } @@ -274,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const uint8_t src_alpha = (src[i] >> 24) & 0xff; + const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; if (src_alpha != 0xff) { src[i] = BlendPixelPremult(src[i], dst[i]); } diff --git a/thirdparty/libwebp/src/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c index 547a7725de..f04a2b8450 100644 --- a/thirdparty/libwebp/src/demux/demux.c +++ b/thirdparty/libwebp/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 1 #define DMUX_MIN_VERSION 2 -#define DMUX_REV_VERSION 1 +#define DMUX_REV_VERSION 2 typedef struct { size_t start_; // start location of the data diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h index 513e159bb3..c4f57e4d5b 100644 --- a/thirdparty/libwebp/src/dsp/dsp.h +++ b/thirdparty/libwebp/src/dsp/dsp.h @@ -119,7 +119,12 @@ extern "C" { #define WEBP_USE_NEON #endif -#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) +// Note: ARM64 is supported in Visual Studio 2017, but requires the direct +// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in +// arm_neon.h. +#if defined(_MSC_VER) && \ + ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ + (_MSC_VER >= 1920 && defined(_M_ARM64))) #define WEBP_USE_NEON #define WEBP_USE_INTRINSICS #endif diff --git a/thirdparty/libwebp/src/dsp/enc_neon.c b/thirdparty/libwebp/src/dsp/enc_neon.c index 43bf1245c5..601962ba76 100644 --- a/thirdparty/libwebp/src/dsp/enc_neon.c +++ b/thirdparty/libwebp/src/dsp/enc_neon.c @@ -9,7 +9,7 @@ // // ARM NEON version of speed-critical encoding functions. // -// adapted from libvpx (http://www.webmproject.org/code/) +// adapted from libvpx (https://www.webmproject.org/code/) #include "src/dsp/dsp.h" diff --git a/thirdparty/libwebp/src/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c index d8bbb02b35..84a54296fd 100644 --- a/thirdparty/libwebp/src/dsp/lossless.c +++ b/thirdparty/libwebp/src/dsp/lossless.c @@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { //------------------------------------------------------------------------------ // Predictors -uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor0_C(const uint32_t* const left, + const uint32_t* const top) { (void)top; (void)left; return ARGB_BLACK; } -uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor1_C(const uint32_t* const left, + const uint32_t* const top) { (void)top; - return left; + return *left; } -uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor2_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[0]; } -uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor3_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[1]; } -uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor4_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[-1]; } -uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3(left, top[0], top[1]); +uint32_t VP8LPredictor5_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average3(*left, top[0], top[1]); return pred; } -uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[-1]); +uint32_t VP8LPredictor6_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2(*left, top[-1]); return pred; } -uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[0]); +uint32_t VP8LPredictor7_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2(*left, top[0]); return pred; } -uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor8_C(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2(top[-1], top[0]); (void)left; return pred; } -uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor9_C(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2(top[0], top[1]); (void)left; return pred; } -uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4(left, top[-1], top[0], top[1]); +uint32_t VP8LPredictor10_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average4(*left, top[-1], top[0], top[1]); return pred; } -uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select(top[0], left, top[-1]); +uint32_t VP8LPredictor11_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Select(top[0], *left, top[-1]); return pred; } -uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); +uint32_t VP8LPredictor12_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]); return pred; } -uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); +uint32_t VP8LPredictor13_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]); return pred; } diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h index ebd316d1ed..c26c6bca07 100644 --- a/thirdparty/libwebp/src/dsp/lossless.h +++ b/thirdparty/libwebp/src/dsp/lossless.h @@ -28,23 +28,38 @@ extern "C" { //------------------------------------------------------------------------------ // Decoding -typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); +typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left, + const uint32_t* const top); extern VP8LPredictorFunc VP8LPredictors[16]; -uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor0_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor1_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor2_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor3_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor4_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor5_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor6_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor7_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor8_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor9_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor10_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor11_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor12_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor13_C(const uint32_t* const left, + const uint32_t* const top); // These Add/Sub function expects upper[-1] and out[-1] to be readable. typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, diff --git a/thirdparty/libwebp/src/dsp/lossless_common.h b/thirdparty/libwebp/src/dsp/lossless_common.h index 96a106f9ee..6a2f736b5e 100644 --- a/thirdparty/libwebp/src/dsp/lossless_common.h +++ b/thirdparty/libwebp/src/dsp/lossless_common.h @@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ int x; \ assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \ + const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \ out[x] = VP8LAddPixels(in[x], pred); \ } \ } diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c index c3e8537ade..1580631e38 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc.c @@ -745,7 +745,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ const uint32_t pred = \ - VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \ + VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \ out[x] = VP8LSubPixels(in[x], pred); \ } \ } diff --git a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c index 9888854d57..bfe5ea6b38 100644 --- a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c +++ b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c @@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, return Average2(Average2(a0, a1), Average2(a2, a3)); } -static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average3(left, top[0], top[1]); +static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average3(*left, top[0], top[1]); } -static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average2(left, top[-1]); +static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average2(*left, top[-1]); } -static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average2(left, top[0]); +static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average2(*left, top[0]); } -static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { (void)left; return Average2(top[-1], top[0]); } -static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { (void)left; return Average2(top[0], top[1]); } -static uint32_t Predictor10_MIPSdspR2(uint32_t left, +static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return Average4(left, top[-1], top[0], top[1]); + return Average4(*left, top[-1], top[0], top[1]); } -static uint32_t Predictor11_MIPSdspR2(uint32_t left, +static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return Select(top[0], left, top[-1]); + return Select(top[0], *left, top[-1]); } -static uint32_t Predictor12_MIPSdspR2(uint32_t left, +static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return ClampedAddSubtractFull(left, top[0], top[-1]); + return ClampedAddSubtractFull(*left, top[0], top[-1]); } -static uint32_t Predictor13_MIPSdspR2(uint32_t left, +static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return ClampedAddSubtractHalf(left, top[0], top[-1]); + return ClampedAddSubtractHalf(*left, top[0], top[-1]); } // Add green to blue and red channels (i.e. perform the inverse transform of diff --git a/thirdparty/libwebp/src/dsp/lossless_neon.c b/thirdparty/libwebp/src/dsp/lossless_neon.c index 76a1b6f873..89e3e013a0 100644 --- a/thirdparty/libwebp/src/dsp/lossless_neon.c +++ b/thirdparty/libwebp/src/dsp/lossless_neon.c @@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1, return avg; } -static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) { - return Average3_NEON(left, top[0], top[1]); +static uint32_t Predictor5_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average3_NEON(*left, top[0], top[1]); } -static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) { - return Average2_NEON(left, top[-1]); +static uint32_t Predictor6_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average2_NEON(*left, top[-1]); } -static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) { - return Average2_NEON(left, top[0]); +static uint32_t Predictor7_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average2_NEON(*left, top[0]); } -static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) { - return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]); +static uint32_t Predictor13_NEON(const uint32_t* const left, + const uint32_t* const top) { + return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]); } // Batch versions of those functions. diff --git a/thirdparty/libwebp/src/dsp/lossless_sse2.c b/thirdparty/libwebp/src/dsp/lossless_sse2.c index 3a0eb440db..396cb0bdfc 100644 --- a/thirdparty/libwebp/src/dsp/lossless_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c @@ -138,42 +138,51 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, return output; } -static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3_SSE2(left, top[0], top[1]); +static uint32_t Predictor5_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average3_SSE2(*left, top[0], top[1]); return pred; } -static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(left, top[-1]); +static uint32_t Predictor6_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2_SSE2(*left, top[-1]); return pred; } -static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(left, top[0]); +static uint32_t Predictor7_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2_SSE2(*left, top[0]); return pred; } -static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_SSE2(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2_SSE2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_SSE2(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2_SSE2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]); +static uint32_t Predictor10_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select_SSE2(top[0], left, top[-1]); +static uint32_t Predictor11_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Select_SSE2(top[0], *left, top[-1]); return pred; } -static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]); +static uint32_t Predictor12_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]); return pred; } -static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]); +static uint32_t Predictor13_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]); return pred; } diff --git a/thirdparty/libwebp/src/dsp/msa_macro.h b/thirdparty/libwebp/src/dsp/msa_macro.h index de026a1d9e..51f6c643ab 100644 --- a/thirdparty/libwebp/src/dsp/msa_macro.h +++ b/thirdparty/libwebp/src/dsp/msa_macro.h @@ -14,6 +14,10 @@ #ifndef WEBP_DSP_MSA_MACRO_H_ #define WEBP_DSP_MSA_MACRO_H_ +#include "src/dsp/dsp.h" + +#if defined(WEBP_USE_MSA) + #include <stdint.h> #include <msa.h> @@ -1389,4 +1393,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) { } while (0) #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__) +#endif // WEBP_USE_MSA #endif // WEBP_DSP_MSA_MACRO_H_ diff --git a/thirdparty/libwebp/src/dsp/neon.h b/thirdparty/libwebp/src/dsp/neon.h index aa1dea1301..c591f9b9a7 100644 --- a/thirdparty/libwebp/src/dsp/neon.h +++ b/thirdparty/libwebp/src/dsp/neon.h @@ -12,10 +12,12 @@ #ifndef WEBP_DSP_NEON_H_ #define WEBP_DSP_NEON_H_ -#include <arm_neon.h> - #include "src/dsp/dsp.h" +#if defined(WEBP_USE_NEON) + +#include <arm_neon.h> + // Right now, some intrinsics functions seem slower, so we disable them // everywhere except newer clang/gcc or aarch64 where the inline assembly is // incompatible. @@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) { } while (0) #endif +#endif // WEBP_USE_NEON #endif // WEBP_DSP_NEON_H_ diff --git a/thirdparty/libwebp/src/dsp/yuv.h b/thirdparty/libwebp/src/dsp/yuv.h index c12be1d094..66a397d117 100644 --- a/thirdparty/libwebp/src/dsp/yuv.h +++ b/thirdparty/libwebp/src/dsp/yuv.h @@ -10,7 +10,7 @@ // inline YUV<->RGB conversion function // // The exact naming is Y'CbCr, following the ITU-R BT.601 standard. -// More information at: http://en.wikipedia.org/wiki/YCbCr +// More information at: https://en.wikipedia.org/wiki/YCbCr // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 diff --git a/thirdparty/libwebp/src/enc/frame_enc.c b/thirdparty/libwebp/src/enc/frame_enc.c index af538d83ba..b93d9e5b99 100644 --- a/thirdparty/libwebp/src/enc/frame_enc.c +++ b/thirdparty/libwebp/src/enc/frame_enc.c @@ -778,6 +778,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { // Roughly refresh the proba eight times per pass int max_count = (enc->mb_w_ * enc->mb_h_) >> 3; int num_pass_left = enc->config_->pass; + int remaining_progress = 40; // percents const int do_search = enc->do_search_; VP8EncIterator it; VP8EncProba* const proba = &enc->proba_; @@ -805,6 +806,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { uint64_t size_p0 = 0; uint64_t distortion = 0; int cnt = max_count; + // The final number of passes is not trivial to know in advance. + const int pass_progress = remaining_progress / (2 + num_pass_left); + remaining_progress -= pass_progress; VP8IteratorInit(enc, &it); SetLoopParams(enc, stats.q); if (is_last_pass) { @@ -832,7 +836,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { StoreSideInfo(&it); VP8StoreFilterStats(&it); VP8IteratorExport(&it); - ok = VP8IteratorProgress(&it, 20); + ok = VP8IteratorProgress(&it, pass_progress); } VP8IteratorSaveBoundary(&it); } while (ok && VP8IteratorNext(&it)); @@ -878,7 +882,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, (const uint8_t*)proba->coeffs_, 1); } - ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); + ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress, + &enc->percent_); return PostLoopFinalize(&it, ok); } diff --git a/thirdparty/libwebp/src/enc/predictor_enc.c b/thirdparty/libwebp/src/enc/predictor_enc.c index 2e6762ea0d..2b5c767280 100644 --- a/thirdparty/libwebp/src/enc/predictor_enc.c +++ b/thirdparty/libwebp/src/enc/predictor_enc.c @@ -249,7 +249,7 @@ static WEBP_INLINE void GetResidual( } else if (x == 0) { predict = upper_row[x]; // Top. } else { - predict = pred_func(current_row[x - 1], upper_row + x); + predict = pred_func(¤t_row[x - 1], upper_row + x); } #if (WEBP_NEAR_LOSSLESS == 1) if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 || diff --git a/thirdparty/libwebp/src/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c index 01eb565c7f..6cede28ab4 100644 --- a/thirdparty/libwebp/src/enc/quant_enc.c +++ b/thirdparty/libwebp/src/enc/quant_enc.c @@ -585,6 +585,9 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, return rate * lambda + RD_DISTO_MULT * distortion; } +// Coefficient type. +enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 }; + static int TrellisQuantizeBlock(const VP8Encoder* const enc, int16_t in[16], int16_t out[16], int ctx0, int coeff_type, @@ -593,7 +596,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; CostArrayPtr const costs = (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; - const int first = (coeff_type == 0) ? 1 : 0; + const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0; Node nodes[16][NUM_NODES]; ScoreState score_states[2][NUM_NODES]; ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); @@ -657,16 +660,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, // test all alternate level values around level0. for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { Node* const cur = &NODE(n, m); - int level = level0 + m; + const int level = level0 + m; const int ctx = (level > 2) ? 2 : level; const int band = VP8EncBands[n + 1]; score_t base_score; - score_t best_cur_score = MAX_COST; - int best_prev = 0; // default, in case + score_t best_cur_score; + int best_prev; + score_t cost, score; - ss_cur[m].score = MAX_COST; ss_cur[m].costs = costs[n + 1][ctx]; if (level < 0 || level > thresh_level) { + ss_cur[m].score = MAX_COST; // Node is dead. continue; } @@ -682,18 +686,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, } // Inspect all possible non-dead predecessors. Retain only the best one. - for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { + // The base_score is added to all scores so it is only added for the final + // value after the loop. + cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level); + best_cur_score = + ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0); + best_prev = -MIN_DELTA; + for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) { // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically // eliminated since their score can't be better than the current best. - const score_t cost = VP8LevelCost(ss_prev[p].costs, level); + cost = VP8LevelCost(ss_prev[p].costs, level); // Examine node assuming it's a non-terminal one. - const score_t score = - base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); + score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); if (score < best_cur_score) { best_cur_score = score; best_prev = p; } } + best_cur_score += base_score; // Store best finding in current node. cur->sign = sign; cur->level = level; @@ -701,11 +711,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, ss_cur[m].score = best_cur_score; // Now, record best terminal node (and thus best entry in the graph). - if (level != 0) { + if (level != 0 && best_cur_score < best_score) { const score_t last_pos_cost = (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); - const score_t score = best_cur_score + last_pos_score; + score = best_cur_score + last_pos_score; if (score < best_score) { best_score = score; best_path[0] = n; // best eob position @@ -717,10 +727,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, } // Fresh start - memset(in + first, 0, (16 - first) * sizeof(*in)); - memset(out + first, 0, (16 - first) * sizeof(*out)); + // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case. + if (coeff_type == TYPE_I16_AC) { + memset(in + 1, 0, 15 * sizeof(*in)); + memset(out + 1, 0, 15 * sizeof(*out)); + } else { + memset(in, 0, 16 * sizeof(*in)); + memset(out, 0, 16 * sizeof(*out)); + } if (best_path[0] == -1) { - return 0; // skip! + return 0; // skip! } { @@ -775,9 +791,9 @@ static int ReconstructIntra16(VP8EncIterator* const it, for (y = 0, n = 0; y < 4; ++y) { for (x = 0; x < 4; ++x, ++n) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - const int non_zero = - TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, - &dqm->y1_, dqm->lambda_trellis_i16_); + const int non_zero = TrellisQuantizeBlock( + enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_, + dqm->lambda_trellis_i16_); it->top_nz_[x] = it->left_nz_[y] = non_zero; rd->y_ac_levels[n][0] = 0; nz |= non_zero << n; @@ -818,7 +834,7 @@ static int ReconstructIntra4(VP8EncIterator* const it, if (DO_TRELLIS_I4 && it->do_trellis_) { const int x = it->i4_ & 3, y = it->i4_ >> 2; const int ctx = it->top_nz_[x] + it->left_nz_[y]; - nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, + nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_, dqm->lambda_trellis_i4_); } else { nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); @@ -927,9 +943,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++n) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - const int non_zero = - TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, - &dqm->uv_, dqm->lambda_trellis_uv_); + const int non_zero = TrellisQuantizeBlock( + enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_, + dqm->lambda_trellis_uv_); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; nz |= non_zero << n; } diff --git a/thirdparty/libwebp/src/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h index 67e9509367..b4bba08f27 100644 --- a/thirdparty/libwebp/src/enc/vp8i_enc.h +++ b/thirdparty/libwebp/src/enc/vp8i_enc.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 #define ENC_MIN_VERSION 2 -#define ENC_REV_VERSION 1 +#define ENC_REV_VERSION 2 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost diff --git a/thirdparty/libwebp/src/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h index 330da66754..d9bf9b3770 100644 --- a/thirdparty/libwebp/src/mux/muxi.h +++ b/thirdparty/libwebp/src/mux/muxi.h @@ -29,7 +29,7 @@ extern "C" { #define MUX_MAJ_VERSION 1 #define MUX_MIN_VERSION 2 -#define MUX_REV_VERSION 1 +#define MUX_REV_VERSION 2 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/thirdparty/libwebp/src/utils/huffman_encode_utils.c b/thirdparty/libwebp/src/utils/huffman_encode_utils.c index fd7a47d8f7..585db91951 100644 --- a/thirdparty/libwebp/src/utils/huffman_encode_utils.c +++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.c @@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree, // especially when population counts are longer than 2**tree_limit, but // we are not planning to use this with extremely long blocks. // -// See http://en.wikipedia.org/wiki/Huffman_coding +// See https://en.wikipedia.org/wiki/Huffman_coding static void GenerateOptimalTree(const uint32_t* const histogram, int histogram_size, HuffmanTree* tree, int tree_depth_limit, diff --git a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c index f65b6cdbb6..97e7893704 100644 --- a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c +++ b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c @@ -30,7 +30,7 @@ #define DFIX 4 // extra precision for ordered dithering #define DSIZE 4 // dithering size (must be a power of two) -// cf. http://en.wikipedia.org/wiki/Ordered_dithering +// cf. https://en.wikipedia.org/wiki/Ordered_dithering static const uint8_t kOrderedDither[DSIZE][DSIZE] = { { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision { 12, 4, 14, 6 }, diff --git a/thirdparty/libwebp/src/utils/utils.c b/thirdparty/libwebp/src/utils/utils.c index 9e464c16ce..a7c3a70fef 100644 --- a/thirdparty/libwebp/src/utils/utils.c +++ b/thirdparty/libwebp/src/utils/utils.c @@ -23,7 +23,7 @@ // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow, // and not multi-thread safe!). // An interesting alternative is valgrind's 'massif' tool: -// http://valgrind.org/docs/manual/ms-manual.html +// https://valgrind.org/docs/manual/ms-manual.html // Here is an example command line: /* valgrind --tool=massif --massif-out-file=massif.out \ --stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc diff --git a/thirdparty/libwebp/src/webp/decode.h b/thirdparty/libwebp/src/webp/decode.h index 44fcd64a84..d98247509a 100644 --- a/thirdparty/libwebp/src/webp/decode.h +++ b/thirdparty/libwebp/src/webp/decode.h @@ -85,7 +85,7 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, // Upon return, the Y buffer has a stride returned as '*stride', while U and V // have a common stride returned as '*uv_stride'. // Return NULL in case of error. -// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr +// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height, uint8_t** u, uint8_t** v, |