libwebp: Sync with upstream 1.2.4

Changes: https://chromium.googlesource.com/webm/libwebp/+/1.2.4/NEWS
author: Rémi Verschelde <rverschelde@gmail.com> 2022-08-23 16:47:48 +0200
committer: Rémi Verschelde <rverschelde@gmail.com> 2022-08-23 19:05:53 +0200
commit: e5e34f21fc36c8a162bb46e845bb3465cd434150 (patch)
tree: 604818f4a22ad5ab35c8defba0baedad40d203ed /thirdparty/libwebp/src/dsp
parent: e95872eb7f4e94f8daac565d67020bb2260e92bd (diff)
11 files changed, 283 insertions, 545 deletions
diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
index 9e0ace9421..6716fb77f0 100644
--- a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
@@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
 static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
                               int alpha_stride, int width, int height,
                               uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-  uint32_t alpha_mask = 0xffffffffu;
+  uint32_t alpha_mask = 0xffu;
   uint8x8_t mask8 = vdup_n_u8(0xff);
   uint32_t tmp[2];
   int i, j;
@@ -107,6 +107,7 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
     dst += dst_stride;
   }
   vst1_u8((uint8_t*)tmp, mask8);
+  alpha_mask *= 0x01010101;
   alpha_mask &= tmp[0];
   alpha_mask &= tmp[1];
   return (alpha_mask != 0xffffffffu);
@@ -135,7 +136,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
 static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
                              int width, int height,
                              uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-  uint32_t alpha_mask = 0xffffffffu;
+  uint32_t alpha_mask = 0xffu;
   uint8x8_t mask8 = vdup_n_u8(0xff);
   uint32_t tmp[2];
   int i, j;
@@ -157,6 +158,7 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
     alpha += alpha_stride;
   }
   vst1_u8((uint8_t*)tmp, mask8);
+  alpha_mask *= 0x01010101;
   alpha_mask &= tmp[0];
   alpha_mask &= tmp[1];
   return (alpha_mask == 0xffffffffu);
diff --git a/thirdparty/libwebp/src/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c
index 3145e190a4..a4ba7f2cb7 100644
--- a/thirdparty/libwebp/src/dsp/cpu.c
+++ b/thirdparty/libwebp/src/dsp/cpu.c
@@ -11,7 +11,7 @@
 //
 // Author: Christian Duvivier (cduvivier@google.com)
 
-#include "src/dsp/dsp.h"
+#include "src/dsp/cpu.h"
 
 #if defined(WEBP_HAVE_NEON_RTCD)
 #include <stdio.h>
diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h
new file mode 100644
index 0000000000..57a40d87d4
--- /dev/null
+++ b/thirdparty/libwebp/src/dsp/cpu.h
@@ -0,0 +1,254 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   CPU detection functions and macros.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_CPU_H_
+#define WEBP_DSP_CPU_H_
+
+#ifdef HAVE_CONFIG_H
+#include "src/webp/config.h"
+#endif
+
+#include "src/webp/types.h"
+
+#if defined(__GNUC__)
+#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_GCC_VERSION 0
+#define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#if defined(__clang__)
+#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+#define LOCAL_CLANG_PREREQ(maj, min) \
+  (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_CLANG_VERSION 0
+#define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
+#endif
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
+#define WEBP_HAVE_SSE2
+#endif
+
+#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
+#define WEBP_HAVE_SSE41
+#endif
+
+#undef WEBP_MSC_SSE41
+#undef WEBP_MSC_SSE2
+
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if ((defined(__ARM_NEON__) || defined(__aarch64__)) &&       \
+     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
+    !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
+    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
+#define WEBP_ANDROID_NEON  // Android targets that may have NEON
+#define WEBP_USE_NEON
+#endif
+
+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
+// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
+// vtbl4_u8(); a fix was made in 16.6.
+#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
+                          (_MSC_VER >= 1926 && defined(_M_ARM64)))
+#define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
+#define WEBP_HAVE_NEON
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
+    (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
+#define WEBP_USE_MSA
+#endif
+
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
+      defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
+#endif
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define WEBP_MSAN
+#endif
+#endif
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h>  // NOLINT
+
+#define WEBP_DSP_INIT(func)                                         \
+  do {                                                              \
+    static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
+        (VP8CPUInfo)&func##_last_cpuinfo_used;                      \
+    static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
+    if (pthread_mutex_lock(&func##_lock)) break;                    \
+    if (func##_last_cpuinfo_used != VP8GetCPUInfo) func();          \
+    func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
+    (void)pthread_mutex_unlock(&func##_lock);                       \
+  } while (0)
+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+#define WEBP_DSP_INIT(func)                               \
+  do {                                                    \
+    static volatile VP8CPUInfo func##_last_cpuinfo_used = \
+        (VP8CPUInfo)&func##_last_cpuinfo_used;            \
+    if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
+    func();                                               \
+    func##_last_cpuinfo_used = VP8GetCPUInfo;             \
+  } while (0)
+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
+
+// Defines an Init + helper function that control multiple initialization of
+// function pointers / tables.
+/* Usage:
+   WEBP_DSP_INIT_FUNC(InitFunc) {
+     ...function body
+   }
+*/
+#define WEBP_DSP_INIT_FUNC(name)                                            \
+  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void);                  \
+  WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
+  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
+
+#define WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures. This is only meant to silence unaligned loads on platforms that
+// are known to support them.
+#undef WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
+
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures related to unsigned integer overflows. This is only meant to
+// silence cases where this well defined behavior is expected.
+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
+  __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
+// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
+#if !defined(WEBP_OFFSET_PTR)
+#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
+#endif
+
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) &&                   \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+typedef enum {
+  kSSE2,
+  kSSE3,
+  kSlowSSSE3,  // special feature for slow SSSE3 architectures
+  kSSE4_1,
+  kAVX,
+  kAVX2,
+  kNEON,
+  kMIPS32,
+  kMIPSdspR2,
+  kMSA
+} CPUFeature;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  // WEBP_DSP_CPU_H_
diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h
index c4f57e4d5b..d2000b8efc 100644
--- a/thirdparty/libwebp/src/dsp/dsp.h
+++ b/thirdparty/libwebp/src/dsp/dsp.h
@@ -18,6 +18,7 @@
 #include "src/webp/config.h"
 #endif
 
+#include "src/dsp/cpu.h"
 #include "src/webp/types.h"
 
 #ifdef __cplusplus
@@ -43,225 +44,6 @@ extern "C" {
 #define WEBP_RESTRICT
 #endif
 
-//------------------------------------------------------------------------------
-// CPU detection
-
-#if defined(__GNUC__)
-# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-# define LOCAL_GCC_PREREQ(maj, min) \
-    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_GCC_VERSION 0
-# define LOCAL_GCC_PREREQ(maj, min) 0
-#endif
-
-#if defined(__clang__)
-# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-# define LOCAL_CLANG_PREREQ(maj, min) \
-    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_CLANG_VERSION 0
-# define LOCAL_CLANG_PREREQ(maj, min) 0
-#endif
-
-#ifndef __has_builtin
-# define __has_builtin(x) 0
-#endif
-
-#if !defined(HAVE_CONFIG_H)
-#if defined(_MSC_VER) && _MSC_VER > 1310 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
-#endif
-#endif
-
-// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
-// files without intrinsics, allowing the corresponding Init() to be called.
-// Files containing intrinsics will need to be built targeting the instruction
-// set so should succeed on one of the earlier tests.
-#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
-    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
-#define WEBP_USE_SSE2
-#endif
-
-#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
-#define WEBP_HAVE_SSE2
-#endif
-
-#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
-    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
-#define WEBP_USE_SSE41
-#endif
-
-#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
-#define WEBP_HAVE_SSE41
-#endif
-
-#undef WEBP_MSC_SSE41
-#undef WEBP_MSC_SSE2
-
-// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-// inline assembly would need to be modified for use with Native Client.
-#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
-     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
-    !defined(__native_client__)
-#define WEBP_USE_NEON
-#endif
-
-#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
-    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
-#define WEBP_ANDROID_NEON  // Android targets that may have NEON
-#define WEBP_USE_NEON
-#endif
-
-// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
-// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
-// arm_neon.h.
-#if defined(_MSC_VER) && \
-  ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
-   (_MSC_VER >= 1920 && defined(_M_ARM64)))
-#define WEBP_USE_NEON
-#define WEBP_USE_INTRINSICS
-#endif
-
-#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
-#define WEBP_HAVE_NEON
-#endif
-
-#if defined(__mips__) && !defined(__mips64) && \
-    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
-#define WEBP_USE_MIPS32
-#if (__mips_isa_rev >= 2)
-#define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
-#define WEBP_USE_MIPS_DSP_R2
-#endif
-#endif
-#endif
-
-#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
-#define WEBP_USE_MSA
-#endif
-
-#ifndef WEBP_DSP_OMIT_C_CODE
-#define WEBP_DSP_OMIT_C_CODE 1
-#endif
-
-#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
-#define WEBP_NEON_OMIT_C_CODE 1
-#else
-#define WEBP_NEON_OMIT_C_CODE 0
-#endif
-
-#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
-#define WEBP_NEON_WORK_AROUND_GCC 1
-#else
-#define WEBP_NEON_WORK_AROUND_GCC 0
-#endif
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#define WEBP_TSAN_IGNORE_FUNCTION
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#undef WEBP_TSAN_IGNORE_FUNCTION
-#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
-#endif
-#endif
-
-#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-#include <pthread.h>  // NOLINT
-
-#define WEBP_DSP_INIT(func) do {                                    \
-  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
-      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
-  static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
-  if (pthread_mutex_lock(&func ## _lock)) break;                    \
-  if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func();          \
-  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
-  (void)pthread_mutex_unlock(&func ## _lock);                       \
-} while (0)
-#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-#define WEBP_DSP_INIT(func) do {                                    \
-  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
-      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
-  if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break;           \
-  func();                                                           \
-  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
-} while (0)
-#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
-
-// Defines an Init + helper function that control multiple initialization of
-// function pointers / tables.
-/* Usage:
-   WEBP_DSP_INIT_FUNC(InitFunc) {
-     ...function body
-   }
-*/
-#define WEBP_DSP_INIT_FUNC(name)                             \
-  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
-  WEBP_TSAN_IGNORE_FUNCTION void name(void) {                \
-    WEBP_DSP_INIT(name ## _body);                            \
-  }                                                          \
-  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
-
-#define WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#if defined(__clang__) && defined(__has_attribute)
-#if __has_attribute(no_sanitize)
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures. This is only meant to silence unaligned loads on platforms that
-// are known to support them.
-#undef WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNDEF \
-  __attribute__((no_sanitize("undefined")))
-
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures related to unsigned integer overflows. This is only meant to
-// silence cases where this well defined behavior is expected.
-#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
-  __attribute__((no_sanitize("unsigned-integer-overflow")))
-#endif
-#endif
-
-// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
-// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
-#if !defined(WEBP_OFFSET_PTR)
-#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
-#endif
-
-// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
-#if !defined(WEBP_SWAP_16BIT_CSP)
-#define WEBP_SWAP_16BIT_CSP 0
-#endif
-
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) && \
-    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
-     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
-typedef enum {
-  kSSE2,
-  kSSE3,
-  kSlowSSSE3,  // special feature for slow SSSE3 architectures
-  kSSE4_1,
-  kAVX,
-  kAVX2,
-  kNEON,
-  kMIPS32,
-  kMIPSdspR2,
-  kMSA
-} CPUFeature;
-// returns true if the CPU supports the feature.
-typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 
 //------------------------------------------------------------------------------
 // Init stub generator
@@ -550,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
 extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
                                     uint8_t* u, uint8_t* v, int width);
 
-// utilities for accurate RGB->YUV conversion
-extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
-                                       uint16_t* dst, int len);
-extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
-                                     int16_t* dst, int len);
-extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
-                                     int len,
-                                     const uint16_t* best_y, uint16_t* out);
-
 // Must be called before using the above.
 void WebPInitConvertARGBToYUV(void);
 
diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h
index c26c6bca07..de60d95d0b 100644
--- a/thirdparty/libwebp/src/dsp/lossless.h
+++ b/thirdparty/libwebp/src/dsp/lossless.h
@@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
 // -----------------------------------------------------------------------------
 // Huffman-cost related functions.
 
-typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
-typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
-                                       int length);
+typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                      int length);
 typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
                                                 const int Y[256]);
 
@@ -198,7 +198,7 @@ typedef struct {        // small struct to hold counters
 } VP8LStreaks;
 
 typedef struct {            // small struct to hold bit entropy results
-  double entropy;           // entropy
+  float entropy;            // entropy
   uint32_t sum;             // sum of the population
   int nonzeros;             // number of non-zero elements in the population
   uint32_t max_val;         // maximum value in the population
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c
index 1580631e38..de6c4ace5f 100644
--- a/thirdparty/libwebp/src/dsp/lossless_enc.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc.c
@@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
 static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
   int i;
-  double retval = 0.;
+  float retval = 0.f;
   int sumX = 0, sumXY = 0;
   for (i = 0; i < 256; ++i) {
     const int x = X[i];
@@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
     }
   }
   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
-  return (float)retval;
+  return retval;
 }
 
 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
@@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
 
 //------------------------------------------------------------------------------
 
-static double ExtraCost_C(const uint32_t* population, int length) {
+static float ExtraCost_C(const uint32_t* population, int length) {
   int i;
-  double cost = 0.;
+  float cost = 0.f;
   for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
   return cost;
 }
 
-static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
+static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
                                   int length) {
   int i;
-  double cost = 0.;
+  float cost = 0.f;
   for (i = 2; i < length - 2; ++i) {
     const int xy = X[i + 2] + Y[i + 2];
     cost += (i >> 1) * xy;
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
index 0412a093cf..639f786631 100644
--- a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
 //     cost += i * *(pop + 1);
 //     pop += 2;
 //   }
-//   return (double)cost;
-static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
+//   return (float)cost;
+static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
   int i, temp0, temp1;
   const uint32_t* pop = &population[4];
   const uint32_t* const LoopEnd = &population[length];
@@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
     : "memory", "hi", "lo"
   );
 
-  return (double)((int64_t)temp0 << 32 | temp1);
+  return (float)((int64_t)temp0 << 32 | temp1);
 }
 
 // C version of this function:
@@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
 //     pX += 2;
 //     pY += 2;
 //   }
-//   return (double)cost;
-static double ExtraCostCombined_MIPS32(const uint32_t* const X,
-                                       const uint32_t* const Y, int length) {
+//   return (float)cost;
+static float ExtraCostCombined_MIPS32(const uint32_t* const X,
+                                      const uint32_t* const Y, int length) {
   int i, temp0, temp1, temp2, temp3;
   const uint32_t* pX = &X[4];
   const uint32_t* pY = &Y[4];
@@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
     : "memory", "hi", "lo"
   );
 
-  return (double)((int64_t)temp0 << 32 | temp1);
+  return (float)((int64_t)temp0 << 32 | temp1);
 }
 
 #define HUFFMAN_COST_PASS                                 \
@@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
 static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
                              uint32_t* pout, int size) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const uint32_t end = ((size) / 4) * 4;
+  const int end = ((size) / 4) * 4;
   const uint32_t* const LoopEnd = pa + end;
   int i;
   ASM_START
   ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
   ASM_END_0
-  for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
+  for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
 }
 
 static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const uint32_t end = ((size) / 4) * 4;
+  const int end = ((size) / 4) * 4;
   const uint32_t* const LoopEnd = pa + end;
   int i;
   ASM_START
   ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
   ASM_END_1
-  for (i = end; i < size; ++i) pout[i] += pa[i];
+  for (i = 0; i < size - end; ++i) pout[i] += pa[i];
 }
 
 #undef ASM_END_1
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
index b2f83b871c..948001a3d5 100644
--- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
 
 static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
   int i;
-  double retval = 0.;
+  float retval = 0.f;
   int sumX = 0, sumXY = 0;
   const __m128i zero = _mm_setzero_si128();
 
@@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
     }
   }
   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
-  return (float)retval;
+  return retval;
 }
 
 #else
diff --git a/thirdparty/libwebp/src/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c
index 48466f8b11..d16c13d3ca 100644
--- a/thirdparty/libwebp/src/dsp/yuv.c
+++ b/thirdparty/libwebp/src/dsp/yuv.c
@@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
 
 //-----------------------------------------------------------------------------
 
-#if !WEBP_NEON_OMIT_C_CODE
-#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
-  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
-                                  uint16_t* dst, int len) {
-  uint64_t diff = 0;
-  int i;
-  for (i = 0; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)dst[i] + diff_y;
-    dst[i] = clip_y(new_y);
-    diff += (uint64_t)abs(diff_y);
-  }
-  return diff;
-}
-
-static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
-                                int16_t* dst, int len) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
-                                const uint16_t* best_y, uint16_t* out) {
-  int i;
-  for (i = 0; i < len; ++i, ++A, ++B) {
-    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
-    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
-    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
-    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
-  }
-}
-#endif  // !WEBP_NEON_OMIT_C_CODE
-
-#undef MAX_Y
-
-//-----------------------------------------------------------------------------
-
 void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
 void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
 void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
@@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
 void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
                             int src_width, int do_store);
 
-uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
-                                uint16_t* dst, int len);
-void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
-                              int16_t* dst, int len);
-void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
-                              const uint16_t* best_y, uint16_t* out);
-
 extern void WebPInitConvertARGBToYUVSSE2(void);
 extern void WebPInitConvertARGBToYUVSSE41(void);
 extern void WebPInitConvertARGBToYUVNEON(void);
-extern void WebPInitSharpYUVSSE2(void);
-extern void WebPInitSharpYUVNEON(void);
 
 WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
   WebPConvertARGBToY = ConvertARGBToY_C;
@@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
 
   WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
 
-#if !WEBP_NEON_OMIT_C_CODE
-  WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
-  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
-  WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
-#endif
-
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_HAVE_SSE2)
     if (VP8GetCPUInfo(kSSE2)) {
       WebPInitConvertARGBToYUVSSE2();
-      WebPInitSharpYUVSSE2();
     }
 #endif  // WEBP_HAVE_SSE2
 #if defined(WEBP_HAVE_SSE41)
@@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
   if (WEBP_NEON_OMIT_C_CODE ||
       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
     WebPInitConvertARGBToYUVNEON();
-    WebPInitSharpYUVNEON();
   }
 #endif  // WEBP_HAVE_NEON
 
@@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
   assert(WebPConvertRGB24ToY != NULL);
   assert(WebPConvertBGR24ToY != NULL);
   assert(WebPConvertRGBA32ToUV != NULL);
-  assert(WebPSharpYUVUpdateY != NULL);
-  assert(WebPSharpYUVUpdateRGB != NULL);
-  assert(WebPSharpYUVFilterRow != NULL);
 }
diff --git a/thirdparty/libwebp/src/dsp/yuv_neon.c b/thirdparty/libwebp/src/dsp/yuv_neon.c
index a34d60248f..ff77b00980 100644
--- a/thirdparty/libwebp/src/dsp/yuv_neon.c
+++ b/thirdparty/libwebp/src/dsp/yuv_neon.c
@@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
   WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
 }
 
-//------------------------------------------------------------------------------
-
-#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
-static uint16_t clip_y_NEON(int v) {
-  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
-                                     uint16_t* dst, int len) {
-  int i;
-  const int16x8_t zero = vdupq_n_s16(0);
-  const int16x8_t max = vdupq_n_s16(MAX_Y);
-  uint64x2_t sum = vdupq_n_u64(0);
-  uint64_t diff;
-
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
-    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
-    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
-    const int16x8_t D = vsubq_s16(A, B);       // diff_y
-    const int16x8_t F = vaddq_s16(C, D);       // new_y
-    const uint16x8_t H =
-        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
-    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
-    vst1q_u16(dst + i, H);
-    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
-  }
-  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
-  for (; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)(dst[i]) + diff_y;
-    dst[i] = clip_y_NEON(new_y);
-    diff += (uint64_t)(abs(diff_y));
-  }
-  return diff;
-}
-
-static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
-                                   int16_t* dst, int len) {
-  int i;
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t A = vld1q_s16(ref + i);
-    const int16x8_t B = vld1q_s16(src + i);
-    const int16x8_t C = vld1q_s16(dst + i);
-    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
-    const int16x8_t E = vaddq_s16(C, D);   // new_uv
-    vst1q_s16(dst + i, E);
-  }
-  for (; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
-                                   const uint16_t* best_y, uint16_t* out) {
-  int i;
-  const int16x8_t max = vdupq_n_s16(MAX_Y);
-  const int16x8_t zero = vdupq_n_s16(0);
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t a0 = vld1q_s16(A + i + 0);
-    const int16x8_t a1 = vld1q_s16(A + i + 1);
-    const int16x8_t b0 = vld1q_s16(B + i + 0);
-    const int16x8_t b1 = vld1q_s16(B + i + 1);
-    const int16x8_t a0b1 = vaddq_s16(a0, b1);
-    const int16x8_t a1b0 = vaddq_s16(a1, b0);
-    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
-    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
-    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
-    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
-    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
-    const int16x8_t d0 = vaddq_s16(c1, a0);
-    const int16x8_t d1 = vaddq_s16(c0, a1);
-    const int16x8_t e0 = vrshrq_n_s16(d0, 1);
-    const int16x8_t e1 = vrshrq_n_s16(d1, 1);
-    const int16x8x2_t f = vzipq_s16(e0, e1);
-    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
-    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
-    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
-    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
-    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
-    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
-    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
-    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
-  }
-  for (; i < len; ++i) {
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
-    out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
-  }
-}
-#undef MAX_Y
-
-//------------------------------------------------------------------------------
-
-extern void WebPInitSharpYUVNEON(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
-  WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
-  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
-  WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
-}
-
 #else  // !WEBP_USE_NEON
 
 WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
-WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
 
 #endif  // WEBP_USE_NEON
diff --git a/thirdparty/libwebp/src/dsp/yuv_sse2.c b/thirdparty/libwebp/src/dsp/yuv_sse2.c
index baa48d5371..970bbb7884 100644
--- a/thirdparty/libwebp/src/dsp/yuv_sse2.c
+++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c
@@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
   WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
 }
 
-//------------------------------------------------------------------------------
-
-#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
-  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
-                                     uint16_t* dst, int len) {
-  uint64_t diff = 0;
-  uint32_t tmp[4];
-  int i;
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i max = _mm_set1_epi16(MAX_Y);
-  const __m128i one = _mm_set1_epi16(1);
-  __m128i sum = zero;
-
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
-    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
-    const __m128i F = _mm_add_epi16(C, D);       // new_y
-    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
-    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
-    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
-    _mm_storeu_si128((__m128i*)(dst + i), H);
-    sum = _mm_add_epi32(sum, I);
-  }
-  _mm_storeu_si128((__m128i*)tmp, sum);
-  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
-  for (; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)dst[i] + diff_y;
-    dst[i] = clip_y(new_y);
-    diff += (uint64_t)abs(diff_y);
-  }
-  return diff;
-}
-
-static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
-                                   int16_t* dst, int len) {
-  int i = 0;
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
-    const __m128i E = _mm_add_epi16(C, D);   // new_uv
-    _mm_storeu_si128((__m128i*)(dst + i), E);
-  }
-  for (; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
-                                   const uint16_t* best_y, uint16_t* out) {
-  int i;
-  const __m128i kCst8 = _mm_set1_epi16(8);
-  const __m128i max = _mm_set1_epi16(MAX_Y);
-  const __m128i zero = _mm_setzero_si128();
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
-    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
-    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
-    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
-    const __m128i a0b1 = _mm_add_epi16(a0, b1);
-    const __m128i a1b0 = _mm_add_epi16(a1, b0);
-    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
-    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
-    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
-    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
-    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
-    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
-    const __m128i d0 = _mm_add_epi16(c1, a0);
-    const __m128i d1 = _mm_add_epi16(c0, a1);
-    const __m128i e0 = _mm_srai_epi16(d0, 1);
-    const __m128i e1 = _mm_srai_epi16(d1, 1);
-    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
-    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
-    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
-    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
-    const __m128i h0 = _mm_add_epi16(g0, f0);
-    const __m128i h1 = _mm_add_epi16(g1, f1);
-    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
-    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
-  }
-  for (; i < len; ++i) {
-    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
-    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
-    // We reuse the common sub-expressions.
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
-    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
-  }
-}
-
-#undef MAX_Y
-
-//------------------------------------------------------------------------------
-
-extern void WebPInitSharpYUVSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
-  WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
-  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
-  WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
-}
-
 #else  // !WEBP_USE_SSE2
 
 WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
 WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
-WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)
 
 #endif  // WEBP_USE_SSE2
author	Rémi Verschelde <rverschelde@gmail.com>	2022-08-23 16:47:48 +0200
committer	Rémi Verschelde <rverschelde@gmail.com>	2022-08-23 19:05:53 +0200
commit	e5e34f21fc36c8a162bb46e845bb3465cd434150 (patch)
tree	604818f4a22ad5ab35c8defba0baedad40d203ed /thirdparty/libwebp/src/dsp
parent	e95872eb7f4e94f8daac565d67020bb2260e92bd (diff)