summaryrefslogtreecommitdiff
path: root/thirdparty/libwebp/src/dsp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/libwebp/src/dsp')
-rw-r--r--thirdparty/libwebp/src/dsp/alpha_processing_neon.c6
-rw-r--r--thirdparty/libwebp/src/dsp/cpu.c2
-rw-r--r--thirdparty/libwebp/src/dsp/cpu.h254
-rw-r--r--thirdparty/libwebp/src/dsp/dsp.h229
-rw-r--r--thirdparty/libwebp/src/dsp/lossless.h8
-rw-r--r--thirdparty/libwebp/src/dsp/lossless_enc.c12
-rw-r--r--thirdparty/libwebp/src/dsp/lossless_enc_mips32.c22
-rw-r--r--thirdparty/libwebp/src/dsp/lossless_enc_sse2.c4
-rw-r--r--thirdparty/libwebp/src/dsp/yuv.c64
-rw-r--r--thirdparty/libwebp/src/dsp/yuv_neon.c108
-rw-r--r--thirdparty/libwebp/src/dsp/yuv_sse2.c119
11 files changed, 283 insertions, 545 deletions
diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
index 9e0ace9421..6716fb77f0 100644
--- a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
@@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
int alpha_stride, int width, int height,
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
- uint32_t alpha_mask = 0xffffffffu;
+ uint32_t alpha_mask = 0xffu;
uint8x8_t mask8 = vdup_n_u8(0xff);
uint32_t tmp[2];
int i, j;
@@ -107,6 +107,7 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
dst += dst_stride;
}
vst1_u8((uint8_t*)tmp, mask8);
+ alpha_mask *= 0x01010101;
alpha_mask &= tmp[0];
alpha_mask &= tmp[1];
return (alpha_mask != 0xffffffffu);
@@ -135,7 +136,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
int width, int height,
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
- uint32_t alpha_mask = 0xffffffffu;
+ uint32_t alpha_mask = 0xffu;
uint8x8_t mask8 = vdup_n_u8(0xff);
uint32_t tmp[2];
int i, j;
@@ -157,6 +158,7 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
alpha += alpha_stride;
}
vst1_u8((uint8_t*)tmp, mask8);
+ alpha_mask *= 0x01010101;
alpha_mask &= tmp[0];
alpha_mask &= tmp[1];
return (alpha_mask == 0xffffffffu);
diff --git a/thirdparty/libwebp/src/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c
index 3145e190a4..a4ba7f2cb7 100644
--- a/thirdparty/libwebp/src/dsp/cpu.c
+++ b/thirdparty/libwebp/src/dsp/cpu.c
@@ -11,7 +11,7 @@
//
// Author: Christian Duvivier (cduvivier@google.com)
-#include "src/dsp/dsp.h"
+#include "src/dsp/cpu.h"
#if defined(WEBP_HAVE_NEON_RTCD)
#include <stdio.h>
diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h
new file mode 100644
index 0000000000..57a40d87d4
--- /dev/null
+++ b/thirdparty/libwebp/src/dsp/cpu.h
@@ -0,0 +1,254 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// CPU detection functions and macros.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_CPU_H_
+#define WEBP_DSP_CPU_H_
+
+#ifdef HAVE_CONFIG_H
+#include "src/webp/config.h"
+#endif
+
+#include "src/webp/types.h"
+
+#if defined(__GNUC__)
+#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_GCC_VERSION 0
+#define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#if defined(__clang__)
+#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+#define LOCAL_CLANG_PREREQ(maj, min) \
+ (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_CLANG_VERSION 0
+#define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+ (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
+ (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
+#endif
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
+ (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
+#define WEBP_HAVE_SSE2
+#endif
+
+#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
+ (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
+#define WEBP_HAVE_SSE41
+#endif
+
+#undef WEBP_MSC_SSE41
+#undef WEBP_MSC_SSE2
+
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
+ (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
+ !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
+ defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
+#define WEBP_ANDROID_NEON // Android targets that may have NEON
+#define WEBP_USE_NEON
+#endif
+
+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
+// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
+// vtbl4_u8(); a fix was made in 16.6.
+#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
+ (_MSC_VER >= 1926 && defined(_M_ARM64)))
+#define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
+#define WEBP_HAVE_NEON
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
+ (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
+#define WEBP_USE_MSA
+#endif
+
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
+ defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
+#endif
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define WEBP_MSAN
+#endif
+#endif
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h> // NOLINT
+
+#define WEBP_DSP_INIT(func) \
+ do { \
+ static volatile VP8CPUInfo func##_last_cpuinfo_used = \
+ (VP8CPUInfo)&func##_last_cpuinfo_used; \
+ static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
+ if (pthread_mutex_lock(&func##_lock)) break; \
+ if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \
+ func##_last_cpuinfo_used = VP8GetCPUInfo; \
+ (void)pthread_mutex_unlock(&func##_lock); \
+ } while (0)
+#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+#define WEBP_DSP_INIT(func) \
+ do { \
+ static volatile VP8CPUInfo func##_last_cpuinfo_used = \
+ (VP8CPUInfo)&func##_last_cpuinfo_used; \
+ if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
+ func(); \
+ func##_last_cpuinfo_used = VP8GetCPUInfo; \
+ } while (0)
+#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
+
+// Defines an Init + helper function that control multiple initialization of
+// function pointers / tables.
+/* Usage:
+ WEBP_DSP_INIT_FUNC(InitFunc) {
+ ...function body
+ }
+*/
+#define WEBP_DSP_INIT_FUNC(name) \
+ static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \
+ WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
+ static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
+
+#define WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures. This is only meant to silence unaligned loads on platforms that
+// are known to support them.
+#undef WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
+
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures related to unsigned integer overflows. This is only meant to
+// silence cases where this well defined behavior is expected.
+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
+ __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
+// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
+#if !defined(WEBP_OFFSET_PTR)
+#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
+#endif
+
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+ (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+typedef enum {
+ kSSE2,
+ kSSE3,
+ kSlowSSSE3, // special feature for slow SSSE3 architectures
+ kSSE4_1,
+ kAVX,
+ kAVX2,
+ kNEON,
+ kMIPS32,
+ kMIPSdspR2,
+ kMSA
+} CPUFeature;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_DSP_CPU_H_
diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h
index c4f57e4d5b..d2000b8efc 100644
--- a/thirdparty/libwebp/src/dsp/dsp.h
+++ b/thirdparty/libwebp/src/dsp/dsp.h
@@ -18,6 +18,7 @@
#include "src/webp/config.h"
#endif
+#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#ifdef __cplusplus
@@ -43,225 +44,6 @@ extern "C" {
#define WEBP_RESTRICT
#endif
-//------------------------------------------------------------------------------
-// CPU detection
-
-#if defined(__GNUC__)
-# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-# define LOCAL_GCC_PREREQ(maj, min) \
- (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_GCC_VERSION 0
-# define LOCAL_GCC_PREREQ(maj, min) 0
-#endif
-
-#if defined(__clang__)
-# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-# define LOCAL_CLANG_PREREQ(maj, min) \
- (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_CLANG_VERSION 0
-# define LOCAL_CLANG_PREREQ(maj, min) 0
-#endif
-
-#ifndef __has_builtin
-# define __has_builtin(x) 0
-#endif
-
-#if !defined(HAVE_CONFIG_H)
-#if defined(_MSC_VER) && _MSC_VER > 1310 && \
- (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
- (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
-#endif
-#endif
-
-// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
-// files without intrinsics, allowing the corresponding Init() to be called.
-// Files containing intrinsics will need to be built targeting the instruction
-// set so should succeed on one of the earlier tests.
-#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
- (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
-#define WEBP_USE_SSE2
-#endif
-
-#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
-#define WEBP_HAVE_SSE2
-#endif
-
-#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
- (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
-#define WEBP_USE_SSE41
-#endif
-
-#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
-#define WEBP_HAVE_SSE41
-#endif
-
-#undef WEBP_MSC_SSE41
-#undef WEBP_MSC_SSE2
-
-// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-// inline assembly would need to be modified for use with Native Client.
-#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
- (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
- !defined(__native_client__)
-#define WEBP_USE_NEON
-#endif
-
-#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
- defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
-#define WEBP_ANDROID_NEON // Android targets that may have NEON
-#define WEBP_USE_NEON
-#endif
-
-// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
-// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
-// arm_neon.h.
-#if defined(_MSC_VER) && \
- ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
- (_MSC_VER >= 1920 && defined(_M_ARM64)))
-#define WEBP_USE_NEON
-#define WEBP_USE_INTRINSICS
-#endif
-
-#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
-#define WEBP_HAVE_NEON
-#endif
-
-#if defined(__mips__) && !defined(__mips64) && \
- defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
-#define WEBP_USE_MIPS32
-#if (__mips_isa_rev >= 2)
-#define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
-#define WEBP_USE_MIPS_DSP_R2
-#endif
-#endif
-#endif
-
-#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
-#define WEBP_USE_MSA
-#endif
-
-#ifndef WEBP_DSP_OMIT_C_CODE
-#define WEBP_DSP_OMIT_C_CODE 1
-#endif
-
-#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
-#define WEBP_NEON_OMIT_C_CODE 1
-#else
-#define WEBP_NEON_OMIT_C_CODE 0
-#endif
-
-#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
-#define WEBP_NEON_WORK_AROUND_GCC 1
-#else
-#define WEBP_NEON_WORK_AROUND_GCC 0
-#endif
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#define WEBP_TSAN_IGNORE_FUNCTION
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#undef WEBP_TSAN_IGNORE_FUNCTION
-#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
-#endif
-#endif
-
-#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-#include <pthread.h> // NOLINT
-
-#define WEBP_DSP_INIT(func) do { \
- static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
- (VP8CPUInfo)&func ## _last_cpuinfo_used; \
- static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
- if (pthread_mutex_lock(&func ## _lock)) break; \
- if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \
- func ## _last_cpuinfo_used = VP8GetCPUInfo; \
- (void)pthread_mutex_unlock(&func ## _lock); \
-} while (0)
-#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-#define WEBP_DSP_INIT(func) do { \
- static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
- (VP8CPUInfo)&func ## _last_cpuinfo_used; \
- if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \
- func(); \
- func ## _last_cpuinfo_used = VP8GetCPUInfo; \
-} while (0)
-#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
-
-// Defines an Init + helper function that control multiple initialization of
-// function pointers / tables.
-/* Usage:
- WEBP_DSP_INIT_FUNC(InitFunc) {
- ...function body
- }
-*/
-#define WEBP_DSP_INIT_FUNC(name) \
- static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
- WEBP_TSAN_IGNORE_FUNCTION void name(void) { \
- WEBP_DSP_INIT(name ## _body); \
- } \
- static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
-
-#define WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#if defined(__clang__) && defined(__has_attribute)
-#if __has_attribute(no_sanitize)
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures. This is only meant to silence unaligned loads on platforms that
-// are known to support them.
-#undef WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNDEF \
- __attribute__((no_sanitize("undefined")))
-
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures related to unsigned integer overflows. This is only meant to
-// silence cases where this well defined behavior is expected.
-#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
- __attribute__((no_sanitize("unsigned-integer-overflow")))
-#endif
-#endif
-
-// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
-// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
-#if !defined(WEBP_OFFSET_PTR)
-#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
-#endif
-
-// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
-#if !defined(WEBP_SWAP_16BIT_CSP)
-#define WEBP_SWAP_16BIT_CSP 0
-#endif
-
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) && \
- (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
- (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
-typedef enum {
- kSSE2,
- kSSE3,
- kSlowSSSE3, // special feature for slow SSSE3 architectures
- kSSE4_1,
- kAVX,
- kAVX2,
- kNEON,
- kMIPS32,
- kMIPSdspR2,
- kMSA
-} CPUFeature;
-// returns true if the CPU supports the feature.
-typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
//------------------------------------------------------------------------------
// Init stub generator
@@ -550,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
uint8_t* u, uint8_t* v, int width);
-// utilities for accurate RGB->YUV conversion
-extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
- uint16_t* dst, int len);
-extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
- int16_t* dst, int len);
-extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
- int len,
- const uint16_t* best_y, uint16_t* out);
-
// Must be called before using the above.
void WebPInitConvertARGBToYUV(void);
diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h
index c26c6bca07..de60d95d0b 100644
--- a/thirdparty/libwebp/src/dsp/lossless.h
+++ b/thirdparty/libwebp/src/dsp/lossless.h
@@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
// -----------------------------------------------------------------------------
// Huffman-cost related functions.
-typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
-typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
- int length);
+typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+ int length);
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
const int Y[256]);
@@ -198,7 +198,7 @@ typedef struct { // small struct to hold counters
} VP8LStreaks;
typedef struct { // small struct to hold bit entropy results
- double entropy; // entropy
+ float entropy; // entropy
uint32_t sum; // sum of the population
int nonzeros; // number of non-zero elements in the population
uint32_t max_val; // maximum value in the population
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c
index 1580631e38..de6c4ace5f 100644
--- a/thirdparty/libwebp/src/dsp/lossless_enc.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc.c
@@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
int i;
- double retval = 0.;
+ float retval = 0.f;
int sumX = 0, sumXY = 0;
for (i = 0; i < 256; ++i) {
const int x = X[i];
@@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
}
}
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
- return (float)retval;
+ return retval;
}
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
@@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
//------------------------------------------------------------------------------
-static double ExtraCost_C(const uint32_t* population, int length) {
+static float ExtraCost_C(const uint32_t* population, int length) {
int i;
- double cost = 0.;
+ float cost = 0.f;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
-static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
+static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
int length) {
int i;
- double cost = 0.;
+ float cost = 0.f;
for (i = 2; i < length - 2; ++i) {
const int xy = X[i + 2] + Y[i + 2];
cost += (i >> 1) * xy;
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
index 0412a093cf..639f786631 100644
--- a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
// cost += i * *(pop + 1);
// pop += 2;
// }
-// return (double)cost;
-static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
+// return (float)cost;
+static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
int i, temp0, temp1;
const uint32_t* pop = &population[4];
const uint32_t* const LoopEnd = &population[length];
@@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
: "memory", "hi", "lo"
);
- return (double)((int64_t)temp0 << 32 | temp1);
+ return (float)((int64_t)temp0 << 32 | temp1);
}
// C version of this function:
@@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
// pX += 2;
// pY += 2;
// }
-// return (double)cost;
-static double ExtraCostCombined_MIPS32(const uint32_t* const X,
- const uint32_t* const Y, int length) {
+// return (float)cost;
+static float ExtraCostCombined_MIPS32(const uint32_t* const X,
+ const uint32_t* const Y, int length) {
int i, temp0, temp1, temp2, temp3;
const uint32_t* pX = &X[4];
const uint32_t* pY = &Y[4];
@@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
: "memory", "hi", "lo"
);
- return (double)((int64_t)temp0 << 32 | temp1);
+ return (float)((int64_t)temp0 << 32 | temp1);
}
#define HUFFMAN_COST_PASS \
@@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
uint32_t* pout, int size) {
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- const uint32_t end = ((size) / 4) * 4;
+ const int end = ((size) / 4) * 4;
const uint32_t* const LoopEnd = pa + end;
int i;
ASM_START
ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
ASM_END_0
- for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
+ for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
}
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
- const uint32_t end = ((size) / 4) * 4;
+ const int end = ((size) / 4) * 4;
const uint32_t* const LoopEnd = pa + end;
int i;
ASM_START
ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
ASM_END_1
- for (i = end; i < size; ++i) pout[i] += pa[i];
+ for (i = 0; i < size - end; ++i) pout[i] += pa[i];
}
#undef ASM_END_1
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
index b2f83b871c..948001a3d5 100644
--- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
int i;
- double retval = 0.;
+ float retval = 0.f;
int sumX = 0, sumXY = 0;
const __m128i zero = _mm_setzero_si128();
@@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
}
}
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
- return (float)retval;
+ return retval;
}
#else
diff --git a/thirdparty/libwebp/src/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c
index 48466f8b11..d16c13d3ca 100644
--- a/thirdparty/libwebp/src/dsp/yuv.c
+++ b/thirdparty/libwebp/src/dsp/yuv.c
@@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
//-----------------------------------------------------------------------------
-#if !WEBP_NEON_OMIT_C_CODE
-#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len) {
- uint64_t diff = 0;
- int i;
- for (i = 0; i < len; ++i) {
- const int diff_y = ref[i] - src[i];
- const int new_y = (int)dst[i] + diff_y;
- dst[i] = clip_y(new_y);
- diff += (uint64_t)abs(diff_y);
- }
- return diff;
-}
-
-static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
- int16_t* dst, int len) {
- int i;
- for (i = 0; i < len; ++i) {
- const int diff_uv = ref[i] - src[i];
- dst[i] += diff_uv;
- }
-}
-
-static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out) {
- int i;
- for (i = 0; i < len; ++i, ++A, ++B) {
- const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
- const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
- out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
- out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
- }
-}
-#endif // !WEBP_NEON_OMIT_C_CODE
-
-#undef MAX_Y
-
-//-----------------------------------------------------------------------------
-
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
@@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
int src_width, int do_store);
-uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len);
-void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
- int16_t* dst, int len);
-void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out);
-
extern void WebPInitConvertARGBToYUVSSE2(void);
extern void WebPInitConvertARGBToYUVSSE41(void);
extern void WebPInitConvertARGBToYUVNEON(void);
-extern void WebPInitSharpYUVSSE2(void);
-extern void WebPInitSharpYUVNEON(void);
WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
WebPConvertARGBToY = ConvertARGBToY_C;
@@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
-#if !WEBP_NEON_OMIT_C_CODE
- WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
- WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
- WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
-#endif
-
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_HAVE_SSE2)
if (VP8GetCPUInfo(kSSE2)) {
WebPInitConvertARGBToYUVSSE2();
- WebPInitSharpYUVSSE2();
}
#endif // WEBP_HAVE_SSE2
#if defined(WEBP_HAVE_SSE41)
@@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
WebPInitConvertARGBToYUVNEON();
- WebPInitSharpYUVNEON();
}
#endif // WEBP_HAVE_NEON
@@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
assert(WebPConvertRGB24ToY != NULL);
assert(WebPConvertBGR24ToY != NULL);
assert(WebPConvertRGBA32ToUV != NULL);
- assert(WebPSharpYUVUpdateY != NULL);
- assert(WebPSharpYUVUpdateRGB != NULL);
- assert(WebPSharpYUVFilterRow != NULL);
}
diff --git a/thirdparty/libwebp/src/dsp/yuv_neon.c b/thirdparty/libwebp/src/dsp/yuv_neon.c
index a34d60248f..ff77b00980 100644
--- a/thirdparty/libwebp/src/dsp/yuv_neon.c
+++ b/thirdparty/libwebp/src/dsp/yuv_neon.c
@@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
}
-//------------------------------------------------------------------------------
-
-#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
-static uint16_t clip_y_NEON(int v) {
- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len) {
- int i;
- const int16x8_t zero = vdupq_n_s16(0);
- const int16x8_t max = vdupq_n_s16(MAX_Y);
- uint64x2_t sum = vdupq_n_u64(0);
- uint64_t diff;
-
- for (i = 0; i + 8 <= len; i += 8) {
- const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
- const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
- const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
- const int16x8_t D = vsubq_s16(A, B); // diff_y
- const int16x8_t F = vaddq_s16(C, D); // new_y
- const uint16x8_t H =
- vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
- const int16x8_t I = vabsq_s16(D); // abs(diff_y)
- vst1q_u16(dst + i, H);
- sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
- }
- diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
- for (; i < len; ++i) {
- const int diff_y = ref[i] - src[i];
- const int new_y = (int)(dst[i]) + diff_y;
- dst[i] = clip_y_NEON(new_y);
- diff += (uint64_t)(abs(diff_y));
- }
- return diff;
-}
-
-static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
- int16_t* dst, int len) {
- int i;
- for (i = 0; i + 8 <= len; i += 8) {
- const int16x8_t A = vld1q_s16(ref + i);
- const int16x8_t B = vld1q_s16(src + i);
- const int16x8_t C = vld1q_s16(dst + i);
- const int16x8_t D = vsubq_s16(A, B); // diff_uv
- const int16x8_t E = vaddq_s16(C, D); // new_uv
- vst1q_s16(dst + i, E);
- }
- for (; i < len; ++i) {
- const int diff_uv = ref[i] - src[i];
- dst[i] += diff_uv;
- }
-}
-
-static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out) {
- int i;
- const int16x8_t max = vdupq_n_s16(MAX_Y);
- const int16x8_t zero = vdupq_n_s16(0);
- for (i = 0; i + 8 <= len; i += 8) {
- const int16x8_t a0 = vld1q_s16(A + i + 0);
- const int16x8_t a1 = vld1q_s16(A + i + 1);
- const int16x8_t b0 = vld1q_s16(B + i + 0);
- const int16x8_t b1 = vld1q_s16(B + i + 1);
- const int16x8_t a0b1 = vaddq_s16(a0, b1);
- const int16x8_t a1b0 = vaddq_s16(a1, b0);
- const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
- const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
- const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
- const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
- const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
- const int16x8_t d0 = vaddq_s16(c1, a0);
- const int16x8_t d1 = vaddq_s16(c0, a1);
- const int16x8_t e0 = vrshrq_n_s16(d0, 1);
- const int16x8_t e1 = vrshrq_n_s16(d1, 1);
- const int16x8x2_t f = vzipq_s16(e0, e1);
- const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
- const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
- const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
- const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
- const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
- const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
- vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
- vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
- }
- for (; i < len; ++i) {
- const int a0b1 = A[i + 0] + B[i + 1];
- const int a1b0 = A[i + 1] + B[i + 0];
- const int a0a1b0b1 = a0b1 + a1b0 + 8;
- const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
- const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
- out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
- out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
- }
-}
-#undef MAX_Y
-
-//------------------------------------------------------------------------------
-
-extern void WebPInitSharpYUVNEON(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
- WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
- WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
- WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
-}
-
#else // !WEBP_USE_NEON
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
-WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
#endif // WEBP_USE_NEON
diff --git a/thirdparty/libwebp/src/dsp/yuv_sse2.c b/thirdparty/libwebp/src/dsp/yuv_sse2.c
index baa48d5371..970bbb7884 100644
--- a/thirdparty/libwebp/src/dsp/yuv_sse2.c
+++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c
@@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
}
-//------------------------------------------------------------------------------
-
-#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
- uint16_t* dst, int len) {
- uint64_t diff = 0;
- uint32_t tmp[4];
- int i;
- const __m128i zero = _mm_setzero_si128();
- const __m128i max = _mm_set1_epi16(MAX_Y);
- const __m128i one = _mm_set1_epi16(1);
- __m128i sum = zero;
-
- for (i = 0; i + 8 <= len; i += 8) {
- const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
- const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
- const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
- const __m128i D = _mm_sub_epi16(A, B); // diff_y
- const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
- const __m128i F = _mm_add_epi16(C, D); // new_y
- const __m128i G = _mm_or_si128(E, one); // -1 or 1
- const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
- const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
- _mm_storeu_si128((__m128i*)(dst + i), H);
- sum = _mm_add_epi32(sum, I);
- }
- _mm_storeu_si128((__m128i*)tmp, sum);
- diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
- for (; i < len; ++i) {
- const int diff_y = ref[i] - src[i];
- const int new_y = (int)dst[i] + diff_y;
- dst[i] = clip_y(new_y);
- diff += (uint64_t)abs(diff_y);
- }
- return diff;
-}
-
-static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
- int16_t* dst, int len) {
- int i = 0;
- for (i = 0; i + 8 <= len; i += 8) {
- const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
- const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
- const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
- const __m128i D = _mm_sub_epi16(A, B); // diff_uv
- const __m128i E = _mm_add_epi16(C, D); // new_uv
- _mm_storeu_si128((__m128i*)(dst + i), E);
- }
- for (; i < len; ++i) {
- const int diff_uv = ref[i] - src[i];
- dst[i] += diff_uv;
- }
-}
-
-static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
- const uint16_t* best_y, uint16_t* out) {
- int i;
- const __m128i kCst8 = _mm_set1_epi16(8);
- const __m128i max = _mm_set1_epi16(MAX_Y);
- const __m128i zero = _mm_setzero_si128();
- for (i = 0; i + 8 <= len; i += 8) {
- const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
- const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
- const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
- const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
- const __m128i a0b1 = _mm_add_epi16(a0, b1);
- const __m128i a1b0 = _mm_add_epi16(a1, b0);
- const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
- const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
- const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
- const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
- const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
- const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
- const __m128i d0 = _mm_add_epi16(c1, a0);
- const __m128i d1 = _mm_add_epi16(c0, a1);
- const __m128i e0 = _mm_srai_epi16(d0, 1);
- const __m128i e1 = _mm_srai_epi16(d1, 1);
- const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
- const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
- const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
- const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
- const __m128i h0 = _mm_add_epi16(g0, f0);
- const __m128i h1 = _mm_add_epi16(g1, f1);
- const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
- const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
- _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
- _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
- }
- for (; i < len; ++i) {
- // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
- // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
- // We reuse the common sub-expressions.
- const int a0b1 = A[i + 0] + B[i + 1];
- const int a1b0 = A[i + 1] + B[i + 0];
- const int a0a1b0b1 = a0b1 + a1b0 + 8;
- const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
- const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
- out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
- out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
- }
-}
-
-#undef MAX_Y
-
-//------------------------------------------------------------------------------
-
-extern void WebPInitSharpYUVSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
- WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
- WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
- WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
-}
-
#else // !WEBP_USE_SSE2
WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
-WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)
#endif // WEBP_USE_SSE2