diff options
Diffstat (limited to 'thirdparty/libwebp/src/dsp')
-rw-r--r-- | thirdparty/libwebp/src/dsp/dec_neon.c | 9 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/dsp.h | 4 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/lossless.c | 11 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/lossless_common.h | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/lossless_enc_sse2.c | 3 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/lossless_sse2.c | 3 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/upsampling_msa.c | 4 | ||||
-rw-r--r-- | thirdparty/libwebp/src/dsp/upsampling_neon.c | 14 |
8 files changed, 33 insertions, 17 deletions
diff --git a/thirdparty/libwebp/src/dsp/dec_neon.c b/thirdparty/libwebp/src/dsp/dec_neon.c index ffa697fcf9..239ec4167e 100644 --- a/thirdparty/libwebp/src/dsp/dec_neon.c +++ b/thirdparty/libwebp/src/dsp/dec_neon.c @@ -1361,7 +1361,8 @@ static void RD4_NEON(uint8_t* dst) { // Down-right const uint32_t J = dst[-1 + 1 * BPS]; const uint32_t K = dst[-1 + 2 * BPS]; const uint32_t L = dst[-1 + 3 * BPS]; - const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24)); + const uint64x1_t LKJI____ = + vcreate_u64((uint64_t)L | (K << 8) | (J << 16) | (I << 24)); const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC); const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8)); const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16)); @@ -1427,10 +1428,16 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x8_t A = vld1_u8(dst - BPS); // top row +#if defined(__aarch64__) + const uint16x8_t B = vmovl_u8(A); + const uint16_t p2 = vaddvq_u16(B); + sum_top = vdupq_n_u16(p2); +#else const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top const uint16x4_t p1 = vpadd_u16(p0, p0); const uint16x4_t p2 = vpadd_u16(p1, p1); sum_top = vcombine_u16(p2, p2); +#endif } if (do_left) { diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h index fafc2d05d3..a784de334a 100644 --- a/thirdparty/libwebp/src/dsp/dsp.h +++ b/thirdparty/libwebp/src/dsp/dsp.h @@ -246,9 +246,9 @@ extern VP8Fdct VP8FTransform2; // performs two transforms at a time extern VP8WHT VP8FTransformWHT; // Predictions // *dst is the destination block. *top and *left can be NULL. -typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left, +typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left, const uint8_t* top); -typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top); +typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top); extern VP8Intra4Preds VP8EncPredLuma4; extern VP8IntraPreds VP8EncPredLuma16; extern VP8IntraPreds VP8EncPredChroma8; diff --git a/thirdparty/libwebp/src/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c index d05af84e7b..aad5f43ec9 100644 --- a/thirdparty/libwebp/src/dsp/lossless.c +++ b/thirdparty/libwebp/src/dsp/lossless.c @@ -81,7 +81,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is // inlined. -#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409 +#if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409 # define LOCAL_INLINE __attribute__ ((noinline)) #else # define LOCAL_INLINE WEBP_INLINE @@ -167,15 +167,20 @@ static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) { return pred; } -GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C) +static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int x; + (void)upper; + for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK); +} static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; uint32_t left = out[-1]; + (void)upper; for (i = 0; i < num_pixels; ++i) { out[i] = left = VP8LAddPixels(in[i], left); } - (void)upper; } GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C) GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C) diff --git a/thirdparty/libwebp/src/dsp/lossless_common.h b/thirdparty/libwebp/src/dsp/lossless_common.h index a2648d1737..9c2ebe6809 100644 --- a/thirdparty/libwebp/src/dsp/lossless_common.h +++ b/thirdparty/libwebp/src/dsp/lossless_common.h @@ -177,6 +177,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) { static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ int num_pixels, uint32_t* out) { \ int x; \ + assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \ out[x] = VP8LAddPixels(in[x], pred); \ @@ -189,6 +190,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \ int num_pixels, uint32_t* out) { \ int x; \ + assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \ out[x] = VP8LSubPixels(in[x], pred); \ diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c index 8adc52139b..e676f6fdc9 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -455,8 +455,9 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, _mm_storeu_si128((__m128i*)&out[i], res); } if (i != num_pixels) { - VP8LPredictorsSub_C[0](in + i, upper + i, num_pixels - i, out + i); + VP8LPredictorsSub_C[0](in + i, NULL, num_pixels - i, out + i); } + (void)upper; } #define GENERATE_PREDICTOR_1(X, IN) \ diff --git a/thirdparty/libwebp/src/dsp/lossless_sse2.c b/thirdparty/libwebp/src/dsp/lossless_sse2.c index 17d7576419..aef0cee1b3 100644 --- a/thirdparty/libwebp/src/dsp/lossless_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c @@ -191,8 +191,9 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, _mm_storeu_si128((__m128i*)&out[i], res); } if (i != num_pixels) { - VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i); + VP8LPredictorsAdd_C[0](in + i, NULL, num_pixels - i, out + i); } + (void)upper; } // Predictor1: left. diff --git a/thirdparty/libwebp/src/dsp/upsampling_msa.c b/thirdparty/libwebp/src/dsp/upsampling_msa.c index 99eea70e7d..f2e03e85e9 100644 --- a/thirdparty/libwebp/src/dsp/upsampling_msa.c +++ b/thirdparty/libwebp/src/dsp/upsampling_msa.c @@ -576,9 +576,9 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \ const uint32_t l_uv = ((cur_u[0]) | ((cur_v[0]) << 16)); \ const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ const uint8_t* ptop_y = &top_y[1]; \ - uint8_t *ptop_dst = top_dst + XSTEP; \ + uint8_t* ptop_dst = top_dst + XSTEP; \ const uint8_t* pbot_y = &bot_y[1]; \ - uint8_t *pbot_dst = bot_dst + XSTEP; \ + uint8_t* pbot_dst = bot_dst + XSTEP; \ \ FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \ if (bot_y != NULL) { \ diff --git a/thirdparty/libwebp/src/dsp/upsampling_neon.c b/thirdparty/libwebp/src/dsp/upsampling_neon.c index 17cbc9f911..6ba71a7de5 100644 --- a/thirdparty/libwebp/src/dsp/upsampling_neon.c +++ b/thirdparty/libwebp/src/dsp/upsampling_neon.c @@ -58,8 +58,8 @@ } while (0) // Turn the macro into a function for reducing code-size when non-critical -static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2, - uint8_t *out) { +static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2, + uint8_t* out) { UPSAMPLE_16PIXELS(r1, r2, out); } @@ -190,14 +190,14 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 }; } #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ -static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \ - const uint8_t *top_u, const uint8_t *top_v, \ - const uint8_t *cur_u, const uint8_t *cur_v, \ - uint8_t *top_dst, uint8_t *bottom_dst, int len) { \ +static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ + const uint8_t* top_u, const uint8_t* top_v, \ + const uint8_t* cur_u, const uint8_t* cur_v, \ + uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ int block; \ /* 16 byte aligned array to cache reconstructed u and v */ \ uint8_t uv_buf[2 * 32 + 15]; \ - uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ + uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ const int uv_len = (len + 1) >> 1; \ /* 9 pixels must be read-able for each block */ \ const int num_blocks = (uv_len - 1) >> 3; \ |