diff options
Diffstat (limited to 'thirdparty/libwebp/src')
84 files changed, 2746 insertions, 2638 deletions
diff --git a/thirdparty/libwebp/src/dec/alpha_dec.c b/thirdparty/libwebp/src/dec/alpha_dec.c index bce735bfc2..0b93a30b32 100644 --- a/thirdparty/libwebp/src/dec/alpha_dec.c +++ b/thirdparty/libwebp/src/dec/alpha_dec.c @@ -183,7 +183,7 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, assert(dec != NULL && io != NULL); if (row < 0 || num_rows <= 0 || row + num_rows > height) { - return NULL; // sanity check. + return NULL; } if (!dec->is_alpha_decoded_) { diff --git a/thirdparty/libwebp/src/dec/buffer_dec.c b/thirdparty/libwebp/src/dec/buffer_dec.c index 3cd94eb4d9..4786cf0ddb 100644 --- a/thirdparty/libwebp/src/dec/buffer_dec.c +++ b/thirdparty/libwebp/src/dec/buffer_dec.c @@ -102,7 +102,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { int stride; uint64_t size; - if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) { + if ((uint64_t)w * kModeBpp[mode] >= (1ull << 31)) { return VP8_STATUS_INVALID_PARAM; } stride = w * kModeBpp[mode]; @@ -117,7 +117,6 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { } total_size = size + 2 * uv_size + a_size; - // Security/sanity checks output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output)); if (output == NULL) { return VP8_STATUS_OUT_OF_MEMORY; @@ -156,11 +155,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) { } if (WebPIsRGBMode(buffer->colorspace)) { WebPRGBABuffer* const buf = &buffer->u.RGBA; - buf->rgba += (buffer->height - 1) * buf->stride; + buf->rgba += (int64_t)(buffer->height - 1) * buf->stride; buf->stride = -buf->stride; } else { WebPYUVABuffer* const buf = &buffer->u.YUVA; - const int H = buffer->height; + const int64_t H = buffer->height; buf->y += (H - 1) * buf->y_stride; buf->y_stride = -buf->y_stride; buf->u += ((H - 1) >> 1) * buf->u_stride; @@ -188,8 +187,7 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height, const int ch = options->crop_height; const int x = options->crop_left & ~1; const int y = options->crop_top & ~1; - if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || - x + cw > width || y + ch > height) { + if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) { return VP8_STATUS_INVALID_PARAM; // out of frame boundary. } width = cw; diff --git a/thirdparty/libwebp/src/dec/frame_dec.c b/thirdparty/libwebp/src/dec/frame_dec.c index 04609a8e56..91ca1f8609 100644 --- a/thirdparty/libwebp/src/dec/frame_dec.c +++ b/thirdparty/libwebp/src/dec/frame_dec.c @@ -705,7 +705,7 @@ static int AllocateMemory(VP8Decoder* const dec) { + cache_size + alpha_size + WEBP_ALIGN_CST; uint8_t* mem; - if (needed != (size_t)needed) return 0; // check for overflow + if (!CheckSizeOverflow(needed)) return 0; // check for overflow if (needed > dec->mem_size_) { WebPSafeFree(dec->mem_); dec->mem_size_ = 0; diff --git a/thirdparty/libwebp/src/dec/io_dec.c b/thirdparty/libwebp/src/dec/io_dec.c index e603f19c98..5ef6298886 100644 --- a/thirdparty/libwebp/src/dec/io_dec.c +++ b/thirdparty/libwebp/src/dec/io_dec.c @@ -25,21 +25,16 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { WebPDecBuffer* output = p->output; const WebPYUVABuffer* const buf = &output->u.YUVA; - uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride; - uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride; - uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride; + uint8_t* const y_dst = buf->y + (size_t)io->mb_y * buf->y_stride; + uint8_t* const u_dst = buf->u + (size_t)(io->mb_y >> 1) * buf->u_stride; + uint8_t* const v_dst = buf->v + (size_t)(io->mb_y >> 1) * buf->v_stride; const int mb_w = io->mb_w; const int mb_h = io->mb_h; const int uv_w = (mb_w + 1) / 2; const int uv_h = (mb_h + 1) / 2; - int j; - for (j = 0; j < mb_h; ++j) { - memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w); - } - for (j = 0; j < uv_h; ++j) { - memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w); - memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w); - } + WebPCopyPlane(io->y, io->y_stride, y_dst, buf->y_stride, mb_w, mb_h); + WebPCopyPlane(io->u, io->uv_stride, u_dst, buf->u_stride, uv_w, uv_h); + WebPCopyPlane(io->v, io->uv_stride, v_dst, buf->v_stride, uv_w, uv_h); return io->mb_h; } @@ -47,7 +42,7 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { WebPDecBuffer* const output = p->output; WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* const dst = buf->rgba + io->mb_y * buf->stride; + uint8_t* const dst = buf->rgba + (size_t)io->mb_y * buf->stride; WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst, buf->stride, io->mb_w, io->mb_h, @@ -62,7 +57,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { int num_lines_out = io->mb_h; // a priori guess const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + uint8_t* dst = buf->rgba + (size_t)io->mb_y * buf->stride; WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace]; const uint8_t* cur_y = io->y; const uint8_t* cur_u = io->u; @@ -133,7 +128,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p, const WebPYUVABuffer* const buf = &p->output->u.YUVA; const int mb_w = io->mb_w; const int mb_h = io->mb_h; - uint8_t* dst = buf->a + io->mb_y * buf->a_stride; + uint8_t* dst = buf->a + (size_t)io->mb_y * buf->a_stride; int j; (void)expected_num_lines_out; assert(expected_num_lines_out == mb_h); @@ -186,7 +181,7 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p, (colorspace == MODE_ARGB || colorspace == MODE_Argb); const WebPRGBABuffer* const buf = &p->output->u.RGBA; int num_rows; - const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3); const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w, @@ -210,7 +205,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p, const WEBP_CSP_MODE colorspace = p->output->colorspace; const WebPRGBABuffer* const buf = &p->output->u.RGBA; int num_rows; - const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; #if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; @@ -276,9 +271,9 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) { static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) { const WebPYUVABuffer* const buf = &p->output->u.YUVA; - uint8_t* const dst_a = buf->a + p->last_y * buf->a_stride; + uint8_t* const dst_a = buf->a + (size_t)p->last_y * buf->a_stride; if (io->a != NULL) { - uint8_t* const dst_y = buf->y + p->last_y * buf->y_stride; + uint8_t* const dst_y = buf->y + (size_t)p->last_y * buf->y_stride; const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a); assert(expected_num_lines_out == num_lines_out); if (num_lines_out > 0) { // unmultiply the Y @@ -303,46 +298,57 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { const int uv_out_height = (out_height + 1) >> 1; const int uv_in_width = (io->mb_w + 1) >> 1; const int uv_in_height = (io->mb_h + 1) >> 1; - const size_t work_size = 2 * out_width; // scratch memory for luma rescaler + // scratch memory for luma rescaler + const size_t work_size = 2 * (size_t)out_width; const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones - size_t tmp_size, rescaler_size; + uint64_t total_size; + size_t rescaler_size; rescaler_t* work; WebPRescaler* scalers; const int num_rescalers = has_alpha ? 4 : 3; - tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work); + total_size = ((uint64_t)work_size + 2 * uv_work_size) * sizeof(*work); if (has_alpha) { - tmp_size += work_size * sizeof(*work); + total_size += (uint64_t)work_size * sizeof(*work); } rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST; + total_size += rescaler_size; + if (!CheckSizeOverflow(total_size)) { + return 0; + } - p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size); + p->memory = WebPSafeMalloc(1ULL, (size_t)total_size); if (p->memory == NULL) { return 0; // memory error } work = (rescaler_t*)p->memory; - scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size); + scalers = (WebPRescaler*)WEBP_ALIGN( + (const uint8_t*)work + total_size - rescaler_size); p->scaler_y = &scalers[0]; p->scaler_u = &scalers[1]; p->scaler_v = &scalers[2]; p->scaler_a = has_alpha ? &scalers[3] : NULL; - WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, - buf->y, out_width, out_height, buf->y_stride, 1, - work); - WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, - buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, - work + work_size); - WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, - buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, - work + work_size + uv_work_size); + if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, + buf->y, out_width, out_height, buf->y_stride, 1, + work) || + !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, + buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, + work + work_size) || + !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, + buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, + work + work_size + uv_work_size)) { + return 0; + } p->emit = EmitRescaledYUV; if (has_alpha) { - WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, - buf->a, out_width, out_height, buf->a_stride, 1, - work + work_size + 2 * uv_work_size); + if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, + buf->a, out_width, out_height, buf->a_stride, 1, + work + work_size + 2 * uv_work_size)) { + return 0; + } p->emit_alpha = EmitRescaledAlphaYUV; WebPInitAlphaProcessing(); } @@ -356,7 +362,7 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) { const WebPYUV444Converter convert = WebPYUV444Converters[p->output->colorspace]; const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + y_pos * buf->stride; + uint8_t* dst = buf->rgba + (size_t)y_pos * buf->stride; int num_lines_out = 0; // For RGB rescaling, because of the YUV420, current scan position // U/V can be +1/-1 line from the Y one. Hence the double test. @@ -383,15 +389,15 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { while (j < mb_h) { const int y_lines_in = WebPRescalerImport(p->scaler_y, mb_h - j, - io->y + j * io->y_stride, io->y_stride); + io->y + (size_t)j * io->y_stride, io->y_stride); j += y_lines_in; if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) { - const int u_lines_in = - WebPRescalerImport(p->scaler_u, uv_mb_h - uv_j, - io->u + uv_j * io->uv_stride, io->uv_stride); - const int v_lines_in = - WebPRescalerImport(p->scaler_v, uv_mb_h - uv_j, - io->v + uv_j * io->uv_stride, io->uv_stride); + const int u_lines_in = WebPRescalerImport( + p->scaler_u, uv_mb_h - uv_j, io->u + (size_t)uv_j * io->uv_stride, + io->uv_stride); + const int v_lines_in = WebPRescalerImport( + p->scaler_v, uv_mb_h - uv_j, io->v + (size_t)uv_j * io->uv_stride, + io->uv_stride); (void)v_lines_in; // remove a gcc warning assert(u_lines_in == v_lines_in); uv_j += u_lines_in; @@ -403,7 +409,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; + uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride; const WEBP_CSP_MODE colorspace = p->output->colorspace; const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb); @@ -431,7 +437,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; + uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride; #if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; #else @@ -470,7 +476,7 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p, int lines_left = expected_num_out_lines; const int y_end = p->last_y + lines_left; while (lines_left > 0) { - const int row_offset = scaler->src_y - io->mb_y; + const int64_t row_offset = (int64_t)scaler->src_y - io->mb_y; WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y, io->a + row_offset * io->width, io->width); lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left); @@ -485,51 +491,58 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { const int out_height = io->scaled_height; const int uv_in_width = (io->mb_w + 1) >> 1; const int uv_in_height = (io->mb_h + 1) >> 1; - const size_t work_size = 2 * out_width; // scratch memory for one rescaler + // scratch memory for one rescaler + const size_t work_size = 2 * (size_t)out_width; rescaler_t* work; // rescalers work area uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion - size_t tmp_size1, tmp_size2, total_size, rescaler_size; + uint64_t tmp_size1, tmp_size2, total_size; + size_t rescaler_size; WebPRescaler* scalers; const int num_rescalers = has_alpha ? 4 : 3; - tmp_size1 = 3 * work_size; - tmp_size2 = 3 * out_width; - if (has_alpha) { - tmp_size1 += work_size; - tmp_size2 += out_width; - } + tmp_size1 = (uint64_t)num_rescalers * work_size; + tmp_size2 = (uint64_t)num_rescalers * out_width; total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp); rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST; + total_size += rescaler_size; + if (!CheckSizeOverflow(total_size)) { + return 0; + } - p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size); + p->memory = WebPSafeMalloc(1ULL, (size_t)total_size); if (p->memory == NULL) { return 0; // memory error } work = (rescaler_t*)p->memory; tmp = (uint8_t*)(work + tmp_size1); - scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size); + scalers = (WebPRescaler*)WEBP_ALIGN( + (const uint8_t*)work + total_size - rescaler_size); p->scaler_y = &scalers[0]; p->scaler_u = &scalers[1]; p->scaler_v = &scalers[2]; p->scaler_a = has_alpha ? &scalers[3] : NULL; - WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, - tmp + 0 * out_width, out_width, out_height, 0, 1, - work + 0 * work_size); - WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, - tmp + 1 * out_width, out_width, out_height, 0, 1, - work + 1 * work_size); - WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, - tmp + 2 * out_width, out_width, out_height, 0, 1, - work + 2 * work_size); + if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, + tmp + 0 * out_width, out_width, out_height, 0, 1, + work + 0 * work_size) || + !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, + tmp + 1 * out_width, out_width, out_height, 0, 1, + work + 1 * work_size) || + !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, + tmp + 2 * out_width, out_width, out_height, 0, 1, + work + 2 * work_size)) { + return 0; + } p->emit = EmitRescaledRGB; WebPInitYUV444Converters(); if (has_alpha) { - WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, - tmp + 3 * out_width, out_width, out_height, 0, 1, - work + 3 * work_size); + if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, + tmp + 3 * out_width, out_width, out_height, 0, 1, + work + 3 * work_size)) { + return 0; + } p->emit_alpha = EmitRescaledAlphaRGB; if (p->output->colorspace == MODE_RGBA_4444 || p->output->colorspace == MODE_rgbA_4444) { diff --git a/thirdparty/libwebp/src/dec/vp8_dec.c b/thirdparty/libwebp/src/dec/vp8_dec.c index 57efb69041..2003935ec4 100644 --- a/thirdparty/libwebp/src/dec/vp8_dec.c +++ b/thirdparty/libwebp/src/dec/vp8_dec.c @@ -335,7 +335,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { io->scaled_width = io->width; io->scaled_height = io->height; - io->mb_w = io->width; // sanity check + io->mb_w = io->width; // for soundness io->mb_h = io->height; // ditto VP8ResetProba(&dec->proba_); @@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; -// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 +// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { int v; if (!VP8GetBit(br, p[3], "coeffs")) { @@ -494,13 +494,11 @@ static int GetCoeffsAlt(VP8BitReader* const br, return 16; } -static WEBP_TSAN_IGNORE_FUNCTION void InitGetCoeffs(void) { - if (GetCoeffs == NULL) { - if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { - GetCoeffs = GetCoeffsAlt; - } else { - GetCoeffs = GetCoeffsFast; - } +WEBP_DSP_INIT_FUNC(InitGetCoeffs) { + if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { + GetCoeffs = GetCoeffsAlt; + } else { + GetCoeffs = GetCoeffsFast; } } diff --git a/thirdparty/libwebp/src/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h index 600a684410..30c1bd3ef9 100644 --- a/thirdparty/libwebp/src/dec/vp8i_dec.h +++ b/thirdparty/libwebp/src/dec/vp8i_dec.h @@ -31,8 +31,8 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 -#define DEC_MIN_VERSION 1 -#define DEC_REV_VERSION 0 +#define DEC_MIN_VERSION 2 +#define DEC_REV_VERSION 4 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/thirdparty/libwebp/src/dec/vp8l_dec.c b/thirdparty/libwebp/src/dec/vp8l_dec.c index 93615d4ed2..1348055128 100644 --- a/thirdparty/libwebp/src/dec/vp8l_dec.c +++ b/thirdparty/libwebp/src/dec/vp8l_dec.c @@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { // to 256 (green component values) + 24 (length prefix values) // + color_cache_size (between 0 and 2048). // All values computed for 8-bit first level lookup with Mark Adler's tool: -// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c +// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c #define FIXED_TABLE_SIZE (630 * 3 + 410) static const uint16_t kTableSize[12] = { FIXED_TABLE_SIZE + 654, @@ -178,7 +178,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { //------------------------------------------------------------------------------ // Decodes the next Huffman code from bit-stream. -// FillBitWindow(br) needs to be called at minimum every second call +// VP8LFillBitWindow(br) needs to be called at minimum every second call // to ReadSymbol, in order to pre-fetch enough bits. static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, VP8LBitReader* const br) { @@ -321,7 +321,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, // The first code is either 1 bit or 8 bit code. int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); code_lengths[symbol] = 1; - // The second code (if present), is always 8 bit long. + // The second code (if present), is always 8 bits long. if (num_symbols == 2) { symbol = VP8LReadBits(br, 8); code_lengths[symbol] = 1; @@ -559,8 +559,11 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { memory += work_size * sizeof(*work); scaled_data = (uint32_t*)memory; - WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, - out_width, out_height, 0, num_channels, work); + if (!WebPRescalerInit(dec->rescaler, in_width, in_height, + (uint8_t*)scaled_data, out_width, out_height, + 0, num_channels, work)) { + return 0; + } return 1; } #endif // WEBP_REDUCE_SIZE @@ -574,13 +577,14 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int rgba_stride, uint8_t* const rgba) { uint32_t* const src = (uint32_t*)rescaler->dst; + uint8_t* dst = rgba; const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - uint8_t* const dst = rgba + num_lines_out * rgba_stride; WebPRescalerExportRow(rescaler); WebPMultARGBRow(src, dst_width, 1); VP8LConvertFromBGRA(src, dst_width, colorspace, dst); + dst += rgba_stride; ++num_lines_out; } return num_lines_out; @@ -594,8 +598,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, int num_lines_in = 0; int num_lines_out = 0; while (num_lines_in < mb_h) { - uint8_t* const row_in = in + num_lines_in * in_stride; - uint8_t* const row_out = out + num_lines_out * out_stride; + uint8_t* const row_in = in + (uint64_t)num_lines_in * in_stride; + uint8_t* const row_out = out + (uint64_t)num_lines_out * out_stride; const int lines_left = mb_h - num_lines_in; const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); int lines_imported; @@ -796,7 +800,8 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { const WebPDecBuffer* const output = dec->output_; if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA const WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride; + uint8_t* const rgba = + buf->rgba + (int64_t)dec->last_out_row_ * buf->stride; const int num_rows_out = #if !defined(WEBP_REDUCE_SIZE) io->use_scaling ? @@ -947,7 +952,6 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) { break; default: goto Copy; - break; } CopySmallPattern8b(src, dst, length, pattern); return; @@ -1277,7 +1281,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) { uint8_t* const new_data = (uint8_t*)new_color_map; new_color_map[0] = transform->data_[0]; for (i = 4; i < 4 * num_colors; ++i) { - // Equivalent to AddPixelEq(), on a byte-basis. + // Equivalent to VP8LAddPixels(), on a byte-basis. new_data[i] = (data[i] + new_data[i - 4]) & 0xff; } for (; i < 4 * final_num_colors; ++i) { @@ -1515,7 +1519,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { assert(dec->width_ <= final_width); dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t)); if (dec->pixels_ == NULL) { - dec->argb_cache_ = NULL; // for sanity check + dec->argb_cache_ = NULL; // for soundness dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; } @@ -1525,7 +1529,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { static int AllocateInternalBuffers8b(VP8LDecoder* const dec) { const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_; - dec->argb_cache_ = NULL; // for sanity check + dec->argb_cache_ = NULL; // for soundness dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t)); if (dec->pixels_ == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; @@ -1667,7 +1671,6 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { VP8Io* io = NULL; WebPDecParams* params = NULL; - // Sanity checks. if (dec == NULL) return 0; assert(dec->hdr_.huffman_tables_ != NULL); diff --git a/thirdparty/libwebp/src/dec/webp_dec.c b/thirdparty/libwebp/src/dec/webp_dec.c index 42d098874d..77a54c55d2 100644 --- a/thirdparty/libwebp/src/dec/webp_dec.c +++ b/thirdparty/libwebp/src/dec/webp_dec.c @@ -785,6 +785,13 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size, //------------------------------------------------------------------------------ // Cropping and rescaling. +int WebPCheckCropDimensions(int image_width, int image_height, + int x, int y, int w, int h) { + return !(x < 0 || y < 0 || w <= 0 || h <= 0 || + x >= image_width || w > image_width || w > image_width - x || + y >= image_height || h > image_height || h > image_height - y); +} + int WebPIoInitFromOptions(const WebPDecoderOptions* const options, VP8Io* const io, WEBP_CSP_MODE src_colorspace) { const int W = io->width; @@ -792,7 +799,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, int x = 0, y = 0, w = W, h = H; // Cropping - io->use_cropping = (options != NULL) && (options->use_cropping > 0); + io->use_cropping = (options != NULL) && options->use_cropping; if (io->use_cropping) { w = options->crop_width; h = options->crop_height; @@ -802,7 +809,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, x &= ~1; y &= ~1; } - if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) { + if (!WebPCheckCropDimensions(W, H, x, y, w, h)) { return 0; // out of frame boundary error } } @@ -814,7 +821,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, io->mb_h = h; // Scaling - io->use_scaling = (options != NULL) && (options->use_scaling > 0); + io->use_scaling = (options != NULL) && options->use_scaling; if (io->use_scaling) { int scaled_width = options->scaled_width; int scaled_height = options->scaled_height; @@ -835,8 +842,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, if (io->use_scaling) { // disable filter (only for large downscaling ratio). - io->bypass_filtering = (io->scaled_width < W * 3 / 4) && - (io->scaled_height < H * 3 / 4); + io->bypass_filtering |= (io->scaled_width < W * 3 / 4) && + (io->scaled_height < H * 3 / 4); io->fancy_upsampling = 0; } return 1; diff --git a/thirdparty/libwebp/src/dec/webpi_dec.h b/thirdparty/libwebp/src/dec/webpi_dec.h index 24baff5d27..3b97388c71 100644 --- a/thirdparty/libwebp/src/dec/webpi_dec.h +++ b/thirdparty/libwebp/src/dec/webpi_dec.h @@ -77,6 +77,10 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers); //------------------------------------------------------------------------------ // Misc utils +// Returns true if crop dimensions are within image bounds. +int WebPCheckCropDimensions(int image_width, int image_height, + int x, int y, int w, int h); + // Initializes VP8Io with custom setup, io and teardown functions. The default // hooks will use the supplied 'params' as io->opaque handle. void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io); diff --git a/thirdparty/libwebp/src/demux/anim_decode.c b/thirdparty/libwebp/src/demux/anim_decode.c index 05dd707371..e077ffb536 100644 --- a/thirdparty/libwebp/src/demux/anim_decode.c +++ b/thirdparty/libwebp/src/demux/anim_decode.c @@ -23,6 +23,14 @@ #define NUM_CHANNELS 4 +// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA +// buffer. +#ifdef WORDS_BIGENDIAN +#define CHANNEL_SHIFT(i) (24 - (i) * 8) +#else +#define CHANNEL_SHIFT(i) ((i) * 8) +#endif + typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int); static void BlendPixelRowNonPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels); @@ -87,11 +95,19 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal( int abi_version) { WebPAnimDecoderOptions options; WebPAnimDecoder* dec = NULL; + WebPBitstreamFeatures features; if (webp_data == NULL || WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) { return NULL; } + // Validate the bitstream before doing expensive allocations. The demuxer may + // be more tolerant than the decoder. + if (WebPGetFeatures(webp_data->bytes, webp_data->size, &features) != + VP8_STATUS_OK) { + return NULL; + } + // Note: calloc() so that the pointer members are initialized to NULL. dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); if (dec == NULL) goto Error; @@ -145,7 +161,7 @@ static int ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width, uint32_t canvas_height) { const uint64_t size = (uint64_t)canvas_width * canvas_height * NUM_CHANNELS * sizeof(*buf); - if (size != (size_t)size) return 0; + if (!CheckSizeOverflow(size)) return 0; memset(buf, 0, (size_t)size); return 1; } @@ -166,7 +182,7 @@ static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset, static int CopyCanvas(const uint8_t* src, uint8_t* dst, uint32_t width, uint32_t height) { const uint64_t size = (uint64_t)width * height * NUM_CHANNELS; - if (size != (size_t)size) return 0; + if (!CheckSizeOverflow(size)) return 0; assert(src != NULL && dst != NULL); memcpy(dst, src, (size_t)size); return 1; @@ -201,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, const uint8_t dst_channel = (dst >> shift) & 0xff; const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a; assert(blend_unscaled < (1ULL << 32) / scale); - return (blend_unscaled * scale) >> 24; + return (blend_unscaled * scale) >> CHANNEL_SHIFT(3); } // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha. static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) { - const uint8_t src_a = (src >> 24) & 0xff; + const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; if (src_a == 0) { return dst; } else { - const uint8_t dst_a = (dst >> 24) & 0xff; + const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff; // This is the approximate integer arithmetic for the actual formula: // dst_factor_a = (dst_a * (255 - src_a)) / 255. const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8; const uint8_t blend_a = src_a + dst_factor_a; const uint32_t scale = (1UL << 24) / blend_a; - const uint8_t blend_r = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0); - const uint8_t blend_g = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8); - const uint8_t blend_b = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16); + const uint8_t blend_r = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0)); + const uint8_t blend_g = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1)); + const uint8_t blend_b = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2)); assert(src_a + dst_factor_a < 256); - return (blend_r << 0) | - (blend_g << 8) | - (blend_b << 16) | - ((uint32_t)blend_a << 24); + return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) | + ((uint32_t)blend_g << CHANNEL_SHIFT(1)) | + ((uint32_t)blend_b << CHANNEL_SHIFT(2)) | + ((uint32_t)blend_a << CHANNEL_SHIFT(3)); } } @@ -239,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const uint8_t src_alpha = (src[i] >> 24) & 0xff; + const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; if (src_alpha != 0xff) { src[i] = BlendPixelNonPremult(src[i], dst[i]); } @@ -256,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) { // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha. static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) { - const uint8_t src_a = (src >> 24) & 0xff; + const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; return src + ChannelwiseMultiply(dst, 256 - src_a); } @@ -266,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const uint8_t src_alpha = (src[i] >> 24) & 0xff; + const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; if (src_alpha != 0xff) { src[i] = BlendPixelPremult(src[i], dst[i]); } @@ -346,12 +362,15 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec, { const uint8_t* in = iter.fragment.bytes; const size_t in_size = iter.fragment.size; - const size_t out_offset = - (iter.y_offset * width + iter.x_offset) * NUM_CHANNELS; + const uint32_t stride = width * NUM_CHANNELS; // at most 25 + 2 bits + const uint64_t out_offset = (uint64_t)iter.y_offset * stride + + (uint64_t)iter.x_offset * NUM_CHANNELS; // 53b + const uint64_t size = (uint64_t)iter.height * stride; // at most 25 + 27b WebPDecoderConfig* const config = &dec->config_; WebPRGBABuffer* const buf = &config->output.u.RGBA; - buf->stride = NUM_CHANNELS * width; - buf->size = buf->stride * iter.height; + if ((size_t)size != size) goto Error; + buf->stride = (int)stride; + buf->size = (size_t)size; buf->rgba = dec->curr_frame_ + out_offset; if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) { diff --git a/thirdparty/libwebp/src/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c index 1b3cc2e0a8..41387ec2d6 100644 --- a/thirdparty/libwebp/src/demux/demux.c +++ b/thirdparty/libwebp/src/demux/demux.c @@ -24,8 +24,8 @@ #include "src/webp/format_constants.h" #define DMUX_MAJ_VERSION 1 -#define DMUX_MIN_VERSION 1 -#define DMUX_REV_VERSION 0 +#define DMUX_MIN_VERSION 2 +#define DMUX_REV_VERSION 4 typedef struct { size_t start_; // start location of the data @@ -221,12 +221,16 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size, const size_t chunk_start_offset = mem->start_; const uint32_t fourcc = ReadLE32(mem); const uint32_t payload_size = ReadLE32(mem); - const uint32_t payload_size_padded = payload_size + (payload_size & 1); - const size_t payload_available = (payload_size_padded > MemDataSize(mem)) - ? MemDataSize(mem) : payload_size_padded; - const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available; + uint32_t payload_size_padded; + size_t payload_available; + size_t chunk_size; if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR; + + payload_size_padded = payload_size + (payload_size & 1); + payload_available = (payload_size_padded > MemDataSize(mem)) + ? MemDataSize(mem) : payload_size_padded; + chunk_size = CHUNK_HEADER_SIZE + payload_available; if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR; if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA; @@ -312,6 +316,7 @@ static ParseStatus ParseAnimationFrame( int bits; MemBuffer* const mem = &dmux->mem_; Frame* frame; + size_t start_offset; ParseStatus status = NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame); if (status != PARSE_OK) return status; @@ -332,7 +337,11 @@ static ParseStatus ParseAnimationFrame( // Store a frame only if the animation flag is set there is some data for // this frame is available. + start_offset = mem->start_; status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame); + if (status != PARSE_ERROR && mem->start_ - start_offset > anmf_payload_size) { + status = PARSE_ERROR; + } if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) { added_frame = AddFrame(dmux, frame); if (added_frame) { @@ -446,9 +455,11 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) { const size_t chunk_start_offset = mem->start_; const uint32_t fourcc = ReadLE32(mem); const uint32_t chunk_size = ReadLE32(mem); - const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1); + uint32_t chunk_size_padded; if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR; + + chunk_size_padded = chunk_size + (chunk_size & 1); if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR; switch (fourcc) { @@ -603,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) { while (f != NULL) { const int cur_frame_set = f->frame_num_; - int frame_count = 0; // Check frame properties. for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) { @@ -638,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) { dmux->canvas_width_, dmux->canvas_height_)) { return 0; } - - ++frame_count; } } return 1; diff --git a/thirdparty/libwebp/src/dsp/alpha_processing.c b/thirdparty/libwebp/src/dsp/alpha_processing.c index 819d1391f2..1892929a43 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing.c @@ -157,7 +157,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) { } } -void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, +void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse) { int x; for (x = 0; x < width; ++x) { @@ -178,7 +179,8 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, #undef MFIX void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse); -void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, +void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse); //------------------------------------------------------------------------------ @@ -193,8 +195,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, } } -void WebPMultRows(uint8_t* ptr, int stride, - const uint8_t* alpha, int alpha_stride, +void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride, + const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int num_rows, int inverse) { int n; for (n = 0; n < num_rows; ++n) { @@ -290,9 +292,9 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, } #if !WEBP_NEON_OMIT_C_CODE -static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, +static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int height, - uint8_t* dst, int dst_stride) { + uint8_t* WEBP_RESTRICT dst, int dst_stride) { uint32_t alpha_mask = 0xff; int i, j; @@ -309,9 +311,10 @@ static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, return (alpha_mask != 0xff); } -static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride) { int i, j; for (j = 0; j < height; ++j) { for (i = 0; i < width; ++i) { @@ -322,9 +325,9 @@ static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, +static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, - uint8_t* alpha, int alpha_stride) { + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { uint8_t alpha_mask = 0xff; int i, j; @@ -340,7 +343,8 @@ static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, return (alpha_mask == 0xff); } -static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) { +static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size) { int i; for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8; } @@ -359,6 +363,11 @@ static int HasAlpha32b_C(const uint8_t* src, int length) { return 0; } +static void AlphaReplace_C(uint32_t* src, int length, uint32_t color) { + int x; + for (x = 0; x < length; ++x) if ((src[x] >> 24) == 0) src[x] = color; +} + //------------------------------------------------------------------------------ // Simple channel manipulations. @@ -367,8 +376,11 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { } #ifdef WORDS_BIGENDIAN -static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, - const uint8_t* b, int len, uint32_t* out) { +static void PackARGB_C(const uint8_t* WEBP_RESTRICT a, + const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i < len; ++i) { out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]); @@ -376,8 +388,10 @@ static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, } #endif -static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, - int len, int step, uint32_t* out) { +static void PackRGB_C(const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, int step, uint32_t* WEBP_RESTRICT out) { int i, offset = 0; for (i = 0; i < len; ++i) { out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]); @@ -387,19 +401,26 @@ static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); -int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); -void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); -int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); -void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); +int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int, + uint8_t* WEBP_RESTRICT, int); +void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT, int, int, int, + uint32_t* WEBP_RESTRICT, int); +int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int, + uint8_t* WEBP_RESTRICT, int); +void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size); #ifdef WORDS_BIGENDIAN void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g, const uint8_t* b, int, uint32_t*); #endif -void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, - int len, int step, uint32_t* out); +void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, int step, uint32_t* WEBP_RESTRICT out); int (*WebPHasAlpha8b)(const uint8_t* src, int length); int (*WebPHasAlpha32b)(const uint8_t* src, int length); +void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); //------------------------------------------------------------------------------ // Init function @@ -428,13 +449,14 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { WebPHasAlpha8b = HasAlpha8b_C; WebPHasAlpha32b = HasAlpha32b_C; + WebPAlphaReplace = AlphaReplace_C; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitAlphaProcessingSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitAlphaProcessingSSE41(); } @@ -448,7 +470,7 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitAlphaProcessingNEON(); @@ -469,4 +491,5 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { assert(WebPPackRGB != NULL); assert(WebPHasAlpha8b != NULL); assert(WebPHasAlpha32b != NULL); + assert(WebPAlphaReplace != NULL); } diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c index 9d55421704..6716fb77f0 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c @@ -80,10 +80,10 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, //------------------------------------------------------------------------------ -static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride) { - uint32_t alpha_mask = 0xffffffffu; +static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint8_t* WEBP_RESTRICT dst, int dst_stride) { + uint32_t alpha_mask = 0xffu; uint8x8_t mask8 = vdup_n_u8(0xff); uint32_t tmp[2]; int i, j; @@ -107,14 +107,16 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride, dst += dst_stride; } vst1_u8((uint8_t*)tmp, mask8); + alpha_mask *= 0x01010101; alpha_mask &= tmp[0]; alpha_mask &= tmp[1]; return (alpha_mask != 0xffffffffu); } -static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride) { int i, j; uint8x8x4_t greens; // leave A/R/B channels zero'd. greens.val[0] = vdup_n_u8(0); @@ -131,10 +133,10 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, +static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, - uint8_t* alpha, int alpha_stride) { - uint32_t alpha_mask = 0xffffffffu; + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { + uint32_t alpha_mask = 0xffu; uint8x8_t mask8 = vdup_n_u8(0xff); uint32_t tmp[2]; int i, j; @@ -156,13 +158,14 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, alpha += alpha_stride; } vst1_u8((uint8_t*)tmp, mask8); + alpha_mask *= 0x01010101; alpha_mask &= tmp[0]; alpha_mask &= tmp[1]; return (alpha_mask == 0xffffffffu); } -static void ExtractGreen_NEON(const uint32_t* argb, - uint8_t* alpha, int size) { +static void ExtractGreen_NEON(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size) { int i; for (i = 0; i + 16 <= size; i += 16) { const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i)); diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c index 2871c56d84..a5f8c9f7c7 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse2.c @@ -18,9 +18,9 @@ //------------------------------------------------------------------------------ -static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride) { +static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint8_t* WEBP_RESTRICT dst, int dst_stride) { // alpha_and stores an 'and' operation of all the alpha[] values. The final // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; @@ -72,9 +72,10 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride, return (alpha_and != 0xff); } -static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride) { int i, j; const __m128i zero = _mm_setzero_si128(); const int limit = width & ~15; @@ -98,9 +99,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride, +static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, - uint8_t* alpha, int alpha_stride) { + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { // alpha_and stores an 'and' operation of all the alpha[] values. The final // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; @@ -265,6 +266,27 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) { return 0; } +static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) { + const __m128i m_color = _mm_set1_epi32(color); + const __m128i zero = _mm_setzero_si128(); + int i = 0; + for (; i + 8 <= length; i += 8) { + const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0)); + const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 4)); + const __m128i b0 = _mm_srai_epi32(a0, 24); + const __m128i b1 = _mm_srai_epi32(a1, 24); + const __m128i c0 = _mm_cmpeq_epi32(b0, zero); + const __m128i c1 = _mm_cmpeq_epi32(b1, zero); + const __m128i d0 = _mm_and_si128(c0, m_color); + const __m128i d1 = _mm_and_si128(c1, m_color); + const __m128i e0 = _mm_andnot_si128(c0, a0); + const __m128i e1 = _mm_andnot_si128(c1, a1); + _mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0)); + _mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1)); + } + for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color; +} + // ----------------------------------------------------------------------------- // Apply alpha value to rows @@ -296,7 +318,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) { if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse); } -static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha, +static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse) { int x = 0; if (!inverse) { @@ -334,6 +357,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) { WebPHasAlpha8b = HasAlpha8b_SSE2; WebPHasAlpha32b = HasAlpha32b_SSE2; + WebPAlphaReplace = AlphaReplace_SSE2; } #else // !WEBP_USE_SSE2 diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c index 56040f9c88..cdf877ce49 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_sse41.c @@ -19,9 +19,9 @@ //------------------------------------------------------------------------------ -static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride, - int width, int height, - uint8_t* alpha, int alpha_stride) { +static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb, + int argb_stride, int width, int height, + uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { // alpha_and stores an 'and' operation of all the alpha[] values. The final // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; diff --git a/thirdparty/libwebp/src/dsp/cost.c b/thirdparty/libwebp/src/dsp/cost.c index cc681cdd4b..460ec4f2a7 100644 --- a/thirdparty/libwebp/src/dsp/cost.c +++ b/thirdparty/libwebp/src/dsp/cost.c @@ -395,12 +395,12 @@ WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) { VP8EncDspCostInitMIPSdspR2(); } #endif -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8EncDspCostInitSSE2(); } #endif -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (VP8GetCPUInfo(kNEON)) { VP8EncDspCostInitNEON(); } diff --git a/thirdparty/libwebp/src/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c index 0fa5b6a5ce..a4ba7f2cb7 100644 --- a/thirdparty/libwebp/src/dsp/cpu.c +++ b/thirdparty/libwebp/src/dsp/cpu.c @@ -11,7 +11,7 @@ // // Author: Christian Duvivier (cduvivier@google.com) -#include "src/dsp/dsp.h" +#include "src/dsp/cpu.h" #if defined(WEBP_HAVE_NEON_RTCD) #include <stdio.h> @@ -55,12 +55,18 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "a"(info_type), "c"(0)); } -#elif (defined(_M_X64) || defined(_M_IX86)) && \ - defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 #include <intrin.h> #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 -#elif defined(WEBP_MSC_SSE2) +#define WEBP_HAVE_MSC_CPUID +#elif _MSC_VER > 1310 +#include <intrin.h> #define GetCPUInfo __cpuid +#define WEBP_HAVE_MSC_CPUID +#endif + #endif // NaCl has no support for xgetbv or the raw opcode. @@ -94,7 +100,7 @@ static WEBP_INLINE uint64_t xgetbv(void) { #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. #endif -#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) +#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID) // helper function for run-time detection of slow SSSE3 platforms static int CheckSlowModel(int info) { @@ -179,9 +185,34 @@ static int AndroidCPUInfo(CPUFeature feature) { return 0; } VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; -#elif defined(WEBP_USE_NEON) -// define a dummy function to enable turning off NEON at runtime by setting -// VP8DecGetCPUInfo = NULL +#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test +// Use compile flags as an indicator of SIMD support instead of a runtime check. +static int wasmCPUInfo(CPUFeature feature) { + switch (feature) { +#ifdef WEBP_HAVE_SSE2 + case kSSE2: + return 1; +#endif +#ifdef WEBP_HAVE_SSE41 + case kSSE3: + case kSlowSSSE3: + case kSSE4_1: + return 1; +#endif +#ifdef WEBP_HAVE_NEON + case kNEON: + return 1; +#endif + default: + break; + } + return 0; +} +VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; +#elif defined(WEBP_HAVE_NEON) +// In most cases this function doesn't check for NEON support (it's assumed by +// the configuration), but enables turning off NEON at runtime, for testing +// purposes, by setting VP8DecGetCPUInfo = NULL. static int armCPUInfo(CPUFeature feature) { if (feature != kNEON) return 0; #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h new file mode 100644 index 0000000000..57a40d87d4 --- /dev/null +++ b/thirdparty/libwebp/src/dsp/cpu.h @@ -0,0 +1,254 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// CPU detection functions and macros. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_DSP_CPU_H_ +#define WEBP_DSP_CPU_H_ + +#ifdef HAVE_CONFIG_H +#include "src/webp/config.h" +#endif + +#include "src/webp/types.h" + +#if defined(__GNUC__) +#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) +#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) +#else +#define LOCAL_GCC_VERSION 0 +#define LOCAL_GCC_PREREQ(maj, min) 0 +#endif + +#if defined(__clang__) +#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) +#define LOCAL_CLANG_PREREQ(maj, min) \ + (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) +#else +#define LOCAL_CLANG_VERSION 0 +#define LOCAL_CLANG_PREREQ(maj, min) 0 +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if !defined(HAVE_CONFIG_H) +#if defined(_MSC_VER) && _MSC_VER > 1310 && \ + (defined(_M_X64) || defined(_M_IX86)) +#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ + (defined(_M_X64) || defined(_M_IX86)) +#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets +#endif +#endif + +// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp +// files without intrinsics, allowing the corresponding Init() to be called. +// Files containing intrinsics will need to be built targeting the instruction +// set so should succeed on one of the earlier tests. +#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2)) +#define WEBP_USE_SSE2 +#endif + +#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2) +#define WEBP_HAVE_SSE2 +#endif + +#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41)) +#define WEBP_USE_SSE41 +#endif + +#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41) +#define WEBP_HAVE_SSE41 +#endif + +#undef WEBP_MSC_SSE41 +#undef WEBP_MSC_SSE2 + +// The intrinsics currently cause compiler errors with arm-nacl-gcc and the +// inline assembly would need to be modified for use with Native Client. +#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \ + !defined(__native_client__) +#define WEBP_USE_NEON +#endif + +#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ + defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) +#define WEBP_ANDROID_NEON // Android targets that may have NEON +#define WEBP_USE_NEON +#endif + +// Note: ARM64 is supported in Visual Studio 2017, but requires the direct +// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in +// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with +// vtbl4_u8(); a fix was made in 16.6. +#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ + (_MSC_VER >= 1926 && defined(_M_ARM64))) +#define WEBP_USE_NEON +#define WEBP_USE_INTRINSICS +#endif + +#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) +#define WEBP_HAVE_NEON +#endif + +#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \ + (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) +#define WEBP_USE_MIPS32 +#if (__mips_isa_rev >= 2) +#define WEBP_USE_MIPS32_R2 +#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) +#define WEBP_USE_MIPS_DSP_R2 +#endif +#endif +#endif + +#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) +#define WEBP_USE_MSA +#endif + +#ifndef WEBP_DSP_OMIT_C_CODE +#define WEBP_DSP_OMIT_C_CODE 1 +#endif + +#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE +#define WEBP_NEON_OMIT_C_CODE 1 +#else +#define WEBP_NEON_OMIT_C_CODE 0 +#endif + +#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \ + defined(__aarch64__)) +#define WEBP_NEON_WORK_AROUND_GCC 1 +#else +#define WEBP_NEON_WORK_AROUND_GCC 0 +#endif + +// This macro prevents thread_sanitizer from reporting known concurrent writes. +#define WEBP_TSAN_IGNORE_FUNCTION +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#undef WEBP_TSAN_IGNORE_FUNCTION +#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) +#endif +#endif + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define WEBP_MSAN +#endif +#endif + +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) +#include <pthread.h> // NOLINT + +#define WEBP_DSP_INIT(func) \ + do { \ + static volatile VP8CPUInfo func##_last_cpuinfo_used = \ + (VP8CPUInfo)&func##_last_cpuinfo_used; \ + static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \ + if (pthread_mutex_lock(&func##_lock)) break; \ + if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \ + func##_last_cpuinfo_used = VP8GetCPUInfo; \ + (void)pthread_mutex_unlock(&func##_lock); \ + } while (0) +#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) +#define WEBP_DSP_INIT(func) \ + do { \ + static volatile VP8CPUInfo func##_last_cpuinfo_used = \ + (VP8CPUInfo)&func##_last_cpuinfo_used; \ + if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \ + func(); \ + func##_last_cpuinfo_used = VP8GetCPUInfo; \ + } while (0) +#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) + +// Defines an Init + helper function that control multiple initialization of +// function pointers / tables. +/* Usage: + WEBP_DSP_INIT_FUNC(InitFunc) { + ...function body + } +*/ +#define WEBP_DSP_INIT_FUNC(name) \ + static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \ + WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \ + static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void) + +#define WEBP_UBSAN_IGNORE_UNDEF +#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW +#if defined(__clang__) && defined(__has_attribute) +#if __has_attribute(no_sanitize) +// This macro prevents the undefined behavior sanitizer from reporting +// failures. This is only meant to silence unaligned loads on platforms that +// are known to support them. +#undef WEBP_UBSAN_IGNORE_UNDEF +#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined"))) + +// This macro prevents the undefined behavior sanitizer from reporting +// failures related to unsigned integer overflows. This is only meant to +// silence cases where this well defined behavior is expected. +#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW +#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ + __attribute__((no_sanitize("unsigned-integer-overflow"))) +#endif +#endif + +// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. +// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. +#if !defined(WEBP_OFFSET_PTR) +#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) +#endif + +// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) +#if !defined(WEBP_SWAP_16BIT_CSP) +#define WEBP_SWAP_16BIT_CSP 0 +#endif + +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + +typedef enum { + kSSE2, + kSSE3, + kSlowSSSE3, // special feature for slow SSSE3 architectures + kSSE4_1, + kAVX, + kAVX2, + kNEON, + kMIPS32, + kMIPSdspR2, + kMSA +} CPUFeature; + +#ifdef __cplusplus +extern "C" { +#endif + +// returns true if the CPU supports the feature. +typedef int (*VP8CPUInfo)(CPUFeature feature); +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_DSP_CPU_H_ diff --git a/thirdparty/libwebp/src/dsp/dec.c b/thirdparty/libwebp/src/dsp/dec.c index 1119842dd3..537c701282 100644 --- a/thirdparty/libwebp/src/dsp/dec.c +++ b/thirdparty/libwebp/src/dsp/dec.c @@ -807,10 +807,10 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8DspInitSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { VP8DspInitSSE41(); } @@ -834,7 +834,7 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8DspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/dec_neon.c b/thirdparty/libwebp/src/dsp/dec_neon.c index 239ec4167e..fa851707e2 100644 --- a/thirdparty/libwebp/src/dsp/dec_neon.c +++ b/thirdparty/libwebp/src/dsp/dec_neon.c @@ -1283,12 +1283,12 @@ static void DC4_NEON(uint8_t* dst) { // DC const uint8x8_t A = vld1_u8(dst - BPS); // top row const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); + const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1)); const uint8x8_t dc0 = vrshrn_n_u16(sum, 3); // (sum + 4) >> 3 @@ -1429,8 +1429,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x8_t A = vld1_u8(dst - BPS); // top row #if defined(__aarch64__) - const uint16x8_t B = vmovl_u8(A); - const uint16_t p2 = vaddvq_u16(B); + const uint16_t p2 = vaddlv_u8(A); sum_top = vdupq_n_u16(p2); #else const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top @@ -1441,18 +1440,18 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { } if (do_left) { - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); - const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + 4 * BPS - 1)); - const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + 5 * BPS - 1)); - const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + 6 * BPS - 1)); - const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + 7 * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); - const uint16x8_t s2 = vaddq_u16(L4, L5); - const uint16x8_t s3 = vaddq_u16(L6, L7); + const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); + const uint8x8_t L4 = vld1_u8(dst + 4 * BPS - 1); + const uint8x8_t L5 = vld1_u8(dst + 5 * BPS - 1); + const uint8x8_t L6 = vld1_u8(dst + 6 * BPS - 1); + const uint8x8_t L7 = vld1_u8(dst + 7 * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); + const uint16x8_t s2 = vaddl_u8(L4, L5); + const uint16x8_t s3 = vaddl_u8(L6, L7); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t s23 = vaddq_u16(s2, s3); sum_left = vaddq_u16(s01, s23); @@ -1512,29 +1511,34 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x16_t A = vld1q_u8(dst - BPS); // top row +#if defined(__aarch64__) + const uint16_t p3 = vaddlvq_u8(A); + sum_top = vdupq_n_u16(p3); +#else const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); const uint16x4_t p2 = vpadd_u16(p1, p1); const uint16x4_t p3 = vpadd_u16(p2, p2); sum_top = vcombine_u16(p3, p3); +#endif } if (do_left) { int i; sum_left = vdupq_n_u16(0); for (i = 0; i < 16; i += 8) { - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + (i + 0) * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + (i + 1) * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + (i + 2) * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + (i + 3) * BPS - 1)); - const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + (i + 4) * BPS - 1)); - const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + (i + 5) * BPS - 1)); - const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + (i + 6) * BPS - 1)); - const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + (i + 7) * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); - const uint16x8_t s2 = vaddq_u16(L4, L5); - const uint16x8_t s3 = vaddq_u16(L6, L7); + const uint8x8_t L0 = vld1_u8(dst + (i + 0) * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + (i + 1) * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + (i + 2) * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + (i + 3) * BPS - 1); + const uint8x8_t L4 = vld1_u8(dst + (i + 4) * BPS - 1); + const uint8x8_t L5 = vld1_u8(dst + (i + 5) * BPS - 1); + const uint8x8_t L6 = vld1_u8(dst + (i + 6) * BPS - 1); + const uint8x8_t L7 = vld1_u8(dst + (i + 7) * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); + const uint16x8_t s2 = vaddl_u8(L4, L5); + const uint16x8_t s3 = vaddl_u8(L6, L7); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t s23 = vaddq_u16(s2, s3); const uint16x8_t sum = vaddq_u16(s01, s23); diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h index a784de334a..d2000b8efc 100644 --- a/thirdparty/libwebp/src/dsp/dsp.h +++ b/thirdparty/libwebp/src/dsp/dsp.h @@ -18,6 +18,7 @@ #include "src/webp/config.h" #endif +#include "src/dsp/cpu.h" #include "src/webp/types.h" #ifdef __cplusplus @@ -27,199 +28,22 @@ extern "C" { #define BPS 32 // this is the common stride for enc/dec //------------------------------------------------------------------------------ -// CPU detection - +// WEBP_RESTRICT + +// Declares a pointer with the restrict type qualifier if available. +// This allows code to hint to the compiler that only this pointer references a +// particular object or memory region within the scope of the block in which it +// is declared. This may allow for improved optimizations due to the lack of +// pointer aliasing. See also: +// https://en.cppreference.com/w/c/language/restrict #if defined(__GNUC__) -# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) -# define LOCAL_GCC_PREREQ(maj, min) \ - (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_GCC_VERSION 0 -# define LOCAL_GCC_PREREQ(maj, min) 0 -#endif - -#if defined(__clang__) -# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) -# define LOCAL_CLANG_PREREQ(maj, min) \ - (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) +#define WEBP_RESTRICT __restrict__ +#elif defined(_MSC_VER) +#define WEBP_RESTRICT __restrict #else -# define LOCAL_CLANG_VERSION 0 -# define LOCAL_CLANG_PREREQ(maj, min) 0 -#endif - -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -// for now, none of the optimizations below are available in emscripten -#if !defined(EMSCRIPTEN) - -#if defined(_MSC_VER) && _MSC_VER > 1310 && \ - (defined(_M_X64) || defined(_M_IX86)) -#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets -#endif - -#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ - (defined(_M_X64) || defined(_M_IX86)) -#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets -#endif - -// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp -// files without intrinsics, allowing the corresponding Init() to be called. -// Files containing intrinsics will need to be built targeting the instruction -// set so should succeed on one of the earlier tests. -#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2) -#define WEBP_USE_SSE2 -#endif - -#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41) -#define WEBP_USE_SSE41 -#endif - -// The intrinsics currently cause compiler errors with arm-nacl-gcc and the -// inline assembly would need to be modified for use with Native Client. -#if (defined(__ARM_NEON__) || \ - defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \ - !defined(__native_client__) -#define WEBP_USE_NEON -#endif - -#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ - defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) -#define WEBP_ANDROID_NEON // Android targets that may have NEON -#define WEBP_USE_NEON -#endif - -#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) -#define WEBP_USE_NEON -#define WEBP_USE_INTRINSICS +#define WEBP_RESTRICT #endif -#if defined(__mips__) && !defined(__mips64) && \ - defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) -#define WEBP_USE_MIPS32 -#if (__mips_isa_rev >= 2) -#define WEBP_USE_MIPS32_R2 -#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) -#define WEBP_USE_MIPS_DSP_R2 -#endif -#endif -#endif - -#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) -#define WEBP_USE_MSA -#endif - -#endif /* EMSCRIPTEN */ - -#ifndef WEBP_DSP_OMIT_C_CODE -#define WEBP_DSP_OMIT_C_CODE 1 -#endif - -#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE -#define WEBP_NEON_OMIT_C_CODE 1 -#else -#define WEBP_NEON_OMIT_C_CODE 0 -#endif - -#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) -#define WEBP_NEON_WORK_AROUND_GCC 1 -#else -#define WEBP_NEON_WORK_AROUND_GCC 0 -#endif - -// This macro prevents thread_sanitizer from reporting known concurrent writes. -#define WEBP_TSAN_IGNORE_FUNCTION -#if defined(__has_feature) -#if __has_feature(thread_sanitizer) -#undef WEBP_TSAN_IGNORE_FUNCTION -#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) -#endif -#endif - -#if defined(WEBP_USE_THREAD) && !defined(_WIN32) -#include <pthread.h> // NOLINT - -#define WEBP_DSP_INIT(func) do { \ - static volatile VP8CPUInfo func ## _last_cpuinfo_used = \ - (VP8CPUInfo)&func ## _last_cpuinfo_used; \ - static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \ - if (pthread_mutex_lock(&func ## _lock)) break; \ - if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \ - func ## _last_cpuinfo_used = VP8GetCPUInfo; \ - (void)pthread_mutex_unlock(&func ## _lock); \ -} while (0) -#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) -#define WEBP_DSP_INIT(func) do { \ - static volatile VP8CPUInfo func ## _last_cpuinfo_used = \ - (VP8CPUInfo)&func ## _last_cpuinfo_used; \ - if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \ - func(); \ - func ## _last_cpuinfo_used = VP8GetCPUInfo; \ -} while (0) -#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) - -// Defines an Init + helper function that control multiple initialization of -// function pointers / tables. -/* Usage: - WEBP_DSP_INIT_FUNC(InitFunc) { - ...function body - } -*/ -#define WEBP_DSP_INIT_FUNC(name) \ - static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \ - WEBP_TSAN_IGNORE_FUNCTION void name(void) { \ - WEBP_DSP_INIT(name ## _body); \ - } \ - static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void) - -#define WEBP_UBSAN_IGNORE_UNDEF -#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW -#if defined(__clang__) && defined(__has_attribute) -#if __has_attribute(no_sanitize) -// This macro prevents the undefined behavior sanitizer from reporting -// failures. This is only meant to silence unaligned loads on platforms that -// are known to support them. -#undef WEBP_UBSAN_IGNORE_UNDEF -#define WEBP_UBSAN_IGNORE_UNDEF \ - __attribute__((no_sanitize("undefined"))) - -// This macro prevents the undefined behavior sanitizer from reporting -// failures related to unsigned integer overflows. This is only meant to -// silence cases where this well defined behavior is expected. -#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW -#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ - __attribute__((no_sanitize("unsigned-integer-overflow"))) -#endif -#endif - -// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) -#if !defined(WEBP_SWAP_16BIT_CSP) -#define WEBP_SWAP_16BIT_CSP 0 -#endif - -// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) -#if !defined(WORDS_BIGENDIAN) && \ - (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) -#define WORDS_BIGENDIAN -#endif - -typedef enum { - kSSE2, - kSSE3, - kSlowSSSE3, // special feature for slow SSSE3 architectures - kSSE4_1, - kAVX, - kAVX2, - kNEON, - kMIPS32, - kMIPSdspR2, - kMSA -} CPUFeature; -// returns true if the CPU supports the feature. -typedef int (*VP8CPUInfo)(CPUFeature feature); -WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; //------------------------------------------------------------------------------ // Init stub generator @@ -508,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width); -// utilities for accurate RGB->YUV conversion -extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref, - uint16_t* dst, int len); -extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref, - int16_t* dst, int len); -extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, - int len, - const uint16_t* best_y, uint16_t* out); - // Must be called before using the above. void WebPInitConvertARGBToYUV(void); @@ -572,26 +387,29 @@ extern void (*WebPApplyAlphaMultiply4444)( // Dispatch the values from alpha[] plane to the ARGB destination 'dst'. // Returns true if alpha[] plane has non-trivial values different from 0xff. -extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride); +extern int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint8_t* WEBP_RESTRICT dst, int dst_stride); // Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the // A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units. -extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride); +extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha, + int alpha_stride, int width, int height, + uint32_t* WEBP_RESTRICT dst, + int dst_stride); // Extract the alpha values from 32b values in argb[] and pack them into alpha[] // (this is the opposite of WebPDispatchAlpha). // Returns true if there's only trivial 0xff alpha values. -extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, - int width, int height, - uint8_t* alpha, int alpha_stride); +extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb, + int argb_stride, int width, int height, + uint8_t* WEBP_RESTRICT alpha, + int alpha_stride); // Extract the green values from 32b values in argb[] and pack them into alpha[] // (this is the opposite of WebPDispatchAlphaToGreen). -extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); +extern void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb, + uint8_t* WEBP_RESTRICT alpha, int size); // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). // Un-Multiply operation transforms x into x * 255 / A. @@ -604,34 +422,42 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, int inverse); // Same for a row of single values, with side alpha values. -extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, +extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse); // Same a WebPMultRow(), but for several 'num_rows' rows. -void WebPMultRows(uint8_t* ptr, int stride, - const uint8_t* alpha, int alpha_stride, +void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride, + const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int num_rows, int inverse); // Plain-C versions, used as fallback by some implementations. -void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, +void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr, + const uint8_t* WEBP_RESTRICT const alpha, int width, int inverse); void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse); #ifdef WORDS_BIGENDIAN // ARGB packing function: a/r/g/b input is rgba or bgra order. -extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, - const uint8_t* g, const uint8_t* b, int len, - uint32_t* out); +extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a, + const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, uint32_t* WEBP_RESTRICT out); #endif // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order. -extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, - int len, int step, uint32_t* out); +extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r, + const uint8_t* WEBP_RESTRICT g, + const uint8_t* WEBP_RESTRICT b, + int len, int step, uint32_t* WEBP_RESTRICT out); // This function returns true if src[i] contains a value different from 0xff. extern int (*WebPHasAlpha8b)(const uint8_t* src, int length); // This function returns true if src[4*i] contains a value different from 0xff. extern int (*WebPHasAlpha32b)(const uint8_t* src, int length); +// replaces transparent values in src[] by 'color'. +extern void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); // To be called first before using the above. void WebPInitAlphaProcessing(void); diff --git a/thirdparty/libwebp/src/dsp/enc.c b/thirdparty/libwebp/src/dsp/enc.c index 2fddbc4c52..ea47a3fd95 100644 --- a/thirdparty/libwebp/src/dsp/enc.c +++ b/thirdparty/libwebp/src/dsp/enc.c @@ -773,10 +773,10 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8EncDspInitSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { VP8EncDspInitSSE41(); } @@ -800,7 +800,7 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8EncDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/enc_neon.c b/thirdparty/libwebp/src/dsp/enc_neon.c index 43bf1245c5..601962ba76 100644 --- a/thirdparty/libwebp/src/dsp/enc_neon.c +++ b/thirdparty/libwebp/src/dsp/enc_neon.c @@ -9,7 +9,7 @@ // // ARM NEON version of speed-critical encoding functions. // -// adapted from libvpx (http://www.webmproject.org/code/) +// adapted from libvpx (https://www.webmproject.org/code/) #include "src/dsp/dsp.h" diff --git a/thirdparty/libwebp/src/dsp/filters.c b/thirdparty/libwebp/src/dsp/filters.c index 9e910d99c9..4506567ba3 100644 --- a/thirdparty/libwebp/src/dsp/filters.c +++ b/thirdparty/libwebp/src/dsp/filters.c @@ -254,7 +254,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) { #endif if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8FiltersInitSSE2(); } @@ -271,7 +271,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8FiltersInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/filters_sse2.c b/thirdparty/libwebp/src/dsp/filters_sse2.c index 4b3f2d020f..5c33ec15e2 100644 --- a/thirdparty/libwebp/src/dsp/filters_sse2.c +++ b/thirdparty/libwebp/src/dsp/filters_sse2.c @@ -320,7 +320,12 @@ extern void VP8FiltersInitSSE2(void); WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) { WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2; +#if defined(CHROMIUM) + // TODO(crbug.com/654974) + (void)VerticalUnfilter_SSE2; +#else WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2; +#endif WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2; WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2; diff --git a/thirdparty/libwebp/src/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c index aad5f43ec9..84a54296fd 100644 --- a/thirdparty/libwebp/src/dsp/lossless.c +++ b/thirdparty/libwebp/src/dsp/lossless.c @@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { //------------------------------------------------------------------------------ // Predictors -static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor0_C(const uint32_t* const left, + const uint32_t* const top) { (void)top; (void)left; return ARGB_BLACK; } -static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor1_C(const uint32_t* const left, + const uint32_t* const top) { (void)top; - return left; + return *left; } -static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor2_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[0]; } -static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor3_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[1]; } -static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor4_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[-1]; } -static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3(left, top[0], top[1]); +uint32_t VP8LPredictor5_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average3(*left, top[0], top[1]); return pred; } -static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[-1]); +uint32_t VP8LPredictor6_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2(*left, top[-1]); return pred; } -static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[0]); +uint32_t VP8LPredictor7_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2(*left, top[0]); return pred; } -static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor8_C(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor9_C(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4(left, top[-1], top[0], top[1]); +uint32_t VP8LPredictor10_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average4(*left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select(top[0], left, top[-1]); +uint32_t VP8LPredictor11_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Select(top[0], *left, top[-1]); return pred; } -static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); +uint32_t VP8LPredictor12_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]); return pred; } -static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); +uint32_t VP8LPredictor13_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]); return pred; } @@ -182,18 +196,18 @@ static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, out[i] = left = VP8LAddPixels(in[i], left); } } -GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C) -GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C) -GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C) -GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C) -GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C) -GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C) -GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C) -GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C) -GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C) -GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C) -GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C) -GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor2_C, PredictorAdd2_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor3_C, PredictorAdd3_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor4_C, PredictorAdd4_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor5_C, PredictorAdd5_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor6_C, PredictorAdd6_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor7_C, PredictorAdd7_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor8_C, PredictorAdd8_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor9_C, PredictorAdd9_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor10_C, PredictorAdd10_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor11_C, PredictorAdd11_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor12_C, PredictorAdd12_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C) //------------------------------------------------------------------------------ @@ -562,7 +576,6 @@ VP8LPredictorFunc VP8LPredictors[16]; // exposed plain-C implementations VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16]; -VP8LPredictorFunc VP8LPredictors_C[16]; VP8LTransformColorInverseFunc VP8LTransformColorInverse; @@ -576,6 +589,7 @@ VP8LMapARGBFunc VP8LMapColor32b; VP8LMapAlphaFunc VP8LMapColor8b; extern void VP8LDspInitSSE2(void); +extern void VP8LDspInitSSE41(void); extern void VP8LDspInitNEON(void); extern void VP8LDspInitMIPSdspR2(void); extern void VP8LDspInitMSA(void); @@ -600,8 +614,7 @@ extern void VP8LDspInitMSA(void); } while (0); WEBP_DSP_INIT_FUNC(VP8LDspInit) { - COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors) - COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C) + COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C) @@ -623,9 +636,14 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8LDspInitSSE2(); +#if defined(WEBP_HAVE_SSE41) + if (VP8GetCPUInfo(kSSE4_1)) { + VP8LDspInitSSE41(); + } +#endif } #endif #if defined(WEBP_USE_MIPS_DSP_R2) @@ -640,7 +658,7 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8LDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h index f709cc86b2..de60d95d0b 100644 --- a/thirdparty/libwebp/src/dsp/lossless.h +++ b/thirdparty/libwebp/src/dsp/lossless.h @@ -28,9 +28,39 @@ extern "C" { //------------------------------------------------------------------------------ // Decoding -typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); +typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left, + const uint32_t* const top); extern VP8LPredictorFunc VP8LPredictors[16]; -extern VP8LPredictorFunc VP8LPredictors_C[16]; + +uint32_t VP8LPredictor0_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor1_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor2_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor3_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor4_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor5_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor6_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor7_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor8_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor9_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor10_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor11_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor12_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor13_C(const uint32_t* const left, + const uint32_t* const top); + // These Add/Sub function expects upper[-1] and out[-1] to be readable. typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, const uint32_t* upper, int num_pixels, @@ -152,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; // ----------------------------------------------------------------------------- // Huffman-cost related functions. -typedef double (*VP8LCostFunc)(const uint32_t* population, int length); -typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, - int length); +typedef float (*VP8LCostFunc)(const uint32_t* population, int length); +typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, + int length); typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256], const int Y[256]); @@ -168,7 +198,7 @@ typedef struct { // small struct to hold counters } VP8LStreaks; typedef struct { // small struct to hold bit entropy results - double entropy; // entropy + float entropy; // entropy uint32_t sum; // sum of the population int nonzeros; // number of non-zero elements in the population uint32_t max_val; // maximum value in the population diff --git a/thirdparty/libwebp/src/dsp/lossless_common.h b/thirdparty/libwebp/src/dsp/lossless_common.h index 9c2ebe6809..6a2f736b5e 100644 --- a/thirdparty/libwebp/src/dsp/lossless_common.h +++ b/thirdparty/libwebp/src/dsp/lossless_common.h @@ -179,24 +179,11 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ int x; \ assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \ + const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \ out[x] = VP8LAddPixels(in[x], pred); \ } \ } -// It subtracts the prediction from the input pixel and stores the residual -// in the output pixel. -#define GENERATE_PREDICTOR_SUB(PREDICTOR, PREDICTOR_SUB) \ -static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int x; \ - assert(upper != NULL); \ - for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \ - out[x] = VP8LSubPixels(in[x], pred); \ - } \ -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c index 9c36055afc..de6c4ace5f 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc.c @@ -329,6 +329,15 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = { static float FastSLog2Slow_C(uint32_t v) { assert(v >= LOG_LOOKUP_IDX_MAX); if (v < APPROX_LOG_WITH_CORRECTION_MAX) { +#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) + // use clz if available + const int log_cnt = BitsLog2Floor(v) - 7; + const uint32_t y = 1 << log_cnt; + int correction = 0; + const float v_f = (float)v; + const uint32_t orig_v = v; + v >>= log_cnt; +#else int log_cnt = 0; uint32_t y = 1; int correction = 0; @@ -339,6 +348,7 @@ static float FastSLog2Slow_C(uint32_t v) { v = v >> 1; y = y << 1; } while (v >= LOG_LOOKUP_IDX_MAX); +#endif // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256 // Xf = floor(Xf) * (1 + (v % y) / v) // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) @@ -355,6 +365,14 @@ static float FastSLog2Slow_C(uint32_t v) { static float FastLog2Slow_C(uint32_t v) { assert(v >= LOG_LOOKUP_IDX_MAX); if (v < APPROX_LOG_WITH_CORRECTION_MAX) { +#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) + // use clz if available + const int log_cnt = BitsLog2Floor(v) - 7; + const uint32_t y = 1 << log_cnt; + const uint32_t orig_v = v; + double log_2; + v >>= log_cnt; +#else int log_cnt = 0; uint32_t y = 1; const uint32_t orig_v = v; @@ -364,6 +382,7 @@ static float FastLog2Slow_C(uint32_t v) { v = v >> 1; y = y << 1; } while (v >= LOG_LOOKUP_IDX_MAX); +#endif log_2 = kLog2Table[v] + log_cnt; if (orig_v >= APPROX_LOG_MAX) { // Since the division is still expensive, add this correction factor only @@ -383,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) { // Compute the combined Shanon's entropy for distribution {X} and {X+Y} static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) { int i; - double retval = 0.; + float retval = 0.f; int sumX = 0, sumXY = 0; for (i = 0; i < 256; ++i) { const int x = X[i]; @@ -399,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) { } } retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); - return (float)retval; + return retval; } void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { @@ -617,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits, //------------------------------------------------------------------------------ -static double ExtraCost_C(const uint32_t* population, int length) { +static float ExtraCost_C(const uint32_t* population, int length) { int i; - double cost = 0.; + float cost = 0.f; for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; return cost; } -static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, +static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, int length) { int i; - double cost = 0.; + float cost = 0.f; for (i = 2; i < length - 2; ++i) { const int xy = X[i + 2] + Y[i + 2]; cost += (i >> 1) * xy; @@ -702,140 +721,6 @@ void VP8LHistogramAdd(const VP8LHistogram* const a, //------------------------------------------------------------------------------ // Image transforms. -static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { - return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1); -} - -static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { - return Average2(Average2(a0, a2), a1); -} - -static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, - uint32_t a2, uint32_t a3) { - return Average2(Average2(a0, a1), Average2(a2, a3)); -} - -static WEBP_INLINE uint32_t Clip255(uint32_t a) { - if (a < 256) { - return a; - } - // return 0, when a is a negative integer. - // return 255, when a is positive. - return ~a >> 24; -} - -static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { - return Clip255(a + b - c); -} - -static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, - uint32_t c2) { - const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); - const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, - (c1 >> 16) & 0xff, - (c2 >> 16) & 0xff); - const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, - (c1 >> 8) & 0xff, - (c2 >> 8) & 0xff); - const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; -} - -static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { - return Clip255(a + (a - b) / 2); -} - -static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, - uint32_t c2) { - const uint32_t ave = Average2(c0, c1); - const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); - const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); - const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); - const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; -} - -// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined. -#if defined(__arm__) && \ - (LOCAL_GCC_VERSION == 0x409 || LOCAL_GCC_VERSION == 0x408) -# define LOCAL_INLINE __attribute__ ((noinline)) -#else -# define LOCAL_INLINE WEBP_INLINE -#endif - -static LOCAL_INLINE int Sub3(int a, int b, int c) { - const int pb = b - c; - const int pa = a - c; - return abs(pb) - abs(pa); -} - -#undef LOCAL_INLINE - -static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { - const int pa_minus_pb = - Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + - Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + - Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + - Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); - return (pa_minus_pb <= 0) ? a : b; -} - -//------------------------------------------------------------------------------ -// Predictors - -static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { - (void)left; - return top[0]; -} -static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { - (void)left; - return top[1]; -} -static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { - (void)left; - return top[-1]; -} -static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3(left, top[0], top[1]); - return pred; -} -static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[-1]); - return pred; -} -static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[0]); - return pred; -} -static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(top[-1], top[0]); - (void)left; - return pred; -} -static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(top[0], top[1]); - (void)left; - return pred; -} -static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4(left, top[-1], top[0], top[1]); - return pred; -} -static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select(top[0], left, top[-1]); - return pred; -} -static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); - return pred; -} -static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); - return pred; -} - -//------------------------------------------------------------------------------ - static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; @@ -850,18 +735,33 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper, (void)upper; } -GENERATE_PREDICTOR_SUB(Predictor2, PredictorSub2_C) -GENERATE_PREDICTOR_SUB(Predictor3, PredictorSub3_C) -GENERATE_PREDICTOR_SUB(Predictor4, PredictorSub4_C) -GENERATE_PREDICTOR_SUB(Predictor5, PredictorSub5_C) -GENERATE_PREDICTOR_SUB(Predictor6, PredictorSub6_C) -GENERATE_PREDICTOR_SUB(Predictor7, PredictorSub7_C) -GENERATE_PREDICTOR_SUB(Predictor8, PredictorSub8_C) -GENERATE_PREDICTOR_SUB(Predictor9, PredictorSub9_C) -GENERATE_PREDICTOR_SUB(Predictor10, PredictorSub10_C) -GENERATE_PREDICTOR_SUB(Predictor11, PredictorSub11_C) -GENERATE_PREDICTOR_SUB(Predictor12, PredictorSub12_C) -GENERATE_PREDICTOR_SUB(Predictor13, PredictorSub13_C) +// It subtracts the prediction from the input pixel and stores the residual +// in the output pixel. +#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \ +static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ + const uint32_t* upper, \ + int num_pixels, uint32_t* out) { \ + int x; \ + assert(upper != NULL); \ + for (x = 0; x < num_pixels; ++x) { \ + const uint32_t pred = \ + VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \ + out[x] = VP8LSubPixels(in[x], pred); \ + } \ +} + +GENERATE_PREDICTOR_SUB(2) +GENERATE_PREDICTOR_SUB(3) +GENERATE_PREDICTOR_SUB(4) +GENERATE_PREDICTOR_SUB(5) +GENERATE_PREDICTOR_SUB(6) +GENERATE_PREDICTOR_SUB(7) +GENERATE_PREDICTOR_SUB(8) +GENERATE_PREDICTOR_SUB(9) +GENERATE_PREDICTOR_SUB(10) +GENERATE_PREDICTOR_SUB(11) +GENERATE_PREDICTOR_SUB(12) +GENERATE_PREDICTOR_SUB(13) //------------------------------------------------------------------------------ @@ -962,10 +862,10 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8LEncDspInitSSE2(); -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { VP8LEncDspInitSSE41(); } @@ -989,7 +889,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { VP8LEncDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c index 0412a093cf..639f786631 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c @@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) { // cost += i * *(pop + 1); // pop += 2; // } -// return (double)cost; -static double ExtraCost_MIPS32(const uint32_t* const population, int length) { +// return (float)cost; +static float ExtraCost_MIPS32(const uint32_t* const population, int length) { int i, temp0, temp1; const uint32_t* pop = &population[4]; const uint32_t* const LoopEnd = &population[length]; @@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) { : "memory", "hi", "lo" ); - return (double)((int64_t)temp0 << 32 | temp1); + return (float)((int64_t)temp0 << 32 | temp1); } // C version of this function: @@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) { // pX += 2; // pY += 2; // } -// return (double)cost; -static double ExtraCostCombined_MIPS32(const uint32_t* const X, - const uint32_t* const Y, int length) { +// return (float)cost; +static float ExtraCostCombined_MIPS32(const uint32_t* const X, + const uint32_t* const Y, int length) { int i, temp0, temp1, temp2, temp3; const uint32_t* pX = &X[4]; const uint32_t* pY = &Y[4]; @@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X, : "memory", "hi", "lo" ); - return (double)((int64_t)temp0 << 32 | temp1); + return (float)((int64_t)temp0 << 32 | temp1); } #define HUFFMAN_COST_PASS \ @@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb, uint32_t* pout, int size) { uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - const uint32_t end = ((size) / 4) * 4; + const int end = ((size) / 4) * 4; const uint32_t* const LoopEnd = pa + end; int i; ASM_START ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout) ASM_END_0 - for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i]; + for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i]; } static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) { uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - const uint32_t end = ((size) / 4) * 4; + const int end = ((size) / 4) * 4; const uint32_t* const LoopEnd = pa + end; int i; ASM_START ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout) ASM_END_1 - for (i = end; i < size; ++i) pout[i] += pa[i]; + for (i = 0; i < size - end; ++i) pout[i] += pa[i]; } #undef ASM_END_1 diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c index e676f6fdc9..948001a3d5 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -232,76 +232,55 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { //------------------------------------------------------------------------------ // Entropy -// Checks whether the X or Y contribution is worth computing and adding. -// Used in loop unrolling. -#define ANALYZE_X_OR_Y(x_or_y, j) \ - do { \ - if ((x_or_y)[i + (j)] != 0) retval -= VP8LFastSLog2((x_or_y)[i + (j)]); \ - } while (0) - -// Checks whether the X + Y contribution is worth computing and adding. -// Used in loop unrolling. -#define ANALYZE_XY(j) \ - do { \ - if (tmp[j] != 0) { \ - retval -= VP8LFastSLog2(tmp[j]); \ - ANALYZE_X_OR_Y(X, j); \ - } \ - } while (0) +// TODO(https://crbug.com/webp/499): this function produces different results +// from the C code due to use of double/float resulting in output differences +// when compared to -noasm. +#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86)) static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { int i; - double retval = 0.; - int sumX, sumXY; - int32_t tmp[4]; - __m128i zero = _mm_setzero_si128(); - // Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY). - __m128i sumXY_128 = zero; - __m128i sumX_128 = zero; - - for (i = 0; i < 256; i += 4) { - const __m128i x = _mm_loadu_si128((const __m128i*)(X + i)); - const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i)); - - // Check if any X is non-zero: this actually provides a speedup as X is - // usually sparse. - if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) { - const __m128i xy_128 = _mm_add_epi32(x, y); - sumXY_128 = _mm_add_epi32(sumXY_128, xy_128); - - sumX_128 = _mm_add_epi32(sumX_128, x); - - // Analyze the different X + Y. - _mm_storeu_si128((__m128i*)tmp, xy_128); - - ANALYZE_XY(0); - ANALYZE_XY(1); - ANALYZE_XY(2); - ANALYZE_XY(3); - } else { - // X is fully 0, so only deal with Y. - sumXY_128 = _mm_add_epi32(sumXY_128, y); - - ANALYZE_X_OR_Y(Y, 0); - ANALYZE_X_OR_Y(Y, 1); - ANALYZE_X_OR_Y(Y, 2); - ANALYZE_X_OR_Y(Y, 3); + float retval = 0.f; + int sumX = 0, sumXY = 0; + const __m128i zero = _mm_setzero_si128(); + + for (i = 0; i < 256; i += 16) { + const __m128i x0 = _mm_loadu_si128((const __m128i*)(X + i + 0)); + const __m128i y0 = _mm_loadu_si128((const __m128i*)(Y + i + 0)); + const __m128i x1 = _mm_loadu_si128((const __m128i*)(X + i + 4)); + const __m128i y1 = _mm_loadu_si128((const __m128i*)(Y + i + 4)); + const __m128i x2 = _mm_loadu_si128((const __m128i*)(X + i + 8)); + const __m128i y2 = _mm_loadu_si128((const __m128i*)(Y + i + 8)); + const __m128i x3 = _mm_loadu_si128((const __m128i*)(X + i + 12)); + const __m128i y3 = _mm_loadu_si128((const __m128i*)(Y + i + 12)); + const __m128i x4 = _mm_packs_epi16(_mm_packs_epi32(x0, x1), + _mm_packs_epi32(x2, x3)); + const __m128i y4 = _mm_packs_epi16(_mm_packs_epi32(y0, y1), + _mm_packs_epi32(y2, y3)); + const int32_t mx = _mm_movemask_epi8(_mm_cmpgt_epi8(x4, zero)); + int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx; + while (my) { + const int32_t j = BitsCtz(my); + int xy; + if ((mx >> j) & 1) { + const int x = X[i + j]; + sumXY += x; + retval -= VP8LFastSLog2(x); + } + xy = X[i + j] + Y[i + j]; + sumX += xy; + retval -= VP8LFastSLog2(xy); + my &= my - 1; } } + retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); + return retval; +} - // Sum up sumX_128 to get sumX. - _mm_storeu_si128((__m128i*)tmp, sumX_128); - sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0]; +#else - // Sum up sumXY_128 to get sumXY. - _mm_storeu_si128((__m128i*)tmp, sumXY_128); - sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0]; +#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC // won't be faster - retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); - return (float)retval; -} -#undef ANALYZE_X_OR_Y -#undef ANALYZE_XY +#endif //------------------------------------------------------------------------------ @@ -460,20 +439,22 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, (void)upper; } -#define GENERATE_PREDICTOR_1(X, IN) \ -static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int i; \ - for (i = 0; i + 4 <= num_pixels; i += 4) { \ - const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ - const __m128i pred = _mm_loadu_si128((const __m128i*)&(IN)); \ - const __m128i res = _mm_sub_epi8(src, pred); \ - _mm_storeu_si128((__m128i*)&out[i], res); \ - } \ - if (i != num_pixels) { \ - VP8LPredictorsSub_C[(X)](in + i, upper + i, num_pixels - i, out + i); \ - } \ -} +#define GENERATE_PREDICTOR_1(X, IN) \ + static void PredictorSub##X##_SSE2(const uint32_t* const in, \ + const uint32_t* const upper, \ + int num_pixels, uint32_t* const out) { \ + int i; \ + for (i = 0; i + 4 <= num_pixels; i += 4) { \ + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ + const __m128i pred = _mm_loadu_si128((const __m128i*)&(IN)); \ + const __m128i res = _mm_sub_epi8(src, pred); \ + _mm_storeu_si128((__m128i*)&out[i], res); \ + } \ + if (i != num_pixels) { \ + VP8LPredictorsSub_C[(X)](in + i, WEBP_OFFSET_PTR(upper, i), \ + num_pixels - i, out + i); \ + } \ + } GENERATE_PREDICTOR_1(1, in[i - 1]) // Predictor1: L GENERATE_PREDICTOR_1(2, upper[i]) // Predictor2: T @@ -657,7 +638,9 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) { VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2; VP8LAddVector = AddVector_SSE2; VP8LAddVectorEq = AddVectorEq_SSE2; +#if !defined(DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC) VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2; +#endif VP8LVectorMismatch = VectorMismatch_SSE2; VP8LBundleColorMap = BundleColorMap_SSE2; diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c index 719d8ed25e..ad358a6f25 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse41.c @@ -44,46 +44,47 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data, //------------------------------------------------------------------------------ // Color Transform -#define SPAN 8 +#define MK_CST_16(HI, LO) \ + _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff))) + static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, int histo[]) { - const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue)); - const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue)); - const __m128i mask_g = _mm_set1_epi16((short)0xff00); // green mask - const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask - const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask - const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1, - -1, -1, -1, -1, -1, -1, -1); - const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, - 2, -1, 6, -1, 10, -1, 14); - int y; - for (y = 0; y < tile_height; ++y) { - const uint32_t* const src = argb + y * stride; - int i, x; - for (x = 0; x + SPAN <= tile_width; x += SPAN) { - uint16_t values[SPAN]; - const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); - const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); - const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo); - const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi); - const __m128i r = _mm_or_si128(r0, r1); // r 0 - const __m128i gb0 = _mm_and_si128(in0, mask_gb); - const __m128i gb1 = _mm_and_si128(in1, mask_gb); - const __m128i gb = _mm_packus_epi32(gb0, gb1); // g b - const __m128i g = _mm_and_si128(gb, mask_g); // g 0 - const __m128i A = _mm_mulhi_epi16(r, mults_r); // x dbr - const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dbg - const __m128i C = _mm_sub_epi8(gb, B); // x b' - const __m128i D = _mm_sub_epi8(C, A); // x b'' - const __m128i E = _mm_and_si128(D, mask_b); // 0 b'' - _mm_storeu_si128((__m128i*)values, E); - for (i = 0; i < SPAN; ++i) ++histo[values[i]]; + const __m128i mult = + MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue)); + const __m128i perm = + _mm_setr_epi8(-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14); + if (tile_width >= 4) { + int y; + for (y = 0; y < tile_height; ++y) { + const uint32_t* const src = argb + y * stride; + const __m128i A1 = _mm_loadu_si128((const __m128i*)src); + const __m128i B1 = _mm_shuffle_epi8(A1, perm); + const __m128i C1 = _mm_mulhi_epi16(B1, mult); + const __m128i D1 = _mm_sub_epi16(A1, C1); + __m128i E = _mm_add_epi16(_mm_srli_epi32(D1, 16), D1); + int x; + for (x = 4; x + 4 <= tile_width; x += 4) { + const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x)); + __m128i B2, C2, D2; + ++histo[_mm_extract_epi8(E, 0)]; + B2 = _mm_shuffle_epi8(A2, perm); + ++histo[_mm_extract_epi8(E, 4)]; + C2 = _mm_mulhi_epi16(B2, mult); + ++histo[_mm_extract_epi8(E, 8)]; + D2 = _mm_sub_epi16(A2, C2); + ++histo[_mm_extract_epi8(E, 12)]; + E = _mm_add_epi16(_mm_srli_epi32(D2, 16), D2); + } + ++histo[_mm_extract_epi8(E, 0)]; + ++histo[_mm_extract_epi8(E, 4)]; + ++histo[_mm_extract_epi8(E, 8)]; + ++histo[_mm_extract_epi8(E, 12)]; } } { - const int left_over = tile_width & (SPAN - 1); + const int left_over = tile_width & 3; if (left_over > 0) { VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride, left_over, tile_height, @@ -95,33 +96,37 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]) { - const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red)); - const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask - const __m128i mask = _mm_set1_epi16(0xff); - - int y; - for (y = 0; y < tile_height; ++y) { - const uint32_t* const src = argb + y * stride; - int i, x; - for (x = 0; x + SPAN <= tile_width; x += SPAN) { - uint16_t values[SPAN]; - const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); - const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); - const __m128i g0 = _mm_and_si128(in0, mask_g); // 0 0 | g 0 - const __m128i g1 = _mm_and_si128(in1, mask_g); - const __m128i g = _mm_packus_epi32(g0, g1); // g 0 - const __m128i A0 = _mm_srli_epi32(in0, 16); // 0 0 | x r - const __m128i A1 = _mm_srli_epi32(in1, 16); - const __m128i A = _mm_packus_epi32(A0, A1); // x r - const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dr - const __m128i C = _mm_sub_epi8(A, B); // x r' - const __m128i D = _mm_and_si128(C, mask); // 0 r' - _mm_storeu_si128((__m128i*)values, D); - for (i = 0; i < SPAN; ++i) ++histo[values[i]]; + + const __m128i mult = MK_CST_16(0, CST_5b(green_to_red)); + const __m128i mask_g = _mm_set1_epi32(0x0000ff00); + if (tile_width >= 4) { + int y; + for (y = 0; y < tile_height; ++y) { + const uint32_t* const src = argb + y * stride; + const __m128i A1 = _mm_loadu_si128((const __m128i*)src); + const __m128i B1 = _mm_and_si128(A1, mask_g); + const __m128i C1 = _mm_madd_epi16(B1, mult); + __m128i D = _mm_sub_epi16(A1, C1); + int x; + for (x = 4; x + 4 <= tile_width; x += 4) { + const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x)); + __m128i B2, C2; + ++histo[_mm_extract_epi8(D, 2)]; + B2 = _mm_and_si128(A2, mask_g); + ++histo[_mm_extract_epi8(D, 6)]; + C2 = _mm_madd_epi16(B2, mult); + ++histo[_mm_extract_epi8(D, 10)]; + ++histo[_mm_extract_epi8(D, 14)]; + D = _mm_sub_epi16(A2, C2); + } + ++histo[_mm_extract_epi8(D, 2)]; + ++histo[_mm_extract_epi8(D, 6)]; + ++histo[_mm_extract_epi8(D, 10)]; + ++histo[_mm_extract_epi8(D, 14)]; } } { - const int left_over = tile_width & (SPAN - 1); + const int left_over = tile_width & 3; if (left_over > 0) { VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride, left_over, tile_height, green_to_red, @@ -130,6 +135,8 @@ static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, } } +#undef MK_CST_16 + //------------------------------------------------------------------------------ // Entry point diff --git a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c index 9888854d57..bfe5ea6b38 100644 --- a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c +++ b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c @@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, return Average2(Average2(a0, a1), Average2(a2, a3)); } -static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average3(left, top[0], top[1]); +static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average3(*left, top[0], top[1]); } -static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average2(left, top[-1]); +static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average2(*left, top[-1]); } -static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average2(left, top[0]); +static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average2(*left, top[0]); } -static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { (void)left; return Average2(top[-1], top[0]); } -static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { (void)left; return Average2(top[0], top[1]); } -static uint32_t Predictor10_MIPSdspR2(uint32_t left, +static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return Average4(left, top[-1], top[0], top[1]); + return Average4(*left, top[-1], top[0], top[1]); } -static uint32_t Predictor11_MIPSdspR2(uint32_t left, +static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return Select(top[0], left, top[-1]); + return Select(top[0], *left, top[-1]); } -static uint32_t Predictor12_MIPSdspR2(uint32_t left, +static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return ClampedAddSubtractFull(left, top[0], top[-1]); + return ClampedAddSubtractFull(*left, top[0], top[-1]); } -static uint32_t Predictor13_MIPSdspR2(uint32_t left, +static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return ClampedAddSubtractHalf(left, top[0], top[-1]); + return ClampedAddSubtractHalf(*left, top[0], top[-1]); } // Add green to blue and red channels (i.e. perform the inverse transform of diff --git a/thirdparty/libwebp/src/dsp/lossless_neon.c b/thirdparty/libwebp/src/dsp/lossless_neon.c index 76a1b6f873..89e3e013a0 100644 --- a/thirdparty/libwebp/src/dsp/lossless_neon.c +++ b/thirdparty/libwebp/src/dsp/lossless_neon.c @@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1, return avg; } -static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) { - return Average3_NEON(left, top[0], top[1]); +static uint32_t Predictor5_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average3_NEON(*left, top[0], top[1]); } -static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) { - return Average2_NEON(left, top[-1]); +static uint32_t Predictor6_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average2_NEON(*left, top[-1]); } -static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) { - return Average2_NEON(left, top[0]); +static uint32_t Predictor7_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average2_NEON(*left, top[0]); } -static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) { - return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]); +static uint32_t Predictor13_NEON(const uint32_t* const left, + const uint32_t* const top) { + return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]); } // Batch versions of those functions. diff --git a/thirdparty/libwebp/src/dsp/lossless_sse2.c b/thirdparty/libwebp/src/dsp/lossless_sse2.c index aef0cee1b3..396cb0bdfc 100644 --- a/thirdparty/libwebp/src/dsp/lossless_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c @@ -18,7 +18,6 @@ #include "src/dsp/common_sse2.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" -#include <assert.h> #include <emmintrin.h> //------------------------------------------------------------------------------ @@ -139,42 +138,51 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, return output; } -static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3_SSE2(left, top[0], top[1]); +static uint32_t Predictor5_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average3_SSE2(*left, top[0], top[1]); return pred; } -static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(left, top[-1]); +static uint32_t Predictor6_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2_SSE2(*left, top[-1]); return pred; } -static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(left, top[0]); +static uint32_t Predictor7_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2_SSE2(*left, top[0]); return pred; } -static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_SSE2(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2_SSE2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_SSE2(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2_SSE2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]); +static uint32_t Predictor10_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select_SSE2(top[0], left, top[-1]); +static uint32_t Predictor11_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Select_SSE2(top[0], *left, top[-1]); return pred; } -static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]); +static uint32_t Predictor12_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]); return pred; } -static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]); +static uint32_t Predictor13_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]); return pred; } diff --git a/thirdparty/libwebp/src/dsp/lossless_sse41.c b/thirdparty/libwebp/src/dsp/lossless_sse41.c new file mode 100644 index 0000000000..b0d6daa7fe --- /dev/null +++ b/thirdparty/libwebp/src/dsp/lossless_sse41.c @@ -0,0 +1,132 @@ +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// SSE41 variant of methods for lossless decoder + +#include "src/dsp/dsp.h" + +#if defined(WEBP_USE_SSE41) + +#include "src/dsp/common_sse41.h" +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" + +//------------------------------------------------------------------------------ +// Color-space conversion functions + +static void TransformColorInverse_SSE41(const VP8LMultipliers* const m, + const uint32_t* const src, + int num_pixels, uint32_t* dst) { +// sign-extended multiplying constants, pre-shifted by 5. +#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend + const __m128i mults_rb = _mm_set1_epi32((uint32_t)CST(green_to_red_) << 16 | + (CST(green_to_blue_) & 0xffff)); + const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_)); +#undef CST + const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); + const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5, + -1, 9, -1, 9, -1, 13, -1, 13); + const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1, + -1, 10, -1, -1, -1, 14, -1, -1); + int i; + for (i = 0; i + 4 <= num_pixels; i += 4) { + const __m128i A = _mm_loadu_si128((const __m128i*)(src + i)); + const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g0 + const __m128i C = _mm_mulhi_epi16(B, mults_rb); + const __m128i D = _mm_add_epi8(A, C); + const __m128i E = _mm_shuffle_epi8(D, perm2); + const __m128i F = _mm_mulhi_epi16(E, mults_b2); + const __m128i G = _mm_add_epi8(D, F); + const __m128i out = _mm_blendv_epi8(G, A, mask_ag); + _mm_storeu_si128((__m128i*)&dst[i], out); + } + // Fall-back to C-version for left-overs. + if (i != num_pixels) { + VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i); + } +} + +//------------------------------------------------------------------------------ + +#define ARGB_TO_RGB_SSE41 do { \ + while (num_pixels >= 16) { \ + const __m128i in0 = _mm_loadu_si128(in + 0); \ + const __m128i in1 = _mm_loadu_si128(in + 1); \ + const __m128i in2 = _mm_loadu_si128(in + 2); \ + const __m128i in3 = _mm_loadu_si128(in + 3); \ + const __m128i a0 = _mm_shuffle_epi8(in0, perm0); \ + const __m128i a1 = _mm_shuffle_epi8(in1, perm1); \ + const __m128i a2 = _mm_shuffle_epi8(in2, perm2); \ + const __m128i a3 = _mm_shuffle_epi8(in3, perm3); \ + const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \ + const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \ + const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \ + _mm_storeu_si128(out + 0, b0); \ + _mm_storeu_si128(out + 1, b1); \ + _mm_storeu_si128(out + 2, b2); \ + in += 4; \ + out += 3; \ + num_pixels -= 16; \ + } \ +} while (0) + +static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels, + uint8_t* dst) { + const __m128i* in = (const __m128i*)src; + __m128i* out = (__m128i*)dst; + const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, + 8, 14, 13, 12, -1, -1, -1, -1); + const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39); + const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e); + const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93); + + ARGB_TO_RGB_SSE41; + + // left-overs + if (num_pixels > 0) { + VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } +} + +static void ConvertBGRAToBGR_SSE41(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const __m128i* in = (const __m128i*)src; + __m128i* out = (__m128i*)dst; + const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, + 12, 13, 14, -1, -1, -1, -1); + const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39); + const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e); + const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93); + + ARGB_TO_RGB_SSE41; + + // left-overs + if (num_pixels > 0) { + VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } +} + +#undef ARGB_TO_RGB_SSE41 + +//------------------------------------------------------------------------------ +// Entry point + +extern void VP8LDspInitSSE41(void); + +WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) { + VP8LTransformColorInverse = TransformColorInverse_SSE41; + VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41; + VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41; +} + +#else // !WEBP_USE_SSE41 + +WEBP_DSP_INIT_STUB(VP8LDspInitSSE41) + +#endif // WEBP_USE_SSE41 diff --git a/thirdparty/libwebp/src/dsp/msa_macro.h b/thirdparty/libwebp/src/dsp/msa_macro.h index de026a1d9e..51f6c643ab 100644 --- a/thirdparty/libwebp/src/dsp/msa_macro.h +++ b/thirdparty/libwebp/src/dsp/msa_macro.h @@ -14,6 +14,10 @@ #ifndef WEBP_DSP_MSA_MACRO_H_ #define WEBP_DSP_MSA_MACRO_H_ +#include "src/dsp/dsp.h" + +#if defined(WEBP_USE_MSA) + #include <stdint.h> #include <msa.h> @@ -1389,4 +1393,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) { } while (0) #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__) +#endif // WEBP_USE_MSA #endif // WEBP_DSP_MSA_MACRO_H_ diff --git a/thirdparty/libwebp/src/dsp/neon.h b/thirdparty/libwebp/src/dsp/neon.h index aa1dea1301..c591f9b9a7 100644 --- a/thirdparty/libwebp/src/dsp/neon.h +++ b/thirdparty/libwebp/src/dsp/neon.h @@ -12,10 +12,12 @@ #ifndef WEBP_DSP_NEON_H_ #define WEBP_DSP_NEON_H_ -#include <arm_neon.h> - #include "src/dsp/dsp.h" +#if defined(WEBP_USE_NEON) + +#include <arm_neon.h> + // Right now, some intrinsics functions seem slower, so we disable them // everywhere except newer clang/gcc or aarch64 where the inline assembly is // incompatible. @@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) { } while (0) #endif +#endif // WEBP_USE_NEON #endif // WEBP_DSP_NEON_H_ diff --git a/thirdparty/libwebp/src/dsp/rescaler.c b/thirdparty/libwebp/src/dsp/rescaler.c index c5a01e82df..14620ce4f1 100644 --- a/thirdparty/libwebp/src/dsp/rescaler.c +++ b/thirdparty/libwebp/src/dsp/rescaler.c @@ -38,8 +38,9 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, int x_out = channel; // simple bilinear interpolation int accum = wrk->x_add; - int left = src[x_in]; - int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left; + rescaler_t left = (rescaler_t)src[x_in]; + rescaler_t right = + (wrk->src_width > 1) ? (rescaler_t)src[x_in + x_stride] : left; x_in += x_stride; while (1) { wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; @@ -50,7 +51,7 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, left = right; x_in += x_stride; assert(x_in < wrk->src_width * x_stride); - right = src[x_in]; + right = (rescaler_t)src[x_in]; accum += wrk->x_add; } } @@ -213,7 +214,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C; if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPRescalerDspInitSSE2(); } @@ -235,7 +236,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPRescalerDspInitNEON(); diff --git a/thirdparty/libwebp/src/dsp/ssim.c b/thirdparty/libwebp/src/dsp/ssim.c index 989ce8254c..f85c2e6e5b 100644 --- a/thirdparty/libwebp/src/dsp/ssim.c +++ b/thirdparty/libwebp/src/dsp/ssim.c @@ -150,7 +150,7 @@ WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) { #endif if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { VP8SSIMDspInitSSE2(); } diff --git a/thirdparty/libwebp/src/dsp/upsampling.c b/thirdparty/libwebp/src/dsp/upsampling.c index 9b60da5bbb..87f771f3eb 100644 --- a/thirdparty/libwebp/src/dsp/upsampling.c +++ b/thirdparty/libwebp/src/dsp/upsampling.c @@ -233,12 +233,12 @@ WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) { WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C; if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitYUV444ConvertersSSE2(); } #endif -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitYUV444ConvertersSSE41(); } @@ -278,12 +278,12 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitUpsamplersSSE2(); } #endif -#if defined(WEBP_USE_SSE41) +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitUpsamplersSSE41(); } @@ -300,7 +300,7 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) { #endif } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitUpsamplersNEON(); diff --git a/thirdparty/libwebp/src/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c index 14e67fc28e..d16c13d3ca 100644 --- a/thirdparty/libwebp/src/dsp/yuv.c +++ b/thirdparty/libwebp/src/dsp/yuv.c @@ -90,16 +90,16 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitSamplersSSE2(); } -#endif // WEBP_USE_SSE2 -#if defined(WEBP_USE_SSE41) +#endif // WEBP_HAVE_SSE2 +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitSamplersSSE41(); } -#endif // WEBP_USE_SSE41 +#endif // WEBP_HAVE_SSE41 #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { WebPInitSamplersMIPS32(); @@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, //----------------------------------------------------------------------------- -#if !WEBP_NEON_OMIT_C_CODE -#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic -static uint16_t clip_y(int v) { - return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; -} - -static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len) { - uint64_t diff = 0; - int i; - for (i = 0; i < len; ++i) { - const int diff_y = ref[i] - src[i]; - const int new_y = (int)dst[i] + diff_y; - dst[i] = clip_y(new_y); - diff += (uint64_t)abs(diff_y); - } - return diff; -} - -static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src, - int16_t* dst, int len) { - int i; - for (i = 0; i < len; ++i) { - const int diff_uv = ref[i] - src[i]; - dst[i] += diff_uv; - } -} - -static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out) { - int i; - for (i = 0; i < len; ++i, ++A, ++B) { - const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; - const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; - out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); - out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); - } -} -#endif // !WEBP_NEON_OMIT_C_CODE - -#undef MAX_Y - -//----------------------------------------------------------------------------- - void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, @@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store); -uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len); -void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src, - int16_t* dst, int len); -void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out); - extern void WebPInitConvertARGBToYUVSSE2(void); extern void WebPInitConvertARGBToYUVSSE41(void); extern void WebPInitConvertARGBToYUVNEON(void); -extern void WebPInitSharpYUVSSE2(void); -extern void WebPInitSharpYUVNEON(void); WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { WebPConvertARGBToY = ConvertARGBToY_C; @@ -269,40 +216,29 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; -#if !WEBP_NEON_OMIT_C_CODE - WebPSharpYUVUpdateY = SharpYUVUpdateY_C; - WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; - WebPSharpYUVFilterRow = SharpYUVFilterRow_C; -#endif - if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitConvertARGBToYUVSSE2(); - WebPInitSharpYUVSSE2(); } -#endif // WEBP_USE_SSE2 -#if defined(WEBP_USE_SSE41) +#endif // WEBP_HAVE_SSE2 +#if defined(WEBP_HAVE_SSE41) if (VP8GetCPUInfo(kSSE4_1)) { WebPInitConvertARGBToYUVSSE41(); } -#endif // WEBP_USE_SSE41 +#endif // WEBP_HAVE_SSE41 } -#if defined(WEBP_USE_NEON) +#if defined(WEBP_HAVE_NEON) if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitConvertARGBToYUVNEON(); - WebPInitSharpYUVNEON(); } -#endif // WEBP_USE_NEON +#endif // WEBP_HAVE_NEON assert(WebPConvertARGBToY != NULL); assert(WebPConvertARGBToUV != NULL); assert(WebPConvertRGB24ToY != NULL); assert(WebPConvertBGR24ToY != NULL); assert(WebPConvertRGBA32ToUV != NULL); - assert(WebPSharpYUVUpdateY != NULL); - assert(WebPSharpYUVUpdateRGB != NULL); - assert(WebPSharpYUVFilterRow != NULL); } diff --git a/thirdparty/libwebp/src/dsp/yuv.h b/thirdparty/libwebp/src/dsp/yuv.h index c12be1d094..66a397d117 100644 --- a/thirdparty/libwebp/src/dsp/yuv.h +++ b/thirdparty/libwebp/src/dsp/yuv.h @@ -10,7 +10,7 @@ // inline YUV<->RGB conversion function // // The exact naming is Y'CbCr, following the ITU-R BT.601 standard. -// More information at: http://en.wikipedia.org/wiki/YCbCr +// More information at: https://en.wikipedia.org/wiki/YCbCr // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 diff --git a/thirdparty/libwebp/src/dsp/yuv_neon.c b/thirdparty/libwebp/src/dsp/yuv_neon.c index a34d60248f..ff77b00980 100644 --- a/thirdparty/libwebp/src/dsp/yuv_neon.c +++ b/thirdparty/libwebp/src/dsp/yuv_neon.c @@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) { WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON; } -//------------------------------------------------------------------------------ - -#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic -static uint16_t clip_y_NEON(int v) { - return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; -} - -static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len) { - int i; - const int16x8_t zero = vdupq_n_s16(0); - const int16x8_t max = vdupq_n_s16(MAX_Y); - uint64x2_t sum = vdupq_n_u64(0); - uint64_t diff; - - for (i = 0; i + 8 <= len; i += 8) { - const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i)); - const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i)); - const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i)); - const int16x8_t D = vsubq_s16(A, B); // diff_y - const int16x8_t F = vaddq_s16(C, D); // new_y - const uint16x8_t H = - vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero)); - const int16x8_t I = vabsq_s16(D); // abs(diff_y) - vst1q_u16(dst + i, H); - sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I))); - } - diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1); - for (; i < len; ++i) { - const int diff_y = ref[i] - src[i]; - const int new_y = (int)(dst[i]) + diff_y; - dst[i] = clip_y_NEON(new_y); - diff += (uint64_t)(abs(diff_y)); - } - return diff; -} - -static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src, - int16_t* dst, int len) { - int i; - for (i = 0; i + 8 <= len; i += 8) { - const int16x8_t A = vld1q_s16(ref + i); - const int16x8_t B = vld1q_s16(src + i); - const int16x8_t C = vld1q_s16(dst + i); - const int16x8_t D = vsubq_s16(A, B); // diff_uv - const int16x8_t E = vaddq_s16(C, D); // new_uv - vst1q_s16(dst + i, E); - } - for (; i < len; ++i) { - const int diff_uv = ref[i] - src[i]; - dst[i] += diff_uv; - } -} - -static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out) { - int i; - const int16x8_t max = vdupq_n_s16(MAX_Y); - const int16x8_t zero = vdupq_n_s16(0); - for (i = 0; i + 8 <= len; i += 8) { - const int16x8_t a0 = vld1q_s16(A + i + 0); - const int16x8_t a1 = vld1q_s16(A + i + 1); - const int16x8_t b0 = vld1q_s16(B + i + 0); - const int16x8_t b1 = vld1q_s16(B + i + 1); - const int16x8_t a0b1 = vaddq_s16(a0, b1); - const int16x8_t a1b0 = vaddq_s16(a1, b0); - const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1 - const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1) - const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0) - const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3); - const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3); - const int16x8_t d0 = vaddq_s16(c1, a0); - const int16x8_t d1 = vaddq_s16(c0, a1); - const int16x8_t e0 = vrshrq_n_s16(d0, 1); - const int16x8_t e1 = vrshrq_n_s16(d1, 1); - const int16x8x2_t f = vzipq_s16(e0, e1); - const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0)); - const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8)); - const int16x8_t h0 = vaddq_s16(g0, f.val[0]); - const int16x8_t h1 = vaddq_s16(g1, f.val[1]); - const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero); - const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero); - vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0)); - vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1)); - } - for (; i < len; ++i) { - const int a0b1 = A[i + 0] + B[i + 1]; - const int a1b0 = A[i + 1] + B[i + 0]; - const int a0a1b0b1 = a0b1 + a1b0 + 8; - const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; - const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; - out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0); - out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1); - } -} -#undef MAX_Y - -//------------------------------------------------------------------------------ - -extern void WebPInitSharpYUVNEON(void); - -WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) { - WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON; - WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON; - WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON; -} - #else // !WEBP_USE_NEON WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON) -WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON) #endif // WEBP_USE_NEON diff --git a/thirdparty/libwebp/src/dsp/yuv_sse2.c b/thirdparty/libwebp/src/dsp/yuv_sse2.c index baa48d5371..970bbb7884 100644 --- a/thirdparty/libwebp/src/dsp/yuv_sse2.c +++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c @@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) { WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2; } -//------------------------------------------------------------------------------ - -#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic -static uint16_t clip_y(int v) { - return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; -} - -static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len) { - uint64_t diff = 0; - uint32_t tmp[4]; - int i; - const __m128i zero = _mm_setzero_si128(); - const __m128i max = _mm_set1_epi16(MAX_Y); - const __m128i one = _mm_set1_epi16(1); - __m128i sum = zero; - - for (i = 0; i + 8 <= len; i += 8) { - const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); - const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); - const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); - const __m128i D = _mm_sub_epi16(A, B); // diff_y - const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0) - const __m128i F = _mm_add_epi16(C, D); // new_y - const __m128i G = _mm_or_si128(E, one); // -1 or 1 - const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); - const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...)) - _mm_storeu_si128((__m128i*)(dst + i), H); - sum = _mm_add_epi32(sum, I); - } - _mm_storeu_si128((__m128i*)tmp, sum); - diff = tmp[3] + tmp[2] + tmp[1] + tmp[0]; - for (; i < len; ++i) { - const int diff_y = ref[i] - src[i]; - const int new_y = (int)dst[i] + diff_y; - dst[i] = clip_y(new_y); - diff += (uint64_t)abs(diff_y); - } - return diff; -} - -static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, - int16_t* dst, int len) { - int i = 0; - for (i = 0; i + 8 <= len; i += 8) { - const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); - const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); - const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); - const __m128i D = _mm_sub_epi16(A, B); // diff_uv - const __m128i E = _mm_add_epi16(C, D); // new_uv - _mm_storeu_si128((__m128i*)(dst + i), E); - } - for (; i < len; ++i) { - const int diff_uv = ref[i] - src[i]; - dst[i] += diff_uv; - } -} - -static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out) { - int i; - const __m128i kCst8 = _mm_set1_epi16(8); - const __m128i max = _mm_set1_epi16(MAX_Y); - const __m128i zero = _mm_setzero_si128(); - for (i = 0; i + 8 <= len; i += 8) { - const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); - const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); - const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); - const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); - const __m128i a0b1 = _mm_add_epi16(a0, b1); - const __m128i a1b0 = _mm_add_epi16(a1, b0); - const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1 - const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); - const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1) - const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0) - const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); - const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); - const __m128i d0 = _mm_add_epi16(c1, a0); - const __m128i d1 = _mm_add_epi16(c0, a1); - const __m128i e0 = _mm_srai_epi16(d0, 1); - const __m128i e1 = _mm_srai_epi16(d1, 1); - const __m128i f0 = _mm_unpacklo_epi16(e0, e1); - const __m128i f1 = _mm_unpackhi_epi16(e0, e1); - const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); - const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); - const __m128i h0 = _mm_add_epi16(g0, f0); - const __m128i h1 = _mm_add_epi16(g1, f1); - const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); - const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); - _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); - _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); - } - for (; i < len; ++i) { - // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = - // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 - // We reuse the common sub-expressions. - const int a0b1 = A[i + 0] + B[i + 1]; - const int a1b0 = A[i + 1] + B[i + 0]; - const int a0a1b0b1 = a0b1 + a1b0 + 8; - const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; - const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; - out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); - out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); - } -} - -#undef MAX_Y - -//------------------------------------------------------------------------------ - -extern void WebPInitSharpYUVSSE2(void); - -WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) { - WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2; - WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2; - WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2; -} - #else // !WEBP_USE_SSE2 WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2) WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2) -WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2) #endif // WEBP_USE_SSE2 diff --git a/thirdparty/libwebp/src/enc/alpha_enc.c b/thirdparty/libwebp/src/enc/alpha_enc.c index dce9ca957d..f7c02690e3 100644 --- a/thirdparty/libwebp/src/enc/alpha_enc.c +++ b/thirdparty/libwebp/src/enc/alpha_enc.c @@ -86,7 +86,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, // a decoder bug related to alpha with color cache. // See: https://code.google.com/p/webp/issues/detail?id=239 // Need to re-enable this later. - ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK); + ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0); WebPPictureFree(&picture); ok = ok && !bw->error_; if (!ok) { @@ -303,7 +303,7 @@ static int EncodeAlpha(VP8Encoder* const enc, int ok = 1; const int reduce_levels = (quality < 100); - // quick sanity checks + // quick correctness checks assert((uint64_t)data_size == (uint64_t)width * height); // as per spec assert(enc != NULL && pic != NULL && pic->a != NULL); assert(output != NULL && output_size != NULL); @@ -361,7 +361,7 @@ static int EncodeAlpha(VP8Encoder* const enc, //------------------------------------------------------------------------------ // Main calls -static int CompressAlphaJob(void* arg1, void* dummy) { +static int CompressAlphaJob(void* arg1, void* unused) { VP8Encoder* const enc = (VP8Encoder*)arg1; const WebPConfig* config = enc->config_; uint8_t* alpha_data = NULL; @@ -375,13 +375,13 @@ static int CompressAlphaJob(void* arg1, void* dummy) { filter, effort_level, &alpha_data, &alpha_size)) { return 0; } - if (alpha_size != (uint32_t)alpha_size) { // Sanity check. + if (alpha_size != (uint32_t)alpha_size) { // Soundness check. WebPSafeFree(alpha_data); return 0; } enc->alpha_data_size_ = (uint32_t)alpha_size; enc->alpha_data_ = alpha_data; - (void)dummy; + (void)unused; return 1; } diff --git a/thirdparty/libwebp/src/enc/analysis_enc.c b/thirdparty/libwebp/src/enc/analysis_enc.c index 687757ae03..ebb784261c 100644 --- a/thirdparty/libwebp/src/enc/analysis_enc.c +++ b/thirdparty/libwebp/src/enc/analysis_enc.c @@ -126,16 +126,6 @@ static void InitHistogram(VP8Histogram* const histo) { histo->last_non_zero = 1; } -static void MergeHistograms(const VP8Histogram* const in, - VP8Histogram* const out) { - if (in->max_value > out->max_value) { - out->max_value = in->max_value; - } - if (in->last_non_zero > out->last_non_zero) { - out->last_non_zero = in->last_non_zero; - } -} - //------------------------------------------------------------------------------ // Simplified k-Means, to assign Nb segments based on alpha-histogram @@ -285,49 +275,6 @@ static int FastMBAnalyze(VP8EncIterator* const it) { return 0; } -static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, - int best_alpha) { - uint8_t modes[16]; - const int max_mode = MAX_INTRA4_MODE; - int i4_alpha; - VP8Histogram total_histo; - int cur_histo = 0; - InitHistogram(&total_histo); - - VP8IteratorStartI4(it); - do { - int mode; - int best_mode_alpha = DEFAULT_ALPHA; - VP8Histogram histos[2]; - const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; - - VP8MakeIntra4Preds(it); - for (mode = 0; mode < max_mode; ++mode) { - int alpha; - - InitHistogram(&histos[cur_histo]); - VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], - 0, 1, &histos[cur_histo]); - alpha = GetAlpha(&histos[cur_histo]); - if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { - best_mode_alpha = alpha; - modes[it->i4_] = mode; - cur_histo ^= 1; // keep track of best histo so far. - } - } - // accumulate best histogram - MergeHistograms(&histos[cur_histo ^ 1], &total_histo); - // Note: we reuse the original samples for predictors - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - - i4_alpha = GetAlpha(&total_histo); - if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { - VP8SetIntra4Mode(it, modes); - best_alpha = i4_alpha; - } - return best_alpha; -} - static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { int best_alpha = DEFAULT_ALPHA; int smallest_alpha = 0; @@ -371,13 +318,6 @@ static void MBAnalyze(VP8EncIterator* const it, best_alpha = FastMBAnalyze(it); } else { best_alpha = MBAnalyzeBestIntra16Mode(it); - if (enc->method_ >= 5) { - // We go and make a fast decision for intra4/intra16. - // It's usually not a good and definitive pick, but helps seeding the - // stats about level bit-cost. - // TODO(skal): improve criterion. - best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha); - } } best_uv_alpha = MBAnalyzeBestUVMode(it); diff --git a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c index 516abd73eb..6968ef3c9f 100644 --- a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c +++ b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c @@ -15,10 +15,11 @@ // #include <assert.h> +#include <float.h> +#include "src/dsp/lossless_common.h" #include "src/enc/backward_references_enc.h" #include "src/enc/histogram_enc.h" -#include "src/dsp/lossless_common.h" #include "src/utils/color_cache_utils.h" #include "src/utils/utils.h" @@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, const PixOrCopy v); typedef struct { - double alpha_[VALUES_IN_BYTE]; - double red_[VALUES_IN_BYTE]; - double blue_[VALUES_IN_BYTE]; - double distance_[NUM_DISTANCE_CODES]; - double* literal_; + float alpha_[VALUES_IN_BYTE]; + float red_[VALUES_IN_BYTE]; + float blue_[VALUES_IN_BYTE]; + float distance_[NUM_DISTANCE_CODES]; + float* literal_; } CostModel; static void ConvertPopulationCountTableToBitEstimates( - int num_symbols, const uint32_t population_counts[], double output[]) { + int num_symbols, const uint32_t population_counts[], float output[]) { uint32_t sum = 0; int nonzeros = 0; int i; @@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates( if (nonzeros <= 1) { memset(output, 0, num_symbols * sizeof(*output)); } else { - const double logsum = VP8LFastLog2(sum); + const float logsum = VP8LFastLog2(sum); for (i = 0; i < num_symbols; ++i) { output[i] = logsum - VP8LFastLog2(population_counts[i]); } @@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, } ConvertPopulationCountTableToBitEstimates( - VP8LHistogramNumCodes(histo->palette_code_bits_), - histo->literal_, m->literal_); + VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_, + m->literal_); ConvertPopulationCountTableToBitEstimates( VALUES_IN_BYTE, histo->red_, m->red_); ConvertPopulationCountTableToBitEstimates( @@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, return ok; } -static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) { +static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) { return m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] + m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff]; } -static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { +static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) { const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; return m->literal_[literal_idx]; } -static WEBP_INLINE double GetLengthCost(const CostModel* const m, - uint32_t length) { +static WEBP_INLINE float GetLengthCost(const CostModel* const m, + uint32_t length) { int code, extra_bits; VP8LPrefixEncodeBits(length, &code, &extra_bits); return m->literal_[VALUES_IN_BYTE + code] + extra_bits; } -static WEBP_INLINE double GetDistanceCost(const CostModel* const m, - uint32_t distance) { +static WEBP_INLINE float GetDistanceCost(const CostModel* const m, + uint32_t distance) { int code, extra_bits; VP8LPrefixEncodeBits(distance, &code, &extra_bits); return m->distance_[code] + extra_bits; @@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel( const uint32_t* const argb, VP8LColorCache* const hashers, const CostModel* const cost_model, int idx, int use_color_cache, float prev_cost, float* const cost, uint16_t* const dist_array) { - double cost_val = prev_cost; + float cost_val = prev_cost; const uint32_t color = argb[idx]; const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; if (ix >= 0) { // use_color_cache is true and hashers contains color - const double mul0 = 0.68; + const float mul0 = 0.68f; cost_val += GetCacheCost(cost_model, ix) * mul0; } else { - const double mul1 = 0.82; + const float mul1 = 0.82f; if (use_color_cache) VP8LColorCacheInsert(hashers, color); cost_val += GetLiteralCost(cost_model, color) * mul1; } if (cost[idx] > cost_val) { - cost[idx] = (float)cost_val; + cost[idx] = cost_val; dist_array[idx] = 1; // only one is inserted. } } @@ -172,7 +173,7 @@ struct CostInterval { // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval. typedef struct { - double cost_; + float cost_; int start_; int end_; // Exclusive. } CostCacheInterval; @@ -187,7 +188,7 @@ typedef struct { int count_; // The number of stored intervals. CostCacheInterval* cache_intervals_; size_t cache_intervals_size_; - double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). + float cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). float* costs_; uint16_t* dist_array_; // Most of the time, we only need few intervals -> use a free-list, to avoid @@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager, CostManagerInitFreeList(manager); // Fill in the cost_cache_. + // Has to be done in two passes due to a GCC bug on i686 + // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 + for (i = 0; i < cost_cache_size; ++i) { + manager->cost_cache_[i] = GetLengthCost(cost_model, i); + } manager->cache_intervals_size_ = 1; - manager->cost_cache_[0] = GetLengthCost(cost_model, 0); for (i = 1; i < cost_cache_size; ++i) { - manager->cost_cache_[i] = GetLengthCost(cost_model, i); // Get the number of bound intervals. if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) { ++manager->cache_intervals_size_; @@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager, cur->end_ = 1; cur->cost_ = manager->cost_cache_[0]; for (i = 1; i < cost_cache_size; ++i) { - const double cost_val = manager->cost_cache_[i]; + const float cost_val = manager->cost_cache_[i]; if (cost_val != cur->cost_) { ++cur; // Initialize an interval. @@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager, } cur->end_ = i + 1; } + assert((size_t)(cur - manager->cache_intervals_) + 1 == + manager->cache_intervals_size_); } manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); @@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager, return 0; } // Set the initial costs_ high for every pixel as we will keep the minimum. - for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; + for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX; return 1; } @@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager, // If handling the interval or one of its subintervals becomes to heavy, its // contribution is added to the costs right away. static WEBP_INLINE void PushInterval(CostManager* const manager, - double distance_cost, int position, + float distance_cost, int position, int len) { size_t i; CostInterval* interval = manager->head_; @@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, const int k = j - position; float cost_tmp; assert(k >= 0 && k < MAX_LENGTH); - cost_tmp = (float)(distance_cost + manager->cost_cache_[k]); + cost_tmp = distance_cost + manager->cost_cache_[k]; if (manager->costs_[j] > cost_tmp) { manager->costs_[j] = cost_tmp; @@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, const int end = position + (cost_cache_intervals[i].end_ > len ? len : cost_cache_intervals[i].end_); - const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_); + const float cost = distance_cost + cost_cache_intervals[i].cost_; for (; interval != NULL && interval->start_ < end; interval = interval_next) { @@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly( const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); const size_t literal_array_size = - sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + - ((cache_bits > 0) ? (1 << cache_bits) : 0)); + sizeof(float) * (VP8LHistogramNumCodes(cache_bits)); const size_t cost_model_size = sizeof(CostModel) + literal_array_size; CostModel* const cost_model = (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); VP8LColorCache hashers; CostManager* cost_manager = - (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); + (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager)); int offset_prev = -1, len_prev = -1; - double offset_cost = -1; + float offset_cost = -1.f; int first_offset_is_constant = -1; // initialized with 'impossible' value int reach = 0; if (cost_model == NULL || cost_manager == NULL) goto Error; - cost_model->literal_ = (double*)(cost_model + 1); + cost_model->literal_ = (float*)(cost_model + 1); if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; @@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly( } ok = !refs->error_; -Error: + Error: if (cc_init) VP8LColorCacheClear(&hashers); CostManagerClear(cost_manager); WebPSafeFree(cost_model); diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.c b/thirdparty/libwebp/src/enc/backward_references_enc.c index d445b40fc5..49a0fac034 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.c +++ b/thirdparty/libwebp/src/enc/backward_references_enc.c @@ -10,16 +10,20 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // +#include "src/enc/backward_references_enc.h" + #include <assert.h> +#include <float.h> #include <math.h> -#include "src/enc/backward_references_enc.h" -#include "src/enc/histogram_enc.h" +#include "src/dsp/dsp.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" -#include "src/dsp/dsp.h" +#include "src/enc/histogram_enc.h" +#include "src/enc/vp8i_enc.h" #include "src/utils/color_cache_utils.h" #include "src/utils/utils.h" +#include "src/webp/encode.h" #define MIN_BLOCK_SIZE 256 // minimum block size for backward references @@ -103,6 +107,20 @@ void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { } } +// Swaps the content of two VP8LBackwardRefs. +static void BackwardRefsSwap(VP8LBackwardRefs* const refs1, + VP8LBackwardRefs* const refs2) { + const int point_to_refs1 = + (refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_); + const int point_to_refs2 = + (refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_); + const VP8LBackwardRefs tmp = *refs1; + *refs1 = *refs2; + *refs2 = tmp; + if (point_to_refs2) refs1->tail_ = &refs1->refs_; + if (point_to_refs1) refs2->tail_ = &refs2->refs_; +} + void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) { assert(refs != NULL); memset(refs, 0, sizeof(*refs)); @@ -154,6 +172,22 @@ static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { return b; } +// Return 1 on success, 0 on error. +static int BackwardRefsClone(const VP8LBackwardRefs* const from, + VP8LBackwardRefs* const to) { + const PixOrCopyBlock* block_from = from->refs_; + VP8LClearBackwardRefs(to); + while (block_from != NULL) { + PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to); + if (block_to == NULL) return 0; + memcpy(block_to->start_, block_from->start_, + block_from->size_ * sizeof(PixOrCopy)); + block_to->size_ = block_from->size_; + block_from = block_from->next_; + } + return 1; +} + extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, const PixOrCopy v); void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, @@ -224,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) { int VP8LHashChainFill(VP8LHashChain* const p, int quality, const uint32_t* const argb, int xsize, int ysize, - int low_effort) { + int low_effort, const WebPPicture* const pic, + int percent_range, int* const percent) { const int size = xsize * ysize; const int iter_max = GetMaxItersForQuality(quality); const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); + int remaining_percent = percent_range; + int percent_start = *percent; int pos; int argb_comp; uint32_t base_position; @@ -245,7 +282,13 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, hash_to_first_index = (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); - if (hash_to_first_index == NULL) return 0; + if (hash_to_first_index == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + + percent_range = remaining_percent / 2; + remaining_percent -= percent_range; // Set the int32_t array to -1. memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); @@ -292,12 +335,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, hash_to_first_index[hash_code] = pos++; argb_comp = argb_comp_next; } + + if (!WebPReportProgress( + pic, percent_start + percent_range * pos / (size - 2), percent)) { + WebPSafeFree(hash_to_first_index); + return 0; + } } // Process the penultimate pixel. chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)]; WebPSafeFree(hash_to_first_index); + percent_start += percent_range; + if (!WebPReportProgress(pic, percent_start, percent)) return 0; + percent_range = remaining_percent; + // Find the best match interval at each pixel, defined by an offset to the // pixel and a length. The right-most pixel cannot match anything to the right // (hence a best length of 0) and the left-most pixel nothing to the left @@ -386,8 +439,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, max_base_position = base_position; } } + + if (!WebPReportProgress(pic, + percent_start + percent_range * + (size - 2 - base_position) / + (size - 2), + percent)) { + return 0; + } } - return 1; + + return WebPReportProgress(pic, percent_start + percent_range, percent); } static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, @@ -697,7 +759,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, int* const best_cache_bits) { int i; const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits; - double entropy_min = MAX_ENTROPY; + float entropy_min = MAX_ENTROPY; int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 }; VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1]; VP8LRefsCursor c = VP8LRefsCursorInit(refs); @@ -753,12 +815,18 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, } } } else { + int code, extra_bits, extra_bits_value; // We should compute the contribution of the (distance,length) // histograms but those are the same independently from the cache size. // As those constant contributions are in the end added to the other - // histogram contributions, we can safely ignore them. + // histogram contributions, we can ignore them, except for the length + // prefix that is part of the literal_ histogram. int len = PixOrCopyLength(v); uint32_t argb_prev = *argb ^ 0xffffffffu; + VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value); + for (i = 0; i <= cache_bits_max; ++i) { + ++histos[i]->literal_[NUM_LITERAL_CODES + code]; + } // Update the color caches. do { if (*argb != argb_prev) { @@ -776,14 +844,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, } for (i = 0; i <= cache_bits_max; ++i) { - const double entropy = VP8LHistogramEstimateBits(histos[i]); + const float entropy = VP8LHistogramEstimateBits(histos[i]); if (i == 0 || entropy < entropy_min) { entropy_min = entropy; *best_cache_bits = i; } } ok = 1; -Error: + Error: for (i = 0; i <= cache_bits_max; ++i) { if (cc_init[i]) VP8LColorCacheClear(&hashers[i]); VP8LFreeHistogram(histos[i]); @@ -842,16 +910,21 @@ extern int VP8LBackwardReferencesTraceBackwards( int xsize, int ysize, const uint32_t* const argb, int cache_bits, const VP8LHashChain* const hash_chain, const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst); -static VP8LBackwardRefs* GetBackwardReferences( - int width, int height, const uint32_t* const argb, int quality, - int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* best, - VP8LBackwardRefs* worst) { - const int cache_bits_initial = *cache_bits; - double bit_cost_best = -1; +static int GetBackwardReferences(int width, int height, + const uint32_t* const argb, int quality, + int lz77_types_to_try, int cache_bits_max, + int do_no_cache, + const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, + int* const cache_bits_best) { VP8LHistogram* histo = NULL; - int lz77_type, lz77_type_best = 0; + int i, lz77_type; + // Index 0 is for a color cache, index 1 for no cache (if needed). + int lz77_types_best[2] = {0, 0}; + float bit_costs_best[2] = {FLT_MAX, FLT_MAX}; VP8LHashChain hash_chain_box; + VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1]; + int status = 0; memset(&hash_chain_box, 0, sizeof(hash_chain_box)); histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS); @@ -860,86 +933,136 @@ static VP8LBackwardRefs* GetBackwardReferences( for (lz77_type = 1; lz77_types_to_try; lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { int res = 0; - double bit_cost; - int cache_bits_tmp = cache_bits_initial; + float bit_cost = 0.f; if ((lz77_types_to_try & lz77_type) == 0) continue; switch (lz77_type) { case kLZ77RLE: - res = BackwardReferencesRle(width, height, argb, 0, worst); + res = BackwardReferencesRle(width, height, argb, 0, refs_tmp); break; case kLZ77Standard: // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color // cache is not that different in practice. - res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, worst); + res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, + refs_tmp); break; case kLZ77Box: if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error; res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain, - &hash_chain_box, worst); + &hash_chain_box, refs_tmp); break; default: assert(0); } if (!res) goto Error; - // Next, try with a color cache and update the references. - if (!CalculateBestCacheSize(argb, quality, worst, &cache_bits_tmp)) { - goto Error; - } - if (cache_bits_tmp > 0) { - if (!BackwardRefsWithLocalCache(argb, cache_bits_tmp, worst)) { - goto Error; + // Start with the no color cache case. + for (i = 1; i >= 0; --i) { + int cache_bits = (i == 1) ? 0 : cache_bits_max; + + if (i == 1 && !do_no_cache) continue; + + if (i == 0) { + // Try with a color cache. + if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) { + goto Error; + } + if (cache_bits > 0) { + if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) { + goto Error; + } + } + } + + if (i == 0 && do_no_cache && cache_bits == 0) { + // No need to re-compute bit_cost as it was computed at i == 1. + } else { + VP8LHistogramCreate(histo, refs_tmp, cache_bits); + bit_cost = VP8LHistogramEstimateBits(histo); } - } - // Keep the best backward references. - VP8LHistogramCreate(histo, worst, cache_bits_tmp); - bit_cost = VP8LHistogramEstimateBits(histo); - if (lz77_type_best == 0 || bit_cost < bit_cost_best) { - VP8LBackwardRefs* const tmp = worst; - worst = best; - best = tmp; - bit_cost_best = bit_cost; - *cache_bits = cache_bits_tmp; - lz77_type_best = lz77_type; + if (bit_cost < bit_costs_best[i]) { + if (i == 1) { + // Do not swap as the full cache analysis would have the wrong + // VP8LBackwardRefs to start with. + if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error; + } else { + BackwardRefsSwap(refs_tmp, &refs[0]); + } + bit_costs_best[i] = bit_cost; + lz77_types_best[i] = lz77_type; + if (i == 0) *cache_bits_best = cache_bits; + } } } - assert(lz77_type_best > 0); + assert(lz77_types_best[0] > 0); + assert(!do_no_cache || lz77_types_best[1] > 0); // Improve on simple LZ77 but only for high quality (TraceBackwards is // costly). - if ((lz77_type_best == kLZ77Standard || lz77_type_best == kLZ77Box) && - quality >= 25) { - const VP8LHashChain* const hash_chain_tmp = - (lz77_type_best == kLZ77Standard) ? hash_chain : &hash_chain_box; - if (VP8LBackwardReferencesTraceBackwards(width, height, argb, *cache_bits, - hash_chain_tmp, best, worst)) { - double bit_cost_trace; - VP8LHistogramCreate(histo, worst, *cache_bits); + for (i = 1; i >= 0; --i) { + if (i == 1 && !do_no_cache) continue; + if ((lz77_types_best[i] == kLZ77Standard || + lz77_types_best[i] == kLZ77Box) && + quality >= 25) { + const VP8LHashChain* const hash_chain_tmp = + (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box; + const int cache_bits = (i == 1) ? 0 : *cache_bits_best; + float bit_cost_trace; + if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, + hash_chain_tmp, &refs[i], + refs_tmp)) { + goto Error; + } + VP8LHistogramCreate(histo, refs_tmp, cache_bits); bit_cost_trace = VP8LHistogramEstimateBits(histo); - if (bit_cost_trace < bit_cost_best) best = worst; + if (bit_cost_trace < bit_costs_best[i]) { + BackwardRefsSwap(refs_tmp, &refs[i]); + } } - } - BackwardReferences2DLocality(width, best); + BackwardReferences2DLocality(width, &refs[i]); + + if (i == 1 && lz77_types_best[0] == lz77_types_best[1] && + *cache_bits_best == 0) { + // If the best cache size is 0 and we have the same best LZ77, just copy + // the data over and stop here. + if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error; + break; + } + } + status = 1; -Error: + Error: VP8LHashChainClear(&hash_chain_box); VP8LFreeHistogram(histo); - return best; + return status; } -VP8LBackwardRefs* VP8LGetBackwardReferences( +int VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2) { + int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, + int* const cache_bits_best, const WebPPicture* const pic, int percent_range, + int* const percent) { if (low_effort) { - return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, - hash_chain, refs_tmp1); + VP8LBackwardRefs* refs_best; + *cache_bits_best = cache_bits_max; + refs_best = GetBackwardReferencesLowEffort( + width, height, argb, cache_bits_best, hash_chain, refs); + if (refs_best == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + // Set it in first position. + BackwardRefsSwap(refs_best, &refs[0]); } else { - return GetBackwardReferences(width, height, argb, quality, - lz77_types_to_try, cache_bits, hash_chain, - refs_tmp1, refs_tmp2); + if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, + cache_bits_max, do_no_cache, hash_chain, refs, + cache_bits_best)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } } + + return WebPReportProgress(pic, *percent + percent_range, percent); } diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.h b/thirdparty/libwebp/src/enc/backward_references_enc.h index 103ddfdcb7..4dff1c27b5 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.h +++ b/thirdparty/libwebp/src/enc/backward_references_enc.h @@ -16,6 +16,7 @@ #include <assert.h> #include <stdlib.h> #include "src/webp/types.h" +#include "src/webp/encode.h" #include "src/webp/format_constants.h" #ifdef __cplusplus @@ -133,10 +134,11 @@ struct VP8LHashChain { // Must be called first, to set size. int VP8LHashChainInit(VP8LHashChain* const p, int size); -// Pre-compute the best matches for argb. +// Pre-compute the best matches for argb. pic and percent are for progress. int VP8LHashChainFill(VP8LHashChain* const p, int quality, const uint32_t* const argb, int xsize, int ysize, - int low_effort); + int low_effort, const WebPPicture* const pic, + int percent_range, int* const percent); void VP8LHashChainClear(VP8LHashChain* const p); // release memory static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p, @@ -218,14 +220,22 @@ enum VP8LLZ77Type { // Evaluates best possible backward references for specified quality. // The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache // bits to use (passing 0 implies disabling the local color cache). -// The optimal cache bits is evaluated and set for the *cache_bits parameter. -// The return value is the pointer to the best of the two backward refs viz, -// refs[0] or refs[1]. -VP8LBackwardRefs* VP8LGetBackwardReferences( +// The optimal cache bits is evaluated and set for the *cache_bits_best +// parameter with the matching refs_best. +// If do_no_cache == 0, refs is an array of 2 values and the best +// VP8LBackwardRefs is put in the first element. +// If do_no_cache != 0, refs is an array of 3 values and the best +// VP8LBackwardRefs is put in the first element, the best value with no-cache in +// the second element. +// In both cases, the last element is used as temporary internally. +// pic and percent are for progress. +// Returns false in case of error (stored in pic->error_code). +int VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2); + int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, + int* const cache_bits_best, const WebPPicture* const pic, int percent_range, + int* const percent); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/enc/config_enc.c b/thirdparty/libwebp/src/enc/config_enc.c index 9d4828978e..3518b41403 100644 --- a/thirdparty/libwebp/src/enc/config_enc.c +++ b/thirdparty/libwebp/src/enc/config_enc.c @@ -39,6 +39,8 @@ int WebPConfigInitInternal(WebPConfig* config, config->partitions = 0; config->segments = 4; config->pass = 1; + config->qmin = 0; + config->qmax = 100; config->show_compressed = 0; config->preprocessing = 0; config->autofilter = 0; @@ -106,6 +108,9 @@ int WebPValidateConfig(const WebPConfig* config) { if (config->filter_type < 0 || config->filter_type > 1) return 0; if (config->autofilter < 0 || config->autofilter > 1) return 0; if (config->pass < 1 || config->pass > 10) return 0; + if (config->qmin < 0 || config->qmax > 100 || config->qmin > config->qmax) { + return 0; + } if (config->show_compressed < 0 || config->show_compressed > 1) return 0; if (config->preprocessing < 0 || config->preprocessing > 7) return 0; if (config->partitions < 0 || config->partitions > 3) return 0; diff --git a/thirdparty/libwebp/src/enc/frame_enc.c b/thirdparty/libwebp/src/enc/frame_enc.c index 1aec376e44..b93d9e5b99 100644 --- a/thirdparty/libwebp/src/enc/frame_enc.c +++ b/thirdparty/libwebp/src/enc/frame_enc.c @@ -31,10 +31,15 @@ // we allow 2k of extra head-room in PARTITION0 limit. #define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11) +static float Clamp(float v, float min, float max) { + return (v < min) ? min : (v > max) ? max : v; +} + typedef struct { // struct for organizing convergence in either size or PSNR int is_first; float dq; float q, last_q; + float qmin, qmax; double value, last_value; // PSNR or size double target; int do_size_search; @@ -47,7 +52,9 @@ static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { s->is_first = 1; s->dq = 10.f; - s->q = s->last_q = enc->config_->quality; + s->qmin = 1.f * enc->config_->qmin; + s->qmax = 1.f * enc->config_->qmax; + s->q = s->last_q = Clamp(enc->config_->quality, s->qmin, s->qmax); s->target = do_size_search ? (double)target_size : (target_PSNR > 0.) ? target_PSNR : 40.; // default, just in case @@ -56,10 +63,6 @@ static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { return do_size_search; } -static float Clamp(float v, float min, float max) { - return (v < min) ? min : (v > max) ? max : v; -} - static float ComputeNextQ(PassStats* const s) { float dq; if (s->is_first) { @@ -75,7 +78,7 @@ static float ComputeNextQ(PassStats* const s) { s->dq = Clamp(dq, -30.f, 30.f); s->last_q = s->q; s->last_value = s->value; - s->q = Clamp(s->q + s->dq, 0.f, 100.f); + s->q = Clamp(s->q + s->dq, s->qmin, s->qmax); return s->q; } @@ -775,6 +778,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { // Roughly refresh the proba eight times per pass int max_count = (enc->mb_w_ * enc->mb_h_) >> 3; int num_pass_left = enc->config_->pass; + int remaining_progress = 40; // percents const int do_search = enc->do_search_; VP8EncIterator it; VP8EncProba* const proba = &enc->proba_; @@ -802,6 +806,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { uint64_t size_p0 = 0; uint64_t distortion = 0; int cnt = max_count; + // The final number of passes is not trivial to know in advance. + const int pass_progress = remaining_progress / (2 + num_pass_left); + remaining_progress -= pass_progress; VP8IteratorInit(enc, &it); SetLoopParams(enc, stats.q); if (is_last_pass) { @@ -829,7 +836,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { StoreSideInfo(&it); VP8StoreFilterStats(&it); VP8IteratorExport(&it); - ok = VP8IteratorProgress(&it, 20); + ok = VP8IteratorProgress(&it, pass_progress); } VP8IteratorSaveBoundary(&it); } while (ok && VP8IteratorNext(&it)); @@ -848,9 +855,10 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { } #if (DEBUG_SEARCH > 0) - printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf\n", + printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf " + " range:[%.1f, %.1f]\n", num_pass_left, stats.last_value, stats.value, - stats.last_q, stats.q, stats.dq); + stats.last_q, stats.q, stats.dq, stats.qmin, stats.qmax); #endif if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) { ++num_pass_left; @@ -874,7 +882,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, (const uint8_t*)proba->coeffs_, 1); } - ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); + ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress, + &enc->percent_); return PostLoopFinalize(&it, ok); } diff --git a/thirdparty/libwebp/src/enc/histogram_enc.c b/thirdparty/libwebp/src/enc/histogram_enc.c index a4e6bf3a98..8418def2e1 100644 --- a/thirdparty/libwebp/src/enc/histogram_enc.c +++ b/thirdparty/libwebp/src/enc/histogram_enc.c @@ -13,15 +13,17 @@ #include "src/webp/config.h" #endif +#include <float.h> #include <math.h> -#include "src/enc/backward_references_enc.h" -#include "src/enc/histogram_enc.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/enc/vp8i_enc.h" #include "src/utils/utils.h" -#define MAX_COST 1.e38 +#define MAX_BIT_COST FLT_MAX // Number of partitions for the three dominant (literal, red and blue) symbol // costs. @@ -208,6 +210,7 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, } else if (PixOrCopyIsCacheIdx(v)) { const int literal_ix = NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); + assert(histo->palette_code_bits_ != 0); ++histo->literal_[literal_ix]; } else { int code, extra_bits; @@ -227,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, // ----------------------------------------------------------------------------- // Entropy-related functions. -static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { - double mix; +static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) { + float mix; if (entropy->nonzeros < 5) { if (entropy->nonzeros <= 1) { return 0; @@ -237,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { // Let's mix in a bit of entropy to favor good clustering when // distributions of these are combined. if (entropy->nonzeros == 2) { - return 0.99 * entropy->sum + 0.01 * entropy->entropy; + return 0.99f * entropy->sum + 0.01f * entropy->entropy; } // No matter what the entropy says, we cannot be better than min_limit // with Huffman coding. I am mixing a bit of entropy into the // min_limit since it produces much better (~0.5 %) compression results // perhaps because of better entropy clustering. if (entropy->nonzeros == 3) { - mix = 0.95; + mix = 0.95f; } else { - mix = 0.7; // nonzeros == 4. + mix = 0.7f; // nonzeros == 4. } } else { - mix = 0.627; + mix = 0.627f; } { - double min_limit = 2 * entropy->sum - entropy->max_val; - min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy; + float min_limit = 2.f * entropy->sum - entropy->max_val; + min_limit = mix * min_limit + (1.f - mix) * entropy->entropy; return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; } } -double VP8LBitsEntropy(const uint32_t* const array, int n) { +float VP8LBitsEntropy(const uint32_t* const array, int n) { VP8LBitEntropy entropy; VP8LBitsEntropyUnrefined(array, n, &entropy); return BitsEntropyRefine(&entropy); } -static double InitialHuffmanCost(void) { +static float InitialHuffmanCost(void) { // Small bias because Huffman code length is typically not stored in // full length. static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; - static const double kSmallBias = 9.1; + static const float kSmallBias = 9.1f; return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; } // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) -static double FinalHuffmanCost(const VP8LStreaks* const stats) { +static float FinalHuffmanCost(const VP8LStreaks* const stats) { // The constants in this function are experimental and got rounded from // their original values in 1/8 when switched to 1/1024. - double retval = InitialHuffmanCost(); + float retval = InitialHuffmanCost(); // Second coefficient: Many zeros in the histogram are covered efficiently // by a run-length encode. Originally 2/8. - retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; + retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1]; // Second coefficient: Constant values are encoded less efficiently, but still // RLE'ed. Originally 6/8. - retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; + retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1]; // 0s are usually encoded more efficiently than non-0s. // Originally 15/8. - retval += 1.796875 * stats->streaks[0][0]; + retval += 1.796875f * stats->streaks[0][0]; // Originally 26/8. - retval += 3.28125 * stats->streaks[1][0]; + retval += 3.28125f * stats->streaks[1][0]; return retval; } // Get the symbol entropy for the distribution 'population'. // Set 'trivial_sym', if there's only one symbol present in the distribution. -static double PopulationCost(const uint32_t* const population, int length, - uint32_t* const trivial_sym, - uint8_t* const is_used) { +static float PopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym, + uint8_t* const is_used) { VP8LBitEntropy bit_entropy; VP8LStreaks stats; VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); @@ -313,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length, // trivial_at_end is 1 if the two histograms only have one element that is // non-zero: both the zero-th one, or both the last one. -static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, - const uint32_t* const Y, - int length, int is_X_used, - int is_Y_used, - int trivial_at_end) { +static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, int length, + int is_X_used, int is_Y_used, + int trivial_at_end) { VP8LStreaks stats; if (trivial_at_end) { // This configuration is due to palettization that transforms an indexed @@ -355,7 +357,7 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, } // Estimates the Entropy + Huffman + other block overhead size cost. -double VP8LHistogramEstimateBits(VP8LHistogram* const p) { +float VP8LHistogramEstimateBits(VP8LHistogram* const p) { return PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL, &p->is_used_[0]) @@ -372,8 +374,7 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) { static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, const VP8LHistogram* const b, - double cost_threshold, - double* cost) { + float cost_threshold, float* cost) { const int palette_code_bits = a->palette_code_bits_; int trivial_at_end = 0; assert(a->palette_code_bits_ == b->palette_code_bits_); @@ -438,12 +439,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a, // Since the previous score passed is 'cost_threshold', we only need to compare // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out // early. -static double HistogramAddEval(const VP8LHistogram* const a, - const VP8LHistogram* const b, - VP8LHistogram* const out, - double cost_threshold) { - double cost = 0; - const double sum_cost = a->bit_cost_ + b->bit_cost_; +static float HistogramAddEval(const VP8LHistogram* const a, + const VP8LHistogram* const b, + VP8LHistogram* const out, float cost_threshold) { + float cost = 0; + const float sum_cost = a->bit_cost_ + b->bit_cost_; cost_threshold += sum_cost; if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { @@ -458,10 +458,10 @@ static double HistogramAddEval(const VP8LHistogram* const a, // Same as HistogramAddEval(), except that the resulting histogram // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit // the term C(b) which is constant over all the evaluations. -static double HistogramAddThresh(const VP8LHistogram* const a, - const VP8LHistogram* const b, - double cost_threshold) { - double cost; +static float HistogramAddThresh(const VP8LHistogram* const a, + const VP8LHistogram* const b, + float cost_threshold) { + float cost; assert(a != NULL && b != NULL); cost = -a->bit_cost_; GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); @@ -472,24 +472,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a, // The structure to keep track of cost range for the three dominant entropy // symbols. -// TODO(skal): Evaluate if float can be used here instead of double for -// representing the entropy costs. typedef struct { - double literal_max_; - double literal_min_; - double red_max_; - double red_min_; - double blue_max_; - double blue_min_; + float literal_max_; + float literal_min_; + float red_max_; + float red_min_; + float blue_max_; + float blue_min_; } DominantCostRange; static void DominantCostRangeInit(DominantCostRange* const c) { c->literal_max_ = 0.; - c->literal_min_ = MAX_COST; + c->literal_min_ = MAX_BIT_COST; c->red_max_ = 0.; - c->red_min_ = MAX_COST; + c->red_min_ = MAX_BIT_COST; c->blue_max_ = 0.; - c->blue_min_ = MAX_COST; + c->blue_min_ = MAX_BIT_COST; } static void UpdateDominantCostRange( @@ -504,10 +502,9 @@ static void UpdateDominantCostRange( static void UpdateHistogramCost(VP8LHistogram* const h) { uint32_t alpha_sym, red_sym, blue_sym; - const double alpha_cost = - PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, - &h->is_used_[3]); - const double distance_cost = + const float alpha_cost = + PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]); + const float distance_cost = PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); @@ -528,10 +525,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { } } -static int GetBinIdForEntropy(double min, double max, double val) { - const double range = max - min; +static int GetBinIdForEntropy(float min, float max, float val) { + const float range = max - min; if (range > 0.) { - const double delta = val - min; + const float delta = val - min; return (int)((NUM_PARTITIONS - 1e-6) * delta / range); } else { return 0; @@ -640,15 +637,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, // Merges some histograms with same bin_id together if it's advantageous. // Sets the remaining histograms to NULL. -static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, - int* num_used, - const uint16_t* const clusters, - uint16_t* const cluster_mappings, - VP8LHistogram* cur_combo, - const uint16_t* const bin_map, - int num_bins, - double combine_cost_factor, - int low_effort) { +static void HistogramCombineEntropyBin( + VP8LHistogramSet* const image_histo, int* num_used, + const uint16_t* const clusters, uint16_t* const cluster_mappings, + VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins, + float combine_cost_factor, int low_effort) { VP8LHistogram** const histograms = image_histo->histograms; int idx; struct { @@ -678,11 +671,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, cluster_mappings[clusters[idx]] = clusters[first]; } else { // try to merge #idx into #first (both share the same bin_id) - const double bit_cost = histograms[idx]->bit_cost_; - const double bit_cost_thresh = -bit_cost * combine_cost_factor; - const double curr_cost_diff = - HistogramAddEval(histograms[first], histograms[idx], - cur_combo, bit_cost_thresh); + const float bit_cost = histograms[idx]->bit_cost_; + const float bit_cost_thresh = -bit_cost * combine_cost_factor; + const float curr_cost_diff = HistogramAddEval( + histograms[first], histograms[idx], cur_combo, bit_cost_thresh); if (curr_cost_diff < bit_cost_thresh) { // Try to merge two histograms only if the combo is a trivial one or // the two candidate histograms are already non-trivial. @@ -730,8 +722,8 @@ static uint32_t MyRand(uint32_t* const seed) { typedef struct { int idx1; int idx2; - double cost_diff; - double cost_combo; + float cost_diff; + float cost_combo; } HistogramPair; typedef struct { @@ -786,10 +778,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue, // Update the cost diff and combo of a pair of histograms. This needs to be // called when the the histograms have been merged with a third one. static void HistoQueueUpdatePair(const VP8LHistogram* const h1, - const VP8LHistogram* const h2, - double threshold, + const VP8LHistogram* const h2, float threshold, HistogramPair* const pair) { - const double sum_cost = h1->bit_cost_ + h2->bit_cost_; + const float sum_cost = h1->bit_cost_ + h2->bit_cost_; pair->cost_combo = 0.; GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo); pair->cost_diff = pair->cost_combo - sum_cost; @@ -798,9 +789,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1, // Create a pair from indices "idx1" and "idx2" provided its cost // is inferior to "threshold", a negative entropy. // It returns the cost of the pair, or 0. if it superior to threshold. -static double HistoQueuePush(HistoQueue* const histo_queue, - VP8LHistogram** const histograms, int idx1, - int idx2, double threshold) { +static float HistoQueuePush(HistoQueue* const histo_queue, + VP8LHistogram** const histograms, int idx1, + int idx2, float threshold) { const VP8LHistogram* h1; const VP8LHistogram* h2; HistogramPair pair; @@ -944,8 +935,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, ++tries_with_no_success < num_tries_no_success; ++iter) { int* mapping_index; - double best_cost = - (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff; + float best_cost = + (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff; int best_idx1 = -1, best_idx2 = 1; const uint32_t rand_range = (*num_used - 1) * (*num_used); // (*num_used) / 2 was chosen empirically. Less means faster but worse @@ -954,7 +945,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, // Pick random samples. for (j = 0; *num_used >= 2 && j < num_tries; ++j) { - double curr_cost; + float curr_cost; // Choose two different histograms at random and try to combine them. const uint32_t tmp = MyRand(&seed) % rand_range; uint32_t idx1 = tmp / (*num_used - 1); @@ -1033,7 +1024,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, *do_greedy = (*num_used <= min_cluster_size); ok = 1; -End: + End: HistoQueueClear(&histo_queue); WebPSafeFree(mappings); return ok; @@ -1056,7 +1047,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, if (out_size > 1) { for (i = 0; i < in_size; ++i) { int best_out = 0; - double best_bits = MAX_COST; + float best_bits = MAX_BIT_COST; int k; if (in_histo[i] == NULL) { // Arbitrarily set to the previous value if unused to help future LZ77. @@ -1064,7 +1055,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, continue; } for (k = 0; k < out_size; ++k) { - double cur_bits; + float cur_bits; cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits); if (k == 0 || cur_bits < best_bits) { best_bits = cur_bits; @@ -1092,13 +1083,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in, } } -static double GetCombineCostFactor(int histo_size, int quality) { - double combine_cost_factor = 0.16; +static float GetCombineCostFactor(int histo_size, int quality) { + float combine_cost_factor = 0.16f; if (quality < 90) { - if (histo_size > 256) combine_cost_factor /= 2.; - if (histo_size > 512) combine_cost_factor /= 2.; - if (histo_size > 1024) combine_cost_factor /= 2.; - if (quality <= 50) combine_cost_factor /= 2.; + if (histo_size > 256) combine_cost_factor /= 2.f; + if (histo_size > 512) combine_cost_factor /= 2.f; + if (histo_size > 1024) combine_cost_factor /= 2.f; + if (quality <= 50) combine_cost_factor /= 2.f; } return combine_cost_factor; } @@ -1168,15 +1159,17 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) { } int VP8LGetHistoImageSymbols(int xsize, int ysize, - const VP8LBackwardRefs* const refs, - int quality, int low_effort, - int histo_bits, int cache_bits, + const VP8LBackwardRefs* const refs, int quality, + int low_effort, int histogram_bits, int cache_bits, VP8LHistogramSet* const image_histo, VP8LHistogram* const tmp_histo, - uint16_t* const histogram_symbols) { - int ok = 0; - const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; - const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; + uint16_t* const histogram_symbols, + const WebPPicture* const pic, int percent_range, + int* const percent) { + const int histo_xsize = + histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1; + const int histo_ysize = + histogram_bits ? VP8LSubSampleSize(ysize, histogram_bits) : 1; const int image_histo_raw_size = histo_xsize * histo_ysize; VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); @@ -1189,10 +1182,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp)); uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size; int num_used = image_histo_raw_size; - if (orig_histo == NULL || map_tmp == NULL) goto Error; + if (orig_histo == NULL || map_tmp == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } // Construct the histograms from backward references. - HistogramBuild(xsize, histo_bits, refs, orig_histo); + HistogramBuild(xsize, histogram_bits, refs, orig_histo); // Copies the histograms and computes its bit_cost. // histogram_symbols is optimized HistogramCopyAndAnalyze(orig_histo, image_histo, &num_used, @@ -1203,16 +1199,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, if (entropy_combine) { uint16_t* const bin_map = map_tmp; - const double combine_cost_factor = + const float combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality); const uint32_t num_clusters = num_used; HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort); // Collapse histograms with similar entropy. - HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols, - cluster_mappings, tmp_histo, bin_map, - entropy_combine_num_bins, combine_cost_factor, - low_effort); + HistogramCombineEntropyBin( + image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo, + bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort); OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters, map_tmp, histogram_symbols); } @@ -1226,11 +1221,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, int do_greedy; if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size, &do_greedy)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } if (do_greedy) { RemoveEmptyHistograms(image_histo); if (!HistogramCombineGreedy(image_histo, &num_used)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } } @@ -1240,10 +1237,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, RemoveEmptyHistograms(image_histo); HistogramRemap(orig_histo, image_histo, histogram_symbols); - ok = 1; + if (!WebPReportProgress(pic, *percent + percent_range, percent)) { + goto Error; + } Error: VP8LFreeHistogramSet(orig_histo); WebPSafeFree(map_tmp); - return ok; + return (pic->error_code == VP8_ENC_OK); } diff --git a/thirdparty/libwebp/src/enc/histogram_enc.h b/thirdparty/libwebp/src/enc/histogram_enc.h index 54c2d21783..4c0bb97464 100644 --- a/thirdparty/libwebp/src/enc/histogram_enc.h +++ b/thirdparty/libwebp/src/enc/histogram_enc.h @@ -40,10 +40,10 @@ typedef struct { int palette_code_bits_; uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha // literal symbols are single valued. - double bit_cost_; // cached value of bit cost. - double literal_cost_; // Cached values of dominant entropy costs: - double red_cost_; // literal, red & blue. - double blue_cost_; + float bit_cost_; // cached value of bit cost. + float literal_cost_; // Cached values of dominant entropy costs: + float red_cost_; // literal, red & blue. + float blue_cost_; uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance } VP8LHistogram; @@ -64,8 +64,8 @@ void VP8LHistogramCreate(VP8LHistogram* const p, const VP8LBackwardRefs* const refs, int palette_code_bits); -// Return the size of the histogram for a given palette_code_bits. -int VP8LGetHistogramSize(int palette_code_bits); +// Return the size of the histogram for a given cache_bits. +int VP8LGetHistogramSize(int cache_bits); // Set the palette_code_bits and reset the stats. // If init_arrays is true, the arrays are also filled with 0's. @@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); } -// Builds the histogram image. +// Builds the histogram image. pic and percent are for progress. +// Returns false in case of error (stored in pic->error_code). int VP8LGetHistoImageSymbols(int xsize, int ysize, - const VP8LBackwardRefs* const refs, - int quality, int low_effort, - int histogram_bits, int cache_bits, - VP8LHistogramSet* const image_in, + const VP8LBackwardRefs* const refs, int quality, + int low_effort, int histogram_bits, int cache_bits, + VP8LHistogramSet* const image_histo, VP8LHistogram* const tmp_histo, - uint16_t* const histogram_symbols); + uint16_t* const histogram_symbols, + const WebPPicture* const pic, int percent_range, + int* const percent); // Returns the entropy for the symbols in the input array. -double VP8LBitsEntropy(const uint32_t* const array, int n); +float VP8LBitsEntropy(const uint32_t* const array, int n); // Estimate how many bits the combined entropy of literals and distance // approximately maps to. -double VP8LHistogramEstimateBits(VP8LHistogram* const p); +float VP8LHistogramEstimateBits(VP8LHistogram* const p); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/enc/picture_csp_enc.c b/thirdparty/libwebp/src/enc/picture_csp_enc.c index 718e014ed2..fabebcf202 100644 --- a/thirdparty/libwebp/src/enc/picture_csp_enc.c +++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c @@ -15,12 +15,19 @@ #include <stdlib.h> #include <math.h> +#include "sharpyuv/sharpyuv.h" +#include "sharpyuv/sharpyuv_csp.h" #include "src/enc/vp8i_enc.h" #include "src/utils/random_utils.h" #include "src/utils/utils.h" #include "src/dsp/dsp.h" #include "src/dsp/lossless.h" #include "src/dsp/yuv.h" +#include "src/dsp/cpu.h" + +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) +#include <pthread.h> +#endif // Uncomment to disable gamma-compression during RGB->U/V averaging #define USE_GAMMA_COMPRESSION @@ -61,16 +68,14 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height, // Checking for the presence of non-opaque alpha. int WebPPictureHasTransparency(const WebPPicture* picture) { if (picture == NULL) return 0; - if (!picture->use_argb) { - return CheckNonOpaque(picture->a, picture->width, picture->height, - 1, picture->a_stride); - } else { + if (picture->use_argb) { const int alpha_offset = ALPHA_OFFSET; return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, picture->width, picture->height, 4, picture->argb_stride * sizeof(*picture->argb)); } - return 0; + return CheckNonOpaque(picture->a, picture->width, picture->height, + 1, picture->a_stride); } //------------------------------------------------------------------------------ @@ -78,29 +83,30 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { #if defined(USE_GAMMA_COMPRESSION) -// gamma-compensates loss of resolution during chroma subsampling -#define kGamma 0.80 // for now we use a different gamma value than kGammaF -#define kGammaFix 12 // fixed-point precision for linear values -#define kGammaScale ((1 << kGammaFix) - 1) -#define kGammaTabFix 7 // fixed-point fractional bits precision -#define kGammaTabScale (1 << kGammaTabFix) -#define kGammaTabRounder (kGammaTabScale >> 1) -#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) +// Gamma correction compensates loss of resolution during chroma subsampling. +#define GAMMA_FIX 12 // fixed-point precision for linear values +#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision +#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX)) +static const double kGamma = 0.80; +static const int kGammaScale = ((1 << GAMMA_FIX) - 1); +static const int kGammaTabScale = (1 << GAMMA_TAB_FIX); +static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1); -static int kLinearToGammaTab[kGammaTabSize + 1]; +static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1]; static uint16_t kGammaToLinearTab[256]; static volatile int kGammaTablesOk = 0; +static void InitGammaTables(void); -static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) { +WEBP_DSP_INIT_FUNC(InitGammaTables) { if (!kGammaTablesOk) { int v; - const double scale = (double)(1 << kGammaTabFix) / kGammaScale; + const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale; const double norm = 1. / 255.; for (v = 0; v <= 255; ++v) { kGammaToLinearTab[v] = (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); } - for (v = 0; v <= kGammaTabSize; ++v) { + for (v = 0; v <= GAMMA_TAB_SIZE; ++v) { kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); } kGammaTablesOk = 1; @@ -112,12 +118,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { } static WEBP_INLINE int Interpolate(int v) { - const int tab_pos = v >> (kGammaTabFix + 2); // integer part + const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part const int x = v & ((kGammaTabScale << 2) - 1); // fractional part const int v0 = kLinearToGammaTab[tab_pos]; const int v1 = kLinearToGammaTab[tab_pos + 1]; const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate - assert(tab_pos + 1 < kGammaTabSize + 1); + assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1); return y; } @@ -125,7 +131,7 @@ static WEBP_INLINE int Interpolate(int v) { // U/V value, suitable for RGBToU/V calls. static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { const int y = Interpolate(base_value << shift); // final uplifted value - return (y + kGammaTabRounder) >> kGammaTabFix; // descale + return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale } #else @@ -159,414 +165,41 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) { //------------------------------------------------------------------------------ // Sharp RGB->YUV conversion -static const int kNumIterations = 4; static const int kMinDimensionIterativeConversion = 4; -// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some -// banding sometimes. Better use extra precision. -#define SFIX 2 // fixed-point precision of RGB and Y/W -typedef int16_t fixed_t; // signed type with extra SFIX precision for UV -typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W - -#define SHALF (1 << SFIX >> 1) -#define MAX_Y_T ((256 << SFIX) - 1) -#define SROUNDER (1 << (YUV_FIX + SFIX - 1)) - -#if defined(USE_GAMMA_COMPRESSION) - -// We use tables of different size and precision for the Rec709 / BT2020 -// transfer function. -#define kGammaF (1./0.45) -static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; -#define GAMMA_TO_LINEAR_BITS 14 -static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX -static volatile int kGammaTablesSOk = 0; - -static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) { - assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values - if (!kGammaTablesSOk) { - int v; - const double norm = 1. / MAX_Y_T; - const double scale = 1. / kGammaTabSize; - const double a = 0.09929682680944; - const double thresh = 0.018053968510807; - const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; - for (v = 0; v <= MAX_Y_T; ++v) { - const double g = norm * v; - double value; - if (g <= thresh * 4.5) { - value = g / 4.5; - } else { - const double a_rec = 1. / (1. + a); - value = pow(a_rec * (g + a), kGammaF); - } - kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); - } - for (v = 0; v <= kGammaTabSize; ++v) { - const double g = scale * v; - double value; - if (g <= thresh) { - value = 4.5 * g; - } else { - value = (1. + a) * pow(g, 1. / kGammaF) - a; - } - // we already incorporate the 1/2 rounding constant here - kLinearToGammaTabS[v] = - (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1); - } - // to prevent small rounding errors to cause read-overflow: - kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize]; - kGammaTablesSOk = 1; - } -} - -// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS -static WEBP_INLINE uint32_t GammaToLinearS(int v) { - return kGammaToLinearTabS[v]; -} - -static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { - // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision - const uint32_t v = value * kGammaTabSize; - const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS; - // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision - const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part - // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1]) - const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0]; - const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1]; - // Final interpolation. Note that rounding is already included. - const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0. - const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS); - return result; -} - -#else - -static void InitGammaTablesS(void) {} -static WEBP_INLINE uint32_t GammaToLinearS(int v) { - return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T; -} -static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { - return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS; -} - -#endif // USE_GAMMA_COMPRESSION - -//------------------------------------------------------------------------------ - -static uint8_t clip_8b(fixed_t v) { - return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; -} - -static fixed_y_t clip_y(int y) { - return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; -} - -//------------------------------------------------------------------------------ - -static int RGBToGray(int r, int g, int b) { - const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF; - return (luma >> YUV_FIX); -} - -static uint32_t ScaleDown(int a, int b, int c, int d) { - const uint32_t A = GammaToLinearS(a); - const uint32_t B = GammaToLinearS(b); - const uint32_t C = GammaToLinearS(c); - const uint32_t D = GammaToLinearS(d); - return LinearToGammaS((A + B + C + D + 2) >> 2); -} - -static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { - int i; - for (i = 0; i < w; ++i) { - const uint32_t R = GammaToLinearS(src[0 * w + i]); - const uint32_t G = GammaToLinearS(src[1 * w + i]); - const uint32_t B = GammaToLinearS(src[2 * w + i]); - const uint32_t Y = RGBToGray(R, G, B); - dst[i] = (fixed_y_t)LinearToGammaS(Y); - } -} - -static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, - fixed_t* dst, int uv_w) { - int i; - for (i = 0; i < uv_w; ++i) { - const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], - src2[0 * uv_w + 0], src2[0 * uv_w + 1]); - const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], - src2[2 * uv_w + 0], src2[2 * uv_w + 1]); - const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], - src2[4 * uv_w + 0], src2[4 * uv_w + 1]); - const int W = RGBToGray(r, g, b); - dst[0 * uv_w] = (fixed_t)(r - W); - dst[1 * uv_w] = (fixed_t)(g - W); - dst[2 * uv_w] = (fixed_t)(b - W); - dst += 1; - src1 += 2; - src2 += 2; - } -} - -static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { - int i; - for (i = 0; i < w; ++i) { - y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); - } -} - //------------------------------------------------------------------------------ +// Main function -static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { - const int v0 = (A * 3 + B + 2) >> 2; - return clip_y(v0 + W0); -} - -//------------------------------------------------------------------------------ - -static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX - return ((fixed_y_t)a << SFIX) | SHALF; -} - -static void ImportOneRow(const uint8_t* const r_ptr, - const uint8_t* const g_ptr, - const uint8_t* const b_ptr, - int step, - int pic_width, - fixed_y_t* const dst) { - int i; - const int w = (pic_width + 1) & ~1; - for (i = 0; i < pic_width; ++i) { - const int off = i * step; - dst[i + 0 * w] = UpLift(r_ptr[off]); - dst[i + 1 * w] = UpLift(g_ptr[off]); - dst[i + 2 * w] = UpLift(b_ptr[off]); - } - if (pic_width & 1) { // replicate rightmost pixel - dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; - dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; - dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; - } -} - -static void InterpolateTwoRows(const fixed_y_t* const best_y, - const fixed_t* prev_uv, - const fixed_t* cur_uv, - const fixed_t* next_uv, - int w, - fixed_y_t* out1, - fixed_y_t* out2) { - const int uv_w = w >> 1; - const int len = (w - 1) >> 1; // length to filter - int k = 3; - while (k-- > 0) { // process each R/G/B segments in turn - // special boundary case for i==0 - out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); - out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); - - WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); - WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); - - // special boundary case for i == w - 1 when w is even - if (!(w & 1)) { - out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], - best_y[w - 1 + 0]); - out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], - best_y[w - 1 + w]); - } - out1 += w; - out2 += w; - prev_uv += uv_w; - cur_uv += uv_w; - next_uv += uv_w; - } -} - -static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { - const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; - return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); -} +extern void SharpYuvInit(VP8CPUInfo cpu_info_func); -static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { - const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER; - return clip_8b(128 + (u >> (YUV_FIX + SFIX))); -} +static void SafeInitSharpYuv(void) { +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) + static pthread_mutex_t initsharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; + if (pthread_mutex_lock(&initsharpyuv_lock)) return; +#endif -static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { - const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER; - return clip_8b(128 + (v >> (YUV_FIX + SFIX))); -} + SharpYuvInit(VP8GetCPUInfo); -static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, - WebPPicture* const picture) { - int i, j; - uint8_t* dst_y = picture->y; - uint8_t* dst_u = picture->u; - uint8_t* dst_v = picture->v; - const fixed_t* const best_uv_base = best_uv; - const int w = (picture->width + 1) & ~1; - const int h = (picture->height + 1) & ~1; - const int uv_w = w >> 1; - const int uv_h = h >> 1; - for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) { - for (i = 0; i < picture->width; ++i) { - const int off = (i >> 1); - const int W = best_y[i]; - const int r = best_uv[off + 0 * uv_w] + W; - const int g = best_uv[off + 1 * uv_w] + W; - const int b = best_uv[off + 2 * uv_w] + W; - dst_y[i] = ConvertRGBToY(r, g, b); - } - best_y += w; - best_uv += (j & 1) * 3 * uv_w; - dst_y += picture->y_stride; - } - for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { - for (i = 0; i < uv_w; ++i) { - const int off = i; - const int r = best_uv[off + 0 * uv_w]; - const int g = best_uv[off + 1 * uv_w]; - const int b = best_uv[off + 2 * uv_w]; - dst_u[i] = ConvertRGBToU(r, g, b); - dst_v[i] = ConvertRGBToV(r, g, b); - } - best_uv += 3 * uv_w; - dst_u += picture->uv_stride; - dst_v += picture->uv_stride; - } - return 1; +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) + (void)pthread_mutex_unlock(&initsharpyuv_lock); +#endif } -//------------------------------------------------------------------------------ -// Main function - -#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) - static int PreprocessARGB(const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr, int step, int rgb_stride, WebPPicture* const picture) { - // we expand the right/bottom border if needed - const int w = (picture->width + 1) & ~1; - const int h = (picture->height + 1) & ~1; - const int uv_w = w >> 1; - const int uv_h = h >> 1; - uint64_t prev_diff_y_sum = ~0; - int j, iter; - - // TODO(skal): allocate one big memory chunk. But for now, it's easier - // for valgrind debugging to have several chunks. - fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch - fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); - fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); - fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); - fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); - fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); - fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); - fixed_y_t* best_y = best_y_base; - fixed_y_t* target_y = target_y_base; - fixed_t* best_uv = best_uv_base; - fixed_t* target_uv = target_uv_base; - const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); - int ok; - - if (best_y_base == NULL || best_uv_base == NULL || - target_y_base == NULL || target_uv_base == NULL || - best_rgb_y == NULL || best_rgb_uv == NULL || - tmp_buffer == NULL) { - ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto End; - } - assert(picture->width >= kMinDimensionIterativeConversion); - assert(picture->height >= kMinDimensionIterativeConversion); - - WebPInitConvertARGBToYUV(); - - // Import RGB samples to W/RGB representation. - for (j = 0; j < picture->height; j += 2) { - const int is_last_row = (j == picture->height - 1); - fixed_y_t* const src1 = tmp_buffer + 0 * w; - fixed_y_t* const src2 = tmp_buffer + 3 * w; - - // prepare two rows of input - ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1); - if (!is_last_row) { - ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, - step, picture->width, src2); - } else { - memcpy(src2, src1, 3 * w * sizeof(*src2)); - } - StoreGray(src1, best_y + 0, w); - StoreGray(src2, best_y + w, w); - - UpdateW(src1, target_y, w); - UpdateW(src2, target_y + w, w); - UpdateChroma(src1, src2, target_uv, uv_w); - memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); - best_y += 2 * w; - best_uv += 3 * uv_w; - target_y += 2 * w; - target_uv += 3 * uv_w; - r_ptr += 2 * rgb_stride; - g_ptr += 2 * rgb_stride; - b_ptr += 2 * rgb_stride; - } - - // Iterate and resolve clipping conflicts. - for (iter = 0; iter < kNumIterations; ++iter) { - const fixed_t* cur_uv = best_uv_base; - const fixed_t* prev_uv = best_uv_base; - uint64_t diff_y_sum = 0; - - best_y = best_y_base; - best_uv = best_uv_base; - target_y = target_y_base; - target_uv = target_uv_base; - for (j = 0; j < h; j += 2) { - fixed_y_t* const src1 = tmp_buffer + 0 * w; - fixed_y_t* const src2 = tmp_buffer + 3 * w; - { - const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); - InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2); - prev_uv = cur_uv; - cur_uv = next_uv; - } - - UpdateW(src1, best_rgb_y + 0 * w, w); - UpdateW(src2, best_rgb_y + 1 * w, w); - UpdateChroma(src1, src2, best_rgb_uv, uv_w); - - // update two rows of Y and one row of RGB - diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w); - WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); - - best_y += 2 * w; - best_uv += 3 * uv_w; - target_y += 2 * w; - target_uv += 3 * uv_w; - } - // test exit condition - if (iter > 0) { - if (diff_y_sum < diff_y_threshold) break; - if (diff_y_sum > prev_diff_y_sum) break; - } - prev_diff_y_sum = diff_y_sum; + const int ok = SharpYuvConvert( + r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8, + picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v, + picture->uv_stride, /*yuv_bit_depth=*/8, picture->width, + picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp)); + if (!ok) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - // final reconstruction - ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture); - - End: - WebPSafeFree(best_y_base); - WebPSafeFree(best_uv_base); - WebPSafeFree(target_y_base); - WebPSafeFree(target_uv_base); - WebPSafeFree(best_rgb_y); - WebPSafeFree(best_rgb_uv); - WebPSafeFree(tmp_buffer); return ok; } -#undef SAFE_ALLOC //------------------------------------------------------------------------------ // "Fast" regular RGB->YUV @@ -591,8 +224,8 @@ static const int kAlphaFix = 19; // and constant are adjusted very tightly to fit 32b arithmetic. // In particular, they use the fact that the operands for 'v / a' are actually // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 -// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid -// overflow is: kGammaFix + kAlphaFix <= 31. +// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid +// overflow is: GAMMA_FIX + kAlphaFix <= 31. static const uint32_t kInvAlpha[4 * 0xff + 1] = { 0, /* alpha = 0 */ 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, @@ -818,11 +451,20 @@ static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr, dst[0] = SUM4(r_ptr + j, step); dst[1] = SUM4(g_ptr + j, step); dst[2] = SUM4(b_ptr + j, step); + // MemorySanitizer may raise false positives with data that passes through + // RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles. + // See https://crbug.com/webp/573. +#ifdef WEBP_MSAN + dst[3] = 0; +#endif } if (width & 1) { dst[0] = SUM2(r_ptr + j); dst[1] = SUM2(g_ptr + j); dst[2] = SUM2(b_ptr + j); +#ifdef WEBP_MSAN + dst[3] = 0; +#endif } } @@ -863,18 +505,18 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, use_iterative_conversion = 0; } - if (!WebPPictureAllocYUVA(picture, width, height)) { + if (!WebPPictureAllocYUVA(picture)) { return 0; } if (has_alpha) { assert(step == 4); #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE) - assert(kAlphaFix + kGammaFix <= 31); + assert(kAlphaFix + GAMMA_FIX <= 31); #endif } if (use_iterative_conversion) { - InitGammaTablesS(); + SafeInitSharpYuv(); if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { return 0; } @@ -1044,7 +686,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) { return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); } // Allocate a new argb buffer (discarding the previous one). - if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0; + if (!WebPPictureAllocARGB(picture)) return 0; picture->use_argb = 1; // Convert @@ -1106,6 +748,8 @@ static int Import(WebPPicture* const picture, const int width = picture->width; const int height = picture->height; + if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0; + if (!picture->use_argb) { const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL; return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, @@ -1163,24 +807,24 @@ static int Import(WebPPicture* const picture, #if !defined(WEBP_REDUCE_CSP) int WebPPictureImportBGR(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return (picture != NULL && rgb != NULL) - ? Import(picture, rgb, rgb_stride, 3, 1, 0) + const uint8_t* bgr, int bgr_stride) { + return (picture != NULL && bgr != NULL) + ? Import(picture, bgr, bgr_stride, 3, 1, 0) : 0; } int WebPPictureImportBGRA(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 1, 1) + const uint8_t* bgra, int bgra_stride) { + return (picture != NULL && bgra != NULL) + ? Import(picture, bgra, bgra_stride, 4, 1, 1) : 0; } int WebPPictureImportBGRX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 1, 0) + const uint8_t* bgrx, int bgrx_stride) { + return (picture != NULL && bgrx != NULL) + ? Import(picture, bgrx, bgrx_stride, 4, 1, 0) : 0; } @@ -1201,9 +845,9 @@ int WebPPictureImportRGBA(WebPPicture* picture, } int WebPPictureImportRGBX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 0, 0) + const uint8_t* rgbx, int rgbx_stride) { + return (picture != NULL && rgbx != NULL) + ? Import(picture, rgbx, rgbx_stride, 4, 0, 0) : 0; } diff --git a/thirdparty/libwebp/src/enc/picture_enc.c b/thirdparty/libwebp/src/enc/picture_enc.c index c691622d03..3af6383d38 100644 --- a/thirdparty/libwebp/src/enc/picture_enc.c +++ b/thirdparty/libwebp/src/enc/picture_enc.c @@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) { //------------------------------------------------------------------------------ +int WebPValidatePicture(const WebPPicture* const picture) { + if (picture == NULL) return 0; + if (picture->width <= 0 || picture->height <= 0) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); + } + if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 || + picture->height <= 0 || picture->height / 4 > INT_MAX / 4) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); + } + if (picture->colorspace != WEBP_YUV420 && + picture->colorspace != WEBP_YUV420A) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } + return 1; +} + static void WebPPictureResetBufferARGB(WebPPicture* const picture) { picture->memory_argb_ = NULL; picture->argb = NULL; @@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) { WebPPictureResetBufferYUVA(picture); } -int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { +int WebPPictureAllocARGB(WebPPicture* const picture) { void* memory; + const int width = picture->width; + const int height = picture->height; const uint64_t argb_size = (uint64_t)width * height; - assert(picture != NULL); + if (!WebPValidatePicture(picture)) return 0; WebPSafeFree(picture->memory_argb_); WebPPictureResetBufferARGB(picture); - if (width <= 0 || height <= 0) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); - } // allocate a new buffer. memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb)); if (memory == NULL) { @@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { return 1; } -int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { - const WebPEncCSP uv_csp = - (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK); +int WebPPictureAllocYUVA(WebPPicture* const picture) { const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT; + const int width = picture->width; + const int height = picture->height; const int y_stride = width; const int uv_width = (int)(((int64_t)width + 1) >> 1); const int uv_height = (int)(((int64_t)height + 1) >> 1); @@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { uint64_t y_size, uv_size, a_size, total_size; uint8_t* mem; - assert(picture != NULL); + if (!WebPValidatePicture(picture)) return 0; WebPSafeFree(picture->memory_); WebPPictureResetBufferYUVA(picture); - if (uv_csp != WEBP_YUV420) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - // alpha a_width = has_alpha ? width : 0; a_stride = a_width; @@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { int WebPPictureAlloc(WebPPicture* picture) { if (picture != NULL) { - const int width = picture->width; - const int height = picture->height; - WebPPictureFree(picture); // erase previous buffer if (!picture->use_argb) { - return WebPPictureAllocYUVA(picture, width, height); + return WebPPictureAllocYUVA(picture); } else { - return WebPPictureAllocARGB(picture, width, height); + return WebPPictureAllocARGB(picture); } } return 1; diff --git a/thirdparty/libwebp/src/enc/picture_rescale_enc.c b/thirdparty/libwebp/src/enc/picture_rescale_enc.c index 58a6ae7b9d..839f91cacc 100644 --- a/thirdparty/libwebp/src/enc/picture_rescale_enc.c +++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c @@ -13,14 +13,15 @@ #include "src/webp/encode.h" -#if !defined(WEBP_REDUCE_SIZE) - #include <assert.h> #include <stdlib.h> #include "src/enc/vp8i_enc.h" + +#if !defined(WEBP_REDUCE_SIZE) #include "src/utils/rescaler_utils.h" #include "src/utils/utils.h" +#endif // !defined(WEBP_REDUCE_SIZE) #define HALVE(x) (((x) + 1) >> 1) @@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic, return 1; } +#if !defined(WEBP_REDUCE_SIZE) int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { if (src == NULL || dst == NULL) return 0; if (src == dst) return 1; @@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { } return 1; } +#endif // !defined(WEBP_REDUCE_SIZE) int WebPPictureIsView(const WebPPicture* picture) { if (picture == NULL) return 0; @@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src, return 1; } +#if !defined(WEBP_REDUCE_SIZE) //------------------------------------------------------------------------------ // Picture cropping @@ -164,22 +168,25 @@ int WebPPictureCrop(WebPPicture* pic, //------------------------------------------------------------------------------ // Simple picture rescaler -static void RescalePlane(const uint8_t* src, - int src_width, int src_height, int src_stride, - uint8_t* dst, - int dst_width, int dst_height, int dst_stride, - rescaler_t* const work, - int num_channels) { +static int RescalePlane(const uint8_t* src, + int src_width, int src_height, int src_stride, + uint8_t* dst, + int dst_width, int dst_height, int dst_stride, + rescaler_t* const work, + int num_channels) { WebPRescaler rescaler; int y = 0; - WebPRescalerInit(&rescaler, src_width, src_height, - dst, dst_width, dst_height, dst_stride, - num_channels, work); + if (!WebPRescalerInit(&rescaler, src_width, src_height, + dst, dst_width, dst_height, dst_stride, + num_channels, work)) { + return 0; + } while (y < src_height) { y += WebPRescalerImport(&rescaler, src_height - y, src + y * src_stride, src_stride); WebPRescalerExport(&rescaler); } + return 1; } static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) { @@ -195,52 +202,53 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { } } -int WebPPictureRescale(WebPPicture* pic, int width, int height) { +int WebPPictureRescale(WebPPicture* picture, int width, int height) { WebPPicture tmp; int prev_width, prev_height; rescaler_t* work; - if (pic == NULL) return 0; - prev_width = pic->width; - prev_height = pic->height; + if (picture == NULL) return 0; + prev_width = picture->width; + prev_height = picture->height; if (!WebPRescalerGetScaledDimensions( prev_width, prev_height, &width, &height)) { return 0; } - PictureGrabSpecs(pic, &tmp); + PictureGrabSpecs(picture, &tmp); tmp.width = width; tmp.height = height; if (!WebPPictureAlloc(&tmp)) return 0; - if (!pic->use_argb) { + if (!picture->use_argb) { work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); return 0; } // If present, we need to rescale alpha first (for AlphaMultiplyY). - if (pic->a != NULL) { + if (picture->a != NULL) { WebPInitAlphaProcessing(); - RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, - tmp.a, width, height, tmp.a_stride, work, 1); + if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride, + tmp.a, width, height, tmp.a_stride, work, 1)) { + return 0; + } } // We take transparency into account on the luma plane only. That's not // totally exact blending, but still is a good approximation. - AlphaMultiplyY(pic, 0); - RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, - tmp.y, width, height, tmp.y_stride, work, 1); + AlphaMultiplyY(picture, 0); + if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride, + tmp.y, width, height, tmp.y_stride, work, 1) || + !RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height), + picture->uv_stride, tmp.u, HALVE(width), HALVE(height), + tmp.uv_stride, work, 1) || + !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height), + picture->uv_stride, tmp.v, HALVE(width), HALVE(height), + tmp.uv_stride, work, 1)) { + return 0; + } AlphaMultiplyY(&tmp, 1); - - RescalePlane(pic->u, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.u, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); - RescalePlane(pic->v, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.v, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); } else { work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); if (work == NULL) { @@ -251,17 +259,17 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { // weighting first (black-matting), scale the RGB values, and remove // the premultiplication afterward (while preserving the alpha channel). WebPInitAlphaProcessing(); - AlphaMultiplyARGB(pic, 0); - RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, - pic->argb_stride * 4, - (uint8_t*)tmp.argb, width, height, - tmp.argb_stride * 4, - work, 4); + AlphaMultiplyARGB(picture, 0); + if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height, + picture->argb_stride * 4, (uint8_t*)tmp.argb, width, + height, tmp.argb_stride * 4, work, 4)) { + return 0; + } AlphaMultiplyARGB(&tmp, 1); } - WebPPictureFree(pic); + WebPPictureFree(picture); WebPSafeFree(work); - *pic = tmp; + *picture = tmp; return 1; } @@ -273,23 +281,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { return 0; } -int WebPPictureIsView(const WebPPicture* picture) { - (void)picture; - return 0; -} - -int WebPPictureView(const WebPPicture* src, - int left, int top, int width, int height, - WebPPicture* dst) { - (void)src; - (void)left; - (void)top; - (void)width; - (void)height; - (void)dst; - return 0; -} - int WebPPictureCrop(WebPPicture* pic, int left, int top, int width, int height) { (void)pic; diff --git a/thirdparty/libwebp/src/enc/picture_tools_enc.c b/thirdparty/libwebp/src/enc/picture_tools_enc.c index d0e8a495da..147cc18608 100644 --- a/thirdparty/libwebp/src/enc/picture_tools_enc.c +++ b/thirdparty/libwebp/src/enc/picture_tools_enc.c @@ -83,6 +83,19 @@ static int SmoothenBlock(const uint8_t* a_ptr, int a_stride, uint8_t* y_ptr, return (count == 0); } +void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color) { + if (pic != NULL && pic->use_argb) { + int y = pic->height; + uint32_t* argb = pic->argb; + color &= 0xffffffu; // force alpha=0 + WebPInitAlphaProcessing(); + while (y-- > 0) { + WebPAlphaReplace(argb, pic->width, color); + argb += pic->argb_stride; + } + } +} + void WebPCleanupTransparentArea(WebPPicture* pic) { int x, y, w, h; if (pic == NULL) return; @@ -165,24 +178,6 @@ void WebPCleanupTransparentArea(WebPPicture* pic) { #undef SIZE #undef SIZE2 -void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { - int x, y, w, h; - uint32_t* argb; - assert(pic != NULL && pic->use_argb); - w = pic->width; - h = pic->height; - argb = pic->argb; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - if ((argb[x] & 0xff000000) == 0) { - argb[x] = 0x00000000; - } - } - argb += pic->argb_stride; - } -} - //------------------------------------------------------------------------------ // Blend color and remove transparency info @@ -195,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { return (0xff000000u | (r << 16) | (g << 8) | b); } -void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { +void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) { const int red = (background_rgb >> 16) & 0xff; const int green = (background_rgb >> 8) & 0xff; const int blue = (background_rgb >> 0) & 0xff; int x, y; - if (pic == NULL) return; - if (!pic->use_argb) { - const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop + if (picture == NULL) return; + if (!picture->use_argb) { + // omit last pixel during u/v loop + const int uv_width = (picture->width >> 1); const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF); // VP8RGBToU/V expects the u/v values summed over four pixels const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); - const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT; - uint8_t* y_ptr = pic->y; - uint8_t* u_ptr = pic->u; - uint8_t* v_ptr = pic->v; - uint8_t* a_ptr = pic->a; + const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; + uint8_t* y_ptr = picture->y; + uint8_t* u_ptr = picture->u; + uint8_t* v_ptr = picture->v; + uint8_t* a_ptr = picture->a; if (!has_alpha || a_ptr == NULL) return; // nothing to do - for (y = 0; y < pic->height; ++y) { + for (y = 0; y < picture->height; ++y) { // Luma blending - for (x = 0; x < pic->width; ++x) { + for (x = 0; x < picture->width; ++x) { const uint8_t alpha = a_ptr[x]; if (alpha < 0xff) { y_ptr[x] = BLEND(Y0, y_ptr[x], alpha); @@ -224,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { // Chroma blending every even line if ((y & 1) == 0) { uint8_t* const a_ptr2 = - (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride; + (y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride; for (x = 0; x < uv_width; ++x) { // Average four alpha values into a single blending weight. // TODO(skal): might lead to visible contouring. Can we do better? @@ -234,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha); v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha); } - if (pic->width & 1) { // rightmost pixel + if (picture->width & 1) { // rightmost pixel const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]); u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha); v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha); } } else { - u_ptr += pic->uv_stride; - v_ptr += pic->uv_stride; + u_ptr += picture->uv_stride; + v_ptr += picture->uv_stride; } - memset(a_ptr, 0xff, pic->width); // reset alpha value to opaque - a_ptr += pic->a_stride; - y_ptr += pic->y_stride; + memset(a_ptr, 0xff, picture->width); // reset alpha value to opaque + a_ptr += picture->a_stride; + y_ptr += picture->y_stride; } } else { - uint32_t* argb = pic->argb; + uint32_t* argb = picture->argb; const uint32_t background = MakeARGB32(red, green, blue); - for (y = 0; y < pic->height; ++y) { - for (x = 0; x < pic->width; ++x) { + for (y = 0; y < picture->height; ++y) { + for (x = 0; x < picture->width; ++x) { const int alpha = (argb[x] >> 24) & 0xff; if (alpha != 0xff) { if (alpha > 0) { @@ -267,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { } } } - argb += pic->argb_stride; + argb += picture->argb_stride; } } } diff --git a/thirdparty/libwebp/src/enc/predictor_enc.c b/thirdparty/libwebp/src/enc/predictor_enc.c index 2e6762ea0d..b3d44b59d5 100644 --- a/thirdparty/libwebp/src/enc/predictor_enc.c +++ b/thirdparty/libwebp/src/enc/predictor_enc.c @@ -16,6 +16,7 @@ #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" +#include "src/enc/vp8i_enc.h" #include "src/enc/vp8li_enc.h" #define MAX_DIFF_COST (1e30f) @@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } // Methods to calculate Entropy (Shannon). static float PredictionCostSpatial(const int counts[256], int weight_0, - double exp_val) { + float exp_val) { const int significant_symbols = 256 >> 4; - const double exp_decay_factor = 0.6; - double bits = weight_0 * counts[0]; + const float exp_decay_factor = 0.6f; + float bits = (float)weight_0 * counts[0]; int i; for (i = 1; i < significant_symbols; ++i) { bits += exp_val * (counts[i] + counts[256 - i]); @@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0, static float PredictionCostSpatialHistogram(const int accumulated[4][256], const int tile[4][256]) { int i; - double retval = 0; + float retval = 0.f; for (i = 0; i < 4; ++i) { - const double kExpValue = 0.94; + const float kExpValue = 0.94f; retval += PredictionCostSpatial(tile[i], 1, kExpValue); retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]); } @@ -249,7 +250,7 @@ static WEBP_INLINE void GetResidual( } else if (x == 0) { predict = upper_row[x]; // Top. } else { - predict = pred_func(current_row[x - 1], upper_row + x); + predict = pred_func(¤t_row[x - 1], upper_row + x); } #if (WEBP_NEAR_LOSSLESS == 1) if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 || @@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height, // with respect to predictions. If near_lossless_quality < 100, applies // near lossless processing, shaving off more bits of residuals for lower // qualities. -void VP8LResidualImage(int width, int height, int bits, int low_effort, - uint32_t* const argb, uint32_t* const argb_scratch, - uint32_t* const image, int near_lossless_quality, - int exact, int used_subtract_green) { +int VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image, int near_lossless_quality, + int exact, int used_subtract_green, + const WebPPicture* const pic, int percent_range, + int* const percent) { const int tiles_per_row = VP8LSubSampleSize(width, bits); const int tiles_per_col = VP8LSubSampleSize(height, bits); + int percent_start = *percent; int tile_y; int histo[4][256]; const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality); @@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort, for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { int tile_x; for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { - const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y, - bits, histo, argb_scratch, argb, max_quantization, exact, - used_subtract_green, image); + const int pred = GetBestPredictorForTile( + width, height, tile_x, tile_y, bits, histo, argb_scratch, argb, + max_quantization, exact, used_subtract_green, image); image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8); } + + if (!WebPReportProgress( + pic, percent_start + percent_range * tile_y / tiles_per_col, + percent)) { + return 0; + } } } CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, low_effort, max_quantization, exact, used_subtract_green); + return WebPReportProgress(pic, percent_start + percent_range, percent); } //------------------------------------------------------------------------------ @@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256], const int counts[256]) { // Favor low entropy, locally and globally. // Favor small absolute values for PredictionCostSpatial - static const double kExpValue = 2.4; + static const float kExpValue = 2.4f; return VP8LCombinedShannonEntropy(counts, accumulated) + PredictionCostSpatial(counts, 3, kExpValue); } @@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize, } } -void VP8LColorSpaceTransform(int width, int height, int bits, int quality, - uint32_t* const argb, uint32_t* image) { +int VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image, + const WebPPicture* const pic, int percent_range, + int* const percent) { const int max_tile_size = 1 << bits; const int tile_xsize = VP8LSubSampleSize(width, bits); const int tile_ysize = VP8LSubSampleSize(height, bits); + int percent_start = *percent; int accumulated_red_histo[256] = { 0 }; int accumulated_blue_histo[256] = { 0 }; int tile_x, tile_y; @@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality, } } } + if (!WebPReportProgress( + pic, percent_start + percent_range * tile_y / tile_ysize, + percent)) { + return 0; + } } + return 1; } diff --git a/thirdparty/libwebp/src/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c index 01eb565c7f..6d8202d277 100644 --- a/thirdparty/libwebp/src/enc/quant_enc.c +++ b/thirdparty/libwebp/src/enc/quant_enc.c @@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) { rd->score = MAX_COST; } -static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { +static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst, + const VP8ModeScore* WEBP_RESTRICT const src) { dst->D = src->D; dst->SD = src->SD; dst->R = src->R; @@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { dst->score = src->score; } -static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { +static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst, + const VP8ModeScore* WEBP_RESTRICT const src) { dst->D += src->D; dst->SD += src->SD; dst->R += src->R; @@ -585,15 +587,18 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, return rate * lambda + RD_DISTO_MULT * distortion; } -static int TrellisQuantizeBlock(const VP8Encoder* const enc, +// Coefficient type. +enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 }; + +static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc, int16_t in[16], int16_t out[16], int ctx0, int coeff_type, - const VP8Matrix* const mtx, + const VP8Matrix* WEBP_RESTRICT const mtx, int lambda) { const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; CostArrayPtr const costs = (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; - const int first = (coeff_type == 0) ? 1 : 0; + const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0; Node nodes[16][NUM_NODES]; ScoreState score_states[2][NUM_NODES]; ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); @@ -657,16 +662,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, // test all alternate level values around level0. for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { Node* const cur = &NODE(n, m); - int level = level0 + m; + const int level = level0 + m; const int ctx = (level > 2) ? 2 : level; const int band = VP8EncBands[n + 1]; score_t base_score; - score_t best_cur_score = MAX_COST; - int best_prev = 0; // default, in case + score_t best_cur_score; + int best_prev; + score_t cost, score; - ss_cur[m].score = MAX_COST; ss_cur[m].costs = costs[n + 1][ctx]; if (level < 0 || level > thresh_level) { + ss_cur[m].score = MAX_COST; // Node is dead. continue; } @@ -682,18 +688,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, } // Inspect all possible non-dead predecessors. Retain only the best one. - for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { + // The base_score is added to all scores so it is only added for the final + // value after the loop. + cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level); + best_cur_score = + ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0); + best_prev = -MIN_DELTA; + for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) { // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically // eliminated since their score can't be better than the current best. - const score_t cost = VP8LevelCost(ss_prev[p].costs, level); + cost = VP8LevelCost(ss_prev[p].costs, level); // Examine node assuming it's a non-terminal one. - const score_t score = - base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); + score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); if (score < best_cur_score) { best_cur_score = score; best_prev = p; } } + best_cur_score += base_score; // Store best finding in current node. cur->sign = sign; cur->level = level; @@ -701,11 +713,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, ss_cur[m].score = best_cur_score; // Now, record best terminal node (and thus best entry in the graph). - if (level != 0) { + if (level != 0 && best_cur_score < best_score) { const score_t last_pos_cost = (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); - const score_t score = best_cur_score + last_pos_score; + score = best_cur_score + last_pos_score; if (score < best_score) { best_score = score; best_path[0] = n; // best eob position @@ -717,10 +729,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, } // Fresh start - memset(in + first, 0, (16 - first) * sizeof(*in)); - memset(out + first, 0, (16 - first) * sizeof(*out)); + // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case. + if (coeff_type == TYPE_I16_AC) { + memset(in + 1, 0, 15 * sizeof(*in)); + memset(out + 1, 0, 15 * sizeof(*out)); + } else { + memset(in, 0, 16 * sizeof(*in)); + memset(out, 0, 16 * sizeof(*out)); + } if (best_path[0] == -1) { - return 0; // skip! + return 0; // skip! } { @@ -751,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, // all at once. Output is the reconstructed block in *yuv_out, and the // quantized levels in *levels. -static int ReconstructIntra16(VP8EncIterator* const it, - VP8ModeScore* const rd, - uint8_t* const yuv_out, +static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, + uint8_t* WEBP_RESTRICT const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; @@ -775,9 +793,9 @@ static int ReconstructIntra16(VP8EncIterator* const it, for (y = 0, n = 0; y < 4; ++y) { for (x = 0; x < 4; ++x, ++n) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - const int non_zero = - TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, - &dqm->y1_, dqm->lambda_trellis_i16_); + const int non_zero = TrellisQuantizeBlock( + enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_, + dqm->lambda_trellis_i16_); it->top_nz_[x] = it->left_nz_[y] = non_zero; rd->y_ac_levels[n][0] = 0; nz |= non_zero << n; @@ -803,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it, return nz; } -static int ReconstructIntra4(VP8EncIterator* const it, +static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it, int16_t levels[16], - const uint8_t* const src, - uint8_t* const yuv_out, + const uint8_t* WEBP_RESTRICT const src, + uint8_t* WEBP_RESTRICT const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; @@ -818,7 +836,7 @@ static int ReconstructIntra4(VP8EncIterator* const it, if (DO_TRELLIS_I4 && it->do_trellis_) { const int x = it->i4_ & 3, y = it->i4_ >> 2; const int ctx = it->top_nz_[x] + it->left_nz_[y]; - nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, + nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_, dqm->lambda_trellis_i4_); } else { nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); @@ -839,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it, // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. -static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { +static int QuantizeSingle(int16_t* WEBP_RESTRICT const v, + const VP8Matrix* WEBP_RESTRICT const mtx) { int V = *v; const int sign = (V < 0); if (sign) V = -V; @@ -853,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { return (sign ? -V : V) >> DSCALE; } -static void CorrectDCValues(const VP8EncIterator* const it, - const VP8Matrix* const mtx, - int16_t tmp[][16], VP8ModeScore* const rd) { +static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it, + const VP8Matrix* WEBP_RESTRICT const mtx, + int16_t tmp[][16], + VP8ModeScore* WEBP_RESTRICT const rd) { // | top[0] | top[1] // --------+--------+--------- // left[0] | tmp[0] tmp[1] <-> err0 err1 @@ -886,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it, } } -static void StoreDiffusionErrors(VP8EncIterator* const it, - const VP8ModeScore* const rd) { +static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it, + const VP8ModeScore* WEBP_RESTRICT const rd) { int ch; for (ch = 0; ch <= 1; ++ch) { int8_t* const top = it->top_derr_[it->x_][ch]; @@ -906,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it, //------------------------------------------------------------------------------ -static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, - uint8_t* const yuv_out, int mode) { +static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, + uint8_t* WEBP_RESTRICT const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; @@ -927,9 +948,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++n) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - const int non_zero = - TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, - &dqm->uv_, dqm->lambda_trellis_uv_); + const int non_zero = TrellisQuantizeBlock( + enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_, + dqm->lambda_trellis_uv_); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; nz |= non_zero << n; } @@ -978,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) { SwapPtr(&it->yuv_out_, &it->yuv_out2_); } -static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { +static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT rd) { const int kNumBlocks = 16; VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i16_; @@ -1038,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { //------------------------------------------------------------------------------ // return the cost array corresponding to the surrounding prediction modes. -static const uint16_t* GetCostModeI4(VP8EncIterator* const it, +static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it, const uint8_t modes[16]) { const int preds_w = it->enc_->preds_w_; const int x = (it->i4_ & 3), y = it->i4_ >> 2; @@ -1047,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it, return VP8FixedCostsI4[top][left]; } -static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { +static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd) { const VP8Encoder* const enc = it->enc_; const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i4_; @@ -1143,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { //------------------------------------------------------------------------------ -static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { +static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd) { const int kNumBlocks = 8; const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_uv_; @@ -1195,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { //------------------------------------------------------------------------------ // Final reconstruction and quantization. -static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { +static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd) { const VP8Encoder* const enc = it->enc_; const int is_i16 = (it->mb_->type_ == 1); int nz = 0; @@ -1220,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { } // Refine intra16/intra4 sub-modes based on distortion only (not rate). -static void RefineUsingDistortion(VP8EncIterator* const it, +static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it, int try_both_modes, int refine_uv_mode, - VP8ModeScore* const rd) { + VP8ModeScore* WEBP_RESTRICT const rd) { score_t best_score = MAX_COST; int nz = 0; int mode; @@ -1336,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it, //------------------------------------------------------------------------------ // Entry point -int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, +int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, VP8RDLevel rd_opt) { int is_skipped; const int method = it->enc_->method_; diff --git a/thirdparty/libwebp/src/enc/syntax_enc.c b/thirdparty/libwebp/src/enc/syntax_enc.c index a9e5a6cf0f..e18cf650ca 100644 --- a/thirdparty/libwebp/src/enc/syntax_enc.c +++ b/thirdparty/libwebp/src/enc/syntax_enc.c @@ -349,7 +349,7 @@ int VP8EncWrite(VP8Encoder* const enc) { (enc->alpha_data_size_ & 1); riff_size += CHUNK_HEADER_SIZE + padded_alpha_size; } - // Sanity check. + // RIFF size should fit in 32-bits. if (riff_size > 0xfffffffeU) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG); } diff --git a/thirdparty/libwebp/src/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h index fedcaeea27..71f76702ae 100644 --- a/thirdparty/libwebp/src/enc/vp8i_enc.h +++ b/thirdparty/libwebp/src/enc/vp8i_enc.h @@ -31,8 +31,8 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 -#define ENC_MIN_VERSION 1 -#define ENC_REV_VERSION 0 +#define ENC_MIN_VERSION 2 +#define ENC_REV_VERSION 4 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -286,8 +286,7 @@ int VP8IteratorNext(VP8EncIterator* const it); // save the yuv_out_ boundary values to top_/left_ arrays for next iterations. void VP8IteratorSaveBoundary(VP8EncIterator* const it); // Report progression based on macroblock rows. Return 0 for user-abort request. -int VP8IteratorProgress(const VP8EncIterator* const it, - int final_delta_percent); +int VP8IteratorProgress(const VP8EncIterator* const it, int delta); // Intra4x4 iterations void VP8IteratorStartI4(VP8EncIterator* const it); // returns true if not done. @@ -471,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc); // Sets up segment's quantization values, base_quant_ and filter strengths. void VP8SetSegmentParams(VP8Encoder* const enc, float quality); // Pick best modes and fills the levels. Returns true if skipped. -int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, +int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, VP8RDLevel rd_opt); // in alpha.c @@ -491,23 +491,28 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta); // misc utils for picture_*.c: +// Returns true if 'picture' is non-NULL and dimensions/colorspace are within +// their valid ranges. If returning false, the 'error_code' in 'picture' is +// updated. +int WebPValidatePicture(const WebPPicture* const picture); + // Remove reference to the ARGB/YUVA buffer (doesn't free anything). void WebPPictureResetBuffers(WebPPicture* const picture); -// Allocates ARGB buffer of given dimension (previous one is always free'd). -// Preserves the YUV(A) buffer. Returns false in case of error (invalid param, -// out-of-memory). -int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); +// Allocates ARGB buffer according to set width/height (previous one is +// always free'd). Preserves the YUV(A) buffer. Returns false in case of error +// (invalid param, out-of-memory). +int WebPPictureAllocARGB(WebPPicture* const picture); -// Allocates YUVA buffer of given dimension (previous one is always free'd). -// Uses picture->csp to determine whether an alpha buffer is needed. +// Allocates YUVA buffer according to set width/height (previous one is always +// free'd). Uses picture->csp to determine whether an alpha buffer is needed. // Preserves the ARGB buffer. // Returns false in case of error (invalid param, out-of-memory). -int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); +int WebPPictureAllocYUVA(WebPPicture* const picture); -// Clean-up the RGB samples under fully transparent area, to help lossless -// compressibility (no guarantee, though). Assumes that pic->use_argb is true. -void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); +// Replace samples that are fully transparent by 'color' to help compressibility +// (no guarantee, though). Assumes pic->use_argb is true. +void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color); //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/enc/vp8l_enc.c b/thirdparty/libwebp/src/enc/vp8l_enc.c index 2efd403f77..2b345df610 100644 --- a/thirdparty/libwebp/src/enc/vp8l_enc.c +++ b/thirdparty/libwebp/src/enc/vp8l_enc.c @@ -15,15 +15,16 @@ #include <assert.h> #include <stdlib.h> +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" #include "src/enc/backward_references_enc.h" #include "src/enc/histogram_enc.h" #include "src/enc/vp8i_enc.h" #include "src/enc/vp8li_enc.h" -#include "src/dsp/lossless.h" -#include "src/dsp/lossless_common.h" #include "src/utils/bit_writer_utils.h" #include "src/utils/huffman_encode_utils.h" #include "src/utils/utils.h" +#include "src/webp/encode.h" #include "src/webp/format_constants.h" // Maximum number of histogram images (sub-blocks). @@ -65,25 +66,22 @@ static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { *col2 = tmp; } -static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { - // Find greedily always the closest color of the predicted color to minimize - // deltas in the palette. This reduces storage needs since the - // palette is stored with delta encoding. - uint32_t predict = 0x00000000; - int i, k; - for (i = 0; i < num_colors; ++i) { - int best_ix = i; - uint32_t best_score = ~0U; - for (k = i; k < num_colors; ++k) { - const uint32_t cur_score = PaletteColorDistance(palette[k], predict); - if (best_score > cur_score) { - best_score = cur_score; - best_ix = k; - } +static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, + int num_colors) { + int low = 0, hi = num_colors; + if (sorted[low] == color) return low; // loop invariant: sorted[low] != color + while (1) { + const int mid = (low + hi) >> 1; + if (sorted[mid] == color) { + return mid; + } else if (sorted[mid] < color) { + low = mid; + } else { + hi = mid; } - SwapColor(&palette[best_ix], &palette[i]); - predict = palette[i]; } + assert(0); + return 0; } // The palette has been sorted by alpha. This function checks if the other @@ -92,7 +90,8 @@ static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { // no benefit to re-organize them greedily. A monotonic development // would be spotted in green-only situations (like lossy alpha) or gray-scale // images. -static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { +static int PaletteHasNonMonotonousDeltas(const uint32_t* const palette, + int num_colors) { uint32_t predict = 0x000000; int i; uint8_t sign_found = 0x00; @@ -115,28 +114,218 @@ static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { return (sign_found & (sign_found << 1)) != 0; // two consequent signs. } +static void PaletteSortMinimizeDeltas(const uint32_t* const palette_sorted, + int num_colors, uint32_t* const palette) { + uint32_t predict = 0x00000000; + int i, k; + memcpy(palette, palette_sorted, num_colors * sizeof(*palette)); + if (!PaletteHasNonMonotonousDeltas(palette_sorted, num_colors)) return; + // Find greedily always the closest color of the predicted color to minimize + // deltas in the palette. This reduces storage needs since the + // palette is stored with delta encoding. + for (i = 0; i < num_colors; ++i) { + int best_ix = i; + uint32_t best_score = ~0U; + for (k = i; k < num_colors; ++k) { + const uint32_t cur_score = PaletteColorDistance(palette[k], predict); + if (best_score > cur_score) { + best_score = cur_score; + best_ix = k; + } + } + SwapColor(&palette[best_ix], &palette[i]); + predict = palette[i]; + } +} + +// Sort palette in increasing order and prepare an inverse mapping array. +static void PrepareMapToPalette(const uint32_t palette[], uint32_t num_colors, + uint32_t sorted[], uint32_t idx_map[]) { + uint32_t i; + memcpy(sorted, palette, num_colors * sizeof(*sorted)); + qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); + for (i = 0; i < num_colors; ++i) { + idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; + } +} + // ----------------------------------------------------------------------------- -// Palette +// Modified Zeng method from "A Survey on Palette Reordering +// Methods for Improving the Compression of Color-Indexed Images" by Armando J. +// Pinho and Antonio J. R. Neves. + +// Finds the biggest cooccurrence in the matrix. +static void CoOccurrenceFindMax(const uint32_t* const cooccurrence, + uint32_t num_colors, uint8_t* const c1, + uint8_t* const c2) { + // Find the index that is most frequently located adjacent to other + // (different) indexes. + uint32_t best_sum = 0u; + uint32_t i, j, best_cooccurrence; + *c1 = 0u; + for (i = 0; i < num_colors; ++i) { + uint32_t sum = 0; + for (j = 0; j < num_colors; ++j) sum += cooccurrence[i * num_colors + j]; + if (sum > best_sum) { + best_sum = sum; + *c1 = i; + } + } + // Find the index that is most frequently found adjacent to *c1. + *c2 = 0u; + best_cooccurrence = 0u; + for (i = 0; i < num_colors; ++i) { + if (cooccurrence[*c1 * num_colors + i] > best_cooccurrence) { + best_cooccurrence = cooccurrence[*c1 * num_colors + i]; + *c2 = i; + } + } + assert(*c1 != *c2); +} -// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE, -// creates a palette and returns true, else returns false. -static int AnalyzeAndCreatePalette(const WebPPicture* const pic, - int low_effort, - uint32_t palette[MAX_PALETTE_SIZE], - int* const palette_size) { - const int num_colors = WebPGetColorPalette(pic, palette); - if (num_colors > MAX_PALETTE_SIZE) { - *palette_size = 0; +// Builds the cooccurrence matrix +static int CoOccurrenceBuild(const WebPPicture* const pic, + const uint32_t* const palette, uint32_t num_colors, + uint32_t* cooccurrence) { + uint32_t *lines, *line_top, *line_current, *line_tmp; + int x, y; + const uint32_t* src = pic->argb; + uint32_t prev_pix = ~src[0]; + uint32_t prev_idx = 0u; + uint32_t idx_map[MAX_PALETTE_SIZE] = {0}; + uint32_t palette_sorted[MAX_PALETTE_SIZE]; + lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines)); + if (lines == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); return 0; } - *palette_size = num_colors; - qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); - if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { - GreedyMinimizeDeltas(palette, num_colors); + line_top = &lines[0]; + line_current = &lines[pic->width]; + PrepareMapToPalette(palette, num_colors, palette_sorted, idx_map); + for (y = 0; y < pic->height; ++y) { + for (x = 0; x < pic->width; ++x) { + const uint32_t pix = src[x]; + if (pix != prev_pix) { + prev_idx = idx_map[SearchColorNoIdx(palette_sorted, pix, num_colors)]; + prev_pix = pix; + } + line_current[x] = prev_idx; + // 4-connectivity is what works best as mentioned in "On the relation + // between Memon's and the modified Zeng's palette reordering methods". + if (x > 0 && prev_idx != line_current[x - 1]) { + const uint32_t left_idx = line_current[x - 1]; + ++cooccurrence[prev_idx * num_colors + left_idx]; + ++cooccurrence[left_idx * num_colors + prev_idx]; + } + if (y > 0 && prev_idx != line_top[x]) { + const uint32_t top_idx = line_top[x]; + ++cooccurrence[prev_idx * num_colors + top_idx]; + ++cooccurrence[top_idx * num_colors + prev_idx]; + } + } + line_tmp = line_top; + line_top = line_current; + line_current = line_tmp; + src += pic->argb_stride; } + WebPSafeFree(lines); return 1; } +struct Sum { + uint8_t index; + uint32_t sum; +}; + +// Implements the modified Zeng method from "A Survey on Palette Reordering +// Methods for Improving the Compression of Color-Indexed Images" by Armando J. +// Pinho and Antonio J. R. Neves. +static int PaletteSortModifiedZeng( + const WebPPicture* const pic, const uint32_t* const palette_sorted, + uint32_t num_colors, uint32_t* const palette) { + uint32_t i, j, ind; + uint8_t remapping[MAX_PALETTE_SIZE]; + uint32_t* cooccurrence; + struct Sum sums[MAX_PALETTE_SIZE]; + uint32_t first, last; + uint32_t num_sums; + // TODO(vrabaud) check whether one color images should use palette or not. + if (num_colors <= 1) return 1; + // Build the co-occurrence matrix. + cooccurrence = + (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence)); + if (cooccurrence == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + if (!CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence)) { + return 0; + } + + // Initialize the mapping list with the two best indices. + CoOccurrenceFindMax(cooccurrence, num_colors, &remapping[0], &remapping[1]); + + // We need to append and prepend to the list of remapping. To this end, we + // actually define the next start/end of the list as indices in a vector (with + // a wrap around when the end is reached). + first = 0; + last = 1; + num_sums = num_colors - 2; // -2 because we know the first two values + if (num_sums > 0) { + // Initialize the sums with the first two remappings and find the best one + struct Sum* best_sum = &sums[0]; + best_sum->index = 0u; + best_sum->sum = 0u; + for (i = 0, j = 0; i < num_colors; ++i) { + if (i == remapping[0] || i == remapping[1]) continue; + sums[j].index = i; + sums[j].sum = cooccurrence[i * num_colors + remapping[0]] + + cooccurrence[i * num_colors + remapping[1]]; + if (sums[j].sum > best_sum->sum) best_sum = &sums[j]; + ++j; + } + + while (num_sums > 0) { + const uint8_t best_index = best_sum->index; + // Compute delta to know if we need to prepend or append the best index. + int32_t delta = 0; + const int32_t n = num_colors - num_sums; + for (ind = first, j = 0; (ind + j) % num_colors != last + 1; ++j) { + const uint16_t l_j = remapping[(ind + j) % num_colors]; + delta += (n - 1 - 2 * (int32_t)j) * + (int32_t)cooccurrence[best_index * num_colors + l_j]; + } + if (delta > 0) { + first = (first == 0) ? num_colors - 1 : first - 1; + remapping[first] = best_index; + } else { + ++last; + remapping[last] = best_index; + } + // Remove best_sum from sums. + *best_sum = sums[num_sums - 1]; + --num_sums; + // Update all the sums and find the best one. + best_sum = &sums[0]; + for (i = 0; i < num_sums; ++i) { + sums[i].sum += cooccurrence[best_index * num_colors + sums[i].index]; + if (sums[i].sum > best_sum->sum) best_sum = &sums[i]; + } + } + } + assert((last + 1) % num_colors == first); + WebPSafeFree(cooccurrence); + + // Re-map the palette. + for (i = 0; i < num_colors; ++i) { + palette[i] = palette_sorted[remapping[(first + i) % num_colors]]; + } + return 1; +} + +// ----------------------------------------------------------------------------- +// Palette + // These five modes are evaluated and their respective entropy is computed. typedef enum { kDirect = 0, @@ -144,10 +333,18 @@ typedef enum { kSubGreen = 2, kSpatialSubGreen = 3, kPalette = 4, - kNumEntropyIx = 5 + kPaletteAndSpatial = 5, + kNumEntropyIx = 6 } EntropyIx; typedef enum { + kSortedDefault = 0, + kMinimizeDelta = 1, + kModifiedZeng = 2, + kUnusedPalette = 3, +} PaletteSorting; + +typedef enum { kHistoAlpha = 0, kHistoAlphaPred, kHistoGreen, @@ -241,8 +438,8 @@ static int AnalyzeEntropy(const uint32_t* argb, curr_row += argb_stride; } { - double entropy_comp[kHistoTotal]; - double entropy[kNumEntropyIx]; + float entropy_comp[kHistoTotal]; + float entropy[kNumEntropyIx]; int k; int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen; int j; @@ -354,14 +551,21 @@ static int GetTransformBits(int method, int histo_bits) { } // Set of parameters to be used in each iteration of the cruncher. -#define CRUNCH_CONFIGS_LZ77_MAX 2 +#define CRUNCH_SUBCONFIGS_MAX 2 +typedef struct { + int lz77_; + int do_no_cache_; +} CrunchSubConfig; typedef struct { int entropy_idx_; - int lz77s_types_to_try_[CRUNCH_CONFIGS_LZ77_MAX]; - int lz77s_types_to_try_size_; + PaletteSorting palette_sorting_type_; + CrunchSubConfig sub_configs_[CRUNCH_SUBCONFIGS_MAX]; + int sub_configs_size_; } CrunchConfig; -#define CRUNCH_CONFIGS_MAX kNumEntropyIx +// +2 because we add a palette sorting configuration for kPalette and +// kPaletteAndSpatial. +#define CRUNCH_CONFIGS_MAX (kNumEntropyIx + 2) static int EncoderAnalyze(VP8LEncoder* const enc, CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX], @@ -376,11 +580,20 @@ static int EncoderAnalyze(VP8LEncoder* const enc, int i; int use_palette; int n_lz77s; + // If set to 0, analyze the cache with the computed cache value. If 1, also + // analyze with no-cache. + int do_no_cache = 0; assert(pic != NULL && pic->argb != NULL); - use_palette = - AnalyzeAndCreatePalette(pic, low_effort, - enc->palette_, &enc->palette_size_); + // Check whether a palette is possible. + enc->palette_size_ = WebPGetColorPalette(pic, enc->palette_sorted_); + use_palette = (enc->palette_size_ <= MAX_PALETTE_SIZE); + if (!use_palette) { + enc->palette_size_ = 0; + } else { + qsort(enc->palette_sorted_, enc->palette_size_, + sizeof(*enc->palette_sorted_), PaletteCompareColorsForQsort); + } // Empirical bit sizes. enc->histo_bits_ = GetHistoBits(method, use_palette, @@ -390,6 +603,8 @@ static int EncoderAnalyze(VP8LEncoder* const enc, if (low_effort) { // AnalyzeEntropy is somewhat slow. crunch_configs[0].entropy_idx_ = use_palette ? kPalette : kSpatialSubGreen; + crunch_configs[0].palette_sorting_type_ = + use_palette ? kSortedDefault : kUnusedPalette; n_lz77s = 1; *crunch_configs_size = 1; } else { @@ -402,29 +617,59 @@ static int EncoderAnalyze(VP8LEncoder* const enc, return 0; } if (method == 6 && config->quality == 100) { + do_no_cache = 1; // Go brute force on all transforms. *crunch_configs_size = 0; for (i = 0; i < kNumEntropyIx; ++i) { - if (i != kPalette || use_palette) { + // We can only apply kPalette or kPaletteAndSpatial if we can indeed use + // a palette. + if ((i != kPalette && i != kPaletteAndSpatial) || use_palette) { assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX); - crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i; + crunch_configs[(*crunch_configs_size)].entropy_idx_ = i; + if (use_palette && (i == kPalette || i == kPaletteAndSpatial)) { + crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = + kMinimizeDelta; + ++*crunch_configs_size; + // Also add modified Zeng's method. + crunch_configs[(*crunch_configs_size)].entropy_idx_ = i; + crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = + kModifiedZeng; + } else { + crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = + kUnusedPalette; + } + ++*crunch_configs_size; } } } else { // Only choose the guessed best transform. *crunch_configs_size = 1; crunch_configs[0].entropy_idx_ = min_entropy_ix; + crunch_configs[0].palette_sorting_type_ = + use_palette ? kMinimizeDelta : kUnusedPalette; + if (config->quality >= 75 && method == 5) { + // Test with and without color cache. + do_no_cache = 1; + // If we have a palette, also check in combination with spatial. + if (min_entropy_ix == kPalette) { + *crunch_configs_size = 2; + crunch_configs[1].entropy_idx_ = kPaletteAndSpatial; + crunch_configs[1].palette_sorting_type_ = kMinimizeDelta; + } + } } } // Fill in the different LZ77s. - assert(n_lz77s <= CRUNCH_CONFIGS_LZ77_MAX); + assert(n_lz77s <= CRUNCH_SUBCONFIGS_MAX); for (i = 0; i < *crunch_configs_size; ++i) { int j; for (j = 0; j < n_lz77s; ++j) { - crunch_configs[i].lz77s_types_to_try_[j] = + assert(j < CRUNCH_SUBCONFIGS_MAX); + crunch_configs[i].sub_configs_[j].lz77_ = (j == 0) ? kLZ77Standard | kLZ77RLE : kLZ77Box; + crunch_configs[i].sub_configs_[j].do_no_cache_ = do_no_cache; } - crunch_configs[i].lz77s_types_to_try_size_ = n_lz77s; + crunch_configs[i].sub_configs_size_ = n_lz77s; } return 1; } @@ -440,7 +685,7 @@ static int EncoderInit(VP8LEncoder* const enc) { int i; if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; - for (i = 0; i < 3; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); + for (i = 0; i < 4; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); return 1; } @@ -708,11 +953,11 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits); } -static WebPEncodingError StoreImageToBitMask( +static int StoreImageToBitMask( VP8LBitWriter* const bw, int width, int histo_bits, const VP8LBackwardRefs* const refs, const uint16_t* histogram_symbols, - const HuffmanTreeCode* const huffman_codes) { + const HuffmanTreeCode* const huffman_codes, const WebPPicture* const pic) { const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits); // x and y trace the position in the image. @@ -765,49 +1010,53 @@ static WebPEncodingError StoreImageToBitMask( } VP8LRefsCursorNext(&c); } - return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK; + if (bw->error_) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + return 1; } -// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 -static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, - const uint32_t* const argb, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2, - int width, int height, - int quality, int low_effort) { +// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31. +// pic and percent are for progress. +static int EncodeImageNoHuffman(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs_array, int width, + int height, int quality, int low_effort, + const WebPPicture* const pic, int percent_range, + int* const percent) { int i; int max_tokens = 0; - WebPEncodingError err = VP8_ENC_OK; VP8LBackwardRefs* refs; HuffmanTreeToken* tokens = NULL; - HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; - const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol + HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}}; + const uint16_t histogram_symbols[1] = {0}; // only one tree, one symbol int cache_bits = 0; VP8LHistogramSet* histogram_image = NULL; HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( - 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); + 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); if (huff_tree == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } // Calculate backward references from ARGB image. - if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, low_effort, + pic, percent_range / 2, percent)) { goto Error; } - refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, - kLZ77Standard | kLZ77RLE, &cache_bits, - hash_chain, refs_tmp1, refs_tmp2); - if (refs == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + if (!VP8LGetBackwardReferences(width, height, argb, quality, /*low_effort=*/0, + kLZ77Standard | kLZ77RLE, cache_bits, + /*do_no_cache=*/0, hash_chain, refs_array, + &cache_bits, pic, + percent_range - percent_range / 2, percent)) { goto Error; } + refs = &refs_array[0]; histogram_image = VP8LAllocateHistogramSet(1, cache_bits); if (histogram_image == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } VP8LHistogramSetClear(histogram_image); @@ -818,7 +1067,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, // Create Huffman bit lengths and codes for each histogram image. assert(histogram_image->size == 1); if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } @@ -835,7 +1084,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); if (tokens == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } @@ -847,27 +1096,32 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, } // Store actual literals. - err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, - huffman_codes); + if (!StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, huffman_codes, + pic)) { + goto Error; + } Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); WebPSafeFree(huffman_codes[0].codes); - return err; + return (pic->error_code == VP8_ENC_OK); } -static WebPEncodingError EncodeImageInternal( +// pic and percent are for progress. +static int EncodeImageInternal( VP8LBitWriter* const bw, const uint32_t* const argb, - VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[3], int width, + VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width, int height, int quality, int low_effort, int use_cache, const CrunchConfig* const config, int* cache_bits, int histogram_bits, - size_t init_byte_position, int* const hdr_size, int* const data_size) { - WebPEncodingError err = VP8_ENC_OK; + size_t init_byte_position, int* const hdr_size, int* const data_size, + const WebPPicture* const pic, int percent_range, int* const percent) { const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); + int remaining_percent = percent_range; + int percent_start = *percent; VP8LHistogramSet* histogram_image = NULL; VP8LHistogram* tmp_histo = NULL; int histogram_image_size = 0; @@ -876,112 +1130,135 @@ static WebPEncodingError EncodeImageInternal( 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; - VP8LBackwardRefs* refs_best; - VP8LBackwardRefs* refs_tmp; - uint16_t* const histogram_symbols = - (uint16_t*)WebPSafeMalloc(histogram_image_xysize, - sizeof(*histogram_symbols)); - int lz77s_idx; + uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc( + histogram_image_xysize, sizeof(*histogram_symbols)); + int sub_configs_idx; + int cache_bits_init, write_histogram_image; VP8LBitWriter bw_init = *bw, bw_best; int hdr_size_tmp; + VP8LHashChain hash_chain_histogram; // histogram image hash chain + size_t bw_size_best = ~(size_t)0; assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); assert(hdr_size != NULL); assert(data_size != NULL); - if (histogram_symbols == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram)); + if (!VP8LBitWriterInit(&bw_best, 0)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } + + // Make sure we can allocate the different objects. + if (huff_tree == NULL || histogram_symbols == NULL || + !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } + percent_range = remaining_percent / 5; + if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort, pic, percent_range, percent)) { + goto Error; + } + percent_start += percent_range; + remaining_percent -= percent_range; + if (use_cache) { // If the value is different from zero, it has been set during the // palette analysis. - if (*cache_bits == 0) *cache_bits = MAX_COLOR_CACHE_BITS; + cache_bits_init = (*cache_bits == 0) ? MAX_COLOR_CACHE_BITS : *cache_bits; } else { - *cache_bits = 0; + cache_bits_init = 0; } - // 'best_refs' is the reference to the best backward refs and points to one - // of refs_array[0] or refs_array[1]. - // Calculate backward references from ARGB image. - if (huff_tree == NULL || - !VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort) || - !VP8LBitWriterInit(&bw_best, 0) || - (config->lz77s_types_to_try_size_ > 1 && - !VP8LBitWriterClone(bw, &bw_best))) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + // If several iterations will happen, clone into bw_best. + if ((config->sub_configs_size_ > 1 || config->sub_configs_[0].do_no_cache_) && + !VP8LBitWriterClone(bw, &bw_best)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } - for (lz77s_idx = 0; lz77s_idx < config->lz77s_types_to_try_size_; - ++lz77s_idx) { - refs_best = VP8LGetBackwardReferences( - width, height, argb, quality, low_effort, - config->lz77s_types_to_try_[lz77s_idx], cache_bits, hash_chain, - &refs_array[0], &refs_array[1]); - if (refs_best == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Keep the best references aside and use the other element from the first - // two as a temporary for later usage. - refs_tmp = &refs_array[refs_best == &refs_array[0] ? 1 : 0]; - - histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); - tmp_histo = VP8LAllocateHistogram(*cache_bits); - if (histogram_image == NULL || tmp_histo == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, refs_best, quality, low_effort, - histogram_bits, *cache_bits, histogram_image, - tmp_histo, histogram_symbols)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Create Huffman bit lengths and codes for each histogram image. - histogram_image_size = histogram_image->size; - bit_array_size = 5 * histogram_image_size; - huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, - sizeof(*huffman_codes)); - // Note: some histogram_image entries may point to tmp_histos[], so the - // latter need to outlive the following call to GetHuffBitLengthsAndCodes(). - if (huffman_codes == NULL || - !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_; + ++sub_configs_idx) { + const CrunchSubConfig* const sub_config = + &config->sub_configs_[sub_configs_idx]; + int cache_bits_best, i_cache; + int i_remaining_percent = remaining_percent / config->sub_configs_size_; + int i_percent_range = i_remaining_percent / 4; + i_remaining_percent -= i_percent_range; + + if (!VP8LGetBackwardReferences( + width, height, argb, quality, low_effort, sub_config->lz77_, + cache_bits_init, sub_config->do_no_cache_, hash_chain, + &refs_array[0], &cache_bits_best, pic, i_percent_range, percent)) { goto Error; } - // Free combined histograms. - VP8LFreeHistogramSet(histogram_image); - histogram_image = NULL; - // Free scratch histograms. - VP8LFreeHistogram(tmp_histo); - tmp_histo = NULL; + for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) { + const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0; + // Speed-up: no need to study the no-cache case if it was already studied + // in i_cache == 0. + if (i_cache == 1 && cache_bits_best == 0) break; + + // Reset the bit writer for this iteration. + VP8LBitWriterReset(&bw_init, bw); + + // Build histogram image and symbols from backward references. + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp); + tmp_histo = VP8LAllocateHistogram(cache_bits_tmp); + if (histogram_image == NULL || tmp_histo == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } - // Color Cache parameters. - if (*cache_bits > 0) { - VP8LPutBits(bw, 1, 1); - VP8LPutBits(bw, *cache_bits, 4); - } else { - VP8LPutBits(bw, 0, 1); - } + i_percent_range = i_remaining_percent / 3; + i_remaining_percent -= i_percent_range; + if (!VP8LGetHistoImageSymbols( + width, height, &refs_array[i_cache], quality, low_effort, + histogram_bits, cache_bits_tmp, histogram_image, tmp_histo, + histogram_symbols, pic, i_percent_range, percent)) { + goto Error; + } + // Create Huffman bit lengths and codes for each histogram image. + histogram_image_size = histogram_image->size; + bit_array_size = 5 * histogram_image_size; + huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, + sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the + // latter need to outlive the following call to + // GetHuffBitLengthsAndCodes(). + if (huffman_codes == NULL || + !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } + // Free combined histograms. + VP8LFreeHistogramSet(histogram_image); + histogram_image = NULL; + + // Free scratch histograms. + VP8LFreeHistogram(tmp_histo); + tmp_histo = NULL; + + // Color Cache parameters. + if (cache_bits_tmp > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, cache_bits_tmp, 4); + } else { + VP8LPutBits(bw, 0, 1); + } - // Huffman image + meta huffman. - { - const int write_histogram_image = (histogram_image_size > 1); + // Huffman image + meta huffman. + write_histogram_image = (histogram_image_size > 1); VP8LPutBits(bw, write_histogram_image, 1); if (write_histogram_image) { - uint32_t* const histogram_argb = - (uint32_t*)WebPSafeMalloc(histogram_image_xysize, - sizeof(*histogram_argb)); + uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc( + histogram_image_xysize, sizeof(*histogram_argb)); int max_index = 0; uint32_t i; if (histogram_argb == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } for (i = 0; i < histogram_image_xysize; ++i) { @@ -994,73 +1271,81 @@ static WebPEncodingError EncodeImageInternal( histogram_image_size = max_index; VP8LPutBits(bw, histogram_bits - 2, 3); - err = EncodeImageNoHuffman( - bw, histogram_argb, hash_chain, refs_tmp, &refs_array[2], - VP8LSubSampleSize(width, histogram_bits), - VP8LSubSampleSize(height, histogram_bits), quality, low_effort); + i_percent_range = i_remaining_percent / 2; + i_remaining_percent -= i_percent_range; + if (!EncodeImageNoHuffman( + bw, histogram_argb, &hash_chain_histogram, &refs_array[2], + VP8LSubSampleSize(width, histogram_bits), + VP8LSubSampleSize(height, histogram_bits), quality, low_effort, + pic, i_percent_range, percent)) { + WebPSafeFree(histogram_argb); + goto Error; + } WebPSafeFree(histogram_argb); - if (err != VP8_ENC_OK) goto Error; } - } - // Store Huffman codes. - { - int i; - int max_tokens = 0; - // Find maximum number of symbols for the huffman tree-set. - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - if (max_tokens < codes->num_symbols) { - max_tokens = codes->num_symbols; + // Store Huffman codes. + { + int i; + int max_tokens = 0; + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) goto Error; + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); + ClearHuffmanTreeIfOnlyOneSymbol(codes); } } - tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + // Store actual literals. + hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); + if (!StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], + histogram_symbols, huffman_codes, pic)) { goto Error; } - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - StoreHuffmanCode(bw, huff_tree, tokens, codes); - ClearHuffmanTreeIfOnlyOneSymbol(codes); + // Keep track of the smallest image so far. + if (VP8LBitWriterNumBytes(bw) < bw_size_best) { + bw_size_best = VP8LBitWriterNumBytes(bw); + *cache_bits = cache_bits_tmp; + *hdr_size = hdr_size_tmp; + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); + VP8LBitWriterSwap(bw, &bw_best); + } + WebPSafeFree(tokens); + tokens = NULL; + if (huffman_codes != NULL) { + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); + huffman_codes = NULL; } - } - // Store actual literals. - hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); - err = StoreImageToBitMask(bw, width, histogram_bits, refs_best, - histogram_symbols, huffman_codes); - // Keep track of the smallest image so far. - if (lz77s_idx == 0 || - VP8LBitWriterNumBytes(bw) < VP8LBitWriterNumBytes(&bw_best)) { - *hdr_size = hdr_size_tmp; - *data_size = - (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); - VP8LBitWriterSwap(bw, &bw_best); - } - // Reset the bit writer for the following iteration if any. - if (config->lz77s_types_to_try_size_ > 1) VP8LBitWriterReset(&bw_init, bw); - WebPSafeFree(tokens); - tokens = NULL; - if (huffman_codes != NULL) { - WebPSafeFree(huffman_codes->codes); - WebPSafeFree(huffman_codes); - huffman_codes = NULL; } } VP8LBitWriterSwap(bw, &bw_best); + if (!WebPReportProgress(pic, percent_start + remaining_percent, percent)) { + goto Error; + } + Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); VP8LFreeHistogram(tmp_histo); + VP8LHashChainClear(&hash_chain_histogram); if (huffman_codes != NULL) { WebPSafeFree(huffman_codes->codes); WebPSafeFree(huffman_codes); } WebPSafeFree(histogram_symbols); VP8LBitWriterWipeOut(&bw_best); - return err; + return (pic->error_code == VP8_ENC_OK); } // ----------------------------------------------------------------------------- @@ -1073,68 +1358,69 @@ static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); } -static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, - int width, int height, - int quality, int low_effort, - int used_subtract_green, - VP8LBitWriter* const bw) { +static int ApplyPredictFilter(const VP8LEncoder* const enc, int width, + int height, int quality, int low_effort, + int used_subtract_green, VP8LBitWriter* const bw, + int percent_range, int* const percent) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); // we disable near-lossless quantization if palette is used. - const int near_lossless_strength = enc->use_palette_ ? 100 - : enc->config_->near_lossless; + const int near_lossless_strength = + enc->use_palette_ ? 100 : enc->config_->near_lossless; - VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, - enc->argb_scratch_, enc->transform_data_, - near_lossless_strength, enc->config_->exact, - used_subtract_green); + if (!VP8LResidualImage( + width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_, + enc->transform_data_, near_lossless_strength, enc->config_->exact, + used_subtract_green, enc->pic_, percent_range / 2, percent)) { + return 0; + } VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); VP8LPutBits(bw, pred_bits - 2, 3); return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], // cast const away - (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, - quality, low_effort); + (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, + quality, low_effort, enc->pic_, percent_range - percent_range / 2, + percent); } -static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, - int width, int height, - int quality, int low_effort, - VP8LBitWriter* const bw) { +static int ApplyCrossColorFilter(const VP8LEncoder* const enc, int width, + int height, int quality, int low_effort, + VP8LBitWriter* const bw, int percent_range, + int* const percent) { const int ccolor_transform_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); - VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, - enc->argb_, enc->transform_data_); + if (!VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, + enc->argb_, enc->transform_data_, enc->pic_, + percent_range / 2, percent)) { + return 0; + } VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); assert(ccolor_transform_bits >= 2); VP8LPutBits(bw, ccolor_transform_bits - 2, 3); return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], // cast const away - (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, - quality, low_effort); + (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, + quality, low_effort, enc->pic_, percent_range - percent_range / 2, + percent); } // ----------------------------------------------------------------------------- -static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic, - size_t riff_size, size_t vp8l_size) { +static int WriteRiffHeader(const WebPPicture* const pic, size_t riff_size, + size_t vp8l_size) { uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = { 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P', 'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE, }; PutLE32(riff + TAG_SIZE, (uint32_t)riff_size); PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size); - if (!pic->writer(riff, sizeof(riff), pic)) { - return VP8_ENC_ERROR_BAD_WRITE; - } - return VP8_ENC_OK; + return pic->writer(riff, sizeof(riff), pic); } static int WriteImageSize(const WebPPicture* const pic, @@ -1154,36 +1440,29 @@ static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) { return !bw->error_; } -static WebPEncodingError WriteImage(const WebPPicture* const pic, - VP8LBitWriter* const bw, - size_t* const coded_size) { - WebPEncodingError err = VP8_ENC_OK; +static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw, + size_t* const coded_size) { const uint8_t* const webpll_data = VP8LBitWriterFinish(bw); const size_t webpll_size = VP8LBitWriterNumBytes(bw); const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size; const size_t pad = vp8l_size & 1; const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad; - err = WriteRiffHeader(pic, riff_size, vp8l_size); - if (err != VP8_ENC_OK) goto Error; - - if (!pic->writer(webpll_data, webpll_size, pic)) { - err = VP8_ENC_ERROR_BAD_WRITE; - goto Error; + if (!WriteRiffHeader(pic, riff_size, vp8l_size) || + !pic->writer(webpll_data, webpll_size, pic)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); + return 0; } if (pad) { const uint8_t pad_byte[1] = { 0 }; if (!pic->writer(pad_byte, 1, pic)) { - err = VP8_ENC_ERROR_BAD_WRITE; - goto Error; + WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); + return 0; } } *coded_size = CHUNK_HEADER_SIZE + riff_size; - return VP8_ENC_OK; - - Error: - return err; + return 1; } // ----------------------------------------------------------------------------- @@ -1199,18 +1478,16 @@ static void ClearTransformBuffer(VP8LEncoder* const enc) { // Flags influencing the memory allocated: // enc->transform_bits_ // enc->use_predict_, enc->use_cross_color_ -static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, - int width, int height) { - WebPEncodingError err = VP8_ENC_OK; +static int AllocateTransformBuffer(VP8LEncoder* const enc, int width, + int height) { const uint64_t image_size = width * height; // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra // pixel in each, plus 2 regular scanlines of bytes. // TODO(skal): Clean up by using arithmetic in bytes instead of words. const uint64_t argb_scratch_size = - enc->use_predict_ - ? (width + 1) * 2 + - (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t) - : 0; + enc->use_predict_ ? (width + 1) * 2 + (width * 2 + sizeof(uint32_t) - 1) / + sizeof(uint32_t) + : 0; const uint64_t transform_data_size = (enc->use_predict_ || enc->use_cross_color_) ? VP8LSubSampleSize(width, enc->transform_bits_) * @@ -1218,17 +1495,16 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, : 0; const uint64_t max_alignment_in_words = (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t); - const uint64_t mem_size = - image_size + max_alignment_in_words + - argb_scratch_size + max_alignment_in_words + - transform_data_size; + const uint64_t mem_size = image_size + max_alignment_in_words + + argb_scratch_size + max_alignment_in_words + + transform_data_size; uint32_t* mem = enc->transform_mem_; if (mem == NULL || mem_size > enc->transform_mem_size_) { ClearTransformBuffer(enc); mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); if (mem == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; } enc->transform_mem_ = mem; enc->transform_mem_size_ = (size_t)mem_size; @@ -1241,19 +1517,16 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, enc->transform_data_ = mem; enc->current_width_ = width; - Error: - return err; + return 1; } -static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { - WebPEncodingError err = VP8_ENC_OK; +static int MakeInputImageCopy(VP8LEncoder* const enc) { const WebPPicture* const picture = enc->pic_; const int width = picture->width; const int height = picture->height; - err = AllocateTransformBuffer(enc, width, height); - if (err != VP8_ENC_OK) return err; - if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK; + if (!AllocateTransformBuffer(enc, width, height)) return 0; + if (enc->argb_content_ == kEncoderARGB) return 1; { uint32_t* dst = enc->argb_; @@ -1267,27 +1540,11 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { } enc->argb_content_ = kEncoderARGB; assert(enc->current_width_ == width); - return VP8_ENC_OK; + return 1; } // ----------------------------------------------------------------------------- -static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, - int hi) { - int low = 0; - if (sorted[low] == color) return low; // loop invariant: sorted[low] != color - while (1) { - const int mid = (low + hi) >> 1; - if (sorted[mid] == color) { - return mid; - } else if (sorted[mid] < color) { - low = mid; - } else { - hi = mid; - } - } -} - #define APPLY_PALETTE_GREEDY_MAX 4 static WEBP_INLINE uint32_t SearchColorGreedy(const uint32_t palette[], @@ -1322,17 +1579,6 @@ static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) { (32 - PALETTE_INV_SIZE_BITS); } -// Sort palette in increasing order and prepare an inverse mapping array. -static void PrepareMapToPalette(const uint32_t palette[], int num_colors, - uint32_t sorted[], uint32_t idx_map[]) { - int i; - memcpy(sorted, palette, num_colors * sizeof(*sorted)); - qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); - for (i = 0; i < num_colors; ++i) { - idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; - } -} - // Use 1 pixel cache for ARGB pixels. #define APPLY_PALETTE_FOR(COLOR_INDEX) do { \ uint32_t prev_pix = palette[0]; \ @@ -1356,16 +1602,19 @@ static void PrepareMapToPalette(const uint32_t palette[], int num_colors, // using 'row' as a temporary buffer of size 'width'. // We assume that all src[] values have a corresponding entry in the palette. // Note: src[] can be the same as dst[] -static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, - uint32_t* dst, uint32_t dst_stride, - const uint32_t* palette, int palette_size, - int width, int height, int xbits) { +static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst, + uint32_t dst_stride, const uint32_t* palette, + int palette_size, int width, int height, int xbits, + const WebPPicture* const pic) { // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be // made to work in-place. uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); int x, y; - if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (tmp_row == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } if (palette_size < APPLY_PALETTE_GREEDY_MAX) { APPLY_PALETTE_FOR(SearchColorGreedy(palette, palette_size, pix)); @@ -1410,7 +1659,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, } } WebPSafeFree(tmp_row); - return VP8_ENC_OK; + return 1; } #undef APPLY_PALETTE_FOR #undef PALETTE_INV_SIZE_BITS @@ -1418,9 +1667,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, #undef APPLY_PALETTE_GREEDY_MAX // Note: Expects "enc->palette_" to be set properly. -static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, - int in_place) { - WebPEncodingError err = VP8_ENC_OK; +static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) { const WebPPicture* const pic = enc->pic_; const int width = pic->width; const int height = pic->height; @@ -1438,19 +1685,22 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, xbits = (palette_size <= 16) ? 1 : 0; } - err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); - if (err != VP8_ENC_OK) return err; - - err = ApplyPalette(src, src_stride, + if (!AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height)) { + return 0; + } + if (!ApplyPalette(src, src_stride, enc->argb_, enc->current_width_, - palette, palette_size, width, height, xbits); + palette, palette_size, width, height, xbits, pic)) { + return 0; + } enc->argb_content_ = kEncoderPalette; - return err; + return 1; } // Save palette_[] to bitstream. static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, - VP8LEncoder* const enc) { + VP8LEncoder* const enc, + int percent_range, int* const percent) { int i; uint32_t tmp_palette[MAX_PALETTE_SIZE]; const int palette_size = enc->palette_size_; @@ -1464,8 +1714,8 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, } tmp_palette[0] = palette[0]; return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, - &enc->refs_[0], &enc->refs_[1], palette_size, 1, - 20 /* quality */, low_effort); + &enc->refs_[0], palette_size, 1, /*quality=*/20, + low_effort, enc->pic_, percent_range, percent); } // ----------------------------------------------------------------------------- @@ -1491,7 +1741,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { if (enc != NULL) { int i; VP8LHashChainClear(&enc->hash_chain_); - for (i = 0; i < 3; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); + for (i = 0; i < 4; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); ClearTransformBuffer(enc); WebPSafeFree(enc); } @@ -1509,7 +1759,6 @@ typedef struct { CrunchConfig crunch_configs_[CRUNCH_CONFIGS_MAX]; int num_crunch_configs_; int red_and_blue_always_zero_; - WebPEncodingError err_; WebPAuxStats* stats_; } StreamEncodeContext; @@ -1526,7 +1775,6 @@ static int EncodeStreamHook(void* input, void* data2) { #if !defined(WEBP_DISABLE_STATS) WebPAuxStats* const stats = params->stats_; #endif - WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; const int low_effort = (config->method == 0); #if (WEBP_NEAR_LOSSLESS == 1) @@ -1534,6 +1782,7 @@ static int EncodeStreamHook(void* input, void* data2) { #endif const int height = picture->height; const size_t byte_position = VP8LBitWriterNumBytes(bw); + int percent = 2; // for WebPProgressHook #if (WEBP_NEAR_LOSSLESS == 1) int use_near_lossless = 0; #endif @@ -1541,24 +1790,28 @@ static int EncodeStreamHook(void* input, void* data2) { int data_size = 0; int use_delta_palette = 0; int idx; - size_t best_size = 0; + size_t best_size = ~(size_t)0; VP8LBitWriter bw_init = *bw, bw_best; (void)data2; if (!VP8LBitWriterInit(&bw_best, 0) || (num_crunch_configs > 1 && !VP8LBitWriterClone(bw, &bw_best))) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } for (idx = 0; idx < num_crunch_configs; ++idx) { const int entropy_idx = crunch_configs[idx].entropy_idx_; - enc->use_palette_ = (entropy_idx == kPalette); + int remaining_percent = 97 / num_crunch_configs, percent_range; + enc->use_palette_ = + (entropy_idx == kPalette) || (entropy_idx == kPaletteAndSpatial); enc->use_subtract_green_ = (entropy_idx == kSubGreen) || (entropy_idx == kSpatialSubGreen); - enc->use_predict_ = - (entropy_idx == kSpatial) || (entropy_idx == kSpatialSubGreen); - if (low_effort) { + enc->use_predict_ = (entropy_idx == kSpatial) || + (entropy_idx == kSpatialSubGreen) || + (entropy_idx == kPaletteAndSpatial); + // When using a palette, R/B==0, hence no need to test for cross-color. + if (low_effort || enc->use_palette_) { enc->use_cross_color_ = 0; } else { enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; @@ -1573,11 +1826,10 @@ static int EncodeStreamHook(void* input, void* data2) { use_near_lossless = (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_; if (use_near_lossless) { - err = AllocateTransformBuffer(enc, width, height); - if (err != VP8_ENC_OK) goto Error; + if (!AllocateTransformBuffer(enc, width, height)) goto Error; if ((enc->argb_content_ != kEncoderNearLossless) && !VP8ApplyNearLossless(picture, config->near_lossless, enc->argb_)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } enc->argb_content_ = kEncoderNearLossless; @@ -1590,10 +1842,26 @@ static int EncodeStreamHook(void* input, void* data2) { // Encode palette if (enc->use_palette_) { - err = EncodePalette(bw, low_effort, enc); - if (err != VP8_ENC_OK) goto Error; - err = MapImageFromPalette(enc, use_delta_palette); - if (err != VP8_ENC_OK) goto Error; + if (crunch_configs[idx].palette_sorting_type_ == kSortedDefault) { + // Nothing to do, we have already sorted the palette. + memcpy(enc->palette_, enc->palette_sorted_, + enc->palette_size_ * sizeof(*enc->palette_)); + } else if (crunch_configs[idx].palette_sorting_type_ == kMinimizeDelta) { + PaletteSortMinimizeDeltas(enc->palette_sorted_, enc->palette_size_, + enc->palette_); + } else { + assert(crunch_configs[idx].palette_sorting_type_ == kModifiedZeng); + if (!PaletteSortModifiedZeng(enc->pic_, enc->palette_sorted_, + enc->palette_size_, enc->palette_)) { + goto Error; + } + } + percent_range = remaining_percent / 4; + if (!EncodePalette(bw, low_effort, enc, percent_range, &percent)) { + goto Error; + } + remaining_percent -= percent_range; + if (!MapImageFromPalette(enc, use_delta_palette)) goto Error; // If using a color cache, do not have it bigger than the number of // colors. if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) { @@ -1604,8 +1872,7 @@ static int EncodeStreamHook(void* input, void* data2) { // In case image is not packed. if (enc->argb_content_ != kEncoderNearLossless && enc->argb_content_ != kEncoderPalette) { - err = MakeInputImageCopy(enc); - if (err != VP8_ENC_OK) goto Error; + if (!MakeInputImageCopy(enc)) goto Error; } // ----------------------------------------------------------------------- @@ -1616,15 +1883,22 @@ static int EncodeStreamHook(void* input, void* data2) { } if (enc->use_predict_) { - err = ApplyPredictFilter(enc, enc->current_width_, height, quality, - low_effort, enc->use_subtract_green_, bw); - if (err != VP8_ENC_OK) goto Error; + percent_range = remaining_percent / 3; + if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, + low_effort, enc->use_subtract_green_, bw, + percent_range, &percent)) { + goto Error; + } + remaining_percent -= percent_range; } if (enc->use_cross_color_) { - err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, - low_effort, bw); - if (err != VP8_ENC_OK) goto Error; + percent_range = remaining_percent / 2; + if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, + low_effort, bw, percent_range, &percent)) { + goto Error; + } + remaining_percent -= percent_range; } } @@ -1632,15 +1906,16 @@ static int EncodeStreamHook(void* input, void* data2) { // ------------------------------------------------------------------------- // Encode and write the transformed image. - err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, - enc->current_width_, height, quality, low_effort, - use_cache, &crunch_configs[idx], - &enc->cache_bits_, enc->histo_bits_, - byte_position, &hdr_size, &data_size); - if (err != VP8_ENC_OK) goto Error; + if (!EncodeImageInternal( + bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_, + height, quality, low_effort, use_cache, &crunch_configs[idx], + &enc->cache_bits_, enc->histo_bits_, byte_position, &hdr_size, + &data_size, picture, remaining_percent, &percent)) { + goto Error; + } // If we are better than what we already have. - if (idx == 0 || VP8LBitWriterNumBytes(bw) < best_size) { + if (VP8LBitWriterNumBytes(bw) < best_size) { best_size = VP8LBitWriterNumBytes(bw); // Store the BitWriter. VP8LBitWriterSwap(bw, &bw_best); @@ -1667,18 +1942,15 @@ static int EncodeStreamHook(void* input, void* data2) { } VP8LBitWriterSwap(&bw_best, bw); -Error: + Error: VP8LBitWriterWipeOut(&bw_best); - params->err_ = err; // The hook should return false in case of error. - return (err == VP8_ENC_OK); + return (params->picture_->error_code == VP8_ENC_OK); } -WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - const WebPPicture* const picture, - VP8LBitWriter* const bw_main, - int use_cache) { - WebPEncodingError err = VP8_ENC_OK; +int VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, + VP8LBitWriter* const bw_main, int use_cache) { VP8LEncoder* const enc_main = VP8LEncoderNew(config, picture); VP8LEncoder* enc_side = NULL; CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX]; @@ -1690,15 +1962,24 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // The main thread uses picture->stats, the side thread uses stats_side. WebPAuxStats stats_side; VP8LBitWriter bw_side; + WebPPicture picture_side; const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface(); int ok_main; + if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + VP8LEncoderDelete(enc_main); + return 0; + } + + // Avoid "garbage value" error from Clang's static analysis tool. + WebPPictureInit(&picture_side); + // Analyze image (entropy, num_palettes etc) - if (enc_main == NULL || - !EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, + if (!EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, &red_and_blue_always_zero) || - !EncoderInit(enc_main) || !VP8LBitWriterInit(&bw_side, 0)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + !EncoderInit(enc_main)) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } @@ -1727,25 +2008,32 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, StreamEncodeContext* const param = (idx == 0) ? ¶ms_main : ¶ms_side; param->config_ = config; - param->picture_ = picture; param->use_cache_ = use_cache; param->red_and_blue_always_zero_ = red_and_blue_always_zero; if (idx == 0) { + param->picture_ = picture; param->stats_ = picture->stats; param->bw_ = bw_main; param->enc_ = enc_main; } else { + // Create a side picture (error_code is not thread-safe). + if (!WebPPictureView(picture, /*left=*/0, /*top=*/0, picture->width, + picture->height, &picture_side)) { + assert(0); + } + picture_side.progress_hook = NULL; // Progress hook is not thread-safe. + param->picture_ = &picture_side; // No need to free a view afterwards. param->stats_ = (picture->stats == NULL) ? NULL : &stats_side; // Create a side bit writer. if (!VP8LBitWriterClone(bw_main, &bw_side)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } param->bw_ = &bw_side; // Create a side encoder. - enc_side = VP8LEncoderNew(config, picture); + enc_side = VP8LEncoderNew(config, &picture_side); if (enc_side == NULL || !EncoderInit(enc_side)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } // Copy the values that were computed for the main encoder. @@ -1754,6 +2042,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, enc_side->palette_size_ = enc_main->palette_size_; memcpy(enc_side->palette_, enc_main->palette_, sizeof(enc_main->palette_)); + memcpy(enc_side->palette_sorted_, enc_main->palette_sorted_, + sizeof(enc_main->palette_sorted_)); param->enc_ = enc_side; } // Create the workers. @@ -1767,7 +2057,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // Start the second thread if needed. if (num_crunch_configs_side != 0) { if (!worker_interface->Reset(&worker_side)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } #if !defined(WEBP_DISABLE_STATS) @@ -1777,8 +2067,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, memcpy(&stats_side, picture->stats, sizeof(stats_side)); } #endif - // This line is only useful to remove a Clang static analyzer warning. - params_side.err_ = VP8_ENC_OK; worker_interface->Launch(&worker_side); } // Execute the main thread. @@ -1790,7 +2078,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const int ok_side = worker_interface->Sync(&worker_side); worker_interface->End(&worker_side); if (!ok_main || !ok_side) { - err = ok_main ? params_side.err_ : params_main.err_; + if (picture->error_code == VP8_ENC_OK) { + assert(picture_side.error_code != VP8_ENC_OK); + WebPEncodingSetError(picture, picture_side.error_code); + } goto Error; } if (VP8LBitWriterNumBytes(&bw_side) < VP8LBitWriterNumBytes(bw_main)) { @@ -1801,22 +2092,17 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, } #endif } - } else { - if (!ok_main) { - err = params_main.err_; - goto Error; - } } -Error: + Error: VP8LBitWriterWipeOut(&bw_side); VP8LEncoderDelete(enc_main); VP8LEncoderDelete(enc_side); - return err; + return (picture->error_code == VP8_ENC_OK); } #undef CRUNCH_CONFIGS_MAX -#undef CRUNCH_CONFIGS_LZ77_MAX +#undef CRUNCH_SUBCONFIGS_MAX int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture) { @@ -1825,14 +2111,12 @@ int VP8LEncodeImage(const WebPConfig* const config, size_t coded_size; int percent = 0; int initial_size; - WebPEncodingError err = VP8_ENC_OK; VP8LBitWriter bw; if (picture == NULL) return 0; if (config == NULL || picture->argb == NULL) { - err = VP8_ENC_ERROR_NULL_PARAMETER; - WebPEncodingSetError(picture, err); + WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); return 0; } @@ -1843,13 +2127,13 @@ int VP8LEncodeImage(const WebPConfig* const config, initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? width * height : width * height * 2; if (!VP8LBitWriterInit(&bw, initial_size)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } if (!WebPReportProgress(picture, 1, &percent)) { UserAbort: - err = VP8_ENC_ERROR_USER_ABORT; + WebPEncodingSetError(picture, VP8_ENC_ERROR_USER_ABORT); goto Error; } // Reset stats (for pure lossless coding) @@ -1865,28 +2149,26 @@ int VP8LEncodeImage(const WebPConfig* const config, // Write image size. if (!WriteImageSize(picture, &bw)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } has_alpha = WebPPictureHasTransparency(picture); // Write the non-trivial Alpha flag and lossless version. if (!WriteRealAlphaAndVersion(&bw, has_alpha)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } - if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort; + if (!WebPReportProgress(picture, 2, &percent)) goto UserAbort; // Encode main image stream. - err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/); - if (err != VP8_ENC_OK) goto Error; + if (!VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/)) goto Error; - if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort; + if (!WebPReportProgress(picture, 99, &percent)) goto UserAbort; // Finish the RIFF chunk. - err = WriteImage(picture, &bw, &coded_size); - if (err != VP8_ENC_OK) goto Error; + if (!WriteImage(picture, &bw, &coded_size)) goto Error; if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort; @@ -1905,13 +2187,11 @@ int VP8LEncodeImage(const WebPConfig* const config, } Error: - if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY; - VP8LBitWriterWipeOut(&bw); - if (err != VP8_ENC_OK) { - WebPEncodingSetError(picture, err); - return 0; + if (bw.error_) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - return 1; + VP8LBitWriterWipeOut(&bw); + return (picture->error_code == VP8_ENC_OK); } //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/enc/vp8li_enc.h b/thirdparty/libwebp/src/enc/vp8li_enc.h index d2d0fc509c..3d35e1612d 100644 --- a/thirdparty/libwebp/src/enc/vp8li_enc.h +++ b/thirdparty/libwebp/src/enc/vp8li_enc.h @@ -69,9 +69,11 @@ typedef struct { int use_palette_; int palette_size_; uint32_t palette_[MAX_PALETTE_SIZE]; + // Sorted version of palette_ for cache purposes. + uint32_t palette_sorted_[MAX_PALETTE_SIZE]; // Some 'scratch' (potentially large) objects. - struct VP8LBackwardRefs refs_[3]; // Backward Refs array for temporaries. + struct VP8LBackwardRefs refs_[4]; // Backward Refs array for temporaries. VP8LHashChain hash_chain_; // HashChain data for constructing // backward references. } VP8LEncoder; @@ -87,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config, // Encodes the main image stream using the supplied bit writer. // If 'use_cache' is false, disables the use of color cache. -WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - const WebPPicture* const picture, - VP8LBitWriter* const bw, int use_cache); +// Returns false in case of error (stored in picture->error_code). +int VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, VP8LBitWriter* const bw, + int use_cache); #if (WEBP_NEAR_LOSSLESS == 1) // in near_lossless.c @@ -101,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality, //------------------------------------------------------------------------------ // Image transforms in predictor.c. -void VP8LResidualImage(int width, int height, int bits, int low_effort, - uint32_t* const argb, uint32_t* const argb_scratch, - uint32_t* const image, int near_lossless, int exact, - int used_subtract_green); - -void VP8LColorSpaceTransform(int width, int height, int bits, int quality, - uint32_t* const argb, uint32_t* image); +// pic and percent are for progress. +// Returns false in case of error (stored in pic->error_code). +int VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image, int near_lossless, int exact, + int used_subtract_green, const WebPPicture* const pic, + int percent_range, int* const percent); + +int VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image, + const WebPPicture* const pic, int percent_range, + int* const percent); //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/enc/webp_enc.c b/thirdparty/libwebp/src/enc/webp_enc.c index 9f4b10c26c..9620e05070 100644 --- a/thirdparty/libwebp/src/enc/webp_enc.c +++ b/thirdparty/libwebp/src/enc/webp_enc.c @@ -336,9 +336,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (!WebPValidateConfig(config)) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); } - if (pic->width <= 0 || pic->height <= 0) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); - } + if (!WebPValidatePicture(pic)) return 0; if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); } @@ -400,7 +398,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { } if (!config->exact) { - WebPCleanupTransparentAreaLossless(pic); + WebPReplaceTransparentPixels(pic, 0x000000); } ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. diff --git a/thirdparty/libwebp/src/mux/anim_encode.c b/thirdparty/libwebp/src/mux/anim_encode.c index 7be99068f6..7078d9ae6b 100644 --- a/thirdparty/libwebp/src/mux/anim_encode.c +++ b/thirdparty/libwebp/src/mux/anim_encode.c @@ -248,9 +248,6 @@ WebPAnimEncoder* WebPAnimEncoderNewInternal( enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc)); if (enc == NULL) return NULL; - // sanity inits, so we can call WebPAnimEncoderDelete(): - enc->encoded_frames_ = NULL; - enc->mux_ = NULL; MarkNoError(enc); // Dimensions and options. @@ -421,7 +418,7 @@ static void MinimizeChangeRectangle(const WebPPicture* const src, const int max_allowed_diff_lossy = QualityToMaxDiff(quality); const int max_allowed_diff = is_lossless ? 0 : max_allowed_diff_lossy; - // Sanity checks. + // Assumption/correctness checks. assert(src->width == dst->width && src->height == dst->height); assert(rect->x_offset_ + rect->width_ <= dst->width); assert(rect->y_offset_ + rect->height_ <= dst->height); @@ -949,7 +946,8 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) { int new_duration; assert(enc->count_ >= 1); - assert(prev_enc_frame->sub_frame_.duration == + assert(!prev_enc_frame->is_key_frame_ || + prev_enc_frame->sub_frame_.duration == prev_enc_frame->key_frame_.duration); assert(prev_enc_frame->sub_frame_.duration == (prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1))); @@ -966,7 +964,7 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) { 0x10, 0x88, 0x88, 0x08 }; const WebPData lossless_1x1 = { - lossless_1x1_bytes, sizeof(lossless_1x1_bytes) + lossless_1x1_bytes, sizeof(lossless_1x1_bytes) }; const uint8_t lossy_1x1_bytes[] = { 0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50, @@ -1358,6 +1356,12 @@ int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp, if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) { return 0; } + // IncreasePreviousDuration() may add a frame to avoid exceeding + // MAX_DURATION which could cause CacheFrame() to over read encoded_frames_ + // before the next flush. + if (enc->count_ == enc->size_ && !FlushFrames(enc)) { + return 0; + } } else { enc->first_timestamp_ = timestamp; } diff --git a/thirdparty/libwebp/src/mux/muxedit.c b/thirdparty/libwebp/src/mux/muxedit.c index ccf14b2a0c..63e71a0aba 100644 --- a/thirdparty/libwebp/src/mux/muxedit.c +++ b/thirdparty/libwebp/src/mux/muxedit.c @@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) { err = ChunkAssignData(&chunk, data, copy_data, tag); \ if (err == WEBP_MUX_OK) { \ err = ChunkSetHead(&chunk, (LIST)); \ + if (err != WEBP_MUX_OK) ChunkRelease(&chunk); \ } \ return err; \ } @@ -235,7 +236,6 @@ WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream, WebPMuxImage wpi; WebPMuxError err; - // Sanity checks. if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) { return WEBP_MUX_INVALID_ARGUMENT; @@ -267,7 +267,6 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info, WebPMuxImage wpi; WebPMuxError err; - // Sanity checks. if (mux == NULL || info == NULL) return WEBP_MUX_INVALID_ARGUMENT; if (info->id != WEBP_CHUNK_ANMF) return WEBP_MUX_INVALID_ARGUMENT; diff --git a/thirdparty/libwebp/src/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h index ad3e1bdb97..0f4af1784d 100644 --- a/thirdparty/libwebp/src/mux/muxi.h +++ b/thirdparty/libwebp/src/mux/muxi.h @@ -28,8 +28,8 @@ extern "C" { // Defines and constants. #define MUX_MAJ_VERSION 1 -#define MUX_MIN_VERSION 1 -#define MUX_REV_VERSION 0 +#define MUX_MIN_VERSION 2 +#define MUX_REV_VERSION 4 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/thirdparty/libwebp/src/mux/muxinternal.c b/thirdparty/libwebp/src/mux/muxinternal.c index b9ee6717d3..75b6b416b9 100644 --- a/thirdparty/libwebp/src/mux/muxinternal.c +++ b/thirdparty/libwebp/src/mux/muxinternal.c @@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk, WebPMuxError ChunkAppend(WebPChunk* const chunk, WebPChunk*** const chunk_list) { + WebPMuxError err; assert(chunk_list != NULL && *chunk_list != NULL); if (**chunk_list == NULL) { - ChunkSetHead(chunk, *chunk_list); + err = ChunkSetHead(chunk, *chunk_list); } else { WebPChunk* last_chunk = **chunk_list; while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_; - ChunkSetHead(chunk, &last_chunk->next_); - *chunk_list = &last_chunk->next_; + err = ChunkSetHead(chunk, &last_chunk->next_); + if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_; } - return WEBP_MUX_OK; + return err; } //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/mux/muxread.c b/thirdparty/libwebp/src/mux/muxread.c index ae3b876bc5..80050396e1 100644 --- a/thirdparty/libwebp/src/mux/muxread.c +++ b/thirdparty/libwebp/src/mux/muxread.c @@ -56,7 +56,7 @@ static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk, uint32_t chunk_size; WebPData chunk_data; - // Sanity checks. + // Correctness checks. if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA; chunk_size = GetLE32(data + TAG_SIZE); if (chunk_size > MAX_CHUNK_PAYLOAD) return WEBP_MUX_BAD_DATA; @@ -155,7 +155,6 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data, break; default: goto Fail; - break; } subchunk_size = ChunkDiskSize(&subchunk); bytes += subchunk_size; @@ -187,7 +186,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, WebPChunk** chunk_list_ends[WEBP_CHUNK_NIL + 1] = { NULL }; ChunkInit(&chunk); - // Sanity checks. if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) { return NULL; // version mismatch } @@ -264,7 +262,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err; ChunkRelease(&chunk); goto PushImage; - break; default: // A non-image chunk. if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before // getting all chunks of an image. @@ -483,7 +480,6 @@ WebPMuxError WebPMuxGetFrame( WebPMuxError err; WebPMuxImage* wpi; - // Sanity checks. if (mux == NULL || frame == NULL) { return WEBP_MUX_INVALID_ARGUMENT; } diff --git a/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h b/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h index 46b3880706..404b9a6d8c 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h +++ b/thirdparty/libwebp/src/utils/bit_reader_inl_utils.h @@ -55,7 +55,7 @@ void VP8LoadFinalBytes(VP8BitReader* const br); // makes sure br->value_ has at least BITS bits worth of data static WEBP_UBSAN_IGNORE_UNDEF WEBP_INLINE -void VP8LoadNewBytes(VP8BitReader* const br) { +void VP8LoadNewBytes(VP8BitReader* WEBP_RESTRICT const br) { assert(br != NULL && br->buf_ != NULL); // Read 'BITS' bits at a time if possible. if (br->buf_ < br->buf_max_) { @@ -104,7 +104,7 @@ void VP8LoadNewBytes(VP8BitReader* const br) { } // Read a bit with proba 'prob'. Speed-critical function! -static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, +static WEBP_INLINE int VP8GetBit(VP8BitReader* WEBP_RESTRICT const br, int prob, const char label[]) { // Don't move this declaration! It makes a big speed difference to store // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't @@ -137,7 +137,8 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, // simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE -int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) { +int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v, + const char label[]) { if (br->bits_ < 0) { VP8LoadNewBytes(br); } @@ -155,7 +156,7 @@ int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) { } } -static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, +static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* WEBP_RESTRICT const br, int prob, const char label[]) { // Don't move this declaration! It makes a big speed difference to store // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't diff --git a/thirdparty/libwebp/src/utils/bit_reader_utils.c b/thirdparty/libwebp/src/utils/bit_reader_utils.c index 60271c0ae0..857cd60988 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_utils.c +++ b/thirdparty/libwebp/src/utils/bit_reader_utils.c @@ -41,14 +41,7 @@ void VP8InitBitReader(VP8BitReader* const br, br->bits_ = -8; // to load the very first 8bits br->eof_ = 0; VP8BitReaderSetBuffer(br, start, size); -// -- GODOT -- begin -#ifdef JAVASCRIPT_ENABLED // html5 required aligned reads - while(((uintptr_t)br->buf_ & 1) != 0 && !br->eof_) - VP8LoadFinalBytes(br); -#else VP8LoadNewBytes(br); -#endif -// -- GODOT -- end } void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) { diff --git a/thirdparty/libwebp/src/utils/bit_reader_utils.h b/thirdparty/libwebp/src/utils/bit_reader_utils.h index 199dacf224..e64156e318 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_utils.h +++ b/thirdparty/libwebp/src/utils/bit_reader_utils.h @@ -58,12 +58,6 @@ extern "C" { // BITS can be any multiple of 8 from 8 to 56 (inclusive). // Pick values that fit natural register size. -// -- GODOT -- start -#ifdef JAVASCRIPT_ENABLED -#define BITS 16 -#else -// -- GODOT -- end - #if defined(__i386__) || defined(_M_IX86) // x86 32bit #define BITS 24 #elif defined(__x86_64__) || defined(_M_X64) // x86 64bit @@ -78,10 +72,6 @@ extern "C" { #define BITS 24 #endif -// -- GODOT -- start -#endif -// -- GODOT -- end - //------------------------------------------------------------------------------ // Derived types and constants: // bit_t = natural register type for storing 'value_' (which is BITS+8 bits) diff --git a/thirdparty/libwebp/src/utils/bit_writer_utils.c b/thirdparty/libwebp/src/utils/bit_writer_utils.c index bef0e31ca5..2f408508f1 100644 --- a/thirdparty/libwebp/src/utils/bit_writer_utils.c +++ b/thirdparty/libwebp/src/utils/bit_writer_utils.c @@ -278,7 +278,7 @@ void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) { // If needed, make some room by flushing some bits out. if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; - if (extra_size != (size_t)extra_size || + if (!CheckSizeOverflow(extra_size) || !VP8LBitWriterResize(bw, (size_t)extra_size)) { bw->cur_ = bw->buf_; bw->error_ = 1; @@ -314,7 +314,7 @@ void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) { while (used >= VP8L_WRITER_BITS) { if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; - if (extra_size != (size_t)extra_size || + if (!CheckSizeOverflow(extra_size) || !VP8LBitWriterResize(bw, (size_t)extra_size)) { bw->cur_ = bw->buf_; bw->error_ = 1; diff --git a/thirdparty/libwebp/src/utils/color_cache_utils.c b/thirdparty/libwebp/src/utils/color_cache_utils.c index b09f538e8b..7b5222b6e5 100644 --- a/thirdparty/libwebp/src/utils/color_cache_utils.c +++ b/thirdparty/libwebp/src/utils/color_cache_utils.c @@ -20,22 +20,22 @@ //------------------------------------------------------------------------------ // VP8LColorCache. -int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) { +int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits) { const int hash_size = 1 << hash_bits; - assert(cc != NULL); + assert(color_cache != NULL); assert(hash_bits > 0); - cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size, - sizeof(*cc->colors_)); - if (cc->colors_ == NULL) return 0; - cc->hash_shift_ = 32 - hash_bits; - cc->hash_bits_ = hash_bits; + color_cache->colors_ = (uint32_t*)WebPSafeCalloc( + (uint64_t)hash_size, sizeof(*color_cache->colors_)); + if (color_cache->colors_ == NULL) return 0; + color_cache->hash_shift_ = 32 - hash_bits; + color_cache->hash_bits_ = hash_bits; return 1; } -void VP8LColorCacheClear(VP8LColorCache* const cc) { - if (cc != NULL) { - WebPSafeFree(cc->colors_); - cc->colors_ = NULL; +void VP8LColorCacheClear(VP8LColorCache* const color_cache) { + if (color_cache != NULL) { + WebPSafeFree(color_cache->colors_); + color_cache->colors_ = NULL; } } diff --git a/thirdparty/libwebp/src/utils/huffman_encode_utils.c b/thirdparty/libwebp/src/utils/huffman_encode_utils.c index 6f3b1bbe02..585db91951 100644 --- a/thirdparty/libwebp/src/utils/huffman_encode_utils.c +++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.c @@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree, // especially when population counts are longer than 2**tree_limit, but // we are not planning to use this with extremely long blocks. // -// See http://en.wikipedia.org/wiki/Huffman_coding +// See https://en.wikipedia.org/wiki/Huffman_coding static void GenerateOptimalTree(const uint32_t* const histogram, int histogram_size, HuffmanTree* tree, int tree_depth_limit, @@ -404,8 +404,7 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { // Main entry point void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, - uint8_t* const buf_rle, - HuffmanTree* const huff_tree, + uint8_t* const buf_rle, HuffmanTree* const huff_tree, HuffmanTreeCode* const huff_code) { const int num_symbols = huff_code->num_symbols; memset(buf_rle, 0, num_symbols * sizeof(*buf_rle)); diff --git a/thirdparty/libwebp/src/utils/huffman_encode_utils.h b/thirdparty/libwebp/src/utils/huffman_encode_utils.h index 3e6763ce49..3f7f1d8074 100644 --- a/thirdparty/libwebp/src/utils/huffman_encode_utils.h +++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.h @@ -51,7 +51,7 @@ int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree, // huffman code tree. void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, uint8_t* const buf_rle, HuffmanTree* const huff_tree, - HuffmanTreeCode* const tree); + HuffmanTreeCode* const huff_code); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c index f65b6cdbb6..97e7893704 100644 --- a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c +++ b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c @@ -30,7 +30,7 @@ #define DFIX 4 // extra precision for ordered dithering #define DSIZE 4 // dithering size (must be a power of two) -// cf. http://en.wikipedia.org/wiki/Ordered_dithering +// cf. https://en.wikipedia.org/wiki/Ordered_dithering static const uint8_t kOrderedDither[DSIZE][DSIZE] = { { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision { 12, 4, 14, 6 }, diff --git a/thirdparty/libwebp/src/utils/rescaler_utils.c b/thirdparty/libwebp/src/utils/rescaler_utils.c index 4bcae24af5..a0581a14b1 100644 --- a/thirdparty/libwebp/src/utils/rescaler_utils.c +++ b/thirdparty/libwebp/src/utils/rescaler_utils.c @@ -12,66 +12,74 @@ // Author: Skal (pascal.massimino@gmail.com) #include <assert.h> +#include <limits.h> #include <stdlib.h> #include <string.h> #include "src/dsp/dsp.h" #include "src/utils/rescaler_utils.h" +#include "src/utils/utils.h" //------------------------------------------------------------------------------ -void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, - uint8_t* const dst, - int dst_width, int dst_height, int dst_stride, - int num_channels, rescaler_t* const work) { +int WebPRescalerInit(WebPRescaler* const rescaler, + int src_width, int src_height, + uint8_t* const dst, + int dst_width, int dst_height, int dst_stride, + int num_channels, rescaler_t* const work) { const int x_add = src_width, x_sub = dst_width; const int y_add = src_height, y_sub = dst_height; - wrk->x_expand = (src_width < dst_width); - wrk->y_expand = (src_height < dst_height); - wrk->src_width = src_width; - wrk->src_height = src_height; - wrk->dst_width = dst_width; - wrk->dst_height = dst_height; - wrk->src_y = 0; - wrk->dst_y = 0; - wrk->dst = dst; - wrk->dst_stride = dst_stride; - wrk->num_channels = num_channels; + const uint64_t total_size = 2ull * dst_width * num_channels * sizeof(*work); + if (!CheckSizeOverflow(total_size)) return 0; + + rescaler->x_expand = (src_width < dst_width); + rescaler->y_expand = (src_height < dst_height); + rescaler->src_width = src_width; + rescaler->src_height = src_height; + rescaler->dst_width = dst_width; + rescaler->dst_height = dst_height; + rescaler->src_y = 0; + rescaler->dst_y = 0; + rescaler->dst = dst; + rescaler->dst_stride = dst_stride; + rescaler->num_channels = num_channels; // for 'x_expand', we use bilinear interpolation - wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add; - wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; - if (!wrk->x_expand) { // fx_scale is not used otherwise - wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub); + rescaler->x_add = rescaler->x_expand ? (x_sub - 1) : x_add; + rescaler->x_sub = rescaler->x_expand ? (x_add - 1) : x_sub; + if (!rescaler->x_expand) { // fx_scale is not used otherwise + rescaler->fx_scale = WEBP_RESCALER_FRAC(1, rescaler->x_sub); } // vertical scaling parameters - wrk->y_add = wrk->y_expand ? y_add - 1 : y_add; - wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub; - wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add; - if (!wrk->y_expand) { + rescaler->y_add = rescaler->y_expand ? y_add - 1 : y_add; + rescaler->y_sub = rescaler->y_expand ? y_sub - 1 : y_sub; + rescaler->y_accum = rescaler->y_expand ? rescaler->y_sub : rescaler->y_add; + if (!rescaler->y_expand) { // This is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast. - // Its value is <= WEBP_RESCALER_ONE, because dst_height <= wrk->y_add, and - // wrk->x_add >= 1; - const uint64_t ratio = - (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add); + // Its value is <= WEBP_RESCALER_ONE, because dst_height <= rescaler->y_add + // and rescaler->x_add >= 1; + const uint64_t num = (uint64_t)dst_height * WEBP_RESCALER_ONE; + const uint64_t den = (uint64_t)rescaler->x_add * rescaler->y_add; + const uint64_t ratio = num / den; if (ratio != (uint32_t)ratio) { // When ratio == WEBP_RESCALER_ONE, we can't represent the ratio with the // current fixed-point precision. This happens when src_height == - // wrk->y_add (which == src_height), and wrk->x_add == 1. + // rescaler->y_add (which == src_height), and rescaler->x_add == 1. // => We special-case fxy_scale = 0, in WebPRescalerExportRow(). - wrk->fxy_scale = 0; + rescaler->fxy_scale = 0; } else { - wrk->fxy_scale = (uint32_t)ratio; + rescaler->fxy_scale = (uint32_t)ratio; } - wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub); + rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->y_sub); } else { - wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add); - // wrk->fxy_scale is unused here. + rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->x_add); + // rescaler->fxy_scale is unused here. } - wrk->irow = work; - wrk->frow = work + num_channels * dst_width; - memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); + rescaler->irow = work; + rescaler->frow = work + num_channels * dst_width; + memset(work, 0, (size_t)total_size); WebPRescalerDspInit(); + return 1; } int WebPRescalerGetScaledDimensions(int src_width, int src_height, @@ -82,6 +90,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, { int width = *scaled_width; int height = *scaled_height; + const int max_size = INT_MAX / 2; // if width is unspecified, scale original proportionally to height ratio. if (width == 0 && src_height > 0) { @@ -94,7 +103,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, (int)(((uint64_t)src_height * width + src_width - 1) / src_width); } // Check if the overall dimensions still make sense. - if (width <= 0 || height <= 0) { + if (width <= 0 || height <= 0 || width > max_size || height > max_size) { return 0; } @@ -107,31 +116,34 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, //------------------------------------------------------------------------------ // all-in-one calls -int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) { - const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub; +int WebPRescaleNeededLines(const WebPRescaler* const rescaler, + int max_num_lines) { + const int num_lines = + (rescaler->y_accum + rescaler->y_sub - 1) / rescaler->y_sub; return (num_lines > max_num_lines) ? max_num_lines : num_lines; } -int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, +int WebPRescalerImport(WebPRescaler* const rescaler, int num_lines, const uint8_t* src, int src_stride) { int total_imported = 0; - while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) { - if (wrk->y_expand) { - rescaler_t* const tmp = wrk->irow; - wrk->irow = wrk->frow; - wrk->frow = tmp; + while (total_imported < num_lines && + !WebPRescalerHasPendingOutput(rescaler)) { + if (rescaler->y_expand) { + rescaler_t* const tmp = rescaler->irow; + rescaler->irow = rescaler->frow; + rescaler->frow = tmp; } - WebPRescalerImportRow(wrk, src); - if (!wrk->y_expand) { // Accumulate the contribution of the new row. + WebPRescalerImportRow(rescaler, src); + if (!rescaler->y_expand) { // Accumulate the contribution of the new row. int x; - for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) { - wrk->irow[x] += wrk->frow[x]; + for (x = 0; x < rescaler->num_channels * rescaler->dst_width; ++x) { + rescaler->irow[x] += rescaler->frow[x]; } } - ++wrk->src_y; + ++rescaler->src_y; src += src_stride; ++total_imported; - wrk->y_accum -= wrk->y_sub; + rescaler->y_accum -= rescaler->y_sub; } return total_imported; } diff --git a/thirdparty/libwebp/src/utils/rescaler_utils.h b/thirdparty/libwebp/src/utils/rescaler_utils.h index ca41e42c4a..ef201ef86c 100644 --- a/thirdparty/libwebp/src/utils/rescaler_utils.h +++ b/thirdparty/libwebp/src/utils/rescaler_utils.h @@ -47,12 +47,13 @@ struct WebPRescaler { }; // Initialize a rescaler given scratch area 'work' and dimensions of src & dst. -void WebPRescalerInit(WebPRescaler* const rescaler, - int src_width, int src_height, - uint8_t* const dst, - int dst_width, int dst_height, int dst_stride, - int num_channels, - rescaler_t* const work); +// Returns false in case of error. +int WebPRescalerInit(WebPRescaler* const rescaler, + int src_width, int src_height, + uint8_t* const dst, + int dst_width, int dst_height, int dst_stride, + int num_channels, + rescaler_t* const work); // If either 'scaled_width' or 'scaled_height' (but not both) is 0 the value // will be calculated preserving the aspect ratio, otherwise the values are diff --git a/thirdparty/libwebp/src/utils/utils.c b/thirdparty/libwebp/src/utils/utils.c index 764f752b82..a7c3a70fef 100644 --- a/thirdparty/libwebp/src/utils/utils.c +++ b/thirdparty/libwebp/src/utils/utils.c @@ -23,7 +23,7 @@ // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow, // and not multi-thread safe!). // An interesting alternative is valgrind's 'massif' tool: -// http://valgrind.org/docs/manual/ms-manual.html +// https://valgrind.org/docs/manual/ms-manual.html // Here is an example command line: /* valgrind --tool=massif --massif-out-file=massif.out \ --stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc @@ -101,6 +101,9 @@ static void Increment(int* const v) { #if defined(MALLOC_LIMIT) { const char* const malloc_limit_str = getenv("MALLOC_LIMIT"); +#if MALLOC_LIMIT > 1 + mem_limit = (size_t)MALLOC_LIMIT; +#endif if (malloc_limit_str != NULL) { mem_limit = atoi(malloc_limit_str); } @@ -169,16 +172,16 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) { const uint64_t total_size = nmemb * size; if (nmemb == 0) return 1; if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0; - if (total_size != (size_t)total_size) return 0; + if (!CheckSizeOverflow(total_size)) return 0; #if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT) if (countdown_to_fail > 0 && --countdown_to_fail == 0) { return 0; // fake fail! } #endif -#if defined(MALLOC_LIMIT) +#if defined(PRINT_MEM_INFO) && defined(MALLOC_LIMIT) if (mem_limit > 0) { const uint64_t new_total_mem = (uint64_t)total_mem + total_size; - if (new_total_mem != (size_t)new_total_mem || + if (!CheckSizeOverflow(new_total_mem) || new_total_mem > mem_limit) { return 0; // fake fail! } @@ -231,7 +234,7 @@ void WebPFree(void* ptr) { void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { assert(src != NULL && dst != NULL); - assert(src_stride >= width && dst_stride >= width); + assert(abs(src_stride) >= width && abs(dst_stride) >= width); while (height-- > 0) { memcpy(dst, src, width); src += src_stride; diff --git a/thirdparty/libwebp/src/utils/utils.h b/thirdparty/libwebp/src/utils/utils.h index 2a3ec92678..ef04f108fe 100644 --- a/thirdparty/libwebp/src/utils/utils.h +++ b/thirdparty/libwebp/src/utils/utils.h @@ -42,6 +42,10 @@ extern "C" { #endif #endif // WEBP_MAX_ALLOCABLE_MEMORY +static WEBP_INLINE int CheckSizeOverflow(uint64_t size) { + return size == (size_t)size; +} + // size-checking safe malloc/calloc: verify that the requested size is not too // large, or return NULL. You don't need to call these for constructs like // malloc(sizeof(foo)), but only if there's picture-dependent size involved @@ -107,24 +111,33 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) { PutLE16(data + 2, (int)(val >> 16)); } -// Returns (int)floor(log2(n)). n must be > 0. // use GNU builtins where available. #if defined(__GNUC__) && \ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) +// Returns (int)floor(log2(n)). n must be > 0. static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return 31 ^ __builtin_clz(n); } +// counts the number of trailing zero +static WEBP_INLINE int BitsCtz(uint32_t n) { return __builtin_ctz(n); } #elif defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) #include <intrin.h> #pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - unsigned long first_set_bit; + unsigned long first_set_bit; // NOLINT (runtime/int) _BitScanReverse(&first_set_bit, n); return first_set_bit; } -#else // default: use the C-version. +static WEBP_INLINE int BitsCtz(uint32_t n) { + unsigned long first_set_bit; // NOLINT (runtime/int) + _BitScanForward(&first_set_bit, n); + return first_set_bit; +} +#else // default: use the (slow) C-version. +#define WEBP_HAVE_SLOW_CLZ_CTZ // signal that the Clz/Ctz function are slow // Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either // based on table or not. Can be used as fallback if clz() is not available. #define WEBP_NEED_LOG_TABLE_8BIT @@ -139,6 +152,15 @@ static WEBP_INLINE int WebPLog2FloorC(uint32_t n) { } static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); } + +static WEBP_INLINE int BitsCtz(uint32_t n) { + int i; + for (i = 0; i < 32; ++i, n >>= 1) { + if (n & 1) return i; + } + return 32; +} + #endif //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/webp/decode.h b/thirdparty/libwebp/src/webp/decode.h index 80dd0ef0cc..d98247509a 100644 --- a/thirdparty/libwebp/src/webp/decode.h +++ b/thirdparty/libwebp/src/webp/decode.h @@ -85,7 +85,7 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, // Upon return, the Y buffer has a stride returned as '*stride', while U and V // have a common stride returned as '*uv_stride'. // Return NULL in case of error. -// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr +// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height, uint8_t** u, uint8_t** v, @@ -453,7 +453,7 @@ struct WebPDecoderOptions { int scaled_width, scaled_height; // final resolution int use_threads; // if true, use multi-threaded decoding int dithering_strength; // dithering strength (0=Off, 100=full) - int flip; // flip output vertically + int flip; // if true, flip output vertically int alpha_dithering_strength; // alpha dithering strength in [0..100] uint32_t pad[5]; // padding for later use diff --git a/thirdparty/libwebp/src/webp/encode.h b/thirdparty/libwebp/src/webp/encode.h index 655166e7d4..56b68e2f10 100644 --- a/thirdparty/libwebp/src/webp/encode.h +++ b/thirdparty/libwebp/src/webp/encode.h @@ -148,7 +148,8 @@ struct WebPConfig { int use_delta_palette; // reserved for future lossless feature int use_sharp_yuv; // if needed, use sharp (and slow) RGB->YUV conversion - uint32_t pad[2]; // padding for later use + int qmin; // minimum permissible quality factor + int qmax; // maximum permissible quality factor }; // Enumerate some predefined settings for WebPConfig, depending on the type @@ -291,6 +292,11 @@ typedef enum WebPEncodingError { #define WEBP_MAX_DIMENSION 16383 // Main exchange structure (input samples, output bytes, statistics) +// +// Once WebPPictureInit() has been called, it's ok to make all the INPUT fields +// (use_argb, y/u/v, argb, ...) point to user-owned data, even if +// WebPPictureAlloc() has been called. Depending on the value use_argb, +// it's guaranteed that either *argb or *y/*u/*v content will be kept untouched. struct WebPPicture { // INPUT ////////////// @@ -435,7 +441,7 @@ WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture, // the original dimension will be lost). Picture 'dst' need not be initialized // with WebPPictureInit() if it is different from 'src', since its content will // be overwritten. -// Returns false in case of memory allocation error or invalid parameters. +// Returns false in case of invalid parameters. WEBP_EXTERN int WebPPictureView(const WebPPicture* src, int left, int top, int width, int height, WebPPicture* dst); @@ -449,7 +455,7 @@ WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture); // dimension will be calculated preserving the aspect ratio. // No gamma correction is applied. // Returns false in case of error (invalid parameter or insufficient memory). -WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height); +WEBP_EXTERN int WebPPictureRescale(WebPPicture* picture, int width, int height); // Colorspace conversion function to import RGB samples. // Previous buffer will be free'd, if any. @@ -520,7 +526,7 @@ WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture); // Remove the transparency information (if present) by blending the color with // the background color 'background_rgb' (specified as 24bit RGB triplet). // After this call, all alpha values are reset to 0xff. -WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb); +WEBP_EXTERN void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb); //------------------------------------------------------------------------------ // Main call |