diff options
Diffstat (limited to 'drivers/webp/dec')
-rw-r--r-- | drivers/webp/dec/alpha.c | 205 | ||||
-rw-r--r-- | drivers/webp/dec/buffer.c | 99 | ||||
-rw-r--r-- | drivers/webp/dec/decode_vp8.h | 17 | ||||
-rw-r--r-- | drivers/webp/dec/frame.c | 665 | ||||
-rw-r--r-- | drivers/webp/dec/idec.c | 307 | ||||
-rw-r--r-- | drivers/webp/dec/io.c | 269 | ||||
-rw-r--r-- | drivers/webp/dec/layer.c | 35 | ||||
-rw-r--r-- | drivers/webp/dec/quant.c | 17 | ||||
-rw-r--r-- | drivers/webp/dec/tree.c | 182 | ||||
-rw-r--r-- | drivers/webp/dec/vp8.c | 493 | ||||
-rw-r--r-- | drivers/webp/dec/vp8i.h | 218 | ||||
-rw-r--r-- | drivers/webp/dec/vp8l.c | 1095 | ||||
-rw-r--r-- | drivers/webp/dec/vp8li.h | 53 | ||||
-rw-r--r-- | drivers/webp/dec/webp.c | 220 | ||||
-rw-r--r-- | drivers/webp/dec/webpi.h | 35 |
15 files changed, 2224 insertions, 1686 deletions
diff --git a/drivers/webp/dec/alpha.c b/drivers/webp/dec/alpha.c index d1095fa555..52216fc4d6 100644 --- a/drivers/webp/dec/alpha.c +++ b/drivers/webp/dec/alpha.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Alpha-plane decompression. @@ -10,131 +12,156 @@ // Author: Skal (pascal.massimino@gmail.com) #include <stdlib.h> +#include "./alphai.h" #include "./vp8i.h" #include "./vp8li.h" -#include "../utils/filters.h" -#include "../utils/quant_levels.h" -#include "../format_constants.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -// TODO(skal): move to dsp/ ? -static void CopyPlane(const uint8_t* src, int src_stride, - uint8_t* dst, int dst_stride, int width, int height) { - while (height-- > 0) { - memcpy(dst, src, width); - src += src_stride; - dst += dst_stride; +#include "../dsp/dsp.h" +#include "../utils/quant_levels_dec.h" +#include "../utils/utils.h" +#include "../webp/format_constants.h" + +//------------------------------------------------------------------------------ +// ALPHDecoder object. + +ALPHDecoder* ALPHNew(void) { + ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); + return dec; +} + +void ALPHDelete(ALPHDecoder* const dec) { + if (dec != NULL) { + VP8LDelete(dec->vp8l_dec_); + dec->vp8l_dec_ = NULL; + WebPSafeFree(dec); } } //------------------------------------------------------------------------------ -// Decodes the compressed data 'data' of size 'data_size' into the 'output'. -// The 'output' buffer should be pre-allocated and must be of the same -// dimension 'height'x'stride', as that of the image. -// -// Returns 1 on successfully decoding the compressed alpha and -// 0 if either: -// error in bit-stream header (invalid compression mode or filter), or -// error returned by appropriate compression method. - -static int DecodeAlpha(const uint8_t* data, size_t data_size, - int width, int height, int stride, uint8_t* output) { - uint8_t* decoded_data = NULL; - const size_t decoded_size = height * width; - uint8_t* unfiltered_data = NULL; - WEBP_FILTER_TYPE filter; - int pre_processing; - int rsrv; +// Decoding. + +// Initialize alpha decoding by parsing the alpha header and decoding the image +// header for alpha data stored using lossless compression. +// Returns false in case of error in alpha header (data too short, invalid +// compression method or filter, error in lossless header data etc). +static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data, + size_t data_size, int width, int height, uint8_t* output) { int ok = 0; - int method; + const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN; + const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN; + int rsrv; - assert(width > 0 && height > 0 && stride >= width); + assert(width > 0 && height > 0); assert(data != NULL && output != NULL); + dec->width_ = width; + dec->height_ = height; + if (data_size <= ALPHA_HEADER_LEN) { return 0; } - method = (data[0] >> 0) & 0x03; - filter = (data[0] >> 2) & 0x03; - pre_processing = (data[0] >> 4) & 0x03; + dec->method_ = (data[0] >> 0) & 0x03; + dec->filter_ = (data[0] >> 2) & 0x03; + dec->pre_processing_ = (data[0] >> 4) & 0x03; rsrv = (data[0] >> 6) & 0x03; - if (method < ALPHA_NO_COMPRESSION || - method > ALPHA_LOSSLESS_COMPRESSION || - filter >= WEBP_FILTER_LAST || - pre_processing > ALPHA_PREPROCESSED_LEVELS || + if (dec->method_ < ALPHA_NO_COMPRESSION || + dec->method_ > ALPHA_LOSSLESS_COMPRESSION || + dec->filter_ >= WEBP_FILTER_LAST || + dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS || rsrv != 0) { return 0; } - if (method == ALPHA_NO_COMPRESSION) { - ok = (data_size >= decoded_size); - decoded_data = (uint8_t*)data + ALPHA_HEADER_LEN; + if (dec->method_ == ALPHA_NO_COMPRESSION) { + const size_t alpha_decoded_size = dec->width_ * dec->height_; + ok = (alpha_data_size >= alpha_decoded_size); } else { - decoded_data = (uint8_t*)malloc(decoded_size); - if (decoded_data == NULL) return 0; - ok = VP8LDecodeAlphaImageStream(width, height, - data + ALPHA_HEADER_LEN, - data_size - ALPHA_HEADER_LEN, - decoded_data); + assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION); + ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output); } + VP8FiltersInit(); + return ok; +} - if (ok) { - WebPFilterFunc unfilter_func = WebPUnfilters[filter]; - if (unfilter_func != NULL) { - unfiltered_data = (uint8_t*)malloc(decoded_size); - if (unfiltered_data == NULL) { - ok = 0; - goto Error; - } - // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode - // and apply filter per image-row. - unfilter_func(decoded_data, width, height, 1, width, unfiltered_data); - // Construct raw_data (height x stride) from alpha data (height x width). - CopyPlane(unfiltered_data, width, output, stride, width, height); - free(unfiltered_data); - } else { - // Construct raw_data (height x stride) from alpha data (height x width). - CopyPlane(decoded_data, width, output, stride, width, height); - } - if (pre_processing == ALPHA_PREPROCESSED_LEVELS) { - ok = DequantizeLevels(decoded_data, width, height); +// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha +// starting from row number 'row'. It assumes that rows up to (row - 1) have +// already been decoded. +// Returns false in case of bitstream error. +static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) { + ALPHDecoder* const alph_dec = dec->alph_dec_; + const int width = alph_dec->width_; + const int height = alph_dec->height_; + WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_]; + uint8_t* const output = dec->alpha_plane_; + if (alph_dec->method_ == ALPHA_NO_COMPRESSION) { + const size_t offset = row * width; + const size_t num_pixels = num_rows * width; + assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels); + memcpy(dec->alpha_plane_ + offset, + dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels); + } else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION + assert(alph_dec->vp8l_dec_ != NULL); + if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) { + return 0; } } - Error: - if (method != ALPHA_NO_COMPRESSION) { - free(decoded_data); + if (unfilter_func != NULL) { + unfilter_func(width, height, width, row, num_rows, output); } - return ok; + + if (row + num_rows == dec->pic_hdr_.height_) { + dec->is_alpha_decoded_ = 1; + } + return 1; } //------------------------------------------------------------------------------ +// Main entry point. const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, int row, int num_rows) { - const int stride = dec->pic_hdr_.width_; + const int width = dec->pic_hdr_.width_; + const int height = dec->pic_hdr_.height_; - if (row < 0 || num_rows < 0 || row + num_rows > dec->pic_hdr_.height_) { + if (row < 0 || num_rows <= 0 || row + num_rows > height) { return NULL; // sanity check. } if (row == 0) { - // Decode everything during the first call. - if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_, - dec->pic_hdr_.width_, dec->pic_hdr_.height_, stride, - dec->alpha_plane_)) { - return NULL; // Error. + // Initialize decoding. + assert(dec->alpha_plane_ != NULL); + dec->alph_dec_ = ALPHNew(); + if (dec->alph_dec_ == NULL) return NULL; + if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_, + width, height, dec->alpha_plane_)) { + ALPHDelete(dec->alph_dec_); + dec->alph_dec_ = NULL; + return NULL; + } + // if we allowed use of alpha dithering, check whether it's needed at all + if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) { + dec->alpha_dithering_ = 0; // disable dithering + } else { + num_rows = height; // decode everything in one pass } } + if (!dec->is_alpha_decoded_) { + int ok = 0; + assert(dec->alph_dec_ != NULL); + ok = ALPHDecode(dec, row, num_rows); + if (ok && dec->alpha_dithering_ > 0) { + ok = WebPDequantizeLevels(dec->alpha_plane_, width, height, + dec->alpha_dithering_); + } + if (!ok || dec->is_alpha_decoded_) { + ALPHDelete(dec->alph_dec_); + dec->alph_dec_ = NULL; + } + if (!ok) return NULL; // Error. + } + // Return a pointer to the current decoded row. - return dec->alpha_plane_ + row * stride; + return dec->alpha_plane_ + row * width; } - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/buffer.c b/drivers/webp/dec/buffer.c index c159f6f248..9ed2b3fe1a 100644 --- a/drivers/webp/dec/buffer.c +++ b/drivers/webp/dec/buffer.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Everything about WebPDecBuffer @@ -15,10 +17,6 @@ #include "./webpi.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - //------------------------------------------------------------------------------ // WebPDecBuffer @@ -35,6 +33,11 @@ static int IsValidColorspace(int webp_csp_mode) { return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST); } +// strictly speaking, the very last (or first, if flipped) row +// doesn't require padding. +#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \ + (uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH) + static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { int ok = 1; const WEBP_CSP_MODE mode = buffer->colorspace; @@ -44,33 +47,41 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { ok = 0; } else if (!WebPIsRGBMode(mode)) { // YUV checks const WebPYUVABuffer* const buf = &buffer->u.YUVA; - const uint64_t y_size = (uint64_t)buf->y_stride * height; - const uint64_t u_size = (uint64_t)buf->u_stride * ((height + 1) / 2); - const uint64_t v_size = (uint64_t)buf->v_stride * ((height + 1) / 2); - const uint64_t a_size = (uint64_t)buf->a_stride * height; + const int uv_width = (width + 1) / 2; + const int uv_height = (height + 1) / 2; + const int y_stride = abs(buf->y_stride); + const int u_stride = abs(buf->u_stride); + const int v_stride = abs(buf->v_stride); + const int a_stride = abs(buf->a_stride); + const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride); + const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride); + const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride); + const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride); ok &= (y_size <= buf->y_size); ok &= (u_size <= buf->u_size); ok &= (v_size <= buf->v_size); - ok &= (buf->y_stride >= width); - ok &= (buf->u_stride >= (width + 1) / 2); - ok &= (buf->v_stride >= (width + 1) / 2); + ok &= (y_stride >= width); + ok &= (u_stride >= uv_width); + ok &= (v_stride >= uv_width); ok &= (buf->y != NULL); ok &= (buf->u != NULL); ok &= (buf->v != NULL); if (mode == MODE_YUVA) { - ok &= (buf->a_stride >= width); + ok &= (a_stride >= width); ok &= (a_size <= buf->a_size); ok &= (buf->a != NULL); } } else { // RGB checks const WebPRGBABuffer* const buf = &buffer->u.RGBA; - const uint64_t size = (uint64_t)buf->stride * height; + const int stride = abs(buf->stride); + const uint64_t size = MIN_BUFFER_SIZE(width, height, stride); ok &= (size <= buf->size); - ok &= (buf->stride >= width * kModeBpp[mode]); + ok &= (stride >= width * kModeBpp[mode]); ok &= (buf->rgba != NULL); } return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM; } +#undef MIN_BUFFER_SIZE static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { const int w = buffer->width; @@ -133,9 +144,35 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { return CheckDecBuffer(buffer); } +VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) { + if (buffer == NULL) { + return VP8_STATUS_INVALID_PARAM; + } + if (WebPIsRGBMode(buffer->colorspace)) { + WebPRGBABuffer* const buf = &buffer->u.RGBA; + buf->rgba += (buffer->height - 1) * buf->stride; + buf->stride = -buf->stride; + } else { + WebPYUVABuffer* const buf = &buffer->u.YUVA; + const int H = buffer->height; + buf->y += (H - 1) * buf->y_stride; + buf->y_stride = -buf->y_stride; + buf->u += ((H - 1) >> 1) * buf->u_stride; + buf->u_stride = -buf->u_stride; + buf->v += ((H - 1) >> 1) * buf->v_stride; + buf->v_stride = -buf->v_stride; + if (buf->a != NULL) { + buf->a += (H - 1) * buf->a_stride; + buf->a_stride = -buf->a_stride; + } + } + return VP8_STATUS_OK; +} + VP8StatusCode WebPAllocateDecBuffer(int w, int h, const WebPDecoderOptions* const options, WebPDecBuffer* const out) { + VP8StatusCode status; if (out == NULL || w <= 0 || h <= 0) { return VP8_STATUS_INVALID_PARAM; } @@ -152,18 +189,28 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h, h = ch; } if (options->use_scaling) { - if (options->scaled_width <= 0 || options->scaled_height <= 0) { + int scaled_width = options->scaled_width; + int scaled_height = options->scaled_height; + if (!WebPRescalerGetScaledDimensions( + w, h, &scaled_width, &scaled_height)) { return VP8_STATUS_INVALID_PARAM; } - w = options->scaled_width; - h = options->scaled_height; + w = scaled_width; + h = scaled_height; } } out->width = w; out->height = h; - // Then, allocate buffer for real - return AllocateBuffer(out); + // Then, allocate buffer for real. + status = AllocateBuffer(out); + if (status != VP8_STATUS_OK) return status; + + // Use the stride trick if vertical flip is needed. + if (options != NULL && options->flip) { + status = WebPFlipBuffer(out); + } + return status; } //------------------------------------------------------------------------------ @@ -180,8 +227,9 @@ int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) { void WebPFreeDecBuffer(WebPDecBuffer* buffer) { if (buffer != NULL) { - if (!buffer->is_external_memory) - free(buffer->private_memory); + if (!buffer->is_external_memory) { + WebPSafeFree(buffer->private_memory); + } buffer->private_memory = NULL; } } @@ -210,6 +258,3 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) { //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/decode_vp8.h b/drivers/webp/dec/decode_vp8.h index c26a9fc891..b9337bbec0 100644 --- a/drivers/webp/dec/decode_vp8.h +++ b/drivers/webp/dec/decode_vp8.h @@ -1,8 +1,10 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Low-level API for VP8 decoder @@ -12,9 +14,9 @@ #ifndef WEBP_WEBP_DECODE_VP8_H_ #define WEBP_WEBP_DECODE_VP8_H_ -#include "../decode.h" +#include "../webp/decode.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif @@ -130,7 +132,8 @@ static WEBP_INLINE int VP8InitIo(VP8Io* const io) { return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION); } -// Start decoding a new picture. Returns true if ok. +// Decode the VP8 frame header. Returns true if ok. +// Note: 'io->data' must be pointing to the start of the VP8 frame header. int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io); // Decode a picture. Will call VP8GetHeaders() if it wasn't done already. @@ -175,7 +178,7 @@ WEBP_EXTERN(int) VP8LGetInfo( const uint8_t* data, size_t data_size, // data available so far int* const width, int* const height, int* const has_alpha); -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif diff --git a/drivers/webp/dec/frame.c b/drivers/webp/dec/frame.c index 9c91a48e17..b882133eab 100644 --- a/drivers/webp/dec/frame.c +++ b/drivers/webp/dec/frame.c @@ -1,8 +1,10 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Frame-reconstruction function. Memory allocation. @@ -13,11 +15,180 @@ #include "./vp8i.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +//------------------------------------------------------------------------------ +// Main reconstruction function. + +static const int kScan[16] = { + 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, + 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, + 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, + 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS +}; + +static int CheckMode(int mb_x, int mb_y, int mode) { + if (mode == B_DC_PRED) { + if (mb_x == 0) { + return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; + } else { + return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; + } + } + return mode; +} -#define ALIGN_MASK (32 - 1) +static void Copy32b(uint8_t* const dst, const uint8_t* const src) { + memcpy(dst, src, 4); +} + +static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, + uint8_t* const dst) { + switch (bits >> 30) { + case 3: + VP8Transform(src, dst, 0); + break; + case 2: + VP8TransformAC3(src, dst); + break; + case 1: + VP8TransformDC(src, dst); + break; + default: + break; + } +} + +static void DoUVTransform(uint32_t bits, const int16_t* const src, + uint8_t* const dst) { + if (bits & 0xff) { // any non-zero coeff at all? + if (bits & 0xaa) { // any non-zero AC coefficient? + VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V + } else { + VP8TransformDCUV(src, dst); + } + } +} + +static void ReconstructRow(const VP8Decoder* const dec, + const VP8ThreadContext* ctx) { + int j; + int mb_x; + const int mb_y = ctx->mb_y_; + const int cache_id = ctx->id_; + uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; + uint8_t* const u_dst = dec->yuv_b_ + U_OFF; + uint8_t* const v_dst = dec->yuv_b_ + V_OFF; + + // Initialize left-most block. + for (j = 0; j < 16; ++j) { + y_dst[j * BPS - 1] = 129; + } + for (j = 0; j < 8; ++j) { + u_dst[j * BPS - 1] = 129; + v_dst[j * BPS - 1] = 129; + } + + // Init top-left sample on left column too. + if (mb_y > 0) { + y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; + } else { + // we only need to do this init once at block (0,0). + // Afterward, it remains valid for the whole topmost row. + memset(y_dst - BPS - 1, 127, 16 + 4 + 1); + memset(u_dst - BPS - 1, 127, 8 + 1); + memset(v_dst - BPS - 1, 127, 8 + 1); + } + + // Reconstruct one row. + for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { + const VP8MBData* const block = ctx->mb_data_ + mb_x; + + // Rotate in the left samples from previously decoded block. We move four + // pixels at a time for alignment reason, and because of in-loop filter. + if (mb_x > 0) { + for (j = -1; j < 16; ++j) { + Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); + } + for (j = -1; j < 8; ++j) { + Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); + Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); + } + } + { + // bring top samples into the cache + VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; + const int16_t* const coeffs = block->coeffs_; + uint32_t bits = block->non_zero_y_; + int n; + + if (mb_y > 0) { + memcpy(y_dst - BPS, top_yuv[0].y, 16); + memcpy(u_dst - BPS, top_yuv[0].u, 8); + memcpy(v_dst - BPS, top_yuv[0].v, 8); + } + + // predict and add residuals + if (block->is_i4x4_) { // 4x4 + uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); + + if (mb_y > 0) { + if (mb_x >= dec->mb_w_ - 1) { // on rightmost border + memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); + } else { + memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); + } + } + // replicate the top-right pixels below + top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; + + // predict and add residuals for all 4x4 blocks in turn. + for (n = 0; n < 16; ++n, bits <<= 2) { + uint8_t* const dst = y_dst + kScan[n]; + VP8PredLuma4[block->imodes_[n]](dst); + DoTransform(bits, coeffs + n * 16, dst); + } + } else { // 16x16 + const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]); + VP8PredLuma16[pred_func](y_dst); + if (bits != 0) { + for (n = 0; n < 16; ++n, bits <<= 2) { + DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); + } + } + } + { + // Chroma + const uint32_t bits_uv = block->non_zero_uv_; + const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); + VP8PredChroma8[pred_func](u_dst); + VP8PredChroma8[pred_func](v_dst); + DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); + DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); + } + + // stash away top samples for next block + if (mb_y < dec->mb_h_ - 1) { + memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); + memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); + memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); + } + } + // Transfer reconstructed samples from yuv_b_ cache to final destination. + { + const int y_offset = cache_id * 16 * dec->cache_y_stride_; + const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; + uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; + uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; + uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; + for (j = 0; j < 16; ++j) { + memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); + } + for (j = 0; j < 8; ++j) { + memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); + memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); + } + } + } +} //------------------------------------------------------------------------------ // Filtering @@ -29,25 +200,18 @@ extern "C" { // U/V, so it's 8 samples total (because of the 2x upsampling). static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; -static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) { - if (keyframe) { - return (level >= 40) ? 2 : (level >= 15) ? 1 : 0; - } else { - return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0; - } -} - static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const int cache_id = ctx->id_; const int y_bps = dec->cache_y_stride_; - VP8FInfo* const f_info = ctx->f_info_ + mb_x; - uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16; - const int level = f_info->f_level_; + const VP8FInfo* const f_info = ctx->f_info_ + mb_x; + uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16; const int ilevel = f_info->f_ilevel_; - const int limit = 2 * level + ilevel; - if (level == 0) { + const int limit = f_info->f_limit_; + if (limit == 0) { return; } + assert(limit >= 3); if (dec->filter_type_ == 1) { // simple if (mb_x > 0) { VP8SimpleHFilter16(y_dst, y_bps, limit + 4); @@ -63,10 +227,9 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { } } else { // complex const int uv_bps = dec->cache_uv_stride_; - uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8; - uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8; - const int hev_thresh = - hev_thresh_from_level(level, dec->frm_hdr_.key_frame_); + uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8; + uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8; + const int hev_thresh = f_info->hev_thresh_; if (mb_x > 0) { VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); @@ -97,53 +260,138 @@ static void FilterRow(const VP8Decoder* const dec) { } //------------------------------------------------------------------------------ +// Precompute the filtering strength for each segment and each i4x4/i16x16 mode. -void VP8StoreBlock(VP8Decoder* const dec) { +static void PrecomputeFilterStrengths(VP8Decoder* const dec) { if (dec->filter_type_ > 0) { - VP8FInfo* const info = dec->f_info_ + dec->mb_x_; - const int skip = dec->mb_info_[dec->mb_x_].skip_; - int level = dec->filter_levels_[dec->segment_]; - if (dec->filter_hdr_.use_lf_delta_) { - // TODO(skal): only CURRENT is handled for now. - level += dec->filter_hdr_.ref_lf_delta_[0]; - if (dec->is_i4x4_) { - level += dec->filter_hdr_.mode_lf_delta_[0]; + int s; + const VP8FilterHeader* const hdr = &dec->filter_hdr_; + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + int i4x4; + // First, compute the initial level + int base_level; + if (dec->segment_hdr_.use_segment_) { + base_level = dec->segment_hdr_.filter_strength_[s]; + if (!dec->segment_hdr_.absolute_delta_) { + base_level += hdr->level_; + } + } else { + base_level = hdr->level_; + } + for (i4x4 = 0; i4x4 <= 1; ++i4x4) { + VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; + int level = base_level; + if (hdr->use_lf_delta_) { + level += hdr->ref_lf_delta_[0]; + if (i4x4) { + level += hdr->mode_lf_delta_[0]; + } + } + level = (level < 0) ? 0 : (level > 63) ? 63 : level; + if (level > 0) { + int ilevel = level; + if (hdr->sharpness_ > 0) { + if (hdr->sharpness_ > 4) { + ilevel >>= 2; + } else { + ilevel >>= 1; + } + if (ilevel > 9 - hdr->sharpness_) { + ilevel = 9 - hdr->sharpness_; + } + } + if (ilevel < 1) ilevel = 1; + info->f_ilevel_ = ilevel; + info->f_limit_ = 2 * level + ilevel; + info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0; + } else { + info->f_limit_ = 0; // no filtering + } + info->f_inner_ = i4x4; } } - level = (level < 0) ? 0 : (level > 63) ? 63 : level; - info->f_level_ = level; + } +} - if (dec->filter_hdr_.sharpness_ > 0) { - if (dec->filter_hdr_.sharpness_ > 4) { - level >>= 2; - } else { - level >>= 1; +//------------------------------------------------------------------------------ +// Dithering + +#define DITHER_AMP_TAB_SIZE 12 +static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = { + // roughly, it's dqm->uv_mat_[1] + 8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1 +}; + +void VP8InitDithering(const WebPDecoderOptions* const options, + VP8Decoder* const dec) { + assert(dec != NULL); + if (options != NULL) { + const int d = options->dithering_strength; + const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1; + const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100); + if (f > 0) { + int s; + int all_amp = 0; + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + VP8QuantMatrix* const dqm = &dec->dqm_[s]; + if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) { + // TODO(skal): should we specially dither more for uv_quant_ < 0? + const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; + dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; + } + all_amp |= dqm->dither_; } - if (level > 9 - dec->filter_hdr_.sharpness_) { - level = 9 - dec->filter_hdr_.sharpness_; + if (all_amp != 0) { + VP8InitRandom(&dec->dithering_rg_, 1.0f); + dec->dither_ = 1; } } + // potentially allow alpha dithering + dec->alpha_dithering_ = options->alpha_dithering_strength; + if (dec->alpha_dithering_ > 100) { + dec->alpha_dithering_ = 100; + } else if (dec->alpha_dithering_ < 0) { + dec->alpha_dithering_ = 0; + } + } +} - info->f_ilevel_ = (level < 1) ? 1 : level; - info->f_inner_ = (!skip || dec->is_i4x4_); +// minimal amp that will provide a non-zero dithering effect +#define MIN_DITHER_AMP 4 +#define DITHER_DESCALE 4 +#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) +#define DITHER_AMP_BITS 8 +#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) + +static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) { + int i, j; + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) { + // TODO: could be made faster with SSE2 + const int bits = + VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER; + // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100 + const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE; + const int v = (int)dst[i] + delta; + dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v; + } + dst += bps; } - { - // Transfer samples to row cache - int y; - const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_; - const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_; - uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset; - uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset; - uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset; - for (y = 0; y < 16; ++y) { - memcpy(ydst + y * dec->cache_y_stride_, - dec->yuv_b_ + Y_OFF + y * BPS, 16); - } - for (y = 0; y < 8; ++y) { - memcpy(udst + y * dec->cache_uv_stride_, - dec->yuv_b_ + U_OFF + y * BPS, 8); - memcpy(vdst + y * dec->cache_uv_stride_, - dec->yuv_b_ + V_OFF + y * BPS, 8); +} + +static void DitherRow(VP8Decoder* const dec) { + int mb_x; + assert(dec->dither_); + for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { + const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const VP8MBData* const data = ctx->mb_data_ + mb_x; + const int cache_id = ctx->id_; + const int uv_bps = dec->cache_uv_stride_; + if (data->dither_ >= MIN_DITHER_AMP) { + uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8; + uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8; + Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_); + Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_); } } } @@ -165,25 +413,35 @@ void VP8StoreBlock(VP8Decoder* const dec) { static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { int ok = 1; const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const int cache_id = ctx->id_; const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; const int ysize = extra_y_rows * dec->cache_y_stride_; const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; - const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_; - const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_; + const int y_offset = cache_id * 16 * dec->cache_y_stride_; + const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; uint8_t* const ydst = dec->cache_y_ - ysize + y_offset; uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset; uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset; - const int first_row = (ctx->mb_y_ == 0); - const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1); - int y_start = MACROBLOCK_VPOS(ctx->mb_y_); - int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1); + const int mb_y = ctx->mb_y_; + const int is_first_row = (mb_y == 0); + const int is_last_row = (mb_y >= dec->br_mb_y_ - 1); + + if (dec->mt_method_ == 2) { + ReconstructRow(dec, ctx); + } if (ctx->filter_row_) { FilterRow(dec); } - if (io->put) { - if (!first_row) { + if (dec->dither_) { + DitherRow(dec); + } + + if (io->put != NULL) { + int y_start = MACROBLOCK_VPOS(mb_y); + int y_end = MACROBLOCK_VPOS(mb_y + 1); + if (!is_first_row) { y_start -= extra_y_rows; io->y = ydst; io->u = udst; @@ -194,7 +452,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { io->v = dec->cache_v_ + uv_offset; } - if (!last_row) { + if (!is_last_row) { y_end -= extra_y_rows; } if (y_end > io->crop_bottom) { @@ -202,11 +460,8 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { } io->a = NULL; if (dec->alpha_data_ != NULL && y_start < y_end) { - // TODO(skal): several things to correct here: - // * testing presence of alpha with dec->alpha_data_ is not a good idea - // * we're actually decompressing the full plane only once. It should be - // more obvious from signature. - // * we could free alpha_data_ right after this call, but we don't own. + // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a + // good idea. io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); if (io->a == NULL) { return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, @@ -238,8 +493,8 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { } } // rotate top samples if needed - if (ctx->id_ + 1 == dec->num_caches_) { - if (!last_row) { + if (cache_id + 1 == dec->num_caches_) { + if (!is_last_row) { memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize); memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize); memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize); @@ -256,27 +511,40 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { int ok = 1; VP8ThreadContext* const ctx = &dec->thread_ctx_; - if (!dec->use_threads_) { + const int filter_row = + (dec->filter_type_ > 0) && + (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_); + if (dec->mt_method_ == 0) { // ctx->id_ and ctx->f_info_ are already set ctx->mb_y_ = dec->mb_y_; - ctx->filter_row_ = dec->filter_row_; + ctx->filter_row_ = filter_row; + ReconstructRow(dec, ctx); ok = FinishRow(dec, io); } else { WebPWorker* const worker = &dec->worker_; // Finish previous job *before* updating context - ok &= WebPWorkerSync(worker); + ok &= WebPGetWorkerInterface()->Sync(worker); assert(worker->status_ == OK); if (ok) { // spawn a new deblocking/output job ctx->io_ = *io; ctx->id_ = dec->cache_id_; ctx->mb_y_ = dec->mb_y_; - ctx->filter_row_ = dec->filter_row_; - if (ctx->filter_row_) { // just swap filter info + ctx->filter_row_ = filter_row; + if (dec->mt_method_ == 2) { // swap macroblock data + VP8MBData* const tmp = ctx->mb_data_; + ctx->mb_data_ = dec->mb_data_; + dec->mb_data_ = tmp; + } else { + // perform reconstruction directly in main thread + ReconstructRow(dec, ctx); + } + if (filter_row) { // swap filter info VP8FInfo* const tmp = ctx->f_info_; ctx->f_info_ = dec->f_info_; dec->f_info_ = tmp; } - WebPWorkerLaunch(worker); + // (reconstruct)+filter in parallel + WebPGetWorkerInterface()->Launch(worker); if (++dec->cache_id_ == dec->num_caches_) { dec->cache_id_ = 0; } @@ -290,8 +558,8 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { // Call setup() first. This may trigger additional decoding features on 'io'. - // Note: Afterward, we must call teardown() not matter what. - if (io->setup && !io->setup(io)) { + // Note: Afterward, we must call teardown() no matter what. + if (io->setup != NULL && !io->setup(io)) { VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed"); return dec->status_; } @@ -304,7 +572,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { // Define the area where we can skip in-loop filtering, in case of cropping. // - // 'Simple' filter reads two luma samples outside of the macroblock and + // 'Simple' filter reads two luma samples outside of the macroblock // and filters one. It doesn't filter the chroma samples. Hence, we can // avoid doing the in-loop filtering before crop_top/crop_left position. // For the 'Complex' filter, 3 samples are read and up to 3 are filtered. @@ -339,16 +607,17 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { dec->br_mb_y_ = dec->mb_h_; } } + PrecomputeFilterStrengths(dec); return VP8_STATUS_OK; } int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { int ok = 1; - if (dec->use_threads_) { - ok = WebPWorkerSync(&dec->worker_); + if (dec->mt_method_ > 0) { + ok = WebPGetWorkerInterface()->Sync(&dec->worker_); } - if (io->teardown) { + if (io->teardown != NULL) { io->teardown(io); } return ok; @@ -384,9 +653,9 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { // Initialize multi/single-thread worker static int InitThreadContext(VP8Decoder* const dec) { dec->cache_id_ = 0; - if (dec->use_threads_) { + if (dec->mt_method_ > 0) { WebPWorker* const worker = &dec->worker_; - if (!WebPWorkerReset(worker)) { + if (!WebPGetWorkerInterface()->Reset(worker)) { return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, "thread initialization failed."); } @@ -401,6 +670,28 @@ static int InitThreadContext(VP8Decoder* const dec) { return 1; } +int VP8GetThreadMethod(const WebPDecoderOptions* const options, + const WebPHeaderStructure* const headers, + int width, int height) { + if (options == NULL || options->use_threads == 0) { + return 0; + } + (void)headers; + (void)width; + (void)height; + assert(headers == NULL || !headers->is_lossless); +#if defined(WEBP_USE_THREAD) + if (width < MIN_WIDTH_FOR_THREADS) return 0; + // TODO(skal): tune the heuristic further +#if 0 + if (height < 2 * width) return 2; +#endif + return 2; +#else // !WEBP_USE_THREAD + return 0; +#endif +} + #undef MT_CACHE_LINES #undef ST_CACHE_LINES @@ -412,14 +703,15 @@ static int AllocateMemory(VP8Decoder* const dec) { const int mb_w = dec->mb_w_; // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise. const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t); - const size_t top_size = (16 + 8 + 8) * mb_w; + const size_t top_size = sizeof(VP8TopSamples) * mb_w; const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB); const size_t f_info_size = (dec->filter_type_ > 0) ? - mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo) + mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo) : 0; const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); - const size_t coeffs_size = 384 * sizeof(*dec->coeffs_); + const size_t mb_data_size = + (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_); const size_t cache_height = (16 * num_caches + kFilterExtraRows[dec->filter_type_]) * 3 / 2; const size_t cache_size = top_size * cache_height; @@ -428,13 +720,13 @@ static int AllocateMemory(VP8Decoder* const dec) { (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size + mb_info_size + f_info_size - + yuv_size + coeffs_size - + cache_size + alpha_size + ALIGN_MASK; + + yuv_size + mb_data_size + + cache_size + alpha_size + WEBP_ALIGN_CST; uint8_t* mem; if (needed != (size_t)needed) return 0; // check for overflow if (needed > dec->mem_size_) { - free(dec->mem_); + WebPSafeFree(dec->mem_); dec->mem_size_ = 0; dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t)); if (dec->mem_ == NULL) { @@ -449,12 +741,8 @@ static int AllocateMemory(VP8Decoder* const dec) { dec->intra_t_ = (uint8_t*)mem; mem += intra_pred_mode_size; - dec->y_t_ = (uint8_t*)mem; - mem += 16 * mb_w; - dec->u_t_ = (uint8_t*)mem; - mem += 8 * mb_w; - dec->v_t_ = (uint8_t*)mem; - mem += 8 * mb_w; + dec->yuv_t_ = (VP8TopSamples*)mem; + mem += top_size; dec->mb_info_ = ((VP8MB*)mem) + 1; mem += mb_info_size; @@ -463,20 +751,24 @@ static int AllocateMemory(VP8Decoder* const dec) { mem += f_info_size; dec->thread_ctx_.id_ = 0; dec->thread_ctx_.f_info_ = dec->f_info_; - if (dec->use_threads_) { + if (dec->mt_method_ > 0) { // secondary cache line. The deblocking process need to make use of the // filtering strength from previous macroblock row, while the new ones // are being decoded in parallel. We'll just swap the pointers. dec->thread_ctx_.f_info_ += mb_w; } - mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); - assert((yuv_size & ALIGN_MASK) == 0); + mem = (uint8_t*)WEBP_ALIGN(mem); + assert((yuv_size & WEBP_ALIGN_CST) == 0); dec->yuv_b_ = (uint8_t*)mem; mem += yuv_size; - dec->coeffs_ = (int16_t*)mem; - mem += coeffs_size; + dec->mb_data_ = (VP8MBData*)mem; + dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; + if (dec->mt_method_ == 2) { + dec->thread_ctx_.mb_data_ += mb_w; + } + mem += mb_data_size; dec->cache_y_stride_ = 16 * mb_w; dec->cache_uv_stride_ = 8 * mb_w; @@ -496,9 +788,11 @@ static int AllocateMemory(VP8Decoder* const dec) { // alpha plane dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL; mem += alpha_size; + assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_); - // note: left-info is initialized once for all. + // note: left/top-info is initialized once for all. memset(dec->mb_info_ - 1, 0, mb_info_size); + VP8InitScanline(dec); // initialize left too. // initialize top memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size); @@ -517,7 +811,7 @@ static void InitIo(VP8Decoder* const dec, VP8Io* io) { io->a = NULL; } -int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { +int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) { if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. if (!AllocateMemory(dec)) return 0; InitIo(dec, io); @@ -526,154 +820,3 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { } //------------------------------------------------------------------------------ -// Main reconstruction function. - -static const int kScan[16] = { - 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, - 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, - 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, - 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS -}; - -static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) { - if (mode == B_DC_PRED) { - if (dec->mb_x_ == 0) { - return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; - } else { - return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; - } - } - return mode; -} - -static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) { - *(uint32_t*)dst = *(uint32_t*)src; -} - -void VP8ReconstructBlock(VP8Decoder* const dec) { - uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; - uint8_t* const u_dst = dec->yuv_b_ + U_OFF; - uint8_t* const v_dst = dec->yuv_b_ + V_OFF; - - // Rotate in the left samples from previously decoded block. We move four - // pixels at a time for alignment reason, and because of in-loop filter. - if (dec->mb_x_ > 0) { - int j; - for (j = -1; j < 16; ++j) { - Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); - } - for (j = -1; j < 8; ++j) { - Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); - Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); - } - } else { - int j; - for (j = 0; j < 16; ++j) { - y_dst[j * BPS - 1] = 129; - } - for (j = 0; j < 8; ++j) { - u_dst[j * BPS - 1] = 129; - v_dst[j * BPS - 1] = 129; - } - // Init top-left sample on left column too - if (dec->mb_y_ > 0) { - y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; - } - } - { - // bring top samples into the cache - uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16; - uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8; - uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8; - const int16_t* coeffs = dec->coeffs_; - int n; - - if (dec->mb_y_ > 0) { - memcpy(y_dst - BPS, top_y, 16); - memcpy(u_dst - BPS, top_u, 8); - memcpy(v_dst - BPS, top_v, 8); - } else if (dec->mb_x_ == 0) { - // we only need to do this init once at block (0,0). - // Afterward, it remains valid for the whole topmost row. - memset(y_dst - BPS - 1, 127, 16 + 4 + 1); - memset(u_dst - BPS - 1, 127, 8 + 1); - memset(v_dst - BPS - 1, 127, 8 + 1); - } - - // predict and add residuals - - if (dec->is_i4x4_) { // 4x4 - uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); - - if (dec->mb_y_ > 0) { - if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border - top_right[0] = top_y[15] * 0x01010101u; - } else { - memcpy(top_right, top_y + 16, sizeof(*top_right)); - } - } - // replicate the top-right pixels below - top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; - - // predict and add residues for all 4x4 blocks in turn. - for (n = 0; n < 16; n++) { - uint8_t* const dst = y_dst + kScan[n]; - VP8PredLuma4[dec->imodes_[n]](dst); - if (dec->non_zero_ac_ & (1 << n)) { - VP8Transform(coeffs + n * 16, dst, 0); - } else if (dec->non_zero_ & (1 << n)) { // only DC is present - VP8TransformDC(coeffs + n * 16, dst); - } - } - } else { // 16x16 - const int pred_func = CheckMode(dec, dec->imodes_[0]); - VP8PredLuma16[pred_func](y_dst); - if (dec->non_zero_) { - for (n = 0; n < 16; n++) { - uint8_t* const dst = y_dst + kScan[n]; - if (dec->non_zero_ac_ & (1 << n)) { - VP8Transform(coeffs + n * 16, dst, 0); - } else if (dec->non_zero_ & (1 << n)) { // only DC is present - VP8TransformDC(coeffs + n * 16, dst); - } - } - } - } - { - // Chroma - const int pred_func = CheckMode(dec, dec->uvmode_); - VP8PredChroma8[pred_func](u_dst); - VP8PredChroma8[pred_func](v_dst); - - if (dec->non_zero_ & 0x0f0000) { // chroma-U - const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16; - if (dec->non_zero_ac_ & 0x0f0000) { - VP8TransformUV(u_coeffs, u_dst); - } else { - VP8TransformDCUV(u_coeffs, u_dst); - } - } - if (dec->non_zero_ & 0xf00000) { // chroma-V - const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16; - if (dec->non_zero_ac_ & 0xf00000) { - VP8TransformUV(v_coeffs, v_dst); - } else { - VP8TransformDCUV(v_coeffs, v_dst); - } - } - - // stash away top samples for next block - if (dec->mb_y_ < dec->mb_h_ - 1) { - memcpy(top_y, y_dst + 15 * BPS, 16); - memcpy(top_u, u_dst + 7 * BPS, 8); - memcpy(top_v, v_dst + 7 * BPS, 8); - } - } - } -} - -//------------------------------------------------------------------------------ - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/idec.c b/drivers/webp/dec/idec.c index 7df790ced8..abafb9f3d1 100644 --- a/drivers/webp/dec/idec.c +++ b/drivers/webp/dec/idec.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Incremental decoding @@ -13,14 +15,11 @@ #include <string.h> #include <stdlib.h> +#include "./alphai.h" #include "./webpi.h" #include "./vp8i.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - // In append mode, buffer allocations increase as multiples of this value. // Needs to be a power of 2. #define CHUNK_SIZE 4096 @@ -29,11 +28,13 @@ extern "C" { //------------------------------------------------------------------------------ // Data structures for memory and states -// Decoding states. State normally flows like HEADER->PARTS0->DATA->DONE. +// Decoding states. State normally flows as: +// WEBP_HEADER->VP8_HEADER->VP8_PARTS0->VP8_DATA->DONE for a lossy image, and +// WEBP_HEADER->VP8L_HEADER->VP8L_DATA->DONE for a lossless image. // If there is any error the decoder goes into state ERROR. typedef enum { - STATE_PRE_VP8, // All data before that of the first VP8 chunk. - STATE_VP8_FRAME_HEADER, // For VP8 Frame header (within VP8 chunk). + STATE_WEBP_HEADER, // All the data before that of the VP8/VP8L chunk. + STATE_VP8_HEADER, // The VP8 Frame header (within the VP8 chunk). STATE_VP8_PARTS0, STATE_VP8_DATA, STATE_VP8L_HEADER, @@ -71,32 +72,41 @@ struct WebPIDecoder { MemBuffer mem_; // input memory buffer. WebPDecBuffer output_; // output buffer (when no external one is supplied) size_t chunk_size_; // Compressed VP8/VP8L size extracted from Header. + + int last_mb_y_; // last row reached for intra-mode decoding }; // MB context to restore in case VP8DecodeMB() fails typedef struct { VP8MB left_; VP8MB info_; - uint8_t intra_t_[4]; - uint8_t intra_l_[4]; - VP8BitReader br_; VP8BitReader token_br_; } MBContext; //------------------------------------------------------------------------------ // MemBuffer: incoming data handling -static void RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) { - if (br->buf_ != NULL) { - br->buf_ += offset; - br->buf_end_ += offset; - } -} - static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) { return (mem->end_ - mem->start_); } +// Check if we need to preserve the compressed alpha data, as it may not have +// been decoded yet. +static int NeedCompressedAlpha(const WebPIDecoder* const idec) { + if (idec->state_ == STATE_WEBP_HEADER) { + // We haven't parsed the headers yet, so we don't know whether the image is + // lossy or lossless. This also means that we haven't parsed the ALPH chunk. + return 0; + } + if (idec->is_lossless_) { + return 0; // ALPH chunk is not present for lossless images. + } else { + const VP8Decoder* const dec = (VP8Decoder*)idec->dec_; + assert(dec != NULL); // Must be true as idec->state_ != STATE_WEBP_HEADER. + return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_; + } +} + static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) { MemBuffer* const mem = &idec->mem_; const uint8_t* const new_base = mem->buf_ + mem->start_; @@ -112,16 +122,36 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) { if (offset != 0) { int p; for (p = 0; p <= last_part; ++p) { - RemapBitReader(dec->parts_ + p, offset); + VP8RemapBitReader(dec->parts_ + p, offset); } // Remap partition #0 data pointer to new offset, but only in MAP // mode (in APPEND mode, partition #0 is copied into a fixed memory). if (mem->mode_ == MEM_MODE_MAP) { - RemapBitReader(&dec->br_, offset); + VP8RemapBitReader(&dec->br_, offset); + } + } + { + const uint8_t* const last_start = dec->parts_[last_part].buf_; + assert(last_part >= 0); + VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start, + mem->buf_ + mem->end_ - last_start); + } + if (NeedCompressedAlpha(idec)) { + ALPHDecoder* const alph_dec = dec->alph_dec_; + dec->alpha_data_ += offset; + if (alph_dec != NULL) { + if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) { + VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_; + assert(alph_vp8l_dec != NULL); + assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN); + VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_, + dec->alpha_data_ + ALPHA_HEADER_LEN, + dec->alpha_data_size_ - ALPHA_HEADER_LEN); + } else { // alph_dec->method_ == ALPHA_NO_COMPRESSION + // Nothing special to do in this case. + } } } - assert(last_part >= 0); - dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_; } else { // Resize lossless bitreader VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_; VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem)); @@ -133,8 +163,12 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) { // size if required and also updates VP8BitReader's if new memory is allocated. static int AppendToMemBuffer(WebPIDecoder* const idec, const uint8_t* const data, size_t data_size) { + VP8Decoder* const dec = (VP8Decoder*)idec->dec_; MemBuffer* const mem = &idec->mem_; - const uint8_t* const old_base = mem->buf_ + mem->start_; + const int need_compressed_alpha = NeedCompressedAlpha(idec); + const uint8_t* const old_start = mem->buf_ + mem->start_; + const uint8_t* const old_base = + need_compressed_alpha ? dec->alpha_data_ : old_start; assert(mem->mode_ == MEM_MODE_APPEND); if (data_size > MAX_CHUNK_PAYLOAD) { // security safeguard: trying to allocate more than what the format @@ -143,17 +177,18 @@ static int AppendToMemBuffer(WebPIDecoder* const idec, } if (mem->end_ + data_size > mem->buf_size_) { // Need some free memory - const size_t current_size = MemDataSize(mem); + const size_t new_mem_start = old_start - old_base; + const size_t current_size = MemDataSize(mem) + new_mem_start; const uint64_t new_size = (uint64_t)current_size + data_size; const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1); uint8_t* const new_buf = (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf)); if (new_buf == NULL) return 0; memcpy(new_buf, old_base, current_size); - free(mem->buf_); + WebPSafeFree(mem->buf_); mem->buf_ = new_buf; mem->buf_size_ = (size_t)extra_size; - mem->start_ = 0; + mem->start_ = new_mem_start; mem->end_ = current_size; } @@ -161,14 +196,15 @@ static int AppendToMemBuffer(WebPIDecoder* const idec, mem->end_ += data_size; assert(mem->end_ <= mem->buf_size_); - DoRemap(idec, mem->buf_ + mem->start_ - old_base); + DoRemap(idec, mem->buf_ + mem->start_ - old_start); return 1; } static int RemapMemBuffer(WebPIDecoder* const idec, const uint8_t* const data, size_t data_size) { MemBuffer* const mem = &idec->mem_; - const uint8_t* const old_base = mem->buf_ + mem->start_; + const uint8_t* const old_buf = mem->buf_; + const uint8_t* const old_start = old_buf + mem->start_; assert(mem->mode_ == MEM_MODE_MAP); if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer! @@ -176,7 +212,7 @@ static int RemapMemBuffer(WebPIDecoder* const idec, mem->buf_ = (uint8_t*)data; mem->end_ = mem->buf_size_ = data_size; - DoRemap(idec, mem->buf_ + mem->start_ - old_base); + DoRemap(idec, mem->buf_ + mem->start_ - old_start); return 1; } @@ -191,8 +227,8 @@ static void InitMemBuffer(MemBuffer* const mem) { static void ClearMemBuffer(MemBuffer* const mem) { assert(mem); if (mem->mode_ == MEM_MODE_APPEND) { - free(mem->buf_); - free((void*)mem->part0_buf_); + WebPSafeFree(mem->buf_); + WebPSafeFree((void*)mem->part0_buf_); } } @@ -206,35 +242,34 @@ static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) { return 1; } +// To be called last. +static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) { + const WebPDecoderOptions* const options = idec->params_.options; + WebPDecBuffer* const output = idec->params_.output; + + idec->state_ = STATE_DONE; + if (options != NULL && options->flip) { + return WebPFlipBuffer(output); + } else { + return VP8_STATUS_OK; + } +} + //------------------------------------------------------------------------------ // Macroblock-decoding contexts static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br, MBContext* const context) { - const VP8BitReader* const br = &dec->br_; - const VP8MB* const left = dec->mb_info_ - 1; - const VP8MB* const info = dec->mb_info_ + dec->mb_x_; - - context->left_ = *left; - context->info_ = *info; - context->br_ = *br; + context->left_ = dec->mb_info_[-1]; + context->info_ = dec->mb_info_[dec->mb_x_]; context->token_br_ = *token_br; - memcpy(context->intra_t_, dec->intra_t_ + 4 * dec->mb_x_, 4); - memcpy(context->intra_l_, dec->intra_l_, 4); } static void RestoreContext(const MBContext* context, VP8Decoder* const dec, VP8BitReader* const token_br) { - VP8BitReader* const br = &dec->br_; - VP8MB* const left = dec->mb_info_ - 1; - VP8MB* const info = dec->mb_info_ + dec->mb_x_; - - *left = context->left_; - *info = context->info_; - *br = context->br_; + dec->mb_info_[-1] = context->left_; + dec->mb_info_[dec->mb_x_] = context->info_; *token_br = context->token_br_; - memcpy(dec->intra_t_ + 4 * dec->mb_x_, context->intra_t_, 4); - memcpy(dec->intra_l_, context->intra_l_, 4); } //------------------------------------------------------------------------------ @@ -242,7 +277,7 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec, static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) { if (idec->state_ == STATE_VP8_DATA) { VP8Io* const io = &idec->io_; - if (io->teardown) { + if (io->teardown != NULL) { io->teardown(io); } } @@ -270,6 +305,7 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) { headers.data = data; headers.data_size = curr_size; + headers.have_all_data = 0; status = WebPParseHeaders(&headers); if (status == VP8_STATUS_NOT_ENOUGH_DATA) { return VP8_STATUS_SUSPENDED; // We haven't found a VP8 chunk yet. @@ -285,15 +321,9 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) { return VP8_STATUS_OUT_OF_MEMORY; } idec->dec_ = dec; -#ifdef WEBP_USE_THREAD - dec->use_threads_ = (idec->params_.options != NULL) && - (idec->params_.options->use_threads > 0); -#else - dec->use_threads_ = 0; -#endif dec->alpha_data_ = headers.alpha_data; dec->alpha_data_size_ = headers.alpha_data_size; - ChangeState(idec, STATE_VP8_FRAME_HEADER, headers.offset); + ChangeState(idec, STATE_VP8_HEADER, headers.offset); } else { VP8LDecoder* const dec = VP8LNew(); if (dec == NULL) { @@ -308,13 +338,14 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) { static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) { const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_; const size_t curr_size = MemDataSize(&idec->mem_); + int width, height; uint32_t bits; if (curr_size < VP8_FRAME_HEADER_SIZE) { // Not enough data bytes to extract VP8 Frame Header. return VP8_STATUS_SUSPENDED; } - if (!VP8GetInfo(data, curr_size, idec->chunk_size_, NULL, NULL)) { + if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) { return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR); } @@ -328,30 +359,32 @@ static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) { } // Partition #0 -static int CopyParts0Data(WebPIDecoder* const idec) { +static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) { VP8Decoder* const dec = (VP8Decoder*)idec->dec_; VP8BitReader* const br = &dec->br_; - const size_t psize = br->buf_end_ - br->buf_; + const size_t part_size = br->buf_end_ - br->buf_; MemBuffer* const mem = &idec->mem_; assert(!idec->is_lossless_); assert(mem->part0_buf_ == NULL); - assert(psize > 0); - assert(psize <= mem->part0_size_); // Format limit: no need for runtime check + // the following is a format limitation, no need for runtime check: + assert(part_size <= mem->part0_size_); + if (part_size == 0) { // can't have zero-size partition #0 + return VP8_STATUS_BITSTREAM_ERROR; + } if (mem->mode_ == MEM_MODE_APPEND) { // We copy and grab ownership of the partition #0 data. - uint8_t* const part0_buf = (uint8_t*)malloc(psize); + uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size); if (part0_buf == NULL) { - return 0; + return VP8_STATUS_OUT_OF_MEMORY; } - memcpy(part0_buf, br->buf_, psize); + memcpy(part0_buf, br->buf_, part_size); mem->part0_buf_ = part0_buf; - br->buf_ = part0_buf; - br->buf_end_ = part0_buf + psize; + VP8BitReaderSetBuffer(br, part0_buf, part_size); } else { // Else: just keep pointers to the partition #0's data in dec_->br_. } - mem->start_ += psize; - return 1; + mem->start_ += part_size; + return VP8_STATUS_OK; } static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) { @@ -381,9 +414,14 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) { if (dec->status_ != VP8_STATUS_OK) { return IDecError(idec, dec->status_); } + // This change must be done before calling VP8InitFrame() + dec->mt_method_ = VP8GetThreadMethod(params->options, NULL, + io->width, io->height); + VP8InitDithering(params->options, dec); - if (!CopyParts0Data(idec)) { - return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY); + dec->status_ = CopyParts0Data(idec); + if (dec->status_ != VP8_STATUS_OK) { + return IDecError(idec, dec->status_); } // Finish setting up the decoding parameters. Will call io->setup(). @@ -407,50 +445,52 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) { VP8Io* const io = &idec->io_; assert(dec->ready_); - for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) { - VP8BitReader* token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)]; - if (dec->mb_x_ == 0) { - VP8InitScanline(dec); + if (idec->last_mb_y_ != dec->mb_y_) { + if (!VP8ParseIntraModeRow(&dec->br_, dec)) { + // note: normally, error shouldn't occur since we already have the whole + // partition0 available here in DecodeRemaining(). Reaching EOF while + // reading intra modes really means a BITSTREAM_ERROR. + return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR); + } + idec->last_mb_y_ = dec->mb_y_; } - for (; dec->mb_x_ < dec->mb_w_; dec->mb_x_++) { + for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) { + VP8BitReader* const token_br = + &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)]; MBContext context; SaveContext(dec, token_br, &context); - if (!VP8DecodeMB(dec, token_br)) { - RestoreContext(&context, dec, token_br); // We shouldn't fail when MAX_MB data was available if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) { return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR); } + RestoreContext(&context, dec, token_br); return VP8_STATUS_SUSPENDED; } - VP8ReconstructBlock(dec); - // Store data and save block's filtering params - VP8StoreBlock(dec); - // Release buffer only if there is only one partition if (dec->num_parts_ == 1) { idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_; assert(idec->mem_.start_ <= idec->mem_.end_); } } + VP8InitScanline(dec); // Prepare for next scanline + + // Reconstruct, filter and emit the row. if (!VP8ProcessRow(dec, io)) { return IDecError(idec, VP8_STATUS_USER_ABORT); } - dec->mb_x_ = 0; } // Synchronize the thread and check for errors. if (!VP8ExitCritical(dec, io)) { return IDecError(idec, VP8_STATUS_USER_ABORT); } dec->ready_ = 0; - idec->state_ = STATE_DONE; - - return VP8_STATUS_OK; + return FinishDecoding(idec); } -static int ErrorStatusLossless(WebPIDecoder* const idec, VP8StatusCode status) { +static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec, + VP8StatusCode status) { if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) { return VP8_STATUS_SUSPENDED; } @@ -467,9 +507,15 @@ static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) { // Wait until there's enough data for decoding header. if (curr_size < (idec->chunk_size_ >> 3)) { - return VP8_STATUS_SUSPENDED; + dec->status_ = VP8_STATUS_SUSPENDED; + return ErrorStatusLossless(idec, dec->status_); } + if (!VP8LDecodeHeader(dec, io)) { + if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && + curr_size < idec->chunk_size_) { + dec->status_ = VP8_STATUS_SUSPENDED; + } return ErrorStatusLossless(idec, dec->status_); } // Allocate/verify output buffer now. @@ -488,33 +534,29 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) { const size_t curr_size = MemDataSize(&idec->mem_); assert(idec->is_lossless_); - // At present Lossless decoder can't decode image incrementally. So wait till - // all the image data is aggregated before image can be decoded. - if (curr_size < idec->chunk_size_) { - return VP8_STATUS_SUSPENDED; - } + // Switch to incremental decoding if we don't have all the bytes available. + dec->incremental_ = (curr_size < idec->chunk_size_); if (!VP8LDecodeImage(dec)) { return ErrorStatusLossless(idec, dec->status_); } - - idec->state_ = STATE_DONE; - - return VP8_STATUS_OK; + assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED); + return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_ + : FinishDecoding(idec); } // Main decoding loop static VP8StatusCode IDecode(WebPIDecoder* idec) { VP8StatusCode status = VP8_STATUS_SUSPENDED; - if (idec->state_ == STATE_PRE_VP8) { + if (idec->state_ == STATE_WEBP_HEADER) { status = DecodeWebPHeaders(idec); } else { if (idec->dec_ == NULL) { return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder. } } - if (idec->state_ == STATE_VP8_FRAME_HEADER) { + if (idec->state_ == STATE_VP8_HEADER) { status = DecodeVP8FrameHeader(idec); } if (idec->state_ == STATE_VP8_PARTS0) { @@ -536,20 +578,23 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) { // Public functions WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) { - WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(*idec)); + WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec)); if (idec == NULL) { return NULL; } - idec->state_ = STATE_PRE_VP8; + idec->state_ = STATE_WEBP_HEADER; idec->chunk_size_ = 0; + idec->last_mb_y_ = -1; + InitMemBuffer(&idec->mem_); WebPInitDecBuffer(&idec->output_); VP8InitIo(&idec->io_); WebPResetDecParams(&idec->params_); - idec->params_.output = output_buffer ? output_buffer : &idec->output_; + idec->params_.output = (output_buffer != NULL) ? output_buffer + : &idec->output_; WebPInitCustomIo(&idec->params_, &idec->io_); // Plug the I/O functions. return idec; @@ -581,14 +626,18 @@ void WebPIDelete(WebPIDecoder* idec) { if (idec == NULL) return; if (idec->dec_ != NULL) { if (!idec->is_lossless_) { - VP8Delete(idec->dec_); + if (idec->state_ == STATE_VP8_DATA) { + // Synchronize the thread, clean-up and check for errors. + VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_); + } + VP8Delete((VP8Decoder*)idec->dec_); } else { - VP8LDelete(idec->dec_); + VP8LDelete((VP8LDecoder*)idec->dec_); } } ClearMemBuffer(&idec->mem_); WebPFreeDecBuffer(&idec->output_); - free(idec); + WebPSafeFree(idec); } //------------------------------------------------------------------------------ @@ -596,12 +645,22 @@ void WebPIDelete(WebPIDecoder* idec) { WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer, size_t output_buffer_size, int output_stride) { + const int is_external_memory = (output_buffer != NULL); WebPIDecoder* idec; + if (mode >= MODE_YUV) return NULL; + if (!is_external_memory) { // Overwrite parameters to sane values. + output_buffer_size = 0; + output_stride = 0; + } else { // A buffer was passed. Validate the other params. + if (output_stride == 0 || output_buffer_size == 0) { + return NULL; // invalid parameter. + } + } idec = WebPINewDecoder(NULL); if (idec == NULL) return NULL; idec->output_.colorspace = mode; - idec->output_.is_external_memory = 1; + idec->output_.is_external_memory = is_external_memory; idec->output_.u.RGBA.rgba = output_buffer; idec->output_.u.RGBA.stride = output_stride; idec->output_.u.RGBA.size = output_buffer_size; @@ -612,10 +671,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride, uint8_t* u, size_t u_size, int u_stride, uint8_t* v, size_t v_size, int v_stride, uint8_t* a, size_t a_size, int a_stride) { - WebPIDecoder* const idec = WebPINewDecoder(NULL); + const int is_external_memory = (luma != NULL); + WebPIDecoder* idec; + WEBP_CSP_MODE colorspace; + + if (!is_external_memory) { // Overwrite parameters to sane values. + luma_size = u_size = v_size = a_size = 0; + luma_stride = u_stride = v_stride = a_stride = 0; + u = v = a = NULL; + colorspace = MODE_YUVA; + } else { // A luma buffer was passed. Validate the other parameters. + if (u == NULL || v == NULL) return NULL; + if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL; + if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL; + if (a != NULL) { + if (a_size == 0 || a_stride == 0) return NULL; + } + colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA; + } + + idec = WebPINewDecoder(NULL); if (idec == NULL) return NULL; - idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA; - idec->output_.is_external_memory = 1; + + idec->output_.colorspace = colorspace; + idec->output_.is_external_memory = is_external_memory; idec->output_.u.YUVA.y = luma; idec->output_.u.YUVA.y_stride = luma_stride; idec->output_.u.YUVA.y_size = luma_size; @@ -768,7 +847,7 @@ int WebPISetIOHooks(WebPIDecoder* const idec, VP8IoSetupHook setup, VP8IoTeardownHook teardown, void* user_data) { - if (idec == NULL || idec->state_ > STATE_PRE_VP8) { + if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) { return 0; } @@ -779,7 +858,3 @@ int WebPISetIOHooks(WebPIDecoder* const idec, return 1; } - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/io.c b/drivers/webp/dec/io.c index 594804c2e6..13e469ab73 100644 --- a/drivers/webp/dec/io.c +++ b/drivers/webp/dec/io.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // functions for sample output. @@ -15,10 +17,7 @@ #include "./webpi.h" #include "../dsp/dsp.h" #include "../dsp/yuv.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "../utils/utils.h" //------------------------------------------------------------------------------ // Main YUV<->RGB conversion functions @@ -46,57 +45,17 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { // Point-sampling U/V sampler. static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { - WebPDecBuffer* output = p->output; - const WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride; - const uint8_t* y_src = io->y; - const uint8_t* u_src = io->u; - const uint8_t* v_src = io->v; - const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace]; - const int mb_w = io->mb_w; - const int last = io->mb_h - 1; - int j; - for (j = 0; j < last; j += 2) { - sample(y_src, y_src + io->y_stride, u_src, v_src, - dst, dst + buf->stride, mb_w); - y_src += 2 * io->y_stride; - u_src += io->uv_stride; - v_src += io->uv_stride; - dst += 2 * buf->stride; - } - if (j == last) { // Just do the last line twice - sample(y_src, y_src, u_src, v_src, dst, dst, mb_w); - } + WebPDecBuffer* const output = p->output; + WebPRGBABuffer* const buf = &output->u.RGBA; + uint8_t* const dst = buf->rgba + io->mb_y * buf->stride; + WebPSamplerProcessPlane(io->y, io->y_stride, + io->u, io->v, io->uv_stride, + dst, buf->stride, io->mb_w, io->mb_h, + WebPSamplers[output->colorspace]); return io->mb_h; } //------------------------------------------------------------------------------ -// YUV444 -> RGB conversion - -#if 0 // TODO(skal): this is for future rescaling. -static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) { - WebPDecBuffer* output = p->output; - const WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride; - const uint8_t* y_src = io->y; - const uint8_t* u_src = io->u; - const uint8_t* v_src = io->v; - const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace]; - const int mb_w = io->mb_w; - const int last = io->mb_h; - int j; - for (j = 0; j < last; ++j) { - convert(y_src, u_src, v_src, dst, mb_w); - y_src += io->y_stride; - u_src += io->uv_stride; - v_src += io->uv_stride; - dst += buf->stride; - } - return io->mb_h; -} -#endif - -//------------------------------------------------------------------------------ // Fancy upsampling #ifdef FANCY_UPSAMPLING @@ -117,7 +76,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { if (y == 0) { // First line is special cased. We mirror the u/v samples at boundary. - upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w); + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w); } else { // We can finish the left-over line from previous call. upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, @@ -160,14 +119,16 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { //------------------------------------------------------------------------------ -static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { +static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p, + int expected_num_lines_out) { const uint8_t* alpha = io->a; const WebPYUVABuffer* const buf = &p->output->u.YUVA; const int mb_w = io->mb_w; const int mb_h = io->mb_h; uint8_t* dst = buf->a + io->mb_y * buf->a_stride; int j; - + (void)expected_num_lines_out; + assert(expected_num_lines_out == mb_h); if (alpha != NULL) { for (j = 0; j < mb_h; ++j) { memcpy(dst, alpha, mb_w * sizeof(*dst)); @@ -210,7 +171,8 @@ static int GetAlphaSourceRow(const VP8Io* const io, return start_y; } -static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { +static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p, + int expected_num_lines_out) { const uint8_t* alpha = io->a; if (alpha != NULL) { const int mb_w = io->mb_w; @@ -221,21 +183,13 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { int num_rows; const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; - uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); - uint32_t alpha_mask = 0xff; - int i, j; - - for (j = 0; j < num_rows; ++j) { - for (i = 0; i < mb_w; ++i) { - const uint32_t alpha_value = alpha[i]; - dst[4 * i] = alpha_value; - alpha_mask &= alpha_value; - } - alpha += io->width; - dst += buf->stride; - } - // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with. - if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) { + uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3); + const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w, + num_rows, dst, buf->stride); + (void)expected_num_lines_out; + assert(expected_num_lines_out == num_rows); + // has_alpha is true if there's non-trivial alpha to premultiply with. + if (has_alpha && WebPIsPremultipliedMode(colorspace)) { WebPApplyAlphaMultiply(base_rgba, alpha_first, mb_w, num_rows, buf->stride); } @@ -243,7 +197,8 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { return 0; } -static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) { +static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p, + int expected_num_lines_out) { const uint8_t* alpha = io->a; if (alpha != NULL) { const int mb_w = io->mb_w; @@ -252,10 +207,13 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) { int num_rows; const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; +#ifdef WEBP_SWAP_16BIT_CSP + uint8_t* alpha_dst = base_rgba; +#else uint8_t* alpha_dst = base_rgba + 1; +#endif uint32_t alpha_mask = 0x0f; int i, j; - for (j = 0; j < num_rows; ++j) { for (i = 0; i < mb_w; ++i) { // Fill in the alpha value (converted to 4 bits). @@ -266,6 +224,8 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) { alpha += io->width; alpha_dst += buf->stride; } + (void)expected_num_lines_out; + assert(expected_num_lines_out == num_rows); if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) { WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride); } @@ -291,15 +251,35 @@ static int Rescale(const uint8_t* src, int src_stride, static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) { const int mb_h = io->mb_h; const int uv_mb_h = (mb_h + 1) >> 1; - const int num_lines_out = Rescale(io->y, io->y_stride, mb_h, &p->scaler_y); + WebPRescaler* const scaler = &p->scaler_y; + int num_lines_out = 0; + if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) { + // Before rescaling, we premultiply the luma directly into the io->y + // internal buffer. This is OK since these samples are not used for + // intra-prediction (the top samples are saved in cache_y_/u_/v_). + // But we need to cast the const away, though. + WebPMultRows((uint8_t*)io->y, io->y_stride, + io->a, io->width, io->mb_w, mb_h, 0); + } + num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler); Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u); Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v); return num_lines_out; } -static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { +static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p, + int expected_num_lines_out) { if (io->a != NULL) { - Rescale(io->a, io->width, io->mb_h, &p->scaler_a); + const WebPYUVABuffer* const buf = &p->output->u.YUVA; + uint8_t* dst_y = buf->y + p->last_y * buf->y_stride; + const uint8_t* src_a = buf->a + p->last_y * buf->a_stride; + const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a); + (void)expected_num_lines_out; + assert(expected_num_lines_out == num_lines_out); + if (num_lines_out > 0) { // unmultiply the Y + WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride, + p->scaler_a.dst_width, num_lines_out, 1); + } } return 0; } @@ -316,39 +296,34 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { const size_t work_size = 2 * out_width; // scratch memory for luma rescaler const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones size_t tmp_size; - int32_t* work; + rescaler_t* work; - tmp_size = work_size + 2 * uv_work_size; + tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work); if (has_alpha) { - tmp_size += work_size; + tmp_size += work_size * sizeof(*work); } - p->memory = calloc(1, tmp_size * sizeof(*work)); + p->memory = WebPSafeMalloc(1ULL, tmp_size); if (p->memory == NULL) { return 0; // memory error } - work = (int32_t*)p->memory; + work = (rescaler_t*)p->memory; WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, buf->y, out_width, out_height, buf->y_stride, 1, - io->mb_w, out_width, io->mb_h, out_height, work); WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, - uv_in_width, uv_out_width, - uv_in_height, uv_out_height, work + work_size); WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, - uv_in_width, uv_out_width, - uv_in_height, uv_out_height, work + work_size + uv_work_size); p->emit = EmitRescaledYUV; if (has_alpha) { WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width, out_height, buf->a_stride, 1, - io->mb_w, out_width, io->mb_h, out_height, work + work_size + 2 * uv_work_size); p->emit_alpha = EmitRescaledAlphaYUV; + WebPInitAlphaProcessing(); } return 1; } @@ -360,13 +335,13 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) { const WebPYUV444Converter convert = WebPYUV444Converters[p->output->colorspace]; const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride; + uint8_t* dst = buf->rgba + y_pos * buf->stride; int num_lines_out = 0; // For RGB rescaling, because of the YUV420, current scan position // U/V can be +1/-1 line from the Y one. Hence the double test. while (WebPRescalerHasPendingOutput(&p->scaler_y) && WebPRescalerHasPendingOutput(&p->scaler_u)) { - assert(p->last_y + y_pos + num_lines_out < p->output->height); + assert(y_pos + num_lines_out < p->output->height); assert(p->scaler_u.y_accum == p->scaler_v.y_accum); WebPRescalerExportRow(&p->scaler_y); WebPRescalerExportRow(&p->scaler_u); @@ -388,65 +363,69 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { const int y_lines_in = WebPRescalerImport(&p->scaler_y, mb_h - j, io->y + j * io->y_stride, io->y_stride); - const int u_lines_in = - WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j, - io->u + uv_j * io->uv_stride, io->uv_stride); - const int v_lines_in = - WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j, - io->v + uv_j * io->uv_stride, io->uv_stride); - (void)v_lines_in; // remove a gcc warning - assert(u_lines_in == v_lines_in); j += y_lines_in; - uv_j += u_lines_in; - num_lines_out += ExportRGB(p, num_lines_out); + if (WebPRescaleNeededLines(&p->scaler_u, uv_mb_h - uv_j)) { + const int u_lines_in = + WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j, + io->u + uv_j * io->uv_stride, io->uv_stride); + const int v_lines_in = + WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j, + io->v + uv_j * io->uv_stride, io->uv_stride); + (void)v_lines_in; // remove a gcc warning + assert(u_lines_in == v_lines_in); + uv_j += u_lines_in; + } + num_lines_out += ExportRGB(p, p->last_y + num_lines_out); } return num_lines_out; } -static int ExportAlpha(WebPDecParams* const p, int y_pos) { +static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; const WEBP_CSP_MODE colorspace = p->output->colorspace; const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb); uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); int num_lines_out = 0; const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); - uint32_t alpha_mask = 0xff; + uint32_t non_opaque = 0; const int width = p->scaler_a.dst_width; - while (WebPRescalerHasPendingOutput(&p->scaler_a)) { - int i; - assert(p->last_y + y_pos + num_lines_out < p->output->height); + while (WebPRescalerHasPendingOutput(&p->scaler_a) && + num_lines_out < max_lines_out) { + assert(y_pos + num_lines_out < p->output->height); WebPRescalerExportRow(&p->scaler_a); - for (i = 0; i < width; ++i) { - const uint32_t alpha_value = p->scaler_a.dst[i]; - dst[4 * i] = alpha_value; - alpha_mask &= alpha_value; - } + non_opaque |= WebPDispatchAlpha(p->scaler_a.dst, 0, width, 1, dst, 0); dst += buf->stride; ++num_lines_out; } - if (is_premult_alpha && alpha_mask != 0xff) { + if (is_premult_alpha && non_opaque) { WebPApplyAlphaMultiply(base_rgba, alpha_first, width, num_lines_out, buf->stride); } return num_lines_out; } -static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) { +static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos, + int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; +#ifdef WEBP_SWAP_16BIT_CSP + uint8_t* alpha_dst = base_rgba; +#else uint8_t* alpha_dst = base_rgba + 1; +#endif int num_lines_out = 0; const WEBP_CSP_MODE colorspace = p->output->colorspace; const int width = p->scaler_a.dst_width; const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); uint32_t alpha_mask = 0x0f; - while (WebPRescalerHasPendingOutput(&p->scaler_a)) { + while (WebPRescalerHasPendingOutput(&p->scaler_a) && + num_lines_out < max_lines_out) { int i; - assert(p->last_y + y_pos + num_lines_out < p->output->height); + assert(y_pos + num_lines_out < p->output->height); WebPRescalerExportRow(&p->scaler_a); for (i = 0; i < width; ++i) { // Fill in the alpha value (converted to 4 bits). @@ -463,15 +442,17 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) { return num_lines_out; } -static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { +static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p, + int expected_num_out_lines) { if (io->a != NULL) { WebPRescaler* const scaler = &p->scaler_a; - int j = 0; - int pos = 0; - while (j < io->mb_h) { - j += WebPRescalerImport(scaler, io->mb_h - j, - io->a + j * io->width, io->width); - pos += p->emit_alpha_row(p, pos); + int lines_left = expected_num_out_lines; + const int y_end = p->last_y + lines_left; + while (lines_left > 0) { + const int row_offset = scaler->src_y - io->mb_y; + WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y, + io->a + row_offset * io->width, io->width); + lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left); } } return 0; @@ -484,9 +465,9 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { const int uv_in_width = (io->mb_w + 1) >> 1; const int uv_in_height = (io->mb_h + 1) >> 1; const size_t work_size = 2 * out_width; // scratch memory for one rescaler - int32_t* work; // rescalers work area + rescaler_t* work; // rescalers work area uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion - size_t tmp_size1, tmp_size2; + size_t tmp_size1, tmp_size2, total_size; tmp_size1 = 3 * work_size; tmp_size2 = 3 * out_width; @@ -494,30 +475,28 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { tmp_size1 += work_size; tmp_size2 += out_width; } - p->memory = calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp)); + total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp); + p->memory = WebPSafeMalloc(1ULL, total_size); if (p->memory == NULL) { return 0; // memory error } - work = (int32_t*)p->memory; + work = (rescaler_t*)p->memory; tmp = (uint8_t*)(work + tmp_size1); WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, tmp + 0 * out_width, out_width, out_height, 0, 1, - io->mb_w, out_width, io->mb_h, out_height, work + 0 * work_size); WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, tmp + 1 * out_width, out_width, out_height, 0, 1, - io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, work + 1 * work_size); WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, tmp + 2 * out_width, out_width, out_height, 0, 1, - io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, work + 2 * work_size); p->emit = EmitRescaledRGB; + WebPInitYUV444Converters(); if (has_alpha) { WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, tmp + 3 * out_width, out_width, out_height, 0, 1, - io->mb_w, out_width, io->mb_h, out_height, work + 3 * work_size); p->emit_alpha = EmitRescaledAlphaRGB; if (p->output->colorspace == MODE_RGBA_4444 || @@ -526,6 +505,7 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { } else { p->emit_alpha_row = ExportAlpha; } + WebPInitAlphaProcessing(); } return 1; } @@ -546,7 +526,9 @@ static int CustomSetup(VP8Io* io) { if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) { return 0; } - + if (is_alpha && WebPIsPremultipliedMode(colorspace)) { + WebPInitUpsamplers(); + } if (io->use_scaling) { const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p); if (!ok) { @@ -554,11 +536,12 @@ static int CustomSetup(VP8Io* io) { } } else { if (is_rgb) { + WebPInitSamplers(); p->emit = EmitSampledRGB; // default -#ifdef FANCY_UPSAMPLING if (io->fancy_upsampling) { +#ifdef FANCY_UPSAMPLING const int uv_width = (io->mb_w + 1) >> 1; - p->memory = malloc(io->mb_w + 2 * uv_width); + p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width)); if (p->memory == NULL) { return 0; // memory error. } @@ -567,18 +550,20 @@ static int CustomSetup(VP8Io* io) { p->tmp_v = p->tmp_u + uv_width; p->emit = EmitFancyRGB; WebPInitUpsamplers(); - } #endif + } } else { p->emit = EmitYUV; } if (is_alpha) { // need transparency output - if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply(); p->emit_alpha = (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ? EmitAlphaRGBA4444 : is_rgb ? EmitAlphaRGB : EmitAlphaYUV; + if (is_rgb) { + WebPInitAlphaProcessing(); + } } } @@ -601,8 +586,8 @@ static int CustomPut(const VP8Io* io) { return 0; } num_lines_out = p->emit(io, p); - if (p->emit_alpha) { - p->emit_alpha(io, p); + if (p->emit_alpha != NULL) { + p->emit_alpha(io, p, num_lines_out); } p->last_y += num_lines_out; return 1; @@ -612,7 +597,7 @@ static int CustomPut(const VP8Io* io) { static void CustomTeardown(const VP8Io* io) { WebPDecParams* const p = (WebPDecParams*)io->opaque; - free(p->memory); + WebPSafeFree(p->memory); p->memory = NULL; } @@ -627,7 +612,3 @@ void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) { } //------------------------------------------------------------------------------ - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/layer.c b/drivers/webp/dec/layer.c deleted file mode 100644 index a3a5bdcfe8..0000000000 --- a/drivers/webp/dec/layer.c +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// Enhancement layer (for YUV444/422) -// -// Author: Skal (pascal.massimino@gmail.com) - -#include <assert.h> -#include <stdlib.h> - -#include "./vp8i.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -//------------------------------------------------------------------------------ - -int VP8DecodeLayer(VP8Decoder* const dec) { - assert(dec); - assert(dec->layer_data_size_ > 0); - (void)dec; - - // TODO: handle enhancement layer here. - - return 1; -} - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/quant.c b/drivers/webp/dec/quant.c index d54097af0d..5b648f942c 100644 --- a/drivers/webp/dec/quant.c +++ b/drivers/webp/dec/quant.c @@ -1,8 +1,10 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Quantizer initialization @@ -11,10 +13,6 @@ #include "./vp8i.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - static WEBP_INLINE int clip(int v, int M) { return v < 0 ? 0 : v > M ? M : v; } @@ -102,12 +100,11 @@ void VP8ParseQuant(VP8Decoder* const dec) { m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)]; m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)]; + + m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation } } } //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/tree.c b/drivers/webp/dec/tree.c index 82484e4c55..c2007ea733 100644 --- a/drivers/webp/dec/tree.c +++ b/drivers/webp/dec/tree.c @@ -1,22 +1,21 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Coding trees and probas // // Author: Skal (pascal.massimino@gmail.com) -#include "vp8i.h" +#include "./vp8i.h" +#include "../utils/bit_reader_inl.h" #define USE_GENERIC_TREE -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - #ifdef USE_GENERIC_TREE static const int8_t kYModesIntra4[18] = { -B_DC_PRED, 1, @@ -31,61 +30,12 @@ static const int8_t kYModesIntra4[18] = { }; #endif -#ifndef ONLY_KEYFRAME_CODE - -// inter prediction modes -enum { - LEFT4 = 0, ABOVE4 = 1, ZERO4 = 2, NEW4 = 3, - NEARESTMV, NEARMV, ZEROMV, NEWMV, SPLITMV }; - -static const int8_t kYModesInter[8] = { - -DC_PRED, 1, - 2, 3, - -V_PRED, -H_PRED, - -TM_PRED, -B_PRED -}; - -static const int8_t kMBSplit[6] = { - -3, 1, - -2, 2, - -0, -1 -}; - -static const int8_t kMVRef[8] = { - -ZEROMV, 1, - -NEARESTMV, 2, - -NEARMV, 3, - -NEWMV, -SPLITMV -}; - -static const int8_t kMVRef4[6] = { - -LEFT4, 1, - -ABOVE4, 2, - -ZERO4, -NEW4 -}; -#endif - //------------------------------------------------------------------------------ // Default probabilities -// Inter -#ifndef ONLY_KEYFRAME_CODE -static const uint8_t kYModeProbaInter0[4] = { 112, 86, 140, 37 }; -static const uint8_t kUVModeProbaInter0[3] = { 162, 101, 204 }; -static const uint8_t kMVProba0[2][NUM_MV_PROBAS] = { - { 162, 128, 225, 146, 172, 147, 214, 39, - 156, 128, 129, 132, 75, 145, 178, 206, - 239, 254, 254 }, - { 164, 128, 204, 170, 119, 235, 140, 230, - 228, 128, 130, 130, 74, 148, 180, 203, - 236, 254, 254 } -}; -#endif - // Paragraph 13.5 static const uint8_t CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { - // genereated using vp8_default_coef_probs() in entropy.c:129 { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } @@ -326,28 +276,38 @@ static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = { void VP8ResetProba(VP8Proba* const proba) { memset(proba->segments_, 255u, sizeof(proba->segments_)); - memcpy(proba->coeffs_, CoeffsProba0, sizeof(CoeffsProba0)); -#ifndef ONLY_KEYFRAME_CODE - memcpy(proba->mv_, kMVProba0, sizeof(kMVProba0)); - memcpy(proba->ymode_, kYModeProbaInter0, sizeof(kYModeProbaInter0)); - memcpy(proba->uvmode_, kUVModeProbaInter0, sizeof(kUVModeProbaInter0)); -#endif + // proba->bands_[][] is initialized later } -void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) { - uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_; +static void ParseIntraMode(VP8BitReader* const br, + VP8Decoder* const dec, int mb_x) { + uint8_t* const top = dec->intra_t_ + 4 * mb_x; uint8_t* const left = dec->intra_l_; - // Hardcoded 16x16 intra-mode decision tree. - dec->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first - if (!dec->is_i4x4_) { + VP8MBData* const block = dec->mb_data_ + mb_x; + + // Note: we don't save segment map (yet), as we don't expect + // to decode more than 1 keyframe. + if (dec->segment_hdr_.update_map_) { + // Hardcoded tree parsing + block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) + ? VP8GetBit(br, dec->proba_.segments_[1]) + : 2 + VP8GetBit(br, dec->proba_.segments_[2]); + } else { + block->segment_ = 0; // default for intra + } + if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_); + + block->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first + if (!block->is_i4x4_) { + // Hardcoded 16x16 intra-mode decision tree. const int ymode = VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED) : (VP8GetBit(br, 163) ? V_PRED : DC_PRED); - dec->imodes_[0] = ymode; - memset(top, ymode, 4 * sizeof(top[0])); - memset(left, ymode, 4 * sizeof(left[0])); + block->imodes_[0] = ymode; + memset(top, ymode, 4 * sizeof(*top)); + memset(left, ymode, 4 * sizeof(*left)); } else { - uint8_t* modes = dec->imodes_; + uint8_t* modes = block->imodes_; int y; for (y = 0; y < 4; ++y) { int ymode = left[y]; @@ -356,10 +316,10 @@ void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) { const uint8_t* const prob = kBModesProba[top[x]][ymode]; #ifdef USE_GENERIC_TREE // Generic tree-parsing - int i = 0; - do { + int i = kYModesIntra4[VP8GetBit(br, prob[0])]; + while (i > 0) { i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])]; - } while (i > 0); + } ymode = -i; #else // Hardcoded tree parsing @@ -374,15 +334,24 @@ void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) { (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED))); #endif // USE_GENERIC_TREE top[x] = ymode; - *modes++ = ymode; } + memcpy(modes, top, 4 * sizeof(*top)); + modes += 4; left[y] = ymode; } } // Hardcoded UVMode decision tree - dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED - : !VP8GetBit(br, 114) ? V_PRED - : VP8GetBit(br, 183) ? TM_PRED : H_PRED; + block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED + : !VP8GetBit(br, 114) ? V_PRED + : VP8GetBit(br, 183) ? TM_PRED : H_PRED; +} + +int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) { + int mb_x; + for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { + ParseIntraMode(br, dec, mb_x); + } + return !dec->br_.eof_; } //------------------------------------------------------------------------------ @@ -524,18 +493,13 @@ static const uint8_t } }; -#ifndef ONLY_KEYFRAME_CODE -static const uint8_t MVUpdateProba[2][NUM_MV_PROBAS] = { - { 237, 246, 253, 253, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 254, 250, 250, - 252, 254, 254 }, - { 231, 243, 245, 253, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 254, 251, 251, - 254, 254, 254 } +// Paragraph 9.9 + +static const int kBands[16 + 1] = { + 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, + 0 // extra entry as sentinel }; -#endif -// Paragraph 9.9 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) { VP8Proba* const proba = &dec->proba_; int t, b, c, p; @@ -543,47 +507,19 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) { for (b = 0; b < NUM_BANDS; ++b) { for (c = 0; c < NUM_CTX; ++c) { for (p = 0; p < NUM_PROBAS; ++p) { - if (VP8GetBit(br, CoeffsUpdateProba[t][b][c][p])) { - proba->coeffs_[t][b][c][p] = VP8GetValue(br, 8); - } + const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ? + VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p]; + proba->bands_[t][b].probas_[c][p] = v; } } } + for (b = 0; b < 16 + 1; ++b) { + proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]]; + } } dec->use_skip_proba_ = VP8Get(br); if (dec->use_skip_proba_) { dec->skip_p_ = VP8GetValue(br, 8); } -#ifndef ONLY_KEYFRAME_CODE - if (!dec->frm_hdr_.key_frame_) { - int i; - dec->intra_p_ = VP8GetValue(br, 8); - dec->last_p_ = VP8GetValue(br, 8); - dec->golden_p_ = VP8GetValue(br, 8); - if (VP8Get(br)) { // update y-mode - for (i = 0; i < 4; ++i) { - proba->ymode_[i] = VP8GetValue(br, 8); - } - } - if (VP8Get(br)) { // update uv-mode - for (i = 0; i < 3; ++i) { - proba->uvmode_[i] = VP8GetValue(br, 8); - } - } - // update MV - for (i = 0; i < 2; ++i) { - int k; - for (k = 0; k < NUM_MV_PROBAS; ++k) { - if (VP8GetBit(br, MVUpdateProba[i][k])) { - const int v = VP8GetValue(br, 7); - proba->mv_[i][k] = v ? v << 1 : 1; - } - } - } - } -#endif } -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/vp8.c b/drivers/webp/dec/vp8.c index b0ccfa2a06..d89eb1c59e 100644 --- a/drivers/webp/dec/vp8.c +++ b/drivers/webp/dec/vp8.c @@ -1,8 +1,10 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // main entry for the decoder @@ -11,14 +13,12 @@ #include <stdlib.h> +#include "./alphai.h" #include "./vp8i.h" #include "./vp8li.h" #include "./webpi.h" -#include "../utils/bit_reader.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "../utils/bit_reader_inl.h" +#include "../utils/utils.h" //------------------------------------------------------------------------------ @@ -45,10 +45,10 @@ int VP8InitIoInternal(VP8Io* const io, int version) { } VP8Decoder* VP8New(void) { - VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(*dec)); + VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); if (dec != NULL) { SetOk(dec); - WebPWorkerInit(&dec->worker_); + WebPGetWorkerInterface()->Init(&dec->worker_); dec->ready_ = 0; dec->num_parts_ = 1; } @@ -69,16 +69,13 @@ const char* VP8StatusMessage(VP8Decoder* const dec) { void VP8Delete(VP8Decoder* const dec) { if (dec != NULL) { VP8Clear(dec); - free(dec); + WebPSafeFree(dec); } } int VP8SetError(VP8Decoder* const dec, VP8StatusCode error, const char* const msg) { - // TODO This check would be unnecessary if alpha decompression was separated - // from VP8ProcessRow/FinishRow. This avoids setting 'dec->status_' to - // something other than VP8_STATUS_BITSTREAM_ERROR on alpha decompression - // failure. + // The oldest error reported takes precedence over the new one. if (dec->status_ == VP8_STATUS_OK) { dec->status_ = error; dec->error_msg_ = msg; @@ -121,6 +118,9 @@ int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size, if (((bits >> 5)) >= chunk_size) { // partition_length return 0; // inconsistent size information. } + if (w == 0 || h == 0) { + return 0; // We don't support both width and height to be zero. + } if (width) { *width = w; @@ -190,25 +190,27 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec, const uint8_t* sz = buf; const uint8_t* buf_end = buf + size; const uint8_t* part_start; - int last_part; - int p; + size_t size_left = size; + size_t last_part; + size_t p; dec->num_parts_ = 1 << VP8GetValue(br, 2); last_part = dec->num_parts_ - 1; - part_start = buf + last_part * 3; - if (buf_end < part_start) { + if (size < 3 * last_part) { // we can't even read the sizes with sz[]! That's a failure. return VP8_STATUS_NOT_ENOUGH_DATA; } + part_start = buf + last_part * 3; + size_left -= last_part * 3; for (p = 0; p < last_part; ++p) { - const uint32_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16); - const uint8_t* part_end = part_start + psize; - if (part_end > buf_end) part_end = buf_end; - VP8InitBitReader(dec->parts_ + p, part_start, part_end); - part_start = part_end; + size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16); + if (psize > size_left) psize = size_left; + VP8InitBitReader(dec->parts_ + p, part_start, psize); + part_start += psize; + size_left -= psize; sz += 3; } - VP8InitBitReader(dec->parts_ + last_part, part_start, buf_end); + VP8InitBitReader(dec->parts_ + last_part, part_start, size_left); return (part_start < buf_end) ? VP8_STATUS_OK : VP8_STATUS_SUSPENDED; // Init is ok, but there's not enough data } @@ -236,20 +238,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) { } } dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2; - if (dec->filter_type_ > 0) { // precompute filter levels per segment - if (dec->segment_hdr_.use_segment_) { - int s; - for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - int strength = dec->segment_hdr_.filter_strength_[s]; - if (!dec->segment_hdr_.absolute_delta_) { - strength += hdr->level_; - } - dec->filter_levels_[s] = strength; - } - } else { - dec->filter_levels_[0] = hdr->level_; - } - } return !br->eof_; } @@ -261,7 +249,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { VP8PictureHeader* pic_hdr; VP8BitReader* br; VP8StatusCode status; - WebPHeaderStructure headers; if (dec == NULL) { return 0; @@ -271,33 +258,8 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { return VP8SetError(dec, VP8_STATUS_INVALID_PARAM, "null VP8Io passed to VP8GetHeaders()"); } - - // Process Pre-VP8 chunks. - headers.data = io->data; - headers.data_size = io->data_size; - status = WebPParseHeaders(&headers); - if (status != VP8_STATUS_OK) { - return VP8SetError(dec, status, "Incorrect/incomplete header."); - } - if (headers.is_lossless) { - return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, - "Unexpected lossless format encountered."); - } - - if (dec->alpha_data_ == NULL) { - assert(dec->alpha_data_size_ == 0); - // We have NOT set alpha data yet. Set it now. - // (This is to ensure that dec->alpha_data_ is NOT reset to NULL if - // WebPParseHeaders() is called more than once, as in incremental decoding - // case.) - dec->alpha_data_ = headers.alpha_data; - dec->alpha_data_size_ = headers.alpha_data_size; - } - - // Process the VP8 frame header. - buf = headers.data + headers.offset; - buf_size = headers.data_size - headers.offset; - assert(headers.data_size >= headers.offset); // WebPParseHeaders' guarantee + buf = io->data; + buf_size = io->data_size; if (buf_size < 4) { return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Truncated header."); @@ -355,7 +317,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { VP8ResetProba(&dec->proba_); ResetSegmentHeader(&dec->segment_hdr_); - dec->segment_ = 0; // default for intra } // Check if we have all the partition #0 available, and initialize dec->br_ @@ -366,7 +327,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { } br = &dec->br_; - VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_); + VP8InitBitReader(br, buf, frm_hdr->partition_length_); buf += frm_hdr->partition_length_; buf_size -= frm_hdr->partition_length_; @@ -393,63 +354,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { // Frame buffer marking if (!frm_hdr->key_frame_) { - // Paragraph 9.7 -#ifndef ONLY_KEYFRAME_CODE - dec->buffer_flags_ = VP8Get(br) << 0; // update golden - dec->buffer_flags_ |= VP8Get(br) << 1; // update alt ref - if (!(dec->buffer_flags_ & 1)) { - dec->buffer_flags_ |= VP8GetValue(br, 2) << 2; - } - if (!(dec->buffer_flags_ & 2)) { - dec->buffer_flags_ |= VP8GetValue(br, 2) << 4; - } - dec->buffer_flags_ |= VP8Get(br) << 6; // sign bias golden - dec->buffer_flags_ |= VP8Get(br) << 7; // sign bias alt ref -#else return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, "Not a key frame."); -#endif - } else { - dec->buffer_flags_ = 0x003 | 0x100; } - // Paragraph 9.8 -#ifndef ONLY_KEYFRAME_CODE - dec->update_proba_ = VP8Get(br); - if (!dec->update_proba_) { // save for later restore - dec->proba_saved_ = dec->proba_; - } - dec->buffer_flags_ &= 1 << 8; - dec->buffer_flags_ |= - (frm_hdr->key_frame_ || VP8Get(br)) << 8; // refresh last frame -#else - VP8Get(br); // just ignore the value of update_proba_ -#endif + VP8Get(br); // ignore the value of update_proba_ VP8ParseProba(br, dec); -#ifdef WEBP_EXPERIMENTAL_FEATURES - // Extensions - if (dec->pic_hdr_.colorspace_) { - const size_t kTrailerSize = 8; - const uint8_t kTrailerMarker = 0x01; - const uint8_t* ext_buf = buf - kTrailerSize; - size_t size; - - if (frm_hdr->partition_length_ < kTrailerSize || - ext_buf[kTrailerSize - 1] != kTrailerMarker) { - return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, - "RIFF: Inconsistent extra information."); - } - - // Layer - size = (ext_buf[0] << 0) | (ext_buf[1] << 8) | (ext_buf[2] << 16); - dec->layer_data_size_ = size; - dec->layer_data_ = NULL; // will be set later - dec->layer_colorspace_ = ext_buf[3]; - } -#endif - // sanitized state dec->ready_ = 1; return 1; @@ -458,11 +370,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { //------------------------------------------------------------------------------ // Residual decoding (Paragraph 13.2 / 13.3) -static const uint8_t kBands[16 + 1] = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, - 0 // extra entry as sentinel -}; - static const uint8_t kCat3[] = { 173, 148, 140, 0 }; static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; @@ -473,253 +380,226 @@ static const uint8_t kZigzag[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; -typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting +// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 +static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { + int v; + if (!VP8GetBit(br, p[3])) { + if (!VP8GetBit(br, p[4])) { + v = 2; + } else { + v = 3 + VP8GetBit(br, p[5]); + } + } else { + if (!VP8GetBit(br, p[6])) { + if (!VP8GetBit(br, p[7])) { + v = 5 + VP8GetBit(br, 159); + } else { + v = 7 + 2 * VP8GetBit(br, 165); + v += VP8GetBit(br, 145); + } + } else { + const uint8_t* tab; + const int bit1 = VP8GetBit(br, p[8]); + const int bit0 = VP8GetBit(br, p[9 + bit1]); + const int cat = 2 * bit1 + bit0; + v = 0; + for (tab = kCat3456[cat]; *tab; ++tab) { + v += v + VP8GetBit(br, *tab); + } + v += 3 + (8 << cat); + } + } + return v; +} // Returns the position of the last non-zero coeff plus one -// (and 0 if there's no coeff at all) -static int GetCoeffs(VP8BitReader* const br, ProbaArray prob, +static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[], int ctx, const quant_t dq, int n, int16_t* out) { - // n is either 0 or 1 here. kBands[n] is not necessary for extracting '*p'. - const uint8_t* p = prob[n][ctx]; - if (!VP8GetBit(br, p[0])) { // first EOB is more a 'CBP' bit. - return 0; - } - while (1) { - ++n; - if (!VP8GetBit(br, p[1])) { - p = prob[kBands[n]][0]; - } else { // non zero coeff - int v, j; + const uint8_t* p = prob[n]->probas_[ctx]; + for (; n < 16; ++n) { + if (!VP8GetBit(br, p[0])) { + return n; // previous coeff was last non-zero coeff + } + while (!VP8GetBit(br, p[1])) { // sequence of zero coeffs + p = prob[++n]->probas_[0]; + if (n == 16) return 16; + } + { // non zero coeff + const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0]; + int v; if (!VP8GetBit(br, p[2])) { - p = prob[kBands[n]][1]; v = 1; + p = p_ctx[1]; } else { - if (!VP8GetBit(br, p[3])) { - if (!VP8GetBit(br, p[4])) { - v = 2; - } else { - v = 3 + VP8GetBit(br, p[5]); - } - } else { - if (!VP8GetBit(br, p[6])) { - if (!VP8GetBit(br, p[7])) { - v = 5 + VP8GetBit(br, 159); - } else { - v = 7 + 2 * VP8GetBit(br, 165); - v += VP8GetBit(br, 145); - } - } else { - const uint8_t* tab; - const int bit1 = VP8GetBit(br, p[8]); - const int bit0 = VP8GetBit(br, p[9 + bit1]); - const int cat = 2 * bit1 + bit0; - v = 0; - for (tab = kCat3456[cat]; *tab; ++tab) { - v += v + VP8GetBit(br, *tab); - } - v += 3 + (8 << cat); - } - } - p = prob[kBands[n]][2]; + v = GetLargeValue(br, p); + p = p_ctx[2]; } - j = kZigzag[n - 1]; - out[j] = VP8GetSigned(br, v) * dq[j > 0]; - if (n == 16 || !VP8GetBit(br, p[0])) { // EOB - return n; - } - } - if (n == 16) { - return 16; + out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0]; } } + return 16; } -// Alias-safe way of converting 4bytes to 32bits. -typedef union { - uint8_t i8[4]; - uint32_t i32; -} PackedNz; - -// Table to unpack four bits into four bytes -static const PackedNz kUnpackTab[16] = { - {{0, 0, 0, 0}}, {{1, 0, 0, 0}}, {{0, 1, 0, 0}}, {{1, 1, 0, 0}}, - {{0, 0, 1, 0}}, {{1, 0, 1, 0}}, {{0, 1, 1, 0}}, {{1, 1, 1, 0}}, - {{0, 0, 0, 1}}, {{1, 0, 0, 1}}, {{0, 1, 0, 1}}, {{1, 1, 0, 1}}, - {{0, 0, 1, 1}}, {{1, 0, 1, 1}}, {{0, 1, 1, 1}}, {{1, 1, 1, 1}} }; - -// Macro to pack four LSB of four bytes into four bits. -#if defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) || \ - defined(__BIG_ENDIAN__) -#define PACK_CST 0x08040201U -#else -#define PACK_CST 0x01020408U -#endif -#define PACK(X, S) ((((X).i32 * PACK_CST) & 0xff000000) >> (S)) - -static void ParseResiduals(VP8Decoder* const dec, - VP8MB* const mb, VP8BitReader* const token_br) { - int out_t_nz, out_l_nz, first; - ProbaArray ac_prob; - const VP8QuantMatrix* q = &dec->dqm_[dec->segment_]; - int16_t* dst = dec->coeffs_; +static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) { + nz_coeffs <<= 2; + nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz; + return nz_coeffs; +} + +static int ParseResiduals(VP8Decoder* const dec, + VP8MB* const mb, VP8BitReader* const token_br) { + const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_; + const VP8BandProbas* const * ac_proba; + VP8MBData* const block = dec->mb_data_ + dec->mb_x_; + const VP8QuantMatrix* const q = &dec->dqm_[block->segment_]; + int16_t* dst = block->coeffs_; VP8MB* const left_mb = dec->mb_info_ - 1; - PackedNz nz_ac, nz_dc; - PackedNz tnz, lnz; - uint32_t non_zero_ac = 0; - uint32_t non_zero_dc = 0; + uint8_t tnz, lnz; + uint32_t non_zero_y = 0; + uint32_t non_zero_uv = 0; int x, y, ch; + uint32_t out_t_nz, out_l_nz; + int first; - nz_dc.i32 = nz_ac.i32 = 0; memset(dst, 0, 384 * sizeof(*dst)); - if (!dec->is_i4x4_) { // parse DC + if (!block->is_i4x4_) { // parse DC int16_t dc[16] = { 0 }; - const int ctx = mb->dc_nz_ + left_mb->dc_nz_; - mb->dc_nz_ = left_mb->dc_nz_ = - (GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[1], - ctx, q->y2_mat_, 0, dc) > 0); + const int ctx = mb->nz_dc_ + left_mb->nz_dc_; + const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc); + mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0); + if (nz > 1) { // more than just the DC -> perform the full transform + VP8TransformWHT(dc, dst); + } else { // only DC is non-zero -> inlined simplified transform + int i; + const int dc0 = (dc[0] + 3) >> 3; + for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0; + } first = 1; - ac_prob = (ProbaArray)dec->proba_.coeffs_[0]; - VP8TransformWHT(dc, dst); + ac_proba = bands[0]; } else { first = 0; - ac_prob = (ProbaArray)dec->proba_.coeffs_[3]; + ac_proba = bands[3]; } - tnz = kUnpackTab[mb->nz_ & 0xf]; - lnz = kUnpackTab[left_mb->nz_ & 0xf]; + tnz = mb->nz_ & 0x0f; + lnz = left_mb->nz_ & 0x0f; for (y = 0; y < 4; ++y) { - int l = lnz.i8[y]; + int l = lnz & 1; + uint32_t nz_coeffs = 0; for (x = 0; x < 4; ++x) { - const int ctx = l + tnz.i8[x]; - const int nz = GetCoeffs(token_br, ac_prob, ctx, - q->y1_mat_, first, dst); - tnz.i8[x] = l = (nz > 0); - nz_dc.i8[x] = (dst[0] != 0); - nz_ac.i8[x] = (nz > 1); + const int ctx = l + (tnz & 1); + const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst); + l = (nz > first); + tnz = (tnz >> 1) | (l << 7); + nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0); dst += 16; } - lnz.i8[y] = l; - non_zero_dc |= PACK(nz_dc, 24 - y * 4); - non_zero_ac |= PACK(nz_ac, 24 - y * 4); + tnz >>= 4; + lnz = (lnz >> 1) | (l << 7); + non_zero_y = (non_zero_y << 8) | nz_coeffs; } - out_t_nz = PACK(tnz, 24); - out_l_nz = PACK(lnz, 24); + out_t_nz = tnz; + out_l_nz = lnz >> 4; - tnz = kUnpackTab[mb->nz_ >> 4]; - lnz = kUnpackTab[left_mb->nz_ >> 4]; for (ch = 0; ch < 4; ch += 2) { + uint32_t nz_coeffs = 0; + tnz = mb->nz_ >> (4 + ch); + lnz = left_mb->nz_ >> (4 + ch); for (y = 0; y < 2; ++y) { - int l = lnz.i8[ch + y]; + int l = lnz & 1; for (x = 0; x < 2; ++x) { - const int ctx = l + tnz.i8[ch + x]; - const int nz = - GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[2], - ctx, q->uv_mat_, 0, dst); - tnz.i8[ch + x] = l = (nz > 0); - nz_dc.i8[y * 2 + x] = (dst[0] != 0); - nz_ac.i8[y * 2 + x] = (nz > 1); + const int ctx = l + (tnz & 1); + const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst); + l = (nz > 0); + tnz = (tnz >> 1) | (l << 3); + nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0); dst += 16; } - lnz.i8[ch + y] = l; + tnz >>= 2; + lnz = (lnz >> 1) | (l << 5); } - non_zero_dc |= PACK(nz_dc, 8 - ch * 2); - non_zero_ac |= PACK(nz_ac, 8 - ch * 2); + // Note: we don't really need the per-4x4 details for U/V blocks. + non_zero_uv |= nz_coeffs << (4 * ch); + out_t_nz |= (tnz << 4) << ch; + out_l_nz |= (lnz & 0xf0) << ch; } - out_t_nz |= PACK(tnz, 20); - out_l_nz |= PACK(lnz, 20); mb->nz_ = out_t_nz; left_mb->nz_ = out_l_nz; - dec->non_zero_ac_ = non_zero_ac; - dec->non_zero_ = non_zero_ac | non_zero_dc; - mb->skip_ = !dec->non_zero_; + block->non_zero_y_ = non_zero_y; + block->non_zero_uv_ = non_zero_uv; + + // We look at the mode-code of each block and check if some blocks have less + // than three non-zero coeffs (code < 2). This is to avoid dithering flat and + // empty blocks. + block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_; + + return !(non_zero_y | non_zero_uv); // will be used for further optimization } -#undef PACK //------------------------------------------------------------------------------ // Main loop int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { - VP8BitReader* const br = &dec->br_; VP8MB* const left = dec->mb_info_ - 1; - VP8MB* const info = dec->mb_info_ + dec->mb_x_; - - // Note: we don't save segment map (yet), as we don't expect - // to decode more than 1 keyframe. - if (dec->segment_hdr_.update_map_) { - // Hardcoded tree parsing - dec->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) ? - VP8GetBit(br, dec->proba_.segments_[1]) : - 2 + VP8GetBit(br, dec->proba_.segments_[2]); - } - info->skip_ = dec->use_skip_proba_ ? VP8GetBit(br, dec->skip_p_) : 0; + VP8MB* const mb = dec->mb_info_ + dec->mb_x_; + VP8MBData* const block = dec->mb_data_ + dec->mb_x_; + int skip = dec->use_skip_proba_ ? block->skip_ : 0; - VP8ParseIntraMode(br, dec); - if (br->eof_) { - return 0; - } - - if (!info->skip_) { - ParseResiduals(dec, info, token_br); + if (!skip) { + skip = ParseResiduals(dec, mb, token_br); } else { - left->nz_ = info->nz_ = 0; - if (!dec->is_i4x4_) { - left->dc_nz_ = info->dc_nz_ = 0; + left->nz_ = mb->nz_ = 0; + if (!block->is_i4x4_) { + left->nz_dc_ = mb->nz_dc_ = 0; } - dec->non_zero_ = 0; - dec->non_zero_ac_ = 0; + block->non_zero_y_ = 0; + block->non_zero_uv_ = 0; + block->dither_ = 0; + } + + if (dec->filter_type_ > 0) { // store filter info + VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_; + *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_]; + finfo->f_inner_ |= !skip; } - return (!token_br->eof_); + return !token_br->eof_; } void VP8InitScanline(VP8Decoder* const dec) { VP8MB* const left = dec->mb_info_ - 1; left->nz_ = 0; - left->dc_nz_ = 0; + left->nz_dc_ = 0; memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_)); - dec->filter_row_ = - (dec->filter_type_ > 0) && - (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_); + dec->mb_x_ = 0; } static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) { + // Parse bitstream for this row. VP8BitReader* const token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)]; - VP8InitScanline(dec); - for (dec->mb_x_ = 0; dec->mb_x_ < dec->mb_w_; dec->mb_x_++) { + if (!VP8ParseIntraModeRow(&dec->br_, dec)) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "Premature end-of-partition0 encountered."); + } + for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) { if (!VP8DecodeMB(dec, token_br)) { return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Premature end-of-file encountered."); } - VP8ReconstructBlock(dec); - - // Store data and save block's filtering params - VP8StoreBlock(dec); } + VP8InitScanline(dec); // Prepare for next scanline + + // Reconstruct, filter and emit the row. if (!VP8ProcessRow(dec, io)) { return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted."); } } - if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) { - return 0; - } - - // Finish -#ifndef ONLY_KEYFRAME_CODE - if (!dec->update_proba_) { - dec->proba_ = dec->proba_saved_; + if (dec->mt_method_ > 0) { + if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0; } -#endif - -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (dec->layer_data_size_ > 0) { - if (!VP8DecodeLayer(dec)) { - return 0; - } - } -#endif return 1; } @@ -768,12 +648,10 @@ void VP8Clear(VP8Decoder* const dec) { if (dec == NULL) { return; } - if (dec->use_threads_) { - WebPWorkerEnd(&dec->worker_); - } - if (dec->mem_) { - free(dec->mem_); - } + WebPGetWorkerInterface()->End(&dec->worker_); + ALPHDelete(dec->alph_dec_); + dec->alph_dec_ = NULL; + WebPSafeFree(dec->mem_); dec->mem_ = NULL; dec->mem_size_ = 0; memset(&dec->br_, 0, sizeof(dec->br_)); @@ -782,6 +660,3 @@ void VP8Clear(VP8Decoder* const dec) { //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/vp8i.h b/drivers/webp/dec/vp8i.h index 4382edfd8e..b5f2b23009 100644 --- a/drivers/webp/dec/vp8i.h +++ b/drivers/webp/dec/vp8i.h @@ -1,8 +1,10 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // VP8 decoder: internal header. @@ -13,12 +15,14 @@ #define WEBP_DEC_VP8I_H_ #include <string.h> // for memcpy() +#include "./common.h" #include "./vp8li.h" #include "../utils/bit_reader.h" +#include "../utils/random.h" #include "../utils/thread.h" #include "../dsp/dsp.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif @@ -27,48 +31,10 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 0 -#define DEC_MIN_VERSION 2 -#define DEC_REV_VERSION 0 - -#define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames - -// intra prediction modes -enum { B_DC_PRED = 0, // 4x4 modes - B_TM_PRED, - B_VE_PRED, - B_HE_PRED, - B_RD_PRED, - B_VR_PRED, - B_LD_PRED, - B_VL_PRED, - B_HD_PRED, - B_HU_PRED, - NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 - - // Luma16 or UV modes - DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, - H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED, - B_PRED = NUM_BMODES, // refined I4x4 mode - - // special modes - B_DC_PRED_NOTOP = 4, - B_DC_PRED_NOLEFT = 5, - B_DC_PRED_NOTOPLEFT = 6, - NUM_B_DC_MODES = 7 }; - -enum { MB_FEATURE_TREE_PROBS = 3, - NUM_MB_SEGMENTS = 4, - NUM_REF_LF_DELTAS = 4, - NUM_MODE_LF_DELTAS = 4, // I4x4, ZERO, *, SPLIT - MAX_NUM_PARTITIONS = 8, - // Probabilities - NUM_TYPES = 4, - NUM_BANDS = 8, - NUM_CTX = 3, - NUM_PROBAS = 11, - NUM_MV_PROBAS = 19 }; - -// YUV-cache parameters. +#define DEC_MIN_VERSION 4 +#define DEC_REV_VERSION 4 + +// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), // and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned, // in order to be SIMD-friendly. We also need to store the top, left and @@ -90,14 +56,15 @@ enum { MB_FEATURE_TREE_PROBS = 3, // 'y' = y-samples 'u' = u-samples 'v' = u-samples // '|' = left sample, '-' = top sample, '+' = top-left sample // 't' = extra top-right sample for 4x4 modes -// With this layout, BPS (=Bytes Per Scan-line) is one cacheline size. -#define BPS 32 // this is the common stride used by yuv[] #define YUV_SIZE (BPS * 17 + BPS * 9) #define Y_SIZE (BPS * 17) #define Y_OFF (BPS * 1 + 8) #define U_OFF (Y_OFF + BPS * 16 + BPS) #define V_OFF (U_OFF + 16) +// minimal width under which lossy multi-threading is always disabled +#define MIN_WIDTH_FOR_THREADS 512 + //------------------------------------------------------------------------------ // Headers @@ -126,15 +93,19 @@ typedef struct { int8_t filter_strength_[NUM_MB_SEGMENTS]; // filter strength for segments } VP8SegmentHeader; +// probas associated to one of the contexts +typedef uint8_t VP8ProbaArray[NUM_PROBAS]; + +typedef struct { // all the probas associated to one band + VP8ProbaArray probas_[NUM_CTX]; +} VP8BandProbas; + // Struct collecting all frame-persistent probabilities. typedef struct { uint8_t segments_[MB_FEATURE_TREE_PROBS]; // Type: 0:Intra16-AC 1:Intra16-DC 2:Chroma 3:Intra4 - uint8_t coeffs_[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; -#ifndef ONLY_KEYFRAME_CODE - uint8_t ymode_[4], uvmode_[3]; - uint8_t mv_[2][NUM_MV_PROBAS]; -#endif + VP8BandProbas bands_[NUM_TYPES][NUM_BANDS]; + const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1]; } VP8Proba; // Filter parameters @@ -151,32 +122,61 @@ typedef struct { // Informations about the macroblocks. typedef struct { // filter specs - unsigned int f_level_:6; // filter strength: 0..63 - unsigned int f_ilevel_:6; // inner limit: 1..63 - unsigned int f_inner_:1; // do inner filtering? + uint8_t f_limit_; // filter limit in [3..189], or 0 if no filtering + uint8_t f_ilevel_; // inner limit in [1..63] + uint8_t f_inner_; // do inner filtering? + uint8_t hev_thresh_; // high edge variance threshold in [0..2] } VP8FInfo; -typedef struct { // used for syntax-parsing - unsigned int nz_; // non-zero AC/DC coeffs - unsigned int dc_nz_:1; // non-zero DC coeffs - unsigned int skip_:1; // block type +typedef struct { // Top/Left Contexts used for syntax-parsing + uint8_t nz_; // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma) + uint8_t nz_dc_; // non-zero DC coeff (1bit) } VP8MB; // Dequantization matrices typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower). typedef struct { quant_t y1_mat_, y2_mat_, uv_mat_; + + int uv_quant_; // U/V quantizer value + int dither_; // dithering amplitude (0 = off, max=255) } VP8QuantMatrix; +// Data needed to reconstruct a macroblock +typedef struct { + int16_t coeffs_[384]; // 384 coeffs = (16+4+4) * 4*4 + uint8_t is_i4x4_; // true if intra4x4 + uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes + uint8_t uvmode_; // chroma prediction mode + // bit-wise info about the content of each sub-4x4 blocks (in decoding order). + // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to: + // code=0 -> no coefficient + // code=1 -> only DC + // code=2 -> first three coefficients are non-zero + // code=3 -> more than three coefficients are non-zero + // This allows to call specialized transform functions. + uint32_t non_zero_y_; + uint32_t non_zero_uv_; + uint8_t dither_; // local dithering strength (deduced from non_zero_*) + uint8_t skip_; + uint8_t segment_; +} VP8MBData; + // Persistent information needed by the parallel processing typedef struct { - int id_; // cache row to process (in [0..2]) - int mb_y_; // macroblock position of the row - int filter_row_; // true if row-filtering is needed - VP8FInfo* f_info_; // filter strengths - VP8Io io_; // copy of the VP8Io to pass to put() + int id_; // cache row to process (in [0..2]) + int mb_y_; // macroblock position of the row + int filter_row_; // true if row-filtering is needed + VP8FInfo* f_info_; // filter strengths (swapped with dec->f_info_) + VP8MBData* mb_data_; // reconstruction data (swapped with dec->mb_data_) + VP8Io io_; // copy of the VP8Io to pass to put() } VP8ThreadContext; +// Saved top samples, per macroblock. Fits into a cache-line. +typedef struct { + uint8_t y[16], u[8], v[8]; +} VP8TopSamples; + //------------------------------------------------------------------------------ // VP8Decoder: the main opaque structure handed over to user @@ -196,7 +196,8 @@ struct VP8Decoder { // Worker WebPWorker worker_; - int use_threads_; // use multi-thread + int mt_method_; // multi-thread method: 0=off, 1=[parse+recon][filter] + // 2=[parse][recon+filter] int cache_id_; // current cache row int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3) VP8ThreadContext thread_ctx_; // Thread context @@ -213,12 +214,9 @@ struct VP8Decoder { // per-partition boolean decoders. VP8BitReader parts_[MAX_NUM_PARTITIONS]; - // buffer refresh flags - // bit 0: refresh Gold, bit 1: refresh Alt - // bit 2-3: copy to Gold, bit 4-5: copy to Alt - // bit 6: Gold sign bias, bit 7: Alt sign bias - // bit 8: refresh last frame - uint32_t buffer_flags_; + // Dithering strength, deduced from decoding options + int dither_; // whether to use dithering or not + VP8Random dithering_rg_; // random generator for dithering // dequantization (one set of DC/AC dequant factor per segment) VP8QuantMatrix dqm_[NUM_MB_SEGMENTS]; @@ -227,24 +225,18 @@ struct VP8Decoder { VP8Proba proba_; int use_skip_proba_; uint8_t skip_p_; -#ifndef ONLY_KEYFRAME_CODE - uint8_t intra_p_, last_p_, golden_p_; - VP8Proba proba_saved_; - int update_proba_; -#endif // Boundary data cache and persistent buffers. - uint8_t* intra_t_; // top intra modes values: 4 * mb_w_ - uint8_t intra_l_[4]; // left intra modes values - uint8_t* y_t_; // top luma samples: 16 * mb_w_ - uint8_t* u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each + uint8_t* intra_t_; // top intra modes values: 4 * mb_w_ + uint8_t intra_l_[4]; // left intra modes values - VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1) - VP8FInfo* f_info_; // filter strength info - uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE) - int16_t* coeffs_; // 384 coeffs = (16+8+8) * 4*4 + VP8TopSamples* yuv_t_; // top y/u/v samples - uint8_t* cache_y_; // macroblock row for storing unfiltered samples + VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1) + VP8FInfo* f_info_; // filter strength info + uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE) + + uint8_t* cache_y_; // macroblock row for storing unfiltered samples uint8_t* cache_u_; uint8_t* cache_v_; int cache_y_stride_; @@ -256,31 +248,19 @@ struct VP8Decoder { // Per macroblock non-persistent infos. int mb_x_, mb_y_; // current position, in macroblock units - uint8_t is_i4x4_; // true if intra4x4 - uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes - uint8_t uvmode_; // chroma prediction mode - uint8_t segment_; // block's segment - - // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits - // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for - // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order. - // If the bit is set, the 4x4 block contains some non-zero coefficients. - uint32_t non_zero_; - uint32_t non_zero_ac_; + VP8MBData* mb_data_; // parsed reconstruction data // Filtering side-info - int filter_type_; // 0=off, 1=simple, 2=complex - int filter_row_; // per-row flag - uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment + int filter_type_; // 0=off, 1=simple, 2=complex + VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type - // extensions - const uint8_t* alpha_data_; // compressed alpha data (if present) + // Alpha + struct ALPHDecoder* alph_dec_; // alpha-plane decoder object + const uint8_t* alpha_data_; // compressed alpha data (if present) size_t alpha_data_size_; - uint8_t* alpha_plane_; // output. Persistent, contains the whole data. - - int layer_colorspace_; - const uint8_t* layer_data_; // compressed layer data (if present) - size_t layer_data_size_; + int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_ + uint8_t* alpha_plane_; // output. Persistent, contains the whole data. + int alpha_dithering_; // derived from decoding options (0=off, 100=full). }; //------------------------------------------------------------------------------ @@ -293,15 +273,14 @@ int VP8SetError(VP8Decoder* const dec, // in tree.c void VP8ResetProba(VP8Proba* const proba); void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec); -void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec); +// parses one row of intra mode data in partition 0, returns !eof +int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec); // in quant.c void VP8ParseQuant(VP8Decoder* const dec); // in frame.c -int VP8InitFrame(VP8Decoder* const dec, VP8Io* io); -// Predict a block and add residual -void VP8ReconstructBlock(VP8Decoder* const dec); +int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io); // Call io->setup() and finish setting up scan parameters. // After this call returns, one must always call VP8ExitCritical() with the // same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK @@ -310,10 +289,16 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io); // Must always be called in pair with VP8EnterCritical(). // Returns false in case of error. int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); -// Process the last decoded row (filtering + output) +// Return the multi-threading method to use (0=off), depending +// on options and bitstream size. Only for lossy decoding. +int VP8GetThreadMethod(const WebPDecoderOptions* const options, + const WebPHeaderStructure* const headers, + int width, int height); +// Initialize dithering post-process if needed. +void VP8InitDithering(const WebPDecoderOptions* const options, + VP8Decoder* const dec); +// Process the last decoded row (filtering + output). int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); -// Store a block, along with filtering params -void VP8StoreBlock(VP8Decoder* const dec); // To be called at the start of a new scanline, to initialize predictors. void VP8InitScanline(VP8Decoder* const dec); // Decode one macroblock. Returns false if there is not enough data. @@ -323,12 +308,9 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br); const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, int row, int num_rows); -// in layer.c -int VP8DecodeLayer(VP8Decoder* const dec); - //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif diff --git a/drivers/webp/dec/vp8l.c b/drivers/webp/dec/vp8l.c index 897e4395c7..19665a007d 100644 --- a/drivers/webp/dec/vp8l.c +++ b/drivers/webp/dec/vp8l.c @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // main entry for the decoder @@ -10,18 +12,17 @@ // Authors: Vikas Arora (vikaas.arora@gmail.com) // Jyrki Alakuijala (jyrki@google.com) -#include <stdio.h> #include <stdlib.h> + +#include "./alphai.h" #include "./vp8li.h" +#include "../dsp/dsp.h" #include "../dsp/lossless.h" #include "../dsp/yuv.h" +#include "../utils/endian_inl.h" #include "../utils/huffman.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - #define NUM_ARGB_CACHE_ROWS 16 static const int kCodeLengthLiterals = 16; @@ -50,6 +51,9 @@ static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = { NUM_DISTANCE_CODES }; +static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = { + 0, 1, 1, 1, 0 +}; #define NUM_CODE_LENGTH_CODES 19 static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = { @@ -57,19 +61,43 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = { }; #define CODE_TO_PLANE_CODES 120 -static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = { - 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, - 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, - 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, - 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03, - 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, - 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, - 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, - 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, - 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, - 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, - 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f, - 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70 +static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { + 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, + 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, + 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, + 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03, + 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, + 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, + 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, + 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, + 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, + 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, + 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f, + 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70 +}; + +// Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha +// and distance alphabets are constant (256 for red, blue and alpha, 40 for +// distance) and lookup table sizes for them in worst case are 630 and 410 +// respectively. Size of green alphabet depends on color cache size and is equal +// to 256 (green component values) + 24 (length prefix values) +// + color_cache_size (between 0 and 2048). +// All values computed for 8-bit first level lookup with Mark Adler's tool: +// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c +#define FIXED_TABLE_SIZE (630 * 3 + 410) +static const int kTableSize[12] = { + FIXED_TABLE_SIZE + 654, + FIXED_TABLE_SIZE + 656, + FIXED_TABLE_SIZE + 658, + FIXED_TABLE_SIZE + 662, + FIXED_TABLE_SIZE + 670, + FIXED_TABLE_SIZE + 686, + FIXED_TABLE_SIZE + 718, + FIXED_TABLE_SIZE + 782, + FIXED_TABLE_SIZE + 912, + FIXED_TABLE_SIZE + 1168, + FIXED_TABLE_SIZE + 1680, + FIXED_TABLE_SIZE + 2704 }; static int DecodeImageStream(int xsize, int ysize, @@ -80,27 +108,28 @@ static int DecodeImageStream(int xsize, int ysize, //------------------------------------------------------------------------------ int VP8LCheckSignature(const uint8_t* const data, size_t size) { - return (size >= 1) && (data[0] == VP8L_MAGIC_BYTE); + return (size >= VP8L_FRAME_HEADER_SIZE && + data[0] == VP8L_MAGIC_BYTE && + (data[4] >> 5) == 0); // version } static int ReadImageInfo(VP8LBitReader* const br, int* const width, int* const height, int* const has_alpha) { - const uint8_t signature = VP8LReadBits(br, 8); - if (!VP8LCheckSignature(&signature, 1)) { - return 0; - } + if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0; *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1; *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1; *has_alpha = VP8LReadBits(br, 1); - VP8LReadBits(br, VP8L_VERSION_BITS); // Read/ignore the version number. - return 1; + if (VP8LReadBits(br, VP8L_VERSION_BITS) != 0) return 0; + return !br->eos_; } int VP8LGetInfo(const uint8_t* data, size_t data_size, int* const width, int* const height, int* const has_alpha) { if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) { return 0; // not enough data + } else if (!VP8LCheckSignature(data, data_size)) { + return 0; // bad signature } else { int w, h, a; VP8LBitReader br; @@ -138,39 +167,80 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { if (plane_code > CODE_TO_PLANE_CODES) { return plane_code - CODE_TO_PLANE_CODES; } else { - const int dist_code = code_to_plane_lut[plane_code - 1]; + const int dist_code = kCodeToPlane[plane_code - 1]; const int yoffset = dist_code >> 4; const int xoffset = 8 - (dist_code & 0xf); const int dist = yoffset * xsize + xoffset; - return (dist >= 1) ? dist : 1; + return (dist >= 1) ? dist : 1; // dist<1 can happen if xsize is very small } } //------------------------------------------------------------------------------ // Decodes the next Huffman code from bit-stream. // FillBitWindow(br) needs to be called at minimum every second call -// to ReadSymbolUnsafe. -static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) { - const HuffmanTreeNode* node = tree->root_; - assert(node != NULL); - while (!HuffmanTreeNodeIsLeaf(node)) { - node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br)); - } - return node->symbol_; +// to ReadSymbol, in order to pre-fetch enough bits. +static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, + VP8LBitReader* const br) { + int nbits; + uint32_t val = VP8LPrefetchBits(br); + table += val & HUFFMAN_TABLE_MASK; + nbits = table->bits - HUFFMAN_TABLE_BITS; + if (nbits > 0) { + VP8LSetBitPos(br, br->bit_pos_ + HUFFMAN_TABLE_BITS); + val = VP8LPrefetchBits(br); + table += table->value; + table += val & ((1 << nbits) - 1); + } + VP8LSetBitPos(br, br->bit_pos_ + table->bits); + return table->value; } -static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree, - VP8LBitReader* const br) { - const int read_safe = (br->pos_ + 8 > br->len_); - if (!read_safe) { - return ReadSymbolUnsafe(tree, br); +// Reads packed symbol depending on GREEN channel +#define BITS_SPECIAL_MARKER 0x100 // something large enough (and a bit-mask) +#define PACKED_NON_LITERAL_CODE 0 // must be < NUM_LITERAL_CODES +static WEBP_INLINE int ReadPackedSymbols(const HTreeGroup* group, + VP8LBitReader* const br, + uint32_t* const dst) { + const uint32_t val = VP8LPrefetchBits(br) & (HUFFMAN_PACKED_TABLE_SIZE - 1); + const HuffmanCode32 code = group->packed_table[val]; + assert(group->use_packed_table); + if (code.bits < BITS_SPECIAL_MARKER) { + VP8LSetBitPos(br, br->bit_pos_ + code.bits); + *dst = code.value; + return PACKED_NON_LITERAL_CODE; } else { - const HuffmanTreeNode* node = tree->root_; - assert(node != NULL); - while (!HuffmanTreeNodeIsLeaf(node)) { - node = HuffmanTreeNextNode(node, VP8LReadOneBit(br)); + VP8LSetBitPos(br, br->bit_pos_ + code.bits - BITS_SPECIAL_MARKER); + assert(code.value >= NUM_LITERAL_CODES); + return code.value; + } +} + +static int AccumulateHCode(HuffmanCode hcode, int shift, + HuffmanCode32* const huff) { + huff->bits += hcode.bits; + huff->value |= (uint32_t)hcode.value << shift; + assert(huff->bits <= HUFFMAN_TABLE_BITS); + return hcode.bits; +} + +static void BuildPackedTable(HTreeGroup* const htree_group) { + uint32_t code; + for (code = 0; code < HUFFMAN_PACKED_TABLE_SIZE; ++code) { + uint32_t bits = code; + HuffmanCode32* const huff = &htree_group->packed_table[bits]; + HuffmanCode hcode = htree_group->htrees[GREEN][bits]; + if (hcode.value >= NUM_LITERAL_CODES) { + huff->bits = hcode.bits + BITS_SPECIAL_MARKER; + huff->value = hcode.value; + } else { + huff->bits = 0; + huff->value = 0; + bits >>= AccumulateHCode(hcode, 8, huff); + bits >>= AccumulateHCode(htree_group->htrees[RED][bits], 16, huff); + bits >>= AccumulateHCode(htree_group->htrees[BLUE][bits], 0, huff); + bits >>= AccumulateHCode(htree_group->htrees[ALPHA][bits], 24, huff); + (void)bits; } - return node->symbol_; } } @@ -182,19 +252,18 @@ static int ReadHuffmanCodeLengths( int symbol; int max_symbol; int prev_code_len = DEFAULT_CODE_LENGTH; - HuffmanTree tree; + HuffmanCode table[1 << LENGTHS_TABLE_BITS]; - if (!HuffmanTreeBuildImplicit(&tree, code_length_code_lengths, - NUM_CODE_LENGTH_CODES)) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; - return 0; + if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, + code_length_code_lengths, + NUM_CODE_LENGTH_CODES)) { + goto End; } if (VP8LReadBits(br, 1)) { // use length const int length_nbits = 2 + 2 * VP8LReadBits(br, 3); max_symbol = 2 + VP8LReadBits(br, length_nbits); if (max_symbol > num_symbols) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; goto End; } } else { @@ -203,10 +272,13 @@ static int ReadHuffmanCodeLengths( symbol = 0; while (symbol < num_symbols) { + const HuffmanCode* p; int code_len; if (max_symbol-- == 0) break; VP8LFillBitWindow(br); - code_len = ReadSymbol(&tree, br); + p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; + VP8LSetBitPos(br, br->bit_pos_ + p->bits); + code_len = p->value; if (code_len < kCodeLengthLiterals) { code_lengths[symbol++] = code_len; if (code_len != 0) prev_code_len = code_len; @@ -217,7 +289,6 @@ static int ReadHuffmanCodeLengths( const int repeat_offset = kCodeLengthRepeatOffsets[slot]; int repeat = VP8LReadBits(br, extra_bits) + repeat_offset; if (symbol + repeat > num_symbols) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; goto End; } else { const int length = use_prev ? prev_code_len : 0; @@ -228,36 +299,34 @@ static int ReadHuffmanCodeLengths( ok = 1; End: - HuffmanTreeRelease(&tree); + if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR; return ok; } +// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman +// tree. static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, - HuffmanTree* const tree) { + int* const code_lengths, HuffmanCode* const table) { int ok = 0; + int size = 0; VP8LBitReader* const br = &dec->br_; const int simple_code = VP8LReadBits(br, 1); + memset(code_lengths, 0, alphabet_size * sizeof(*code_lengths)); + if (simple_code) { // Read symbols, codes & code lengths directly. - int symbols[2]; - int codes[2]; - int code_lengths[2]; const int num_symbols = VP8LReadBits(br, 1) + 1; const int first_symbol_len_code = VP8LReadBits(br, 1); // The first code is either 1 bit or 8 bit code. - symbols[0] = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); - codes[0] = 0; - code_lengths[0] = num_symbols - 1; + int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); + code_lengths[symbol] = 1; // The second code (if present), is always 8 bit long. if (num_symbols == 2) { - symbols[1] = VP8LReadBits(br, 8); - codes[1] = 1; - code_lengths[1] = num_symbols - 1; + symbol = VP8LReadBits(br, 8); + code_lengths[symbol] = 1; } - ok = HuffmanTreeBuildExplicit(tree, code_lengths, codes, symbols, - alphabet_size, num_symbols); + ok = 1; } else { // Decode Huffman-coded code lengths. - int* code_lengths = NULL; int i; int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 }; const int num_codes = VP8LReadBits(br, 4) + 4; @@ -266,42 +335,23 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, return 0; } - code_lengths = - (int*)WebPSafeCalloc((uint64_t)alphabet_size, sizeof(*code_lengths)); - if (code_lengths == NULL) { - dec->status_ = VP8_STATUS_OUT_OF_MEMORY; - return 0; - } - for (i = 0; i < num_codes; ++i) { code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3); } ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size, code_lengths); - if (ok) { - ok = HuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size); - } - free(code_lengths); } - ok = ok && !br->error_; - if (!ok) { + + ok = ok && !br->eos_; + if (ok) { + size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, + code_lengths, alphabet_size); + } + if (!ok || size == 0) { dec->status_ = VP8_STATUS_BITSTREAM_ERROR; return 0; } - return 1; -} - -static void DeleteHtreeGroups(HTreeGroup* htree_groups, int num_htree_groups) { - if (htree_groups != NULL) { - int i, j; - for (i = 0; i < num_htree_groups; ++i) { - HuffmanTree* const htrees = htree_groups[i].htrees_; - for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { - HuffmanTreeRelease(&htrees[j]); - } - } - free(htree_groups); - } + return size; } static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, @@ -311,7 +361,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, VP8LMetadata* const hdr = &dec->hdr_; uint32_t* huffman_image = NULL; HTreeGroup* htree_groups = NULL; + HuffmanCode* huffman_tables = NULL; + HuffmanCode* next = NULL; int num_htree_groups = 1; + int max_alphabet_size = 0; + int* code_lengths = NULL; + const int table_size = kTableSize[color_cache_bits]; if (allow_recursion && VP8LReadBits(br, 1)) { // use meta Huffman codes. @@ -321,51 +376,108 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, const int huffman_pixs = huffman_xsize * huffman_ysize; if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec, &huffman_image)) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; goto Error; } hdr->huffman_subsample_bits_ = huffman_precision; for (i = 0; i < huffman_pixs; ++i) { // The huffman data is stored in red and green bytes. - const int index = (huffman_image[i] >> 8) & 0xffff; - huffman_image[i] = index; - if (index >= num_htree_groups) { - num_htree_groups = index + 1; + const int group = (huffman_image[i] >> 8) & 0xffff; + huffman_image[i] = group; + if (group >= num_htree_groups) { + num_htree_groups = group + 1; } } } - if (br->error_) goto Error; + if (br->eos_) goto Error; + + // Find maximum alphabet size for the htree group. + for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { + int alphabet_size = kAlphabetSize[j]; + if (j == 0 && color_cache_bits > 0) { + alphabet_size += 1 << color_cache_bits; + } + if (max_alphabet_size < alphabet_size) { + max_alphabet_size = alphabet_size; + } + } + + huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size, + sizeof(*huffman_tables)); + htree_groups = VP8LHtreeGroupsNew(num_htree_groups); + code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, + sizeof(*code_lengths)); - assert(num_htree_groups <= 0x10000); - htree_groups = - (HTreeGroup*)WebPSafeCalloc((uint64_t)num_htree_groups, - sizeof(*htree_groups)); - if (htree_groups == NULL) { + if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; goto Error; } + next = huffman_tables; for (i = 0; i < num_htree_groups; ++i) { - HuffmanTree* const htrees = htree_groups[i].htrees_; + HTreeGroup* const htree_group = &htree_groups[i]; + HuffmanCode** const htrees = htree_group->htrees; + int size; + int total_size = 0; + int is_trivial_literal = 1; + int max_bits = 0; for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { int alphabet_size = kAlphabetSize[j]; + htrees[j] = next; if (j == 0 && color_cache_bits > 0) { alphabet_size += 1 << color_cache_bits; } - if (!ReadHuffmanCode(alphabet_size, dec, htrees + j)) goto Error; + size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next); + if (is_trivial_literal && kLiteralMap[j] == 1) { + is_trivial_literal = (next->bits == 0); + } + total_size += next->bits; + next += size; + if (size == 0) { + goto Error; + } + if (j <= ALPHA) { + int local_max_bits = code_lengths[0]; + int k; + for (k = 1; k < alphabet_size; ++k) { + if (code_lengths[k] > local_max_bits) { + local_max_bits = code_lengths[k]; + } + } + max_bits += local_max_bits; + } } + htree_group->is_trivial_literal = is_trivial_literal; + htree_group->is_trivial_code = 0; + if (is_trivial_literal) { + const int red = htrees[RED][0].value; + const int blue = htrees[BLUE][0].value; + const int alpha = htrees[ALPHA][0].value; + htree_group->literal_arb = + ((uint32_t)alpha << 24) | (red << 16) | blue; + if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) { + htree_group->is_trivial_code = 1; + htree_group->literal_arb |= htrees[GREEN][0].value << 8; + } + } + htree_group->use_packed_table = !htree_group->is_trivial_code && + (max_bits < HUFFMAN_PACKED_BITS); + if (htree_group->use_packed_table) BuildPackedTable(htree_group); } + WebPSafeFree(code_lengths); // All OK. Finalize pointers and return. hdr->huffman_image_ = huffman_image; hdr->num_htree_groups_ = num_htree_groups; hdr->htree_groups_ = htree_groups; + hdr->huffman_tables_ = huffman_tables; return 1; Error: - free(huffman_image); - DeleteHtreeGroups(htree_groups, num_htree_groups); + WebPSafeFree(code_lengths); + WebPSafeFree(huffman_image); + WebPSafeFree(huffman_tables); + VP8LHtreeGroupsFree(htree_groups); return 0; } @@ -379,13 +491,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { const int in_height = io->mb_h; const int out_height = io->scaled_height; const uint64_t work_size = 2 * num_channels * (uint64_t)out_width; - int32_t* work; // Rescaler work area. - const uint64_t scaled_data_size = num_channels * (uint64_t)out_width; + rescaler_t* work; // Rescaler work area. + const uint64_t scaled_data_size = (uint64_t)out_width; uint32_t* scaled_data; // Temporary storage for scaled BGRA data. const uint64_t memory_size = sizeof(*dec->rescaler) + work_size * sizeof(*work) + scaled_data_size * sizeof(*scaled_data); - uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory)); + uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory)); if (memory == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; @@ -395,13 +507,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { dec->rescaler = (WebPRescaler*)memory; memory += sizeof(*dec->rescaler); - work = (int32_t*)memory; + work = (rescaler_t*)memory; memory += work_size * sizeof(*work); scaled_data = (uint32_t*)memory; WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, - out_width, out_height, 0, num_channels, - in_width, out_width, in_height, out_height, work); + out_width, out_height, 0, num_channels, work); return 1; } @@ -411,12 +522,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { // We have special "export" function since we need to convert from BGRA static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int rgba_stride, uint8_t* const rgba) { - const uint32_t* const src = (const uint32_t*)rescaler->dst; + uint32_t* const src = (uint32_t*)rescaler->dst; const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { uint8_t* const dst = rgba + num_lines_out * rgba_stride; WebPRescalerExportRow(rescaler); + WebPMultARGBRow(src, dst_width, 1); VP8LConvertFromBGRA(src, dst_width, colorspace, dst); ++num_lines_out; } @@ -424,18 +536,22 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, } // Emit scaled rows. -static int EmitRescaledRows(const VP8LDecoder* const dec, - const uint32_t* const data, int in_stride, int mb_h, - uint8_t* const out, int out_stride) { +static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, + uint8_t* in, int in_stride, int mb_h, + uint8_t* const out, int out_stride) { const WEBP_CSP_MODE colorspace = dec->output_->colorspace; - const uint8_t* const in = (const uint8_t*)data; int num_lines_in = 0; int num_lines_out = 0; while (num_lines_in < mb_h) { - const uint8_t* const row_in = in + num_lines_in * in_stride; + uint8_t* const row_in = in + num_lines_in * in_stride; uint8_t* const row_out = out + num_lines_out * out_stride; - num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in, - row_in, in_stride); + const int lines_left = mb_h - num_lines_in; + const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); + assert(needed_lines > 0 && needed_lines <= lines_left); + WebPMultARGBRows(row_in, in_stride, + dec->rescaler->src_width, needed_lines, 0); + WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride); + num_lines_in += needed_lines; num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out); } return num_lines_out; @@ -443,11 +559,10 @@ static int EmitRescaledRows(const VP8LDecoder* const dec, // Emit rows without any scaling. static int EmitRows(WEBP_CSP_MODE colorspace, - const uint32_t* const data, int in_stride, + const uint8_t* row_in, int in_stride, int mb_w, int mb_h, uint8_t* const out, int out_stride) { int lines = mb_h; - const uint8_t* row_in = (const uint8_t*)data; uint8_t* row_out = out; while (lines-- > 0) { VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out); @@ -463,72 +578,37 @@ static int EmitRows(WEBP_CSP_MODE colorspace, static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos, const WebPDecBuffer* const output) { const WebPYUVABuffer* const buf = &output->u.YUVA; + // first, the luma plane - { - int i; - uint8_t* const y = buf->y + y_pos * buf->y_stride; - for (i = 0; i < width; ++i) { - const uint32_t p = src[i]; - y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff); - } - } + WebPConvertARGBToY(src, buf->y + y_pos * buf->y_stride, width); // then U/V planes { uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride; uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride; - const int uv_width = width >> 1; - int i; - for (i = 0; i < uv_width; ++i) { - const uint32_t v0 = src[2 * i + 0]; - const uint32_t v1 = src[2 * i + 1]; - // VP8RGBToU/V expects four accumulated pixels. Hence we need to - // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less. - const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe); - const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe); - const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe); - if (!(y_pos & 1)) { // even lines: store values - u[i] = VP8RGBToU(r, g, b); - v[i] = VP8RGBToV(r, g, b); - } else { // odd lines: average with previous values - const int tmp_u = VP8RGBToU(r, g, b); - const int tmp_v = VP8RGBToV(r, g, b); - // Approximated average-of-four. But it's an acceptable diff. - u[i] = (u[i] + tmp_u + 1) >> 1; - v[i] = (v[i] + tmp_v + 1) >> 1; - } - } - if (width & 1) { // last pixel - const uint32_t v0 = src[2 * i + 0]; - const int r = (v0 >> 14) & 0x3fc; - const int g = (v0 >> 6) & 0x3fc; - const int b = (v0 << 2) & 0x3fc; - if (!(y_pos & 1)) { // even lines - u[i] = VP8RGBToU(r, g, b); - v[i] = VP8RGBToV(r, g, b); - } else { // odd lines (note: we could just skip this) - const int tmp_u = VP8RGBToU(r, g, b); - const int tmp_v = VP8RGBToV(r, g, b); - u[i] = (u[i] + tmp_u + 1) >> 1; - v[i] = (v[i] + tmp_v + 1) >> 1; - } - } + // even lines: store values + // odd lines: average with previous values + WebPConvertARGBToUV(src, u, v, width, !(y_pos & 1)); } // Lastly, store alpha if needed. if (buf->a != NULL) { - int i; uint8_t* const a = buf->a + y_pos * buf->a_stride; - for (i = 0; i < width; ++i) a[i] = (src[i] >> 24); +#if defined(WORDS_BIGENDIAN) + WebPExtractAlpha((uint8_t*)src + 0, 0, width, 1, a, 0); +#else + WebPExtractAlpha((uint8_t*)src + 3, 0, width, 1, a, 0); +#endif } } static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { WebPRescaler* const rescaler = dec->rescaler; - const uint32_t* const src = (const uint32_t*)rescaler->dst; + uint32_t* const src = (uint32_t*)rescaler->dst; const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { WebPRescalerExportRow(rescaler); + WebPMultARGBRow(src, dst_width, 1); ConvertToYUVA(src, dst_width, y_pos, dec->output_); ++y_pos; ++num_lines_out; @@ -537,28 +617,28 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { } static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, - const uint32_t* const data, - int in_stride, int mb_h) { - const uint8_t* const in = (const uint8_t*)data; + uint8_t* in, int in_stride, int mb_h) { int num_lines_in = 0; int y_pos = dec->last_out_row_; while (num_lines_in < mb_h) { - const uint8_t* const row_in = in + num_lines_in * in_stride; - num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in, - row_in, in_stride); + const int lines_left = mb_h - num_lines_in; + const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); + WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0); + WebPRescalerImport(dec->rescaler, lines_left, in, in_stride); + num_lines_in += needed_lines; + in += needed_lines * in_stride; y_pos += ExportYUVA(dec, y_pos); } return y_pos; } static int EmitRowsYUVA(const VP8LDecoder* const dec, - const uint32_t* const data, int in_stride, + const uint8_t* in, int in_stride, int mb_w, int num_rows) { int y_pos = dec->last_out_row_; - const uint8_t* row_in = (const uint8_t*)data; while (num_rows-- > 0) { - ConvertToYUVA((const uint32_t*)row_in, mb_w, y_pos, dec->output_); - row_in += in_stride; + ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output_); + in += in_stride; ++y_pos; } return y_pos; @@ -569,11 +649,11 @@ static int EmitRowsYUVA(const VP8LDecoder* const dec, // Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and // crop options. Also updates the input data pointer, so that it points to the -// start of the cropped window. -// Note that 'pixel_stride' is in units of 'uint32_t' (and not 'bytes). +// start of the cropped window. Note that pixels are in ARGB format even if +// 'in_data' is uint8_t*. // Returns true if the crop window is not empty. static int SetCropWindow(VP8Io* const io, int y_start, int y_end, - const uint32_t** const in_data, int pixel_stride) { + uint8_t** const in_data, int pixel_stride) { assert(y_start < y_end); assert(io->crop_left < io->crop_right); if (y_end > io->crop_bottom) { @@ -582,11 +662,11 @@ static int SetCropWindow(VP8Io* const io, int y_start, int y_end, if (y_start < io->crop_top) { const int delta = io->crop_top - y_start; y_start = io->crop_top; - *in_data += pixel_stride * delta; + *in_data += delta * pixel_stride; } if (y_start >= y_end) return 0; // Crop window is empty. - *in_data += io->crop_left; + *in_data += io->crop_left * sizeof(uint32_t); io->mb_y = y_start - io->crop_top; io->mb_w = io->crop_right - io->crop_left; @@ -634,10 +714,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows, } } +// Special method for paletted alpha data. +static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows, + const uint8_t* const rows) { + const int start_row = dec->last_row_; + const int end_row = start_row + num_rows; + const uint8_t* rows_in = rows; + uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row; + VP8LTransform* const transform = &dec->transforms_[0]; + assert(dec->next_transform_ == 1); + assert(transform->type_ == COLOR_INDEXING_TRANSFORM); + VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in, + rows_out); +} + // Processes (transforms, scales & color-converts) the rows decoded after the // last call. static void ProcessRows(VP8LDecoder* const dec, int row) { - const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_; + const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_; const int num_rows = row - dec->last_row_; if (num_rows <= 0) return; // Nothing to be done. @@ -646,18 +740,18 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { // Emit output. { VP8Io* const io = dec->io_; - const uint32_t* rows_data = dec->argb_cache_; - if (!SetCropWindow(io, dec->last_row_, row, &rows_data, io->width)) { + uint8_t* rows_data = (uint8_t*)dec->argb_cache_; + const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA + if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) { // Nothing to output (this time). } else { const WebPDecBuffer* const output = dec->output_; - const int in_stride = io->width * sizeof(*rows_data); - if (output->colorspace < MODE_YUV) { // convert to RGBA + if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA const WebPRGBABuffer* const buf = &output->u.RGBA; uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride; const int num_rows_out = io->use_scaling ? - EmitRescaledRows(dec, rows_data, in_stride, io->mb_h, - rgba, buf->stride) : + EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h, + rgba, buf->stride) : EmitRows(output->colorspace, rows_data, in_stride, io->mb_w, io->mb_h, rgba, buf->stride); // Update 'last_out_row_'. @@ -676,50 +770,317 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { assert(dec->last_row_ <= dec->height_); } -static int DecodeImageData(VP8LDecoder* const dec, - uint32_t* const data, int width, int height, - ProcessRowsFunc process_func) { +// Row-processing for the special case when alpha data contains only one +// transform (color indexing), and trivial non-green literals. +static int Is8bOptimizable(const VP8LMetadata* const hdr) { + int i; + if (hdr->color_cache_size_ > 0) return 0; + // When the Huffman tree contains only one symbol, we can skip the + // call to ReadSymbol() for red/blue/alpha channels. + for (i = 0; i < hdr->num_htree_groups_; ++i) { + HuffmanCode** const htrees = hdr->htree_groups_[i].htrees; + if (htrees[RED][0].bits > 0) return 0; + if (htrees[BLUE][0].bits > 0) return 0; + if (htrees[ALPHA][0].bits > 0) return 0; + } + return 1; +} + +static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) { + const int num_rows = row - dec->last_row_; + const uint8_t* const in = + (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_; + if (num_rows > 0) { + ApplyInverseTransformsAlpha(dec, num_rows, in); + } + dec->last_row_ = dec->last_out_row_ = row; +} + +//------------------------------------------------------------------------------ +// Helper functions for fast pattern copy (8b and 32b) + +// cyclic rotation of pattern word +static WEBP_INLINE uint32_t Rotate8b(uint32_t V) { +#if defined(WORDS_BIGENDIAN) + return ((V & 0xff000000u) >> 24) | (V << 8); +#else + return ((V & 0xffu) << 24) | (V >> 8); +#endif +} + +// copy 1, 2 or 4-bytes pattern +static WEBP_INLINE void CopySmallPattern8b(const uint8_t* src, uint8_t* dst, + int length, uint32_t pattern) { + int i; + // align 'dst' to 4-bytes boundary. Adjust the pattern along the way. + while ((uintptr_t)dst & 3) { + *dst++ = *src++; + pattern = Rotate8b(pattern); + --length; + } + // Copy the pattern 4 bytes at a time. + for (i = 0; i < (length >> 2); ++i) { + ((uint32_t*)dst)[i] = pattern; + } + // Finish with left-overs. 'pattern' is still correctly positioned, + // so no Rotate8b() call is needed. + for (i <<= 2; i < length; ++i) { + dst[i] = src[i]; + } +} + +static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) { + const uint8_t* src = dst - dist; + if (length >= 8) { + uint32_t pattern = 0; + switch (dist) { + case 1: + pattern = src[0]; +#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much + pattern |= pattern << 8; + pattern |= pattern << 16; +#elif defined(WEBP_USE_MIPS_DSP_R2) + __asm__ volatile ("replv.qb %0, %0" : "+r"(pattern)); +#else + pattern = 0x01010101u * pattern; +#endif + break; + case 2: + memcpy(&pattern, src, sizeof(uint16_t)); +#if defined(__arm__) || defined(_M_ARM) + pattern |= pattern << 16; +#elif defined(WEBP_USE_MIPS_DSP_R2) + __asm__ volatile ("replv.ph %0, %0" : "+r"(pattern)); +#else + pattern = 0x00010001u * pattern; +#endif + break; + case 4: + memcpy(&pattern, src, sizeof(uint32_t)); + break; + default: + goto Copy; + break; + } + CopySmallPattern8b(src, dst, length, pattern); + return; + } + Copy: + if (dist >= length) { // no overlap -> use memcpy() + memcpy(dst, src, length * sizeof(*dst)); + } else { + int i; + for (i = 0; i < length; ++i) dst[i] = src[i]; + } +} + +// copy pattern of 1 or 2 uint32_t's +static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, + uint32_t* dst, + int length, uint64_t pattern) { + int i; + if ((uintptr_t)dst & 4) { // Align 'dst' to 8-bytes boundary. + *dst++ = *src++; + pattern = (pattern >> 32) | (pattern << 32); + --length; + } + assert(0 == ((uintptr_t)dst & 7)); + for (i = 0; i < (length >> 1); ++i) { + ((uint64_t*)dst)[i] = pattern; // Copy the pattern 8 bytes at a time. + } + if (length & 1) { // Finish with left-over. + dst[i << 1] = src[i << 1]; + } +} + +static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, + int dist, int length) { + const uint32_t* const src = dst - dist; + if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) { + uint64_t pattern; + if (dist == 1) { + pattern = (uint64_t)src[0]; + pattern |= pattern << 32; + } else { + memcpy(&pattern, src, sizeof(pattern)); + } + CopySmallPattern32b(src, dst, length, pattern); + } else if (dist >= length) { // no overlap + memcpy(dst, src, length * sizeof(*dst)); + } else { + int i; + for (i = 0; i < length; ++i) dst[i] = src[i]; + } +} + +//------------------------------------------------------------------------------ + +static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, + int width, int height, int last_row) { int ok = 1; - int col = 0, row = 0; + int row = dec->last_pixel_ / width; + int col = dec->last_pixel_ % width; VP8LBitReader* const br = &dec->br_; VP8LMetadata* const hdr = &dec->hdr_; - HTreeGroup* htree_group = hdr->htree_groups_; - uint32_t* src = data; - uint32_t* last_cached = data; - uint32_t* const src_end = data + width * height; + const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row); + int pos = dec->last_pixel_; // current position + const int end = width * height; // End of data + const int last = width * last_row; // Last pixel to decode const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; - const int color_cache_limit = len_code_limit + hdr->color_cache_size_; - VP8LColorCache* const color_cache = - (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; const int mask = hdr->huffman_mask_; - assert(htree_group != NULL); + assert(pos < end); + assert(last_row <= height); + assert(Is8bOptimizable(hdr)); - while (!br->eos_ && src < src_end) { + while (!br->eos_ && pos < last) { int code; - // Only update when changing tile. Note we could use the following test: - // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed - // but that's actually slower and requires storing the previous col/row + // Only update when changing tile. if ((col & mask) == 0) { htree_group = GetHtreeGroupForPos(hdr, col, row); } VP8LFillBitWindow(br); - code = ReadSymbol(&htree_group->htrees_[GREEN], br); - if (code < NUM_LITERAL_CODES) { // Literal. - int red, green, blue, alpha; - red = ReadSymbol(&htree_group->htrees_[RED], br); - green = code; + code = ReadSymbol(htree_group->htrees[GREEN], br); + if (code < NUM_LITERAL_CODES) { // Literal + data[pos] = code; + ++pos; + ++col; + if (col >= width) { + col = 0; + ++row; + if (row % NUM_ARGB_CACHE_ROWS == 0) { + ExtractPalettedAlphaRows(dec, row); + } + } + } else if (code < len_code_limit) { // Backward reference + int dist_code, dist; + const int length_sym = code - NUM_LITERAL_CODES; + const int length = GetCopyLength(length_sym, br); + const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br); VP8LFillBitWindow(br); - blue = ReadSymbol(&htree_group->htrees_[BLUE], br); - alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br); - *src = (alpha << 24) + (red << 16) + (green << 8) + blue; - AdvanceByOne: + dist_code = GetCopyDistance(dist_symbol, br); + dist = PlaneCodeToDistance(width, dist_code); + if (pos >= dist && end - pos >= length) { + CopyBlock8b(data + pos, dist, length); + } else { + ok = 0; + goto End; + } + pos += length; + col += length; + while (col >= width) { + col -= width; + ++row; + if (row % NUM_ARGB_CACHE_ROWS == 0) { + ExtractPalettedAlphaRows(dec, row); + } + } + if (pos < last && (col & mask)) { + htree_group = GetHtreeGroupForPos(hdr, col, row); + } + } else { // Not reached + ok = 0; + goto End; + } + assert(br->eos_ == VP8LIsEndOfStream(br)); + } + // Process the remaining rows corresponding to last row-block. + ExtractPalettedAlphaRows(dec, row); + + End: + if (!ok || (br->eos_ && pos < end)) { + ok = 0; + dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED + : VP8_STATUS_BITSTREAM_ERROR; + } else { + dec->last_pixel_ = pos; + } + return ok; +} + +static void SaveState(VP8LDecoder* const dec, int last_pixel) { + assert(dec->incremental_); + dec->saved_br_ = dec->br_; + dec->saved_last_pixel_ = last_pixel; + if (dec->hdr_.color_cache_size_ > 0) { + VP8LColorCacheCopy(&dec->hdr_.color_cache_, &dec->hdr_.saved_color_cache_); + } +} + +static void RestoreState(VP8LDecoder* const dec) { + assert(dec->br_.eos_); + dec->status_ = VP8_STATUS_SUSPENDED; + dec->br_ = dec->saved_br_; + dec->last_pixel_ = dec->saved_last_pixel_; + if (dec->hdr_.color_cache_size_ > 0) { + VP8LColorCacheCopy(&dec->hdr_.saved_color_cache_, &dec->hdr_.color_cache_); + } +} + +#define SYNC_EVERY_N_ROWS 8 // minimum number of rows between check-points +static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, + int width, int height, int last_row, + ProcessRowsFunc process_func) { + int row = dec->last_pixel_ / width; + int col = dec->last_pixel_ % width; + VP8LBitReader* const br = &dec->br_; + VP8LMetadata* const hdr = &dec->hdr_; + HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row); + uint32_t* src = data + dec->last_pixel_; + uint32_t* last_cached = src; + uint32_t* const src_end = data + width * height; // End of data + uint32_t* const src_last = data + width * last_row; // Last pixel to decode + const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; + const int color_cache_limit = len_code_limit + hdr->color_cache_size_; + int next_sync_row = dec->incremental_ ? row : 1 << 24; + VP8LColorCache* const color_cache = + (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; + const int mask = hdr->huffman_mask_; + assert(htree_group != NULL); + assert(src < src_end); + assert(src_last <= src_end); + + while (src < src_last) { + int code; + if (row >= next_sync_row) { + SaveState(dec, (int)(src - data)); + next_sync_row = row + SYNC_EVERY_N_ROWS; + } + // Only update when changing tile. Note we could use this test: + // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed + // but that's actually slower and needs storing the previous col/row. + if ((col & mask) == 0) htree_group = GetHtreeGroupForPos(hdr, col, row); + if (htree_group->is_trivial_code) { + *src = htree_group->literal_arb; + goto AdvanceByOne; + } + VP8LFillBitWindow(br); + if (htree_group->use_packed_table) { + code = ReadPackedSymbols(htree_group, br, src); + if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne; + } else { + code = ReadSymbol(htree_group->htrees[GREEN], br); + } + if (br->eos_) break; // early out + if (code < NUM_LITERAL_CODES) { // Literal + if (htree_group->is_trivial_literal) { + *src = htree_group->literal_arb | (code << 8); + } else { + int red, blue, alpha; + red = ReadSymbol(htree_group->htrees[RED], br); + VP8LFillBitWindow(br); + blue = ReadSymbol(htree_group->htrees[BLUE], br); + alpha = ReadSymbol(htree_group->htrees[ALPHA], br); + if (br->eos_) break; + *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue; + } + AdvanceByOne: ++src; ++col; if (col >= width) { col = 0; ++row; - if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { + if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) { process_func(dec, row); } if (color_cache != NULL) { @@ -728,40 +1089,39 @@ static int DecodeImageData(VP8LDecoder* const dec, } } } - } else if (code < len_code_limit) { // Backward reference + } else if (code < len_code_limit) { // Backward reference int dist_code, dist; const int length_sym = code - NUM_LITERAL_CODES; const int length = GetCopyLength(length_sym, br); - const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br); + const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br); VP8LFillBitWindow(br); dist_code = GetCopyDistance(dist_symbol, br); dist = PlaneCodeToDistance(width, dist_code); - if (src - data < dist || src_end - src < length) { - ok = 0; - goto End; - } - { - int i; - for (i = 0; i < length; ++i) src[i] = src[i - dist]; - src += length; + if (br->eos_) break; + if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) { + goto Error; + } else { + CopyBlock32b(src, dist, length); } + src += length; col += length; while (col >= width) { col -= width; ++row; - if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { + if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) { process_func(dec, row); } } - if (src < src_end) { - htree_group = GetHtreeGroupForPos(hdr, col, row); - if (color_cache != NULL) { - while (last_cached < src) { - VP8LColorCacheInsert(color_cache, *last_cached++); - } + // Because of the check done above (before 'src' was incremented by + // 'length'), the following holds true. + assert(src <= src_end); + if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row); + if (color_cache != NULL) { + while (last_cached < src) { + VP8LColorCacheInsert(color_cache, *last_cached++); } } - } else if (code < color_cache_limit) { // Color cache. + } else if (code < color_cache_limit) { // Color cache const int key = code - len_code_limit; assert(color_cache != NULL); while (last_cached < src) { @@ -769,33 +1129,38 @@ static int DecodeImageData(VP8LDecoder* const dec, } *src = VP8LColorCacheLookup(color_cache, key); goto AdvanceByOne; - } else { // Not reached. - ok = 0; - goto End; + } else { // Not reached + goto Error; } - ok = !br->error_; - if (!ok) goto End; + assert(br->eos_ == VP8LIsEndOfStream(br)); } - // Process the remaining rows corresponding to last row-block. - if (process_func != NULL) process_func(dec, row); - End: - if (br->error_ || !ok || (br->eos_ && src < src_end)) { - ok = 0; - dec->status_ = (!br->eos_) ? - VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED; - } else if (src == src_end) { - dec->state_ = READ_DATA; + if (dec->incremental_ && br->eos_ && src < src_end) { + RestoreState(dec); + } else if (!br->eos_) { + // Process the remaining rows corresponding to last row-block. + if (process_func != NULL) { + process_func(dec, row); + } + dec->status_ = VP8_STATUS_OK; + dec->last_pixel_ = (int)(src - data); // end-of-scan marker + } else { + // if not incremental, and we are past the end of buffer (eos_=1), then this + // is a real bitstream error. + goto Error; } + return 1; - return ok; + Error: + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + return 0; } // ----------------------------------------------------------------------------- // VP8LTransform static void ClearTransform(VP8LTransform* const transform) { - free(transform->data_); + WebPSafeFree(transform->data_); transform->data_ = NULL; } @@ -819,7 +1184,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) { } for (; i < 4 * final_num_colors; ++i) new_data[i] = 0; // black tail. - free(transform->data_); + WebPSafeFree(transform->data_); transform->data_ = new_color_map; } return 1; @@ -882,16 +1247,18 @@ static int ReadTransform(int* const xsize, int const* ysize, // VP8LMetadata static void InitMetadata(VP8LMetadata* const hdr) { - assert(hdr); + assert(hdr != NULL); memset(hdr, 0, sizeof(*hdr)); } static void ClearMetadata(VP8LMetadata* const hdr) { - assert(hdr); + assert(hdr != NULL); - free(hdr->huffman_image_); - DeleteHtreeGroups(hdr->htree_groups_, hdr->num_htree_groups_); + WebPSafeFree(hdr->huffman_image_); + WebPSafeFree(hdr->huffman_tables_); + VP8LHtreeGroupsFree(hdr->htree_groups_); VP8LColorCacheClear(&hdr->color_cache_); + VP8LColorCacheClear(&hdr->saved_color_cache_); InitMetadata(hdr); } @@ -899,11 +1266,13 @@ static void ClearMetadata(VP8LMetadata* const hdr) { // VP8LDecoder VP8LDecoder* VP8LNew(void) { - VP8LDecoder* const dec = (VP8LDecoder*)calloc(1, sizeof(*dec)); + VP8LDecoder* const dec = (VP8LDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); if (dec == NULL) return NULL; dec->status_ = VP8_STATUS_OK; - dec->action_ = READ_DIM; dec->state_ = READ_DIM; + + VP8LDspInit(); // Init critical function pointers. + return dec; } @@ -912,15 +1281,15 @@ void VP8LClear(VP8LDecoder* const dec) { if (dec == NULL) return; ClearMetadata(&dec->hdr_); - free(dec->argb_); - dec->argb_ = NULL; + WebPSafeFree(dec->pixels_); + dec->pixels_ = NULL; for (i = 0; i < dec->next_transform_; ++i) { ClearTransform(&dec->transforms_[i]); } dec->next_transform_ = 0; dec->transforms_seen_ = 0; - free(dec->rescaler_memory); + WebPSafeFree(dec->rescaler_memory); dec->rescaler_memory = NULL; dec->output_ = NULL; // leave no trace behind @@ -929,7 +1298,7 @@ void VP8LClear(VP8LDecoder* const dec) { void VP8LDelete(VP8LDecoder* const dec) { if (dec != NULL) { VP8LClear(dec); - free(dec); + WebPSafeFree(dec); } } @@ -1009,19 +1378,14 @@ static int DecodeImageStream(int xsize, int ysize, } // Use the Huffman trees to decode the LZ77 encoded data. - ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, NULL); - ok = ok && !br->error_; + ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, + transform_ysize, NULL); + ok = ok && !br->eos_; End: - if (!ok) { - free(data); + WebPSafeFree(data); ClearMetadata(hdr); - // If not enough data (br.eos_) resulted in BIT_STREAM_ERROR, update the - // status appropriately. - if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && dec->br_.eos_) { - dec->status_ = VP8_STATUS_SUSPENDED; - } } else { if (decoded_data != NULL) { *decoded_data = data; @@ -1031,41 +1395,52 @@ static int DecodeImageStream(int xsize, int ysize, assert(data == NULL); assert(is_level0); } + dec->last_pixel_ = 0; // Reset for future DECODE_DATA_FUNC() calls. if (!is_level0) ClearMetadata(hdr); // Clean up temporary data behind. } return ok; } //------------------------------------------------------------------------------ -// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_ - -static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) { +// Allocate internal buffers dec->pixels_ and dec->argb_cache_. +static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_; // Scratch buffer corresponding to top-prediction row for transforming the - // first row in the row-blocks. - const uint64_t cache_top_pixels = final_width; - // Scratch buffer for temporary BGRA storage. + // first row in the row-blocks. Not needed for paletted alpha. + const uint64_t cache_top_pixels = (uint16_t)final_width; + // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha. const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS; const uint64_t total_num_pixels = num_pixels + cache_top_pixels + cache_pixels; assert(dec->width_ <= final_width); - dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_)); - if (dec->argb_ == NULL) { + dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t)); + if (dec->pixels_ == NULL) { dec->argb_cache_ = NULL; // for sanity check dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; } - dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels; + dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels; + return 1; +} + +static int AllocateInternalBuffers8b(VP8LDecoder* const dec) { + const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_; + dec->argb_cache_ = NULL; // for sanity check + dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t)); + if (dec->pixels_ == NULL) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + return 0; + } return 1; } //------------------------------------------------------------------------------ -// Special row-processing that only stores the alpha data. +// Special row-processing that only stores the alpha data. static void ExtractAlphaRows(VP8LDecoder* const dec, int row) { const int num_rows = row - dec->last_row_; - const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_; + const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_; if (num_rows <= 0) return; // Nothing to be done. ApplyInverseTransforms(dec, num_rows, in); @@ -1079,44 +1454,77 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) { int i; for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff; } - dec->last_row_ = dec->last_out_row_ = row; } -int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data, - size_t data_size, uint8_t* const output) { - VP8Io io; +int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec, + const uint8_t* const data, size_t data_size, + uint8_t* const output) { int ok = 0; - VP8LDecoder* const dec = VP8LNew(); - if (dec == NULL) return 0; - - dec->width_ = width; - dec->height_ = height; - dec->io_ = &io; + VP8LDecoder* dec; + VP8Io* io; + assert(alph_dec != NULL); + alph_dec->vp8l_dec_ = VP8LNew(); + if (alph_dec->vp8l_dec_ == NULL) return 0; + dec = alph_dec->vp8l_dec_; + + dec->width_ = alph_dec->width_; + dec->height_ = alph_dec->height_; + dec->io_ = &alph_dec->io_; + io = dec->io_; - VP8InitIo(&io); - WebPInitCustomIo(NULL, &io); // Just a sanity Init. io won't be used. - io.opaque = output; - io.width = width; - io.height = height; + VP8InitIo(io); + WebPInitCustomIo(NULL, io); // Just a sanity Init. io won't be used. + io->opaque = output; + io->width = alph_dec->width_; + io->height = alph_dec->height_; dec->status_ = VP8_STATUS_OK; VP8LInitBitReader(&dec->br_, data, data_size); - dec->action_ = READ_HDR; - if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err; + if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) { + goto Err; + } - // Allocate output (note that dec->width_ may have changed here). - if (!AllocateARGBBuffers(dec, width)) goto Err; + // Special case: if alpha data uses only the color indexing transform and + // doesn't use color cache (a frequent case), we will use DecodeAlphaData() + // method that only needs allocation of 1 byte per pixel (alpha channel). + if (dec->next_transform_ == 1 && + dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM && + Is8bOptimizable(&dec->hdr_)) { + alph_dec->use_8b_decode = 1; + ok = AllocateInternalBuffers8b(dec); + } else { + // Allocate internal buffers (note that dec->width_ may have changed here). + alph_dec->use_8b_decode = 0; + ok = AllocateInternalBuffers32b(dec, alph_dec->width_); + } - // Decode (with special row processing). - dec->action_ = READ_DATA; - ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_, - ExtractAlphaRows); + if (!ok) goto Err; + + return 1; Err: - VP8LDelete(dec); - return ok; + VP8LDelete(alph_dec->vp8l_dec_); + alph_dec->vp8l_dec_ = NULL; + return 0; +} + +int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) { + VP8LDecoder* const dec = alph_dec->vp8l_dec_; + assert(dec != NULL); + assert(last_row <= dec->height_); + + if (dec->last_pixel_ == dec->width_ * dec->height_) { + return 1; // done + } + + // Decode (with special row processing). + return alph_dec->use_8b_decode ? + DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_, + last_row) : + DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, + last_row, ExtractAlphaRows); } //------------------------------------------------------------------------------ @@ -1141,14 +1549,13 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) { io->width = width; io->height = height; - dec->action_ = READ_HDR; if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error; return 1; Error: - VP8LClear(dec); - assert(dec->status_ != VP8_STATUS_OK); - return 0; + VP8LClear(dec); + assert(dec->status_ != VP8_STATUS_OK); + return 0; } int VP8LDecodeImage(VP8LDecoder* const dec) { @@ -1158,33 +1565,57 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { // Sanity checks. if (dec == NULL) return 0; + assert(dec->hdr_.huffman_tables_ != NULL); + assert(dec->hdr_.htree_groups_ != NULL); + assert(dec->hdr_.num_htree_groups_ > 0); + io = dec->io_; assert(io != NULL); params = (WebPDecParams*)io->opaque; assert(params != NULL); - dec->output_ = params->output; - assert(dec->output_ != NULL); // Initialization. - if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) { - dec->status_ = VP8_STATUS_INVALID_PARAM; - goto Err; - } + if (dec->state_ != READ_DATA) { + dec->output_ = params->output; + assert(dec->output_ != NULL); + + if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) { + dec->status_ = VP8_STATUS_INVALID_PARAM; + goto Err; + } - if (!AllocateARGBBuffers(dec, io->width)) goto Err; + if (!AllocateInternalBuffers32b(dec, io->width)) goto Err; - if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; + if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; + + if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) { + // need the alpha-multiply functions for premultiplied output or rescaling + WebPInitAlphaProcessing(); + } + if (!WebPIsRGBMode(dec->output_->colorspace)) { + WebPInitConvertARGBToYUV(); + if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing(); + } + if (dec->incremental_) { + if (dec->hdr_.color_cache_size_ > 0 && + dec->hdr_.saved_color_cache_.colors_ == NULL) { + if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_, + dec->hdr_.color_cache_.hash_bits_)) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + goto Err; + } + } + } + dec->state_ = READ_DATA; + } // Decode. - dec->action_ = READ_DATA; - if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_, - ProcessRows)) { + if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, + dec->height_, ProcessRows)) { goto Err; } - // Cleanup. params->last_y = dec->last_out_row_; - VP8LClear(dec); return 1; Err: @@ -1194,7 +1625,3 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { } //------------------------------------------------------------------------------ - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/vp8li.h b/drivers/webp/dec/vp8li.h index 5f6cd6a01c..8886e47f62 100644 --- a/drivers/webp/dec/vp8li.h +++ b/drivers/webp/dec/vp8li.h @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Lossless decoder: internal header. @@ -18,9 +20,8 @@ #include "../utils/bit_reader.h" #include "../utils/color_cache.h" #include "../utils/huffman.h" -#include "../format_constants.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif @@ -40,12 +41,9 @@ struct VP8LTransform { }; typedef struct { - HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE]; -} HTreeGroup; - -typedef struct { int color_cache_size_; VP8LColorCache color_cache_; + VP8LColorCache saved_color_cache_; // for incremental int huffman_mask_; int huffman_subsample_bits_; @@ -53,24 +51,32 @@ typedef struct { uint32_t *huffman_image_; int num_htree_groups_; HTreeGroup *htree_groups_; + HuffmanCode *huffman_tables_; } VP8LMetadata; -typedef struct { +typedef struct VP8LDecoder VP8LDecoder; +struct VP8LDecoder { VP8StatusCode status_; - VP8LDecodeState action_; VP8LDecodeState state_; VP8Io *io_; const WebPDecBuffer *output_; // shortcut to io->opaque->output - uint32_t *argb_; // Internal data: always in BGRA color mode. + uint32_t *pixels_; // Internal data: either uint8_t* for alpha + // or uint32_t* for BGRA. uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage. VP8LBitReader br_; + int incremental_; // if true, incremental decoding is expected + VP8LBitReader saved_br_; // note: could be local variables too + int saved_last_pixel_; int width_; int height_; int last_row_; // last input row decoded so far. + int last_pixel_; // last pixel decoded so far. However, it may + // not be transformed, scaled and + // color-converted yet. int last_out_row_; // last row output so far. VP8LMetadata hdr_; @@ -82,18 +88,27 @@ typedef struct { uint8_t *rescaler_memory; // Working memory for rescaling work. WebPRescaler *rescaler; // Common rescaler for all channels. -} VP8LDecoder; +}; //------------------------------------------------------------------------------ // internal functions. Not public. +struct ALPHDecoder; // Defined in dec/alphai.h. + // in vp8l.c -// Decodes a raw image stream (without header) and store the alpha data -// into *output, which must be of size width x height. Returns false in case -// of error. -int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data, - size_t data_size, uint8_t* const output); +// Decodes image header for alpha data stored using lossless compression. +// Returns false in case of error. +int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec, + const uint8_t* const data, size_t data_size, + uint8_t* const output); + +// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are +// already decoded in previous call(s), it will resume decoding from where it +// was paused. +// Returns false in case of bitstream error. +int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec, + int last_row); // Allocates and initialize a new lossless decoder instance. VP8LDecoder* VP8LNew(void); @@ -114,7 +129,7 @@ void VP8LDelete(VP8LDecoder* const dec); //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif diff --git a/drivers/webp/dec/webp.c b/drivers/webp/dec/webp.c index f44bc2b8ae..952178fa89 100644 --- a/drivers/webp/dec/webp.c +++ b/drivers/webp/dec/webp.c @@ -1,8 +1,10 @@ // Copyright 2010 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Main decoding functions for WEBP images. @@ -14,11 +16,8 @@ #include "./vp8i.h" #include "./vp8li.h" #include "./webpi.h" -#include "../format_constants.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "../utils/utils.h" +#include "../webp/mux_types.h" // ALPHA_FLAG //------------------------------------------------------------------------------ // RIFF layout is: @@ -40,27 +39,20 @@ extern "C" { // 20..23 VP8X flags bit-map corresponding to the chunk-types present. // 24..26 Width of the Canvas Image. // 27..29 Height of the Canvas Image. -// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8, -// META ...) +// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8, +// VP8L, XMP, EXIF ...) // All sizes are in little-endian order. // Note: chunk data size must be padded to multiple of 2 when written. -static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) { - return data[0] | (data[1] << 8) | (data[2] << 16); -} - -static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) { - return (uint32_t)get_le24(data) | (data[3] << 24); -} - // Validates the RIFF container (if detected) and skips over it. -// If a RIFF container is detected, -// Returns VP8_STATUS_BITSTREAM_ERROR for invalid header, and -// VP8_STATUS_OK otherwise. +// If a RIFF container is detected, returns: +// VP8_STATUS_BITSTREAM_ERROR for invalid header, +// VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true, +// and VP8_STATUS_OK otherwise. // In case there are not enough bytes (partial RIFF container), return 0 for // *riff_size. Else return the RIFF size extracted from the header. static VP8StatusCode ParseRIFF(const uint8_t** const data, - size_t* const data_size, + size_t* const data_size, int have_all_data, size_t* const riff_size) { assert(data != NULL); assert(data_size != NULL); @@ -71,11 +63,17 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data, if (memcmp(*data + 8, "WEBP", TAG_SIZE)) { return VP8_STATUS_BITSTREAM_ERROR; // Wrong image file signature. } else { - const uint32_t size = get_le32(*data + TAG_SIZE); + const uint32_t size = GetLE32(*data + TAG_SIZE); // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn"). if (size < TAG_SIZE + CHUNK_HEADER_SIZE) { return VP8_STATUS_BITSTREAM_ERROR; } + if (size > MAX_CHUNK_PAYLOAD) { + return VP8_STATUS_BITSTREAM_ERROR; + } + if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Truncated bitstream. + } // We have a RIFF container. Skip it. *riff_size = size; *data += RIFF_HEADER_SIZE; @@ -111,7 +109,7 @@ static VP8StatusCode ParseVP8X(const uint8_t** const data, if (!memcmp(*data, "VP8X", TAG_SIZE)) { int width, height; uint32_t flags; - const uint32_t chunk_size = get_le32(*data + TAG_SIZE); + const uint32_t chunk_size = GetLE32(*data + TAG_SIZE); if (chunk_size != VP8X_CHUNK_SIZE) { return VP8_STATUS_BITSTREAM_ERROR; // Wrong chunk size. } @@ -120,9 +118,9 @@ static VP8StatusCode ParseVP8X(const uint8_t** const data, if (*data_size < vp8x_size) { return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data. } - flags = get_le32(*data + 8); - width = 1 + get_le24(*data + 12); - height = 1 + get_le24(*data + 15); + flags = GetLE32(*data + 8); + width = 1 + GetLE24(*data + 12); + height = 1 + GetLE24(*data + 15); if (width * (uint64_t)height >= MAX_IMAGE_AREA) { return VP8_STATUS_BITSTREAM_ERROR; // image is too large } @@ -176,7 +174,10 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, return VP8_STATUS_NOT_ENOUGH_DATA; } - chunk_size = get_le32(buf + TAG_SIZE); + chunk_size = GetLE32(buf + TAG_SIZE); + if (chunk_size > MAX_CHUNK_PAYLOAD) { + return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. + } // For odd-sized chunk-payload, there's one byte padding at the end. disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1; total_size += disk_chunk_size; @@ -186,6 +187,15 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. } + // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have + // parsed all the optional chunks. + // Note: This check must occur before the check 'buf_size < disk_chunk_size' + // below to allow incomplete VP8/VP8L chunks. + if (!memcmp(buf, "VP8 ", TAG_SIZE) || + !memcmp(buf, "VP8L", TAG_SIZE)) { + return VP8_STATUS_OK; + } + if (buf_size < disk_chunk_size) { // Insufficient data. return VP8_STATUS_NOT_ENOUGH_DATA; } @@ -193,9 +203,6 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header. *alpha_data = buf + CHUNK_HEADER_SIZE; *alpha_size = chunk_size; - } else if (!memcmp(buf, "VP8 ", TAG_SIZE) || - !memcmp(buf, "VP8L", TAG_SIZE)) { // A valid VP8/VP8L header. - return VP8_STATUS_OK; // Found. } // We have a full and valid chunk; skip it. @@ -213,9 +220,8 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, // extracted from the VP8/VP8L chunk header. // The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data. static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr, - size_t* const data_size, - size_t riff_size, - size_t* const chunk_size, + size_t* const data_size, int have_all_data, + size_t riff_size, size_t* const chunk_size, int* const is_lossless) { const uint8_t* const data = *data_ptr; const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE); @@ -234,10 +240,13 @@ static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr, if (is_vp8 || is_vp8l) { // Bitstream contains VP8/VP8L header. - const uint32_t size = get_le32(data + TAG_SIZE); + const uint32_t size = GetLE32(data + TAG_SIZE); if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) { return VP8_STATUS_BITSTREAM_ERROR; // Inconsistent size information. } + if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Truncated bitstream. + } // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header. *chunk_size = size; *data_ptr += CHUNK_HEADER_SIZE; @@ -270,9 +279,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, int* const width, int* const height, int* const has_alpha, + int* const has_animation, + int* const format, WebPHeaderStructure* const headers) { + int canvas_width = 0; + int canvas_height = 0; + int image_width = 0; + int image_height = 0; int found_riff = 0; int found_vp8x = 0; + int animation_present = 0; + int fragments_present = 0; + const int have_all_data = (headers != NULL) ? headers->have_all_data : 0; + VP8StatusCode status; WebPHeaderStructure hdrs; @@ -284,7 +303,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, hdrs.data_size = data_size; // Skip over RIFF header. - status = ParseRIFF(&data, &data_size, &hdrs.riff_size); + status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size); if (status != VP8_STATUS_OK) { return status; // Wrong RIFF header / insufficient data. } @@ -293,22 +312,35 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, // Skip over VP8X. { uint32_t flags = 0; - status = ParseVP8X(&data, &data_size, &found_vp8x, width, height, &flags); + status = ParseVP8X(&data, &data_size, &found_vp8x, + &canvas_width, &canvas_height, &flags); if (status != VP8_STATUS_OK) { return status; // Wrong VP8X / insufficient data. } + animation_present = !!(flags & ANIMATION_FLAG); + fragments_present = !!(flags & FRAGMENTS_FLAG); if (!found_riff && found_vp8x) { // Note: This restriction may be removed in the future, if it becomes // necessary to send VP8X chunk to the decoder. return VP8_STATUS_BITSTREAM_ERROR; } - if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT); - if (found_vp8x && headers == NULL) { - return VP8_STATUS_OK; // Return features from VP8X header. + if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG); + if (has_animation != NULL) *has_animation = animation_present; + if (format != NULL) *format = 0; // default = undefined + + image_width = canvas_width; + image_height = canvas_height; + if (found_vp8x && (animation_present || fragments_present) && + headers == NULL) { + status = VP8_STATUS_OK; + goto ReturnWidthHeight; // Just return features from VP8X header. } } - if (data_size < TAG_SIZE) return VP8_STATUS_NOT_ENOUGH_DATA; + if (data_size < TAG_SIZE) { + status = VP8_STATUS_NOT_ENOUGH_DATA; + goto ReturnWidthHeight; + } // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH". if ((found_riff && found_vp8x) || @@ -316,43 +348,49 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size, &hdrs.alpha_data, &hdrs.alpha_data_size); if (status != VP8_STATUS_OK) { - return status; // Found an invalid chunk size / insufficient data. + goto ReturnWidthHeight; // Invalid chunk size / insufficient data. } } // Skip over VP8/VP8L header. - status = ParseVP8Header(&data, &data_size, hdrs.riff_size, + status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size, &hdrs.compressed_size, &hdrs.is_lossless); if (status != VP8_STATUS_OK) { - return status; // Wrong VP8/VP8L chunk-header / insufficient data. + goto ReturnWidthHeight; // Wrong VP8/VP8L chunk-header / insufficient data. } if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) { return VP8_STATUS_BITSTREAM_ERROR; } + if (format != NULL && !(animation_present || fragments_present)) { + *format = hdrs.is_lossless ? 2 : 1; + } + if (!hdrs.is_lossless) { if (data_size < VP8_FRAME_HEADER_SIZE) { - return VP8_STATUS_NOT_ENOUGH_DATA; + status = VP8_STATUS_NOT_ENOUGH_DATA; + goto ReturnWidthHeight; } // Validates raw VP8 data. - if (!VP8GetInfo(data, data_size, - (uint32_t)hdrs.compressed_size, width, height)) { + if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size, + &image_width, &image_height)) { return VP8_STATUS_BITSTREAM_ERROR; } } else { if (data_size < VP8L_FRAME_HEADER_SIZE) { - return VP8_STATUS_NOT_ENOUGH_DATA; + status = VP8_STATUS_NOT_ENOUGH_DATA; + goto ReturnWidthHeight; } // Validates raw VP8L data. - if (!VP8LGetInfo(data, data_size, width, height, has_alpha)) { + if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) { return VP8_STATUS_BITSTREAM_ERROR; } } - - if (has_alpha != NULL) { - // If the data did not contain a VP8X/VP8L chunk the only definitive way - // to set this is by looking for alpha data (from an ALPH chunk). - *has_alpha |= (hdrs.alpha_data != NULL); + // Validates image size coherency. + if (found_vp8x) { + if (canvas_width != image_width || canvas_height != image_height) { + return VP8_STATUS_BITSTREAM_ERROR; + } } if (headers != NULL) { *headers = hdrs; @@ -360,21 +398,44 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD); assert(headers->offset == headers->data_size - data_size); } - return VP8_STATUS_OK; // Return features from VP8 header. + ReturnWidthHeight: + if (status == VP8_STATUS_OK || + (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) { + if (has_alpha != NULL) { + // If the data did not contain a VP8X/VP8L chunk the only definitive way + // to set this is by looking for alpha data (from an ALPH chunk). + *has_alpha |= (hdrs.alpha_data != NULL); + } + if (width != NULL) *width = image_width; + if (height != NULL) *height = image_height; + return VP8_STATUS_OK; + } else { + return status; + } } VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) { + VP8StatusCode status; + int has_animation = 0; assert(headers != NULL); // fill out headers, ignore width/height/has_alpha. - return ParseHeadersInternal(headers->data, headers->data_size, - NULL, NULL, NULL, headers); + status = ParseHeadersInternal(headers->data, headers->data_size, + NULL, NULL, NULL, &has_animation, + NULL, headers); + if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) { + // TODO(jzern): full support of animation frames will require API additions. + if (has_animation) { + status = VP8_STATUS_UNSUPPORTED_FEATURE; + } + } + return status; } //------------------------------------------------------------------------------ // WebPDecParams void WebPResetDecParams(WebPDecParams* const params) { - if (params) { + if (params != NULL) { memset(params, 0, sizeof(*params)); } } @@ -391,6 +452,7 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, headers.data = data; headers.data_size = data_size; + headers.have_all_data = 1; status = WebPParseHeaders(&headers); // Process Pre-VP8 chunks. if (status != VP8_STATUS_OK) { return status; @@ -407,11 +469,6 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, if (dec == NULL) { return VP8_STATUS_OUT_OF_MEMORY; } -#ifdef WEBP_USE_THREAD - dec->use_threads_ = params->options && (params->options->use_threads > 0); -#else - dec->use_threads_ = 0; -#endif dec->alpha_data_ = headers.alpha_data; dec->alpha_data_size_ = headers.alpha_data_size; @@ -423,6 +480,10 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, status = WebPAllocateDecBuffer(io.width, io.height, params->options, params->output); if (status == VP8_STATUS_OK) { // Decode + // This change must be done before calling VP8Decode() + dec->mt_method_ = VP8GetThreadMethod(params->options, &headers, + io.width, io.height); + VP8InitDithering(params->options, dec); if (!VP8Decode(dec, &io)) { status = dec->status_; } @@ -452,6 +513,10 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, if (status != VP8_STATUS_OK) { WebPFreeDecBuffer(params->output); } + + if (params->options != NULL && params->options->flip) { + status = WebPFlipBuffer(params->output); + } return status; } @@ -609,7 +674,6 @@ uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, static void DefaultFeatures(WebPBitstreamFeatures* const features) { assert(features != NULL); memset(features, 0, sizeof(*features)); - features->bitstream_version = 0; } static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size, @@ -619,10 +683,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size, } DefaultFeatures(features); - // Only parse enough of the data to retrieve width/height/has_alpha. + // Only parse enough of the data to retrieve the features. return ParseHeadersInternal(data, data_size, &features->width, &features->height, - &features->has_alpha, NULL); + &features->has_alpha, &features->has_animation, + &features->format, NULL); } //------------------------------------------------------------------------------ @@ -666,19 +731,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config, VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size, WebPBitstreamFeatures* features, int version) { - VP8StatusCode status; if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) { return VP8_STATUS_INVALID_PARAM; // version mismatch } if (features == NULL) { return VP8_STATUS_INVALID_PARAM; } - - status = GetFeatures(data, data_size, features); - if (status == VP8_STATUS_NOT_ENOUGH_DATA) { - return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error. - } - return status; + return GetFeatures(data, data_size, features); } VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size, @@ -722,9 +781,9 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, h = options->crop_height; x = options->crop_left; y = options->crop_top; - if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420 or YUV422 + if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420 x &= ~1; - y &= ~1; // TODO(later): only for YUV420, not YUV422. + y &= ~1; } if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) { return 0; // out of frame boundary error @@ -740,11 +799,13 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, // Scaling io->use_scaling = (options != NULL) && (options->use_scaling > 0); if (io->use_scaling) { - if (options->scaled_width <= 0 || options->scaled_height <= 0) { + int scaled_width = options->scaled_width; + int scaled_height = options->scaled_height; + if (!WebPRescalerGetScaledDimensions(w, h, &scaled_width, &scaled_height)) { return 0; } - io->scaled_width = options->scaled_width; - io->scaled_height = options->scaled_height; + io->scaled_width = scaled_width; + io->scaled_height = scaled_height; } // Filter @@ -766,6 +827,3 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/dec/webpi.h b/drivers/webp/dec/webpi.h index 44e5744411..c75a2e4a5b 100644 --- a/drivers/webp/dec/webpi.h +++ b/drivers/webp/dec/webpi.h @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Internal header: WebP decoding parameters and custom IO on buffer @@ -12,7 +14,7 @@ #ifndef WEBP_DEC_WEBPI_H_ #define WEBP_DEC_WEBPI_H_ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif @@ -24,7 +26,10 @@ extern "C" { typedef struct WebPDecParams WebPDecParams; typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p); -typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos); +typedef int (*OutputAlphaFunc)(const VP8Io* const io, WebPDecParams* const p, + int expected_num_out_lines); +typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos, + int max_out_lines); struct WebPDecParams { WebPDecBuffer* output; // output buffer. @@ -38,7 +43,7 @@ struct WebPDecParams { void* memory; // overall scratch memory for the output work. OutputFunc emit; // output RGB or YUV samples - OutputFunc emit_alpha; // output alpha channel + OutputAlphaFunc emit_alpha; // output alpha channel OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values }; @@ -52,6 +57,7 @@ void WebPResetDecParams(WebPDecParams* const params); typedef struct { const uint8_t* data; // input buffer size_t data_size; // input buffer size + int have_all_data; // true if all data is known to be available size_t offset; // offset to main data chunk (VP8 or VP8L) const uint8_t* alpha_data; // points to alpha chunk (if present) size_t alpha_data_size; // alpha chunk size @@ -61,10 +67,10 @@ typedef struct { } WebPHeaderStructure; // Skips over all valid chunks prior to the first VP8/VP8L frame header. -// Returns VP8_STATUS_OK on success, -// VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and -// VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data. -// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless +// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk), +// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE +// in the case of non-decodable features (animation for instance). +// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless // fields are updated appropriately upon success. VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers); @@ -91,10 +97,15 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, // dimension / etc.). If *options is not NULL, also verify that the options' // parameters are valid and apply them to the width/height dimensions of the // output buffer. This takes cropping / scaling / rotation into account. +// Also incorporates the options->flip flag to flip the buffer parameters if +// needed. VP8StatusCode WebPAllocateDecBuffer(int width, int height, const WebPDecoderOptions* const options, WebPDecBuffer* const buffer); +// Flip buffer vertically by negating the various strides. +VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer); + // Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the // memory (still held by 'src'). void WebPCopyDecBuffer(const WebPDecBuffer* const src, @@ -103,11 +114,9 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src, // Copy and transfer ownership from src to dst (beware of parameter order!) void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst); - - //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif |