diff options
Diffstat (limited to 'drivers/webp/dec/vp8l.c')
-rw-r--r-- | drivers/webp/dec/vp8l.c | 1095 |
1 files changed, 761 insertions, 334 deletions
diff --git a/drivers/webp/dec/vp8l.c b/drivers/webp/dec/vp8l.c index 897e4395c7..19665a007d 100644 --- a/drivers/webp/dec/vp8l.c +++ b/drivers/webp/dec/vp8l.c @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // main entry for the decoder @@ -10,18 +12,17 @@ // Authors: Vikas Arora (vikaas.arora@gmail.com) // Jyrki Alakuijala (jyrki@google.com) -#include <stdio.h> #include <stdlib.h> + +#include "./alphai.h" #include "./vp8li.h" +#include "../dsp/dsp.h" #include "../dsp/lossless.h" #include "../dsp/yuv.h" +#include "../utils/endian_inl.h" #include "../utils/huffman.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - #define NUM_ARGB_CACHE_ROWS 16 static const int kCodeLengthLiterals = 16; @@ -50,6 +51,9 @@ static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = { NUM_DISTANCE_CODES }; +static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = { + 0, 1, 1, 1, 0 +}; #define NUM_CODE_LENGTH_CODES 19 static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = { @@ -57,19 +61,43 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = { }; #define CODE_TO_PLANE_CODES 120 -static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = { - 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, - 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, - 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, - 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03, - 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, - 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, - 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, - 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, - 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, - 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, - 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f, - 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70 +static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { + 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, + 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, + 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, + 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03, + 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, + 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, + 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, + 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, + 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, + 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, + 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f, + 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70 +}; + +// Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha +// and distance alphabets are constant (256 for red, blue and alpha, 40 for +// distance) and lookup table sizes for them in worst case are 630 and 410 +// respectively. Size of green alphabet depends on color cache size and is equal +// to 256 (green component values) + 24 (length prefix values) +// + color_cache_size (between 0 and 2048). +// All values computed for 8-bit first level lookup with Mark Adler's tool: +// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c +#define FIXED_TABLE_SIZE (630 * 3 + 410) +static const int kTableSize[12] = { + FIXED_TABLE_SIZE + 654, + FIXED_TABLE_SIZE + 656, + FIXED_TABLE_SIZE + 658, + FIXED_TABLE_SIZE + 662, + FIXED_TABLE_SIZE + 670, + FIXED_TABLE_SIZE + 686, + FIXED_TABLE_SIZE + 718, + FIXED_TABLE_SIZE + 782, + FIXED_TABLE_SIZE + 912, + FIXED_TABLE_SIZE + 1168, + FIXED_TABLE_SIZE + 1680, + FIXED_TABLE_SIZE + 2704 }; static int DecodeImageStream(int xsize, int ysize, @@ -80,27 +108,28 @@ static int DecodeImageStream(int xsize, int ysize, //------------------------------------------------------------------------------ int VP8LCheckSignature(const uint8_t* const data, size_t size) { - return (size >= 1) && (data[0] == VP8L_MAGIC_BYTE); + return (size >= VP8L_FRAME_HEADER_SIZE && + data[0] == VP8L_MAGIC_BYTE && + (data[4] >> 5) == 0); // version } static int ReadImageInfo(VP8LBitReader* const br, int* const width, int* const height, int* const has_alpha) { - const uint8_t signature = VP8LReadBits(br, 8); - if (!VP8LCheckSignature(&signature, 1)) { - return 0; - } + if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0; *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1; *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1; *has_alpha = VP8LReadBits(br, 1); - VP8LReadBits(br, VP8L_VERSION_BITS); // Read/ignore the version number. - return 1; + if (VP8LReadBits(br, VP8L_VERSION_BITS) != 0) return 0; + return !br->eos_; } int VP8LGetInfo(const uint8_t* data, size_t data_size, int* const width, int* const height, int* const has_alpha) { if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) { return 0; // not enough data + } else if (!VP8LCheckSignature(data, data_size)) { + return 0; // bad signature } else { int w, h, a; VP8LBitReader br; @@ -138,39 +167,80 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { if (plane_code > CODE_TO_PLANE_CODES) { return plane_code - CODE_TO_PLANE_CODES; } else { - const int dist_code = code_to_plane_lut[plane_code - 1]; + const int dist_code = kCodeToPlane[plane_code - 1]; const int yoffset = dist_code >> 4; const int xoffset = 8 - (dist_code & 0xf); const int dist = yoffset * xsize + xoffset; - return (dist >= 1) ? dist : 1; + return (dist >= 1) ? dist : 1; // dist<1 can happen if xsize is very small } } //------------------------------------------------------------------------------ // Decodes the next Huffman code from bit-stream. // FillBitWindow(br) needs to be called at minimum every second call -// to ReadSymbolUnsafe. -static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) { - const HuffmanTreeNode* node = tree->root_; - assert(node != NULL); - while (!HuffmanTreeNodeIsLeaf(node)) { - node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br)); - } - return node->symbol_; +// to ReadSymbol, in order to pre-fetch enough bits. +static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, + VP8LBitReader* const br) { + int nbits; + uint32_t val = VP8LPrefetchBits(br); + table += val & HUFFMAN_TABLE_MASK; + nbits = table->bits - HUFFMAN_TABLE_BITS; + if (nbits > 0) { + VP8LSetBitPos(br, br->bit_pos_ + HUFFMAN_TABLE_BITS); + val = VP8LPrefetchBits(br); + table += table->value; + table += val & ((1 << nbits) - 1); + } + VP8LSetBitPos(br, br->bit_pos_ + table->bits); + return table->value; } -static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree, - VP8LBitReader* const br) { - const int read_safe = (br->pos_ + 8 > br->len_); - if (!read_safe) { - return ReadSymbolUnsafe(tree, br); +// Reads packed symbol depending on GREEN channel +#define BITS_SPECIAL_MARKER 0x100 // something large enough (and a bit-mask) +#define PACKED_NON_LITERAL_CODE 0 // must be < NUM_LITERAL_CODES +static WEBP_INLINE int ReadPackedSymbols(const HTreeGroup* group, + VP8LBitReader* const br, + uint32_t* const dst) { + const uint32_t val = VP8LPrefetchBits(br) & (HUFFMAN_PACKED_TABLE_SIZE - 1); + const HuffmanCode32 code = group->packed_table[val]; + assert(group->use_packed_table); + if (code.bits < BITS_SPECIAL_MARKER) { + VP8LSetBitPos(br, br->bit_pos_ + code.bits); + *dst = code.value; + return PACKED_NON_LITERAL_CODE; } else { - const HuffmanTreeNode* node = tree->root_; - assert(node != NULL); - while (!HuffmanTreeNodeIsLeaf(node)) { - node = HuffmanTreeNextNode(node, VP8LReadOneBit(br)); + VP8LSetBitPos(br, br->bit_pos_ + code.bits - BITS_SPECIAL_MARKER); + assert(code.value >= NUM_LITERAL_CODES); + return code.value; + } +} + +static int AccumulateHCode(HuffmanCode hcode, int shift, + HuffmanCode32* const huff) { + huff->bits += hcode.bits; + huff->value |= (uint32_t)hcode.value << shift; + assert(huff->bits <= HUFFMAN_TABLE_BITS); + return hcode.bits; +} + +static void BuildPackedTable(HTreeGroup* const htree_group) { + uint32_t code; + for (code = 0; code < HUFFMAN_PACKED_TABLE_SIZE; ++code) { + uint32_t bits = code; + HuffmanCode32* const huff = &htree_group->packed_table[bits]; + HuffmanCode hcode = htree_group->htrees[GREEN][bits]; + if (hcode.value >= NUM_LITERAL_CODES) { + huff->bits = hcode.bits + BITS_SPECIAL_MARKER; + huff->value = hcode.value; + } else { + huff->bits = 0; + huff->value = 0; + bits >>= AccumulateHCode(hcode, 8, huff); + bits >>= AccumulateHCode(htree_group->htrees[RED][bits], 16, huff); + bits >>= AccumulateHCode(htree_group->htrees[BLUE][bits], 0, huff); + bits >>= AccumulateHCode(htree_group->htrees[ALPHA][bits], 24, huff); + (void)bits; } - return node->symbol_; } } @@ -182,19 +252,18 @@ static int ReadHuffmanCodeLengths( int symbol; int max_symbol; int prev_code_len = DEFAULT_CODE_LENGTH; - HuffmanTree tree; + HuffmanCode table[1 << LENGTHS_TABLE_BITS]; - if (!HuffmanTreeBuildImplicit(&tree, code_length_code_lengths, - NUM_CODE_LENGTH_CODES)) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; - return 0; + if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, + code_length_code_lengths, + NUM_CODE_LENGTH_CODES)) { + goto End; } if (VP8LReadBits(br, 1)) { // use length const int length_nbits = 2 + 2 * VP8LReadBits(br, 3); max_symbol = 2 + VP8LReadBits(br, length_nbits); if (max_symbol > num_symbols) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; goto End; } } else { @@ -203,10 +272,13 @@ static int ReadHuffmanCodeLengths( symbol = 0; while (symbol < num_symbols) { + const HuffmanCode* p; int code_len; if (max_symbol-- == 0) break; VP8LFillBitWindow(br); - code_len = ReadSymbol(&tree, br); + p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; + VP8LSetBitPos(br, br->bit_pos_ + p->bits); + code_len = p->value; if (code_len < kCodeLengthLiterals) { code_lengths[symbol++] = code_len; if (code_len != 0) prev_code_len = code_len; @@ -217,7 +289,6 @@ static int ReadHuffmanCodeLengths( const int repeat_offset = kCodeLengthRepeatOffsets[slot]; int repeat = VP8LReadBits(br, extra_bits) + repeat_offset; if (symbol + repeat > num_symbols) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; goto End; } else { const int length = use_prev ? prev_code_len : 0; @@ -228,36 +299,34 @@ static int ReadHuffmanCodeLengths( ok = 1; End: - HuffmanTreeRelease(&tree); + if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR; return ok; } +// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman +// tree. static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, - HuffmanTree* const tree) { + int* const code_lengths, HuffmanCode* const table) { int ok = 0; + int size = 0; VP8LBitReader* const br = &dec->br_; const int simple_code = VP8LReadBits(br, 1); + memset(code_lengths, 0, alphabet_size * sizeof(*code_lengths)); + if (simple_code) { // Read symbols, codes & code lengths directly. - int symbols[2]; - int codes[2]; - int code_lengths[2]; const int num_symbols = VP8LReadBits(br, 1) + 1; const int first_symbol_len_code = VP8LReadBits(br, 1); // The first code is either 1 bit or 8 bit code. - symbols[0] = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); - codes[0] = 0; - code_lengths[0] = num_symbols - 1; + int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); + code_lengths[symbol] = 1; // The second code (if present), is always 8 bit long. if (num_symbols == 2) { - symbols[1] = VP8LReadBits(br, 8); - codes[1] = 1; - code_lengths[1] = num_symbols - 1; + symbol = VP8LReadBits(br, 8); + code_lengths[symbol] = 1; } - ok = HuffmanTreeBuildExplicit(tree, code_lengths, codes, symbols, - alphabet_size, num_symbols); + ok = 1; } else { // Decode Huffman-coded code lengths. - int* code_lengths = NULL; int i; int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 }; const int num_codes = VP8LReadBits(br, 4) + 4; @@ -266,42 +335,23 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, return 0; } - code_lengths = - (int*)WebPSafeCalloc((uint64_t)alphabet_size, sizeof(*code_lengths)); - if (code_lengths == NULL) { - dec->status_ = VP8_STATUS_OUT_OF_MEMORY; - return 0; - } - for (i = 0; i < num_codes; ++i) { code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3); } ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size, code_lengths); - if (ok) { - ok = HuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size); - } - free(code_lengths); } - ok = ok && !br->error_; - if (!ok) { + + ok = ok && !br->eos_; + if (ok) { + size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, + code_lengths, alphabet_size); + } + if (!ok || size == 0) { dec->status_ = VP8_STATUS_BITSTREAM_ERROR; return 0; } - return 1; -} - -static void DeleteHtreeGroups(HTreeGroup* htree_groups, int num_htree_groups) { - if (htree_groups != NULL) { - int i, j; - for (i = 0; i < num_htree_groups; ++i) { - HuffmanTree* const htrees = htree_groups[i].htrees_; - for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { - HuffmanTreeRelease(&htrees[j]); - } - } - free(htree_groups); - } + return size; } static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, @@ -311,7 +361,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, VP8LMetadata* const hdr = &dec->hdr_; uint32_t* huffman_image = NULL; HTreeGroup* htree_groups = NULL; + HuffmanCode* huffman_tables = NULL; + HuffmanCode* next = NULL; int num_htree_groups = 1; + int max_alphabet_size = 0; + int* code_lengths = NULL; + const int table_size = kTableSize[color_cache_bits]; if (allow_recursion && VP8LReadBits(br, 1)) { // use meta Huffman codes. @@ -321,51 +376,108 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, const int huffman_pixs = huffman_xsize * huffman_ysize; if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec, &huffman_image)) { - dec->status_ = VP8_STATUS_BITSTREAM_ERROR; goto Error; } hdr->huffman_subsample_bits_ = huffman_precision; for (i = 0; i < huffman_pixs; ++i) { // The huffman data is stored in red and green bytes. - const int index = (huffman_image[i] >> 8) & 0xffff; - huffman_image[i] = index; - if (index >= num_htree_groups) { - num_htree_groups = index + 1; + const int group = (huffman_image[i] >> 8) & 0xffff; + huffman_image[i] = group; + if (group >= num_htree_groups) { + num_htree_groups = group + 1; } } } - if (br->error_) goto Error; + if (br->eos_) goto Error; + + // Find maximum alphabet size for the htree group. + for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { + int alphabet_size = kAlphabetSize[j]; + if (j == 0 && color_cache_bits > 0) { + alphabet_size += 1 << color_cache_bits; + } + if (max_alphabet_size < alphabet_size) { + max_alphabet_size = alphabet_size; + } + } + + huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size, + sizeof(*huffman_tables)); + htree_groups = VP8LHtreeGroupsNew(num_htree_groups); + code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, + sizeof(*code_lengths)); - assert(num_htree_groups <= 0x10000); - htree_groups = - (HTreeGroup*)WebPSafeCalloc((uint64_t)num_htree_groups, - sizeof(*htree_groups)); - if (htree_groups == NULL) { + if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; goto Error; } + next = huffman_tables; for (i = 0; i < num_htree_groups; ++i) { - HuffmanTree* const htrees = htree_groups[i].htrees_; + HTreeGroup* const htree_group = &htree_groups[i]; + HuffmanCode** const htrees = htree_group->htrees; + int size; + int total_size = 0; + int is_trivial_literal = 1; + int max_bits = 0; for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { int alphabet_size = kAlphabetSize[j]; + htrees[j] = next; if (j == 0 && color_cache_bits > 0) { alphabet_size += 1 << color_cache_bits; } - if (!ReadHuffmanCode(alphabet_size, dec, htrees + j)) goto Error; + size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next); + if (is_trivial_literal && kLiteralMap[j] == 1) { + is_trivial_literal = (next->bits == 0); + } + total_size += next->bits; + next += size; + if (size == 0) { + goto Error; + } + if (j <= ALPHA) { + int local_max_bits = code_lengths[0]; + int k; + for (k = 1; k < alphabet_size; ++k) { + if (code_lengths[k] > local_max_bits) { + local_max_bits = code_lengths[k]; + } + } + max_bits += local_max_bits; + } } + htree_group->is_trivial_literal = is_trivial_literal; + htree_group->is_trivial_code = 0; + if (is_trivial_literal) { + const int red = htrees[RED][0].value; + const int blue = htrees[BLUE][0].value; + const int alpha = htrees[ALPHA][0].value; + htree_group->literal_arb = + ((uint32_t)alpha << 24) | (red << 16) | blue; + if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) { + htree_group->is_trivial_code = 1; + htree_group->literal_arb |= htrees[GREEN][0].value << 8; + } + } + htree_group->use_packed_table = !htree_group->is_trivial_code && + (max_bits < HUFFMAN_PACKED_BITS); + if (htree_group->use_packed_table) BuildPackedTable(htree_group); } + WebPSafeFree(code_lengths); // All OK. Finalize pointers and return. hdr->huffman_image_ = huffman_image; hdr->num_htree_groups_ = num_htree_groups; hdr->htree_groups_ = htree_groups; + hdr->huffman_tables_ = huffman_tables; return 1; Error: - free(huffman_image); - DeleteHtreeGroups(htree_groups, num_htree_groups); + WebPSafeFree(code_lengths); + WebPSafeFree(huffman_image); + WebPSafeFree(huffman_tables); + VP8LHtreeGroupsFree(htree_groups); return 0; } @@ -379,13 +491,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { const int in_height = io->mb_h; const int out_height = io->scaled_height; const uint64_t work_size = 2 * num_channels * (uint64_t)out_width; - int32_t* work; // Rescaler work area. - const uint64_t scaled_data_size = num_channels * (uint64_t)out_width; + rescaler_t* work; // Rescaler work area. + const uint64_t scaled_data_size = (uint64_t)out_width; uint32_t* scaled_data; // Temporary storage for scaled BGRA data. const uint64_t memory_size = sizeof(*dec->rescaler) + work_size * sizeof(*work) + scaled_data_size * sizeof(*scaled_data); - uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory)); + uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory)); if (memory == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; @@ -395,13 +507,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { dec->rescaler = (WebPRescaler*)memory; memory += sizeof(*dec->rescaler); - work = (int32_t*)memory; + work = (rescaler_t*)memory; memory += work_size * sizeof(*work); scaled_data = (uint32_t*)memory; WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, - out_width, out_height, 0, num_channels, - in_width, out_width, in_height, out_height, work); + out_width, out_height, 0, num_channels, work); return 1; } @@ -411,12 +522,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { // We have special "export" function since we need to convert from BGRA static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int rgba_stride, uint8_t* const rgba) { - const uint32_t* const src = (const uint32_t*)rescaler->dst; + uint32_t* const src = (uint32_t*)rescaler->dst; const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { uint8_t* const dst = rgba + num_lines_out * rgba_stride; WebPRescalerExportRow(rescaler); + WebPMultARGBRow(src, dst_width, 1); VP8LConvertFromBGRA(src, dst_width, colorspace, dst); ++num_lines_out; } @@ -424,18 +536,22 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, } // Emit scaled rows. -static int EmitRescaledRows(const VP8LDecoder* const dec, - const uint32_t* const data, int in_stride, int mb_h, - uint8_t* const out, int out_stride) { +static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, + uint8_t* in, int in_stride, int mb_h, + uint8_t* const out, int out_stride) { const WEBP_CSP_MODE colorspace = dec->output_->colorspace; - const uint8_t* const in = (const uint8_t*)data; int num_lines_in = 0; int num_lines_out = 0; while (num_lines_in < mb_h) { - const uint8_t* const row_in = in + num_lines_in * in_stride; + uint8_t* const row_in = in + num_lines_in * in_stride; uint8_t* const row_out = out + num_lines_out * out_stride; - num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in, - row_in, in_stride); + const int lines_left = mb_h - num_lines_in; + const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); + assert(needed_lines > 0 && needed_lines <= lines_left); + WebPMultARGBRows(row_in, in_stride, + dec->rescaler->src_width, needed_lines, 0); + WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride); + num_lines_in += needed_lines; num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out); } return num_lines_out; @@ -443,11 +559,10 @@ static int EmitRescaledRows(const VP8LDecoder* const dec, // Emit rows without any scaling. static int EmitRows(WEBP_CSP_MODE colorspace, - const uint32_t* const data, int in_stride, + const uint8_t* row_in, int in_stride, int mb_w, int mb_h, uint8_t* const out, int out_stride) { int lines = mb_h; - const uint8_t* row_in = (const uint8_t*)data; uint8_t* row_out = out; while (lines-- > 0) { VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out); @@ -463,72 +578,37 @@ static int EmitRows(WEBP_CSP_MODE colorspace, static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos, const WebPDecBuffer* const output) { const WebPYUVABuffer* const buf = &output->u.YUVA; + // first, the luma plane - { - int i; - uint8_t* const y = buf->y + y_pos * buf->y_stride; - for (i = 0; i < width; ++i) { - const uint32_t p = src[i]; - y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff); - } - } + WebPConvertARGBToY(src, buf->y + y_pos * buf->y_stride, width); // then U/V planes { uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride; uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride; - const int uv_width = width >> 1; - int i; - for (i = 0; i < uv_width; ++i) { - const uint32_t v0 = src[2 * i + 0]; - const uint32_t v1 = src[2 * i + 1]; - // VP8RGBToU/V expects four accumulated pixels. Hence we need to - // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less. - const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe); - const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe); - const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe); - if (!(y_pos & 1)) { // even lines: store values - u[i] = VP8RGBToU(r, g, b); - v[i] = VP8RGBToV(r, g, b); - } else { // odd lines: average with previous values - const int tmp_u = VP8RGBToU(r, g, b); - const int tmp_v = VP8RGBToV(r, g, b); - // Approximated average-of-four. But it's an acceptable diff. - u[i] = (u[i] + tmp_u + 1) >> 1; - v[i] = (v[i] + tmp_v + 1) >> 1; - } - } - if (width & 1) { // last pixel - const uint32_t v0 = src[2 * i + 0]; - const int r = (v0 >> 14) & 0x3fc; - const int g = (v0 >> 6) & 0x3fc; - const int b = (v0 << 2) & 0x3fc; - if (!(y_pos & 1)) { // even lines - u[i] = VP8RGBToU(r, g, b); - v[i] = VP8RGBToV(r, g, b); - } else { // odd lines (note: we could just skip this) - const int tmp_u = VP8RGBToU(r, g, b); - const int tmp_v = VP8RGBToV(r, g, b); - u[i] = (u[i] + tmp_u + 1) >> 1; - v[i] = (v[i] + tmp_v + 1) >> 1; - } - } + // even lines: store values + // odd lines: average with previous values + WebPConvertARGBToUV(src, u, v, width, !(y_pos & 1)); } // Lastly, store alpha if needed. if (buf->a != NULL) { - int i; uint8_t* const a = buf->a + y_pos * buf->a_stride; - for (i = 0; i < width; ++i) a[i] = (src[i] >> 24); +#if defined(WORDS_BIGENDIAN) + WebPExtractAlpha((uint8_t*)src + 0, 0, width, 1, a, 0); +#else + WebPExtractAlpha((uint8_t*)src + 3, 0, width, 1, a, 0); +#endif } } static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { WebPRescaler* const rescaler = dec->rescaler; - const uint32_t* const src = (const uint32_t*)rescaler->dst; + uint32_t* const src = (uint32_t*)rescaler->dst; const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { WebPRescalerExportRow(rescaler); + WebPMultARGBRow(src, dst_width, 1); ConvertToYUVA(src, dst_width, y_pos, dec->output_); ++y_pos; ++num_lines_out; @@ -537,28 +617,28 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { } static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, - const uint32_t* const data, - int in_stride, int mb_h) { - const uint8_t* const in = (const uint8_t*)data; + uint8_t* in, int in_stride, int mb_h) { int num_lines_in = 0; int y_pos = dec->last_out_row_; while (num_lines_in < mb_h) { - const uint8_t* const row_in = in + num_lines_in * in_stride; - num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in, - row_in, in_stride); + const int lines_left = mb_h - num_lines_in; + const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); + WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0); + WebPRescalerImport(dec->rescaler, lines_left, in, in_stride); + num_lines_in += needed_lines; + in += needed_lines * in_stride; y_pos += ExportYUVA(dec, y_pos); } return y_pos; } static int EmitRowsYUVA(const VP8LDecoder* const dec, - const uint32_t* const data, int in_stride, + const uint8_t* in, int in_stride, int mb_w, int num_rows) { int y_pos = dec->last_out_row_; - const uint8_t* row_in = (const uint8_t*)data; while (num_rows-- > 0) { - ConvertToYUVA((const uint32_t*)row_in, mb_w, y_pos, dec->output_); - row_in += in_stride; + ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output_); + in += in_stride; ++y_pos; } return y_pos; @@ -569,11 +649,11 @@ static int EmitRowsYUVA(const VP8LDecoder* const dec, // Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and // crop options. Also updates the input data pointer, so that it points to the -// start of the cropped window. -// Note that 'pixel_stride' is in units of 'uint32_t' (and not 'bytes). +// start of the cropped window. Note that pixels are in ARGB format even if +// 'in_data' is uint8_t*. // Returns true if the crop window is not empty. static int SetCropWindow(VP8Io* const io, int y_start, int y_end, - const uint32_t** const in_data, int pixel_stride) { + uint8_t** const in_data, int pixel_stride) { assert(y_start < y_end); assert(io->crop_left < io->crop_right); if (y_end > io->crop_bottom) { @@ -582,11 +662,11 @@ static int SetCropWindow(VP8Io* const io, int y_start, int y_end, if (y_start < io->crop_top) { const int delta = io->crop_top - y_start; y_start = io->crop_top; - *in_data += pixel_stride * delta; + *in_data += delta * pixel_stride; } if (y_start >= y_end) return 0; // Crop window is empty. - *in_data += io->crop_left; + *in_data += io->crop_left * sizeof(uint32_t); io->mb_y = y_start - io->crop_top; io->mb_w = io->crop_right - io->crop_left; @@ -634,10 +714,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows, } } +// Special method for paletted alpha data. +static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows, + const uint8_t* const rows) { + const int start_row = dec->last_row_; + const int end_row = start_row + num_rows; + const uint8_t* rows_in = rows; + uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row; + VP8LTransform* const transform = &dec->transforms_[0]; + assert(dec->next_transform_ == 1); + assert(transform->type_ == COLOR_INDEXING_TRANSFORM); + VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in, + rows_out); +} + // Processes (transforms, scales & color-converts) the rows decoded after the // last call. static void ProcessRows(VP8LDecoder* const dec, int row) { - const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_; + const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_; const int num_rows = row - dec->last_row_; if (num_rows <= 0) return; // Nothing to be done. @@ -646,18 +740,18 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { // Emit output. { VP8Io* const io = dec->io_; - const uint32_t* rows_data = dec->argb_cache_; - if (!SetCropWindow(io, dec->last_row_, row, &rows_data, io->width)) { + uint8_t* rows_data = (uint8_t*)dec->argb_cache_; + const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA + if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) { // Nothing to output (this time). } else { const WebPDecBuffer* const output = dec->output_; - const int in_stride = io->width * sizeof(*rows_data); - if (output->colorspace < MODE_YUV) { // convert to RGBA + if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA const WebPRGBABuffer* const buf = &output->u.RGBA; uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride; const int num_rows_out = io->use_scaling ? - EmitRescaledRows(dec, rows_data, in_stride, io->mb_h, - rgba, buf->stride) : + EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h, + rgba, buf->stride) : EmitRows(output->colorspace, rows_data, in_stride, io->mb_w, io->mb_h, rgba, buf->stride); // Update 'last_out_row_'. @@ -676,50 +770,317 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { assert(dec->last_row_ <= dec->height_); } -static int DecodeImageData(VP8LDecoder* const dec, - uint32_t* const data, int width, int height, - ProcessRowsFunc process_func) { +// Row-processing for the special case when alpha data contains only one +// transform (color indexing), and trivial non-green literals. +static int Is8bOptimizable(const VP8LMetadata* const hdr) { + int i; + if (hdr->color_cache_size_ > 0) return 0; + // When the Huffman tree contains only one symbol, we can skip the + // call to ReadSymbol() for red/blue/alpha channels. + for (i = 0; i < hdr->num_htree_groups_; ++i) { + HuffmanCode** const htrees = hdr->htree_groups_[i].htrees; + if (htrees[RED][0].bits > 0) return 0; + if (htrees[BLUE][0].bits > 0) return 0; + if (htrees[ALPHA][0].bits > 0) return 0; + } + return 1; +} + +static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) { + const int num_rows = row - dec->last_row_; + const uint8_t* const in = + (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_; + if (num_rows > 0) { + ApplyInverseTransformsAlpha(dec, num_rows, in); + } + dec->last_row_ = dec->last_out_row_ = row; +} + +//------------------------------------------------------------------------------ +// Helper functions for fast pattern copy (8b and 32b) + +// cyclic rotation of pattern word +static WEBP_INLINE uint32_t Rotate8b(uint32_t V) { +#if defined(WORDS_BIGENDIAN) + return ((V & 0xff000000u) >> 24) | (V << 8); +#else + return ((V & 0xffu) << 24) | (V >> 8); +#endif +} + +// copy 1, 2 or 4-bytes pattern +static WEBP_INLINE void CopySmallPattern8b(const uint8_t* src, uint8_t* dst, + int length, uint32_t pattern) { + int i; + // align 'dst' to 4-bytes boundary. Adjust the pattern along the way. + while ((uintptr_t)dst & 3) { + *dst++ = *src++; + pattern = Rotate8b(pattern); + --length; + } + // Copy the pattern 4 bytes at a time. + for (i = 0; i < (length >> 2); ++i) { + ((uint32_t*)dst)[i] = pattern; + } + // Finish with left-overs. 'pattern' is still correctly positioned, + // so no Rotate8b() call is needed. + for (i <<= 2; i < length; ++i) { + dst[i] = src[i]; + } +} + +static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) { + const uint8_t* src = dst - dist; + if (length >= 8) { + uint32_t pattern = 0; + switch (dist) { + case 1: + pattern = src[0]; +#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much + pattern |= pattern << 8; + pattern |= pattern << 16; +#elif defined(WEBP_USE_MIPS_DSP_R2) + __asm__ volatile ("replv.qb %0, %0" : "+r"(pattern)); +#else + pattern = 0x01010101u * pattern; +#endif + break; + case 2: + memcpy(&pattern, src, sizeof(uint16_t)); +#if defined(__arm__) || defined(_M_ARM) + pattern |= pattern << 16; +#elif defined(WEBP_USE_MIPS_DSP_R2) + __asm__ volatile ("replv.ph %0, %0" : "+r"(pattern)); +#else + pattern = 0x00010001u * pattern; +#endif + break; + case 4: + memcpy(&pattern, src, sizeof(uint32_t)); + break; + default: + goto Copy; + break; + } + CopySmallPattern8b(src, dst, length, pattern); + return; + } + Copy: + if (dist >= length) { // no overlap -> use memcpy() + memcpy(dst, src, length * sizeof(*dst)); + } else { + int i; + for (i = 0; i < length; ++i) dst[i] = src[i]; + } +} + +// copy pattern of 1 or 2 uint32_t's +static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, + uint32_t* dst, + int length, uint64_t pattern) { + int i; + if ((uintptr_t)dst & 4) { // Align 'dst' to 8-bytes boundary. + *dst++ = *src++; + pattern = (pattern >> 32) | (pattern << 32); + --length; + } + assert(0 == ((uintptr_t)dst & 7)); + for (i = 0; i < (length >> 1); ++i) { + ((uint64_t*)dst)[i] = pattern; // Copy the pattern 8 bytes at a time. + } + if (length & 1) { // Finish with left-over. + dst[i << 1] = src[i << 1]; + } +} + +static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, + int dist, int length) { + const uint32_t* const src = dst - dist; + if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) { + uint64_t pattern; + if (dist == 1) { + pattern = (uint64_t)src[0]; + pattern |= pattern << 32; + } else { + memcpy(&pattern, src, sizeof(pattern)); + } + CopySmallPattern32b(src, dst, length, pattern); + } else if (dist >= length) { // no overlap + memcpy(dst, src, length * sizeof(*dst)); + } else { + int i; + for (i = 0; i < length; ++i) dst[i] = src[i]; + } +} + +//------------------------------------------------------------------------------ + +static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, + int width, int height, int last_row) { int ok = 1; - int col = 0, row = 0; + int row = dec->last_pixel_ / width; + int col = dec->last_pixel_ % width; VP8LBitReader* const br = &dec->br_; VP8LMetadata* const hdr = &dec->hdr_; - HTreeGroup* htree_group = hdr->htree_groups_; - uint32_t* src = data; - uint32_t* last_cached = data; - uint32_t* const src_end = data + width * height; + const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row); + int pos = dec->last_pixel_; // current position + const int end = width * height; // End of data + const int last = width * last_row; // Last pixel to decode const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; - const int color_cache_limit = len_code_limit + hdr->color_cache_size_; - VP8LColorCache* const color_cache = - (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; const int mask = hdr->huffman_mask_; - assert(htree_group != NULL); + assert(pos < end); + assert(last_row <= height); + assert(Is8bOptimizable(hdr)); - while (!br->eos_ && src < src_end) { + while (!br->eos_ && pos < last) { int code; - // Only update when changing tile. Note we could use the following test: - // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed - // but that's actually slower and requires storing the previous col/row + // Only update when changing tile. if ((col & mask) == 0) { htree_group = GetHtreeGroupForPos(hdr, col, row); } VP8LFillBitWindow(br); - code = ReadSymbol(&htree_group->htrees_[GREEN], br); - if (code < NUM_LITERAL_CODES) { // Literal. - int red, green, blue, alpha; - red = ReadSymbol(&htree_group->htrees_[RED], br); - green = code; + code = ReadSymbol(htree_group->htrees[GREEN], br); + if (code < NUM_LITERAL_CODES) { // Literal + data[pos] = code; + ++pos; + ++col; + if (col >= width) { + col = 0; + ++row; + if (row % NUM_ARGB_CACHE_ROWS == 0) { + ExtractPalettedAlphaRows(dec, row); + } + } + } else if (code < len_code_limit) { // Backward reference + int dist_code, dist; + const int length_sym = code - NUM_LITERAL_CODES; + const int length = GetCopyLength(length_sym, br); + const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br); VP8LFillBitWindow(br); - blue = ReadSymbol(&htree_group->htrees_[BLUE], br); - alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br); - *src = (alpha << 24) + (red << 16) + (green << 8) + blue; - AdvanceByOne: + dist_code = GetCopyDistance(dist_symbol, br); + dist = PlaneCodeToDistance(width, dist_code); + if (pos >= dist && end - pos >= length) { + CopyBlock8b(data + pos, dist, length); + } else { + ok = 0; + goto End; + } + pos += length; + col += length; + while (col >= width) { + col -= width; + ++row; + if (row % NUM_ARGB_CACHE_ROWS == 0) { + ExtractPalettedAlphaRows(dec, row); + } + } + if (pos < last && (col & mask)) { + htree_group = GetHtreeGroupForPos(hdr, col, row); + } + } else { // Not reached + ok = 0; + goto End; + } + assert(br->eos_ == VP8LIsEndOfStream(br)); + } + // Process the remaining rows corresponding to last row-block. + ExtractPalettedAlphaRows(dec, row); + + End: + if (!ok || (br->eos_ && pos < end)) { + ok = 0; + dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED + : VP8_STATUS_BITSTREAM_ERROR; + } else { + dec->last_pixel_ = pos; + } + return ok; +} + +static void SaveState(VP8LDecoder* const dec, int last_pixel) { + assert(dec->incremental_); + dec->saved_br_ = dec->br_; + dec->saved_last_pixel_ = last_pixel; + if (dec->hdr_.color_cache_size_ > 0) { + VP8LColorCacheCopy(&dec->hdr_.color_cache_, &dec->hdr_.saved_color_cache_); + } +} + +static void RestoreState(VP8LDecoder* const dec) { + assert(dec->br_.eos_); + dec->status_ = VP8_STATUS_SUSPENDED; + dec->br_ = dec->saved_br_; + dec->last_pixel_ = dec->saved_last_pixel_; + if (dec->hdr_.color_cache_size_ > 0) { + VP8LColorCacheCopy(&dec->hdr_.saved_color_cache_, &dec->hdr_.color_cache_); + } +} + +#define SYNC_EVERY_N_ROWS 8 // minimum number of rows between check-points +static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, + int width, int height, int last_row, + ProcessRowsFunc process_func) { + int row = dec->last_pixel_ / width; + int col = dec->last_pixel_ % width; + VP8LBitReader* const br = &dec->br_; + VP8LMetadata* const hdr = &dec->hdr_; + HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row); + uint32_t* src = data + dec->last_pixel_; + uint32_t* last_cached = src; + uint32_t* const src_end = data + width * height; // End of data + uint32_t* const src_last = data + width * last_row; // Last pixel to decode + const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; + const int color_cache_limit = len_code_limit + hdr->color_cache_size_; + int next_sync_row = dec->incremental_ ? row : 1 << 24; + VP8LColorCache* const color_cache = + (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; + const int mask = hdr->huffman_mask_; + assert(htree_group != NULL); + assert(src < src_end); + assert(src_last <= src_end); + + while (src < src_last) { + int code; + if (row >= next_sync_row) { + SaveState(dec, (int)(src - data)); + next_sync_row = row + SYNC_EVERY_N_ROWS; + } + // Only update when changing tile. Note we could use this test: + // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed + // but that's actually slower and needs storing the previous col/row. + if ((col & mask) == 0) htree_group = GetHtreeGroupForPos(hdr, col, row); + if (htree_group->is_trivial_code) { + *src = htree_group->literal_arb; + goto AdvanceByOne; + } + VP8LFillBitWindow(br); + if (htree_group->use_packed_table) { + code = ReadPackedSymbols(htree_group, br, src); + if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne; + } else { + code = ReadSymbol(htree_group->htrees[GREEN], br); + } + if (br->eos_) break; // early out + if (code < NUM_LITERAL_CODES) { // Literal + if (htree_group->is_trivial_literal) { + *src = htree_group->literal_arb | (code << 8); + } else { + int red, blue, alpha; + red = ReadSymbol(htree_group->htrees[RED], br); + VP8LFillBitWindow(br); + blue = ReadSymbol(htree_group->htrees[BLUE], br); + alpha = ReadSymbol(htree_group->htrees[ALPHA], br); + if (br->eos_) break; + *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue; + } + AdvanceByOne: ++src; ++col; if (col >= width) { col = 0; ++row; - if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { + if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) { process_func(dec, row); } if (color_cache != NULL) { @@ -728,40 +1089,39 @@ static int DecodeImageData(VP8LDecoder* const dec, } } } - } else if (code < len_code_limit) { // Backward reference + } else if (code < len_code_limit) { // Backward reference int dist_code, dist; const int length_sym = code - NUM_LITERAL_CODES; const int length = GetCopyLength(length_sym, br); - const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br); + const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br); VP8LFillBitWindow(br); dist_code = GetCopyDistance(dist_symbol, br); dist = PlaneCodeToDistance(width, dist_code); - if (src - data < dist || src_end - src < length) { - ok = 0; - goto End; - } - { - int i; - for (i = 0; i < length; ++i) src[i] = src[i - dist]; - src += length; + if (br->eos_) break; + if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) { + goto Error; + } else { + CopyBlock32b(src, dist, length); } + src += length; col += length; while (col >= width) { col -= width; ++row; - if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { + if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) { process_func(dec, row); } } - if (src < src_end) { - htree_group = GetHtreeGroupForPos(hdr, col, row); - if (color_cache != NULL) { - while (last_cached < src) { - VP8LColorCacheInsert(color_cache, *last_cached++); - } + // Because of the check done above (before 'src' was incremented by + // 'length'), the following holds true. + assert(src <= src_end); + if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row); + if (color_cache != NULL) { + while (last_cached < src) { + VP8LColorCacheInsert(color_cache, *last_cached++); } } - } else if (code < color_cache_limit) { // Color cache. + } else if (code < color_cache_limit) { // Color cache const int key = code - len_code_limit; assert(color_cache != NULL); while (last_cached < src) { @@ -769,33 +1129,38 @@ static int DecodeImageData(VP8LDecoder* const dec, } *src = VP8LColorCacheLookup(color_cache, key); goto AdvanceByOne; - } else { // Not reached. - ok = 0; - goto End; + } else { // Not reached + goto Error; } - ok = !br->error_; - if (!ok) goto End; + assert(br->eos_ == VP8LIsEndOfStream(br)); } - // Process the remaining rows corresponding to last row-block. - if (process_func != NULL) process_func(dec, row); - End: - if (br->error_ || !ok || (br->eos_ && src < src_end)) { - ok = 0; - dec->status_ = (!br->eos_) ? - VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED; - } else if (src == src_end) { - dec->state_ = READ_DATA; + if (dec->incremental_ && br->eos_ && src < src_end) { + RestoreState(dec); + } else if (!br->eos_) { + // Process the remaining rows corresponding to last row-block. + if (process_func != NULL) { + process_func(dec, row); + } + dec->status_ = VP8_STATUS_OK; + dec->last_pixel_ = (int)(src - data); // end-of-scan marker + } else { + // if not incremental, and we are past the end of buffer (eos_=1), then this + // is a real bitstream error. + goto Error; } + return 1; - return ok; + Error: + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + return 0; } // ----------------------------------------------------------------------------- // VP8LTransform static void ClearTransform(VP8LTransform* const transform) { - free(transform->data_); + WebPSafeFree(transform->data_); transform->data_ = NULL; } @@ -819,7 +1184,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) { } for (; i < 4 * final_num_colors; ++i) new_data[i] = 0; // black tail. - free(transform->data_); + WebPSafeFree(transform->data_); transform->data_ = new_color_map; } return 1; @@ -882,16 +1247,18 @@ static int ReadTransform(int* const xsize, int const* ysize, // VP8LMetadata static void InitMetadata(VP8LMetadata* const hdr) { - assert(hdr); + assert(hdr != NULL); memset(hdr, 0, sizeof(*hdr)); } static void ClearMetadata(VP8LMetadata* const hdr) { - assert(hdr); + assert(hdr != NULL); - free(hdr->huffman_image_); - DeleteHtreeGroups(hdr->htree_groups_, hdr->num_htree_groups_); + WebPSafeFree(hdr->huffman_image_); + WebPSafeFree(hdr->huffman_tables_); + VP8LHtreeGroupsFree(hdr->htree_groups_); VP8LColorCacheClear(&hdr->color_cache_); + VP8LColorCacheClear(&hdr->saved_color_cache_); InitMetadata(hdr); } @@ -899,11 +1266,13 @@ static void ClearMetadata(VP8LMetadata* const hdr) { // VP8LDecoder VP8LDecoder* VP8LNew(void) { - VP8LDecoder* const dec = (VP8LDecoder*)calloc(1, sizeof(*dec)); + VP8LDecoder* const dec = (VP8LDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); if (dec == NULL) return NULL; dec->status_ = VP8_STATUS_OK; - dec->action_ = READ_DIM; dec->state_ = READ_DIM; + + VP8LDspInit(); // Init critical function pointers. + return dec; } @@ -912,15 +1281,15 @@ void VP8LClear(VP8LDecoder* const dec) { if (dec == NULL) return; ClearMetadata(&dec->hdr_); - free(dec->argb_); - dec->argb_ = NULL; + WebPSafeFree(dec->pixels_); + dec->pixels_ = NULL; for (i = 0; i < dec->next_transform_; ++i) { ClearTransform(&dec->transforms_[i]); } dec->next_transform_ = 0; dec->transforms_seen_ = 0; - free(dec->rescaler_memory); + WebPSafeFree(dec->rescaler_memory); dec->rescaler_memory = NULL; dec->output_ = NULL; // leave no trace behind @@ -929,7 +1298,7 @@ void VP8LClear(VP8LDecoder* const dec) { void VP8LDelete(VP8LDecoder* const dec) { if (dec != NULL) { VP8LClear(dec); - free(dec); + WebPSafeFree(dec); } } @@ -1009,19 +1378,14 @@ static int DecodeImageStream(int xsize, int ysize, } // Use the Huffman trees to decode the LZ77 encoded data. - ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, NULL); - ok = ok && !br->error_; + ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, + transform_ysize, NULL); + ok = ok && !br->eos_; End: - if (!ok) { - free(data); + WebPSafeFree(data); ClearMetadata(hdr); - // If not enough data (br.eos_) resulted in BIT_STREAM_ERROR, update the - // status appropriately. - if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && dec->br_.eos_) { - dec->status_ = VP8_STATUS_SUSPENDED; - } } else { if (decoded_data != NULL) { *decoded_data = data; @@ -1031,41 +1395,52 @@ static int DecodeImageStream(int xsize, int ysize, assert(data == NULL); assert(is_level0); } + dec->last_pixel_ = 0; // Reset for future DECODE_DATA_FUNC() calls. if (!is_level0) ClearMetadata(hdr); // Clean up temporary data behind. } return ok; } //------------------------------------------------------------------------------ -// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_ - -static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) { +// Allocate internal buffers dec->pixels_ and dec->argb_cache_. +static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_; // Scratch buffer corresponding to top-prediction row for transforming the - // first row in the row-blocks. - const uint64_t cache_top_pixels = final_width; - // Scratch buffer for temporary BGRA storage. + // first row in the row-blocks. Not needed for paletted alpha. + const uint64_t cache_top_pixels = (uint16_t)final_width; + // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha. const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS; const uint64_t total_num_pixels = num_pixels + cache_top_pixels + cache_pixels; assert(dec->width_ <= final_width); - dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_)); - if (dec->argb_ == NULL) { + dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t)); + if (dec->pixels_ == NULL) { dec->argb_cache_ = NULL; // for sanity check dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; } - dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels; + dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels; + return 1; +} + +static int AllocateInternalBuffers8b(VP8LDecoder* const dec) { + const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_; + dec->argb_cache_ = NULL; // for sanity check + dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t)); + if (dec->pixels_ == NULL) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + return 0; + } return 1; } //------------------------------------------------------------------------------ -// Special row-processing that only stores the alpha data. +// Special row-processing that only stores the alpha data. static void ExtractAlphaRows(VP8LDecoder* const dec, int row) { const int num_rows = row - dec->last_row_; - const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_; + const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_; if (num_rows <= 0) return; // Nothing to be done. ApplyInverseTransforms(dec, num_rows, in); @@ -1079,44 +1454,77 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) { int i; for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff; } - dec->last_row_ = dec->last_out_row_ = row; } -int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data, - size_t data_size, uint8_t* const output) { - VP8Io io; +int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec, + const uint8_t* const data, size_t data_size, + uint8_t* const output) { int ok = 0; - VP8LDecoder* const dec = VP8LNew(); - if (dec == NULL) return 0; - - dec->width_ = width; - dec->height_ = height; - dec->io_ = &io; + VP8LDecoder* dec; + VP8Io* io; + assert(alph_dec != NULL); + alph_dec->vp8l_dec_ = VP8LNew(); + if (alph_dec->vp8l_dec_ == NULL) return 0; + dec = alph_dec->vp8l_dec_; + + dec->width_ = alph_dec->width_; + dec->height_ = alph_dec->height_; + dec->io_ = &alph_dec->io_; + io = dec->io_; - VP8InitIo(&io); - WebPInitCustomIo(NULL, &io); // Just a sanity Init. io won't be used. - io.opaque = output; - io.width = width; - io.height = height; + VP8InitIo(io); + WebPInitCustomIo(NULL, io); // Just a sanity Init. io won't be used. + io->opaque = output; + io->width = alph_dec->width_; + io->height = alph_dec->height_; dec->status_ = VP8_STATUS_OK; VP8LInitBitReader(&dec->br_, data, data_size); - dec->action_ = READ_HDR; - if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err; + if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) { + goto Err; + } - // Allocate output (note that dec->width_ may have changed here). - if (!AllocateARGBBuffers(dec, width)) goto Err; + // Special case: if alpha data uses only the color indexing transform and + // doesn't use color cache (a frequent case), we will use DecodeAlphaData() + // method that only needs allocation of 1 byte per pixel (alpha channel). + if (dec->next_transform_ == 1 && + dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM && + Is8bOptimizable(&dec->hdr_)) { + alph_dec->use_8b_decode = 1; + ok = AllocateInternalBuffers8b(dec); + } else { + // Allocate internal buffers (note that dec->width_ may have changed here). + alph_dec->use_8b_decode = 0; + ok = AllocateInternalBuffers32b(dec, alph_dec->width_); + } - // Decode (with special row processing). - dec->action_ = READ_DATA; - ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_, - ExtractAlphaRows); + if (!ok) goto Err; + + return 1; Err: - VP8LDelete(dec); - return ok; + VP8LDelete(alph_dec->vp8l_dec_); + alph_dec->vp8l_dec_ = NULL; + return 0; +} + +int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) { + VP8LDecoder* const dec = alph_dec->vp8l_dec_; + assert(dec != NULL); + assert(last_row <= dec->height_); + + if (dec->last_pixel_ == dec->width_ * dec->height_) { + return 1; // done + } + + // Decode (with special row processing). + return alph_dec->use_8b_decode ? + DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_, + last_row) : + DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, + last_row, ExtractAlphaRows); } //------------------------------------------------------------------------------ @@ -1141,14 +1549,13 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) { io->width = width; io->height = height; - dec->action_ = READ_HDR; if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error; return 1; Error: - VP8LClear(dec); - assert(dec->status_ != VP8_STATUS_OK); - return 0; + VP8LClear(dec); + assert(dec->status_ != VP8_STATUS_OK); + return 0; } int VP8LDecodeImage(VP8LDecoder* const dec) { @@ -1158,33 +1565,57 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { // Sanity checks. if (dec == NULL) return 0; + assert(dec->hdr_.huffman_tables_ != NULL); + assert(dec->hdr_.htree_groups_ != NULL); + assert(dec->hdr_.num_htree_groups_ > 0); + io = dec->io_; assert(io != NULL); params = (WebPDecParams*)io->opaque; assert(params != NULL); - dec->output_ = params->output; - assert(dec->output_ != NULL); // Initialization. - if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) { - dec->status_ = VP8_STATUS_INVALID_PARAM; - goto Err; - } + if (dec->state_ != READ_DATA) { + dec->output_ = params->output; + assert(dec->output_ != NULL); + + if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) { + dec->status_ = VP8_STATUS_INVALID_PARAM; + goto Err; + } - if (!AllocateARGBBuffers(dec, io->width)) goto Err; + if (!AllocateInternalBuffers32b(dec, io->width)) goto Err; - if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; + if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; + + if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) { + // need the alpha-multiply functions for premultiplied output or rescaling + WebPInitAlphaProcessing(); + } + if (!WebPIsRGBMode(dec->output_->colorspace)) { + WebPInitConvertARGBToYUV(); + if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing(); + } + if (dec->incremental_) { + if (dec->hdr_.color_cache_size_ > 0 && + dec->hdr_.saved_color_cache_.colors_ == NULL) { + if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_, + dec->hdr_.color_cache_.hash_bits_)) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + goto Err; + } + } + } + dec->state_ = READ_DATA; + } // Decode. - dec->action_ = READ_DATA; - if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_, - ProcessRows)) { + if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, + dec->height_, ProcessRows)) { goto Err; } - // Cleanup. params->last_y = dec->last_out_row_; - VP8LClear(dec); return 1; Err: @@ -1194,7 +1625,3 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { } //------------------------------------------------------------------------------ - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif |