diff options
author | RĂ©mi Verschelde <remi@verschelde.fr> | 2016-07-14 09:03:14 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-14 09:03:14 +0200 |
commit | 1f2110956b8f66fc3c6c89f74f0dfeb6c2265e45 (patch) | |
tree | 6232703f2e5ddeb2ff9bb6fda050f673aec6a402 /drivers/webp/enc | |
parent | 26baaf447abb85c7a1670141ffa6a41f3287601e (diff) | |
parent | e55c6f823251fcff366c7ce93b3ab0bf1fdedd68 (diff) |
Merge pull request #5592 from volzhs/libwebp-0.5.1
Update webp driver to 0.5.1
Diffstat (limited to 'drivers/webp/enc')
-rw-r--r-- | drivers/webp/enc/alpha.c | 12 | ||||
-rw-r--r-- | drivers/webp/enc/backward_references.c | 1136 | ||||
-rw-r--r-- | drivers/webp/enc/backward_references.h | 18 | ||||
-rw-r--r-- | drivers/webp/enc/filter.c | 91 | ||||
-rw-r--r-- | drivers/webp/enc/histogram.c | 453 | ||||
-rw-r--r-- | drivers/webp/enc/histogram.h | 13 | ||||
-rw-r--r-- | drivers/webp/enc/near_lossless.c | 56 | ||||
-rw-r--r-- | drivers/webp/enc/picture.c | 2 | ||||
-rw-r--r-- | drivers/webp/enc/picture_csp.c | 24 | ||||
-rw-r--r-- | drivers/webp/enc/picture_psnr.c | 6 | ||||
-rw-r--r-- | drivers/webp/enc/picture_tools.c | 20 | ||||
-rw-r--r-- | drivers/webp/enc/quant.c | 202 | ||||
-rw-r--r-- | drivers/webp/enc/vp8enci.h | 29 | ||||
-rw-r--r-- | drivers/webp/enc/vp8l.c | 210 | ||||
-rw-r--r-- | drivers/webp/enc/vp8li.h | 20 | ||||
-rw-r--r-- | drivers/webp/enc/webpenc.c | 54 |
16 files changed, 1568 insertions, 778 deletions
diff --git a/drivers/webp/enc/alpha.c b/drivers/webp/enc/alpha.c index 1842b36401..464df4db09 100644 --- a/drivers/webp/enc/alpha.c +++ b/drivers/webp/enc/alpha.c @@ -67,6 +67,11 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, WebPConfigInit(&config); config.lossless = 1; + // Enable exact, or it would alter RGB values of transparent alpha, which is + // normally OK but not here since we are not encoding the input image but an + // internal encoding-related image containing necessary exact information in + // RGB channels. + config.exact = 1; config.method = effort_level; // impact is very small // Set a low default quality for encoding alpha. Ensure that Alpha quality at // lower methods (3 and below) is less than the threshold for triggering @@ -74,7 +79,11 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, config.quality = 8.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f); - ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK); + // TODO(urvang): Temporary fix to avoid generating images that trigger + // a decoder bug related to alpha with color cache. + // See: https://code.google.com/p/webp/issues/detail?id=239 + // Need to re-enable this later. + ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK); WebPPictureFree(&picture); ok = ok && !bw->error_; if (!ok) { @@ -113,7 +122,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, assert(method >= ALPHA_NO_COMPRESSION); assert(method <= ALPHA_LOSSLESS_COMPRESSION); assert(sizeof(header) == ALPHA_HEADER_LEN); - // TODO(skal): have a common function and #define's to validate alpha params. filter_func = WebPFilters[filter]; if (filter_func != NULL) { diff --git a/drivers/webp/enc/backward_references.c b/drivers/webp/enc/backward_references.c index 049125e521..136a24a8c3 100644 --- a/drivers/webp/enc/backward_references.c +++ b/drivers/webp/enc/backward_references.c @@ -16,6 +16,7 @@ #include "./backward_references.h" #include "./histogram.h" #include "../dsp/lossless.h" +#include "../dsp/dsp.h" #include "../utils/color_cache.h" #include "../utils/utils.h" @@ -26,11 +27,19 @@ #define MAX_ENTROPY (1e30f) // 1M window (4M bytes) minus 120 special codes for short distances. -#define WINDOW_SIZE ((1 << 20) - 120) +#define WINDOW_SIZE_BITS 20 +#define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120) // Bounds for the match length. #define MIN_LENGTH 2 -#define MAX_LENGTH 4096 +// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it +// is used in VP8LHashChain. +#define MAX_LENGTH_BITS 12 +// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits. +#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1) +#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32 +#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32" +#endif // ----------------------------------------------------------------------------- @@ -56,46 +65,19 @@ static int DistanceToPlaneCode(int xsize, int dist) { return dist + 120; } +// Returns the exact index where array1 and array2 are different. For an index +// inferior or equal to best_len_match, the return value just has to be strictly +// inferior to best_len_match. The current behavior is to return 0 if this index +// is best_len_match, and the index itself otherwise. +// If no two elements are the same, it returns max_limit. static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, const uint32_t* const array2, - int best_len_match, - int max_limit) { -#if !defined(__x86_64__) - // TODO(vrabaud): Compare on other architectures. - int match_len = 0; + int best_len_match, int max_limit) { // Before 'expensive' linear match, check if the two arrays match at the // current best length index. if (array1[best_len_match] != array2[best_len_match]) return 0; - while (match_len < max_limit && array1[match_len] == array2[match_len]) { - ++match_len; - } - return match_len; -#else - const uint32_t* array1_32 = array1; - const uint32_t* array2_32 = array2; - // max value is aligned to (uint64_t*) array1 - const uint32_t* const array1_32_max = array1 + (max_limit & ~1); - // Before 'expensive' linear match, check if the two arrays match at the - // current best length index. - if (array1[best_len_match] != array2[best_len_match]) return 0; - - // TODO(vrabaud): add __predict_true on bound checking? - while (array1_32 < array1_32_max) { - if (*(uint64_t*)array1_32 == *(uint64_t*)array2_32) { - array1_32 += 2; - array2_32 += 2; - } else { - // if the uint32_t pointed to are the same, then the following ones have - // to be different - return (array1_32 - array1) + (*array1_32 == *array2_32); - } - } - - // Deal with the potential last uint32_t. - if ((max_limit & 1) && (*array1_32 != *array2_32)) return max_limit - 1; - return max_limit; -#endif + return VP8LVectorMismatch(array1, array2, max_limit); } // ----------------------------------------------------------------------------- @@ -207,34 +189,24 @@ int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, // ----------------------------------------------------------------------------- // Hash chains -// initialize as empty -static void HashChainReset(VP8LHashChain* const p) { - int i; - assert(p != NULL); - for (i = 0; i < p->size_; ++i) { - p->chain_[i] = -1; - } - for (i = 0; i < HASH_SIZE; ++i) { - p->hash_to_first_index_[i] = -1; - } -} - int VP8LHashChainInit(VP8LHashChain* const p, int size) { assert(p->size_ == 0); - assert(p->chain_ == NULL); + assert(p->offset_length_ == NULL); assert(size > 0); - p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_)); - if (p->chain_ == NULL) return 0; + p->offset_length_ = + (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_)); + if (p->offset_length_ == NULL) return 0; p->size_ = size; - HashChainReset(p); + return 1; } void VP8LHashChainClear(VP8LHashChain* const p) { assert(p != NULL); - WebPSafeFree(p->chain_); + WebPSafeFree(p->offset_length_); + p->size_ = 0; - p->chain_ = NULL; + p->offset_length_ = NULL; } // ----------------------------------------------------------------------------- @@ -250,18 +222,10 @@ static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { return key; } -// Insertion of two pixels at a time. -static void HashChainInsert(VP8LHashChain* const p, - const uint32_t* const argb, int pos) { - const uint32_t hash_code = GetPixPairHash64(argb); - p->chain_[pos] = p->hash_to_first_index_[hash_code]; - p->hash_to_first_index_[hash_code] = pos; -} - // Returns the maximum number of hash chain lookups to do for a -// given compression quality. Return value in range [6, 86]. -static int GetMaxItersForQuality(int quality, int low_effort) { - return (low_effort ? 6 : 8) + (quality * quality) / 128; +// given compression quality. Return value in range [8, 86]. +static int GetMaxItersForQuality(int quality) { + return 8 + (quality * quality) / 128; } static int GetWindowSizeForHashChain(int quality, int xsize) { @@ -277,63 +241,120 @@ static WEBP_INLINE int MaxFindCopyLength(int len) { return (len < MAX_LENGTH) ? len : MAX_LENGTH; } -static void HashChainFindOffset(const VP8LHashChain* const p, int base_position, - const uint32_t* const argb, int len, - int window_size, int* const distance_ptr) { - const uint32_t* const argb_start = argb + base_position; - const int min_pos = - (base_position > window_size) ? base_position - window_size : 0; - int pos; - assert(len <= MAX_LENGTH); - for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; - pos >= min_pos; - pos = p->chain_[pos]) { - const int curr_length = - FindMatchLength(argb + pos, argb_start, len - 1, len); - if (curr_length == len) break; - } - *distance_ptr = base_position - pos; -} - -static int HashChainFindCopy(const VP8LHashChain* const p, - int base_position, - const uint32_t* const argb, int max_len, - int window_size, int iter_max, - int* const distance_ptr, - int* const length_ptr) { - const uint32_t* const argb_start = argb + base_position; - int iter = iter_max; - int best_length = 0; - int best_distance = 0; - const int min_pos = - (base_position > window_size) ? base_position - window_size : 0; +int VP8LHashChainFill(VP8LHashChain* const p, int quality, + const uint32_t* const argb, int xsize, int ysize) { + const int size = xsize * ysize; + const int iter_max = GetMaxItersForQuality(quality); + const int iter_min = iter_max - quality / 10; + const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); int pos; - int length_max = 256; - if (max_len < length_max) { - length_max = max_len; + uint32_t base_position; + int32_t* hash_to_first_index; + // Temporarily use the p->offset_length_ as a hash chain. + int32_t* chain = (int32_t*)p->offset_length_; + assert(p->size_ != 0); + assert(p->offset_length_ != NULL); + + hash_to_first_index = + (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); + if (hash_to_first_index == NULL) return 0; + + // Set the int32_t array to -1. + memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); + // Fill the chain linking pixels with the same hash. + for (pos = 0; pos < size - 1; ++pos) { + const uint32_t hash_code = GetPixPairHash64(argb + pos); + chain[pos] = hash_to_first_index[hash_code]; + hash_to_first_index[hash_code] = pos; } - for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; - pos >= min_pos; - pos = p->chain_[pos]) { - int curr_length; - int distance; - if (--iter < 0) { - break; + WebPSafeFree(hash_to_first_index); + + // Find the best match interval at each pixel, defined by an offset to the + // pixel and a length. The right-most pixel cannot match anything to the right + // (hence a best length of 0) and the left-most pixel nothing to the left + // (hence an offset of 0). + p->offset_length_[0] = p->offset_length_[size - 1] = 0; + for (base_position = size - 2 < 0 ? 0 : size - 2; base_position > 0;) { + const int max_len = MaxFindCopyLength(size - 1 - base_position); + const uint32_t* const argb_start = argb + base_position; + int iter = iter_max; + int best_length = 0; + uint32_t best_distance = 0; + const int min_pos = + (base_position > window_size) ? base_position - window_size : 0; + const int length_max = (max_len < 256) ? max_len : 256; + uint32_t max_base_position; + + for (pos = chain[base_position]; pos >= min_pos; pos = chain[pos]) { + int curr_length; + if (--iter < 0) { + break; + } + assert(base_position > (uint32_t)pos); + + curr_length = + FindMatchLength(argb + pos, argb_start, best_length, max_len); + if (best_length < curr_length) { + best_length = curr_length; + best_distance = base_position - pos; + // Stop if we have reached the maximum length. Otherwise, make sure + // we have executed a minimum number of iterations depending on the + // quality. + if ((best_length == MAX_LENGTH) || + (curr_length >= length_max && iter < iter_min)) { + break; + } + } } - - curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len); - if (best_length < curr_length) { - distance = base_position - pos; - best_length = curr_length; - best_distance = distance; - if (curr_length >= length_max) { + // We have the best match but in case the two intervals continue matching + // to the left, we have the best matches for the left-extended pixels. + max_base_position = base_position; + while (1) { + assert(best_length <= MAX_LENGTH); + assert(best_distance <= WINDOW_SIZE); + p->offset_length_[base_position] = + (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length; + --base_position; + // Stop if we don't have a match or if we are out of bounds. + if (best_distance == 0 || base_position == 0) break; + // Stop if we cannot extend the matching intervals to the left. + if (base_position < best_distance || + argb[base_position - best_distance] != argb[base_position]) { + break; + } + // Stop if we are matching at its limit because there could be a closer + // matching interval with the same maximum length. Then again, if the + // matching interval is as close as possible (best_distance == 1), we will + // never find anything better so let's continue. + if (best_length == MAX_LENGTH && best_distance != 1 && + base_position + MAX_LENGTH < max_base_position) { break; } + if (best_length < MAX_LENGTH) { + ++best_length; + max_base_position = base_position; + } } } - *distance_ptr = best_distance; - *length_ptr = best_length; - return (best_length >= MIN_LENGTH); + return 1; +} + +static WEBP_INLINE int HashChainFindOffset(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] >> MAX_LENGTH_BITS; +} + +static WEBP_INLINE int HashChainFindLength(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1); +} + +static WEBP_INLINE void HashChainFindCopy(const VP8LHashChain* const p, + int base_position, + int* const offset_ptr, + int* const length_ptr) { + *offset_ptr = HashChainFindOffset(p, base_position); + *length_ptr = HashChainFindLength(p, base_position); } static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, @@ -400,84 +421,62 @@ static int BackwardReferencesRle(int xsize, int ysize, static int BackwardReferencesLz77(int xsize, int ysize, const uint32_t* const argb, int cache_bits, - int quality, int low_effort, - VP8LHashChain* const hash_chain, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { int i; + int i_last_check = -1; int ok = 0; int cc_init = 0; const int use_color_cache = (cache_bits > 0); const int pix_count = xsize * ysize; VP8LColorCache hashers; - int iter_max = GetMaxItersForQuality(quality, low_effort); - const int window_size = GetWindowSizeForHashChain(quality, xsize); - int min_matches = 32; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } ClearBackwardRefs(refs); - HashChainReset(hash_chain); - for (i = 0; i < pix_count - 2; ) { + for (i = 0; i < pix_count;) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; int len = 0; - const int max_len = MaxFindCopyLength(pix_count - i); - HashChainFindCopy(hash_chain, i, argb, max_len, window_size, - iter_max, &offset, &len); - if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) { - int offset2 = 0; - int len2 = 0; - int k; - min_matches = 8; - HashChainInsert(hash_chain, &argb[i], i); - if ((len < (max_len >> 2)) && !low_effort) { - // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code - // the pixels starting at 'i + 1' using backward reference. - HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1, - window_size, iter_max, &offset2, - &len2); - if (len2 > len + 1) { - AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - i++; // Backward reference to be done for next pixel. - len = len2; - offset = offset2; - } - } - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); - if (use_color_cache) { - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); - } - } - // Add to the hash_chain (but cannot add the last pixel). - if (offset >= 3 && offset != xsize) { - const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i; - for (k = 2; k < last - 8; k += 2) { - HashChainInsert(hash_chain, &argb[i + k], i + k); - } - for (; k < last; ++k) { - HashChainInsert(hash_chain, &argb[i + k], i + k); + int j; + HashChainFindCopy(hash_chain, i, &offset, &len); + if (len > MIN_LENGTH + 1) { + const int len_ini = len; + int max_reach = 0; + assert(i + len < pix_count); + // Only start from what we have not checked already. + i_last_check = (i > i_last_check) ? i : i_last_check; + // We know the best match for the current pixel but we try to find the + // best matches for the current pixel AND the next one combined. + // The naive method would use the intervals: + // [i,i+len) + [i+len, length of best match at i+len) + // while we check if we can use: + // [i,j) (where j<=i+len) + [j, length of best match at j) + for (j = i_last_check + 1; j <= i + len_ini; ++j) { + const int len_j = HashChainFindLength(hash_chain, j); + const int reach = + j + (len_j > MIN_LENGTH + 1 ? len_j : 1); // 1 for single literal. + if (reach > max_reach) { + len = j - i; + max_reach = reach; } } - i += len; } else { + len = 1; + } + // Go with literal or backward reference. + assert(len > 0); + if (len == 1) { AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - HashChainInsert(hash_chain, &argb[i], i); - ++i; - --min_matches; - if (min_matches <= 0) { - AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - HashChainInsert(hash_chain, &argb[i], i); - ++i; + } else { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); + if (use_color_cache) { + for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]); } } - } - while (i < pix_count) { - // Handle the last pixel(s). - AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - ++i; + i += len; } ok = !refs->error_; @@ -498,7 +497,7 @@ typedef struct { static int BackwardReferencesTraceBackwards( int xsize, int ysize, const uint32_t* const argb, int quality, - int cache_bits, VP8LHashChain* const hash_chain, + int cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs); static void ConvertPopulationCountTableToBitEstimates( @@ -574,16 +573,14 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m, return m->distance_[code] + extra_bits; } -static void AddSingleLiteralWithCostModel( - const uint32_t* const argb, VP8LHashChain* const hash_chain, - VP8LColorCache* const hashers, const CostModel* const cost_model, int idx, - int is_last, int use_color_cache, double prev_cost, float* const cost, - uint16_t* const dist_array) { +static void AddSingleLiteralWithCostModel(const uint32_t* const argb, + VP8LColorCache* const hashers, + const CostModel* const cost_model, + int idx, int use_color_cache, + double prev_cost, float* const cost, + uint16_t* const dist_array) { double cost_val = prev_cost; const uint32_t color = argb[0]; - if (!is_last) { - HashChainInsert(hash_chain, argb, idx); - } if (use_color_cache && VP8LColorCacheContains(hashers, color)) { const double mul0 = 0.68; const int ix = VP8LColorCacheGetIndex(hashers, color); @@ -599,30 +596,598 @@ static void AddSingleLiteralWithCostModel( } } +// ----------------------------------------------------------------------------- +// CostManager and interval handling + +// Empirical value to avoid high memory consumption but good for performance. +#define COST_CACHE_INTERVAL_SIZE_MAX 100 + +// To perform backward reference every pixel at index index_ is considered and +// the cost for the MAX_LENGTH following pixels computed. Those following pixels +// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of: +// distance_cost_ at index_ + GetLengthCost(cost_model, k) +// (named cost) (named cached cost) +// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an +// array of size MAX_LENGTH. +// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the +// minimal values using intervals, for which lower_ and upper_ bounds are kept. +// An interval is defined by the index_ of the pixel that generated it and +// is only useful in a range of indices from start_ to end_ (exclusive), i.e. +// it contains the minimum value for pixels between start_ and end_. +// Intervals are stored in a linked list and ordered by start_. When a new +// interval has a better minimum, old intervals are split or removed. +typedef struct CostInterval CostInterval; +struct CostInterval { + double lower_; + double upper_; + int start_; + int end_; + double distance_cost_; + int index_; + CostInterval* previous_; + CostInterval* next_; +}; + +// The GetLengthCost(cost_model, k) part of the costs is also bounded for +// efficiency in a set of intervals of a different type. +// If those intervals are small enough, they are not used for comparison and +// written into the costs right away. +typedef struct { + double lower_; // Lower bound of the interval. + double upper_; // Upper bound of the interval. + int start_; + int end_; // Exclusive. + int do_write_; // If !=0, the interval is saved to cost instead of being kept + // for comparison. +} CostCacheInterval; + +// This structure is in charge of managing intervals and costs. +// It caches the different CostCacheInterval, caches the different +// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose +// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX). +#define COST_MANAGER_MAX_FREE_LIST 10 +typedef struct { + CostInterval* head_; + int count_; // The number of stored intervals. + CostCacheInterval* cache_intervals_; + size_t cache_intervals_size_; + double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). + double min_cost_cache_; // The minimum value in cost_cache_[1:]. + double max_cost_cache_; // The maximum value in cost_cache_[1:]. + float* costs_; + uint16_t* dist_array_; + // Most of the time, we only need few intervals -> use a free-list, to avoid + // fragmentation with small allocs in most common cases. + CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST]; + CostInterval* free_intervals_; + // These are regularly malloc'd remains. This list can't grow larger than than + // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note. + CostInterval* recycled_intervals_; + // Buffer used in BackwardReferencesHashChainDistanceOnly to store the ends + // of the intervals that can have impacted the cost at a pixel. + int* interval_ends_; + int interval_ends_size_; +} CostManager; + +static int IsCostCacheIntervalWritable(int start, int end) { + // 100 is the length for which we consider an interval for comparison, and not + // for writing. + // The first intervals are very small and go in increasing size. This constant + // helps merging them into one big interval (up to index 150/200 usually from + // which intervals start getting much bigger). + // This value is empirical. + return (end - start + 1 < 100); +} + +static void CostIntervalAddToFreeList(CostManager* const manager, + CostInterval* const interval) { + interval->next_ = manager->free_intervals_; + manager->free_intervals_ = interval; +} + +static int CostIntervalIsInFreeList(const CostManager* const manager, + const CostInterval* const interval) { + return (interval >= &manager->intervals_[0] && + interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]); +} + +static void CostManagerInitFreeList(CostManager* const manager) { + int i; + manager->free_intervals_ = NULL; + for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) { + CostIntervalAddToFreeList(manager, &manager->intervals_[i]); + } +} + +static void DeleteIntervalList(CostManager* const manager, + const CostInterval* interval) { + while (interval != NULL) { + const CostInterval* const next = interval->next_; + if (!CostIntervalIsInFreeList(manager, interval)) { + WebPSafeFree((void*)interval); + } // else: do nothing + interval = next; + } +} + +static void CostManagerClear(CostManager* const manager) { + if (manager == NULL) return; + + WebPSafeFree(manager->costs_); + WebPSafeFree(manager->cache_intervals_); + WebPSafeFree(manager->interval_ends_); + + // Clear the interval lists. + DeleteIntervalList(manager, manager->head_); + manager->head_ = NULL; + DeleteIntervalList(manager, manager->recycled_intervals_); + manager->recycled_intervals_ = NULL; + + // Reset pointers, count_ and cache_intervals_size_. + memset(manager, 0, sizeof(*manager)); + CostManagerInitFreeList(manager); +} + +static int CostManagerInit(CostManager* const manager, + uint16_t* const dist_array, int pix_count, + const CostModel* const cost_model) { + int i; + const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count; + // This constant is tied to the cost_model we use. + // Empirically, differences between intervals is usually of more than 1. + const double min_cost_diff = 0.1; + + manager->costs_ = NULL; + manager->cache_intervals_ = NULL; + manager->interval_ends_ = NULL; + manager->head_ = NULL; + manager->recycled_intervals_ = NULL; + manager->count_ = 0; + manager->dist_array_ = dist_array; + CostManagerInitFreeList(manager); + + // Fill in the cost_cache_. + manager->cache_intervals_size_ = 1; + manager->cost_cache_[0] = 0; + for (i = 1; i < cost_cache_size; ++i) { + manager->cost_cache_[i] = GetLengthCost(cost_model, i); + // Get an approximation of the number of bound intervals. + if (fabs(manager->cost_cache_[i] - manager->cost_cache_[i - 1]) > + min_cost_diff) { + ++manager->cache_intervals_size_; + } + // Compute the minimum of cost_cache_. + if (i == 1) { + manager->min_cost_cache_ = manager->cost_cache_[1]; + manager->max_cost_cache_ = manager->cost_cache_[1]; + } else if (manager->cost_cache_[i] < manager->min_cost_cache_) { + manager->min_cost_cache_ = manager->cost_cache_[i]; + } else if (manager->cost_cache_[i] > manager->max_cost_cache_) { + manager->max_cost_cache_ = manager->cost_cache_[i]; + } + } + + // With the current cost models, we have 15 intervals, so we are safe by + // setting a maximum of COST_CACHE_INTERVAL_SIZE_MAX. + if (manager->cache_intervals_size_ > COST_CACHE_INTERVAL_SIZE_MAX) { + manager->cache_intervals_size_ = COST_CACHE_INTERVAL_SIZE_MAX; + } + manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc( + manager->cache_intervals_size_, sizeof(*manager->cache_intervals_)); + if (manager->cache_intervals_ == NULL) { + CostManagerClear(manager); + return 0; + } + + // Fill in the cache_intervals_. + { + double cost_prev = -1e38f; // unprobably low initial value + CostCacheInterval* prev = NULL; + CostCacheInterval* cur = manager->cache_intervals_; + const CostCacheInterval* const end = + manager->cache_intervals_ + manager->cache_intervals_size_; + + // Consecutive values in cost_cache_ are compared and if a big enough + // difference is found, a new interval is created and bounded. + for (i = 0; i < cost_cache_size; ++i) { + const double cost_val = manager->cost_cache_[i]; + if (i == 0 || + (fabs(cost_val - cost_prev) > min_cost_diff && cur + 1 < end)) { + if (i > 1) { + const int is_writable = + IsCostCacheIntervalWritable(cur->start_, cur->end_); + // Merge with the previous interval if both are writable. + if (is_writable && cur != manager->cache_intervals_ && + prev->do_write_) { + // Update the previous interval. + prev->end_ = cur->end_; + if (cur->lower_ < prev->lower_) { + prev->lower_ = cur->lower_; + } else if (cur->upper_ > prev->upper_) { + prev->upper_ = cur->upper_; + } + } else { + cur->do_write_ = is_writable; + prev = cur; + ++cur; + } + } + // Initialize an interval. + cur->start_ = i; + cur->do_write_ = 0; + cur->lower_ = cost_val; + cur->upper_ = cost_val; + } else { + // Update the current interval bounds. + if (cost_val < cur->lower_) { + cur->lower_ = cost_val; + } else if (cost_val > cur->upper_) { + cur->upper_ = cost_val; + } + } + cur->end_ = i + 1; + cost_prev = cost_val; + } + manager->cache_intervals_size_ = cur + 1 - manager->cache_intervals_; + } + + manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); + if (manager->costs_ == NULL) { + CostManagerClear(manager); + return 0; + } + // Set the initial costs_ high for every pixel as we will keep the minimum. + for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; + + // The cost at pixel is influenced by the cost intervals from previous pixels. + // Let us take the specific case where the offset is the same (which actually + // happens a lot in case of uniform regions). + // pixel i contributes to j>i a cost of: offset cost + cost_cache_[j-i] + // pixel i+1 contributes to j>i a cost of: 2*offset cost + cost_cache_[j-i-1] + // pixel i+2 contributes to j>i a cost of: 3*offset cost + cost_cache_[j-i-2] + // and so on. + // A pixel i influences the following length(j) < MAX_LENGTH pixels. What is + // the value of j such that pixel i + j cannot influence any of those pixels? + // This value is such that: + // max of cost_cache_ < j*offset cost + min of cost_cache_ + // (pixel i + j 's cost cannot beat the worst cost given by pixel i). + // This value will be used to optimize the cost computation in + // BackwardReferencesHashChainDistanceOnly. + { + // The offset cost is computed in GetDistanceCost and has a minimum value of + // the minimum in cost_model->distance_. The case where the offset cost is 0 + // will be dealt with differently later so we are only interested in the + // minimum non-zero offset cost. + double offset_cost_min = 0.; + int size; + for (i = 0; i < NUM_DISTANCE_CODES; ++i) { + if (cost_model->distance_[i] != 0) { + if (offset_cost_min == 0.) { + offset_cost_min = cost_model->distance_[i]; + } else if (cost_model->distance_[i] < offset_cost_min) { + offset_cost_min = cost_model->distance_[i]; + } + } + } + // In case all the cost_model->distance_ is 0, the next non-zero cost we + // can have is from the extra bit in GetDistanceCost, hence 1. + if (offset_cost_min < 1.) offset_cost_min = 1.; + + size = 1 + (int)ceil((manager->max_cost_cache_ - manager->min_cost_cache_) / + offset_cost_min); + // Empirically, we usually end up with a value below 100. + if (size > MAX_LENGTH) size = MAX_LENGTH; + + manager->interval_ends_ = + (int*)WebPSafeMalloc(size, sizeof(*manager->interval_ends_)); + if (manager->interval_ends_ == NULL) { + CostManagerClear(manager); + return 0; + } + manager->interval_ends_size_ = size; + } + + return 1; +} + +// Given the distance_cost for pixel 'index', update the cost at pixel 'i' if it +// is smaller than the previously computed value. +static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, int index, + double distance_cost) { + int k = i - index; + double cost_tmp; + assert(k >= 0 && k < MAX_LENGTH); + cost_tmp = distance_cost + manager->cost_cache_[k]; + + if (manager->costs_[i] > cost_tmp) { + manager->costs_[i] = (float)cost_tmp; + manager->dist_array_[i] = k + 1; + } +} + +// Given the distance_cost for pixel 'index', update the cost for all the pixels +// between 'start' and 'end' excluded. +static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager, + int start, int end, int index, + double distance_cost) { + int i; + for (i = start; i < end; ++i) UpdateCost(manager, i, index, distance_cost); +} + +// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. +static WEBP_INLINE void ConnectIntervals(CostManager* const manager, + CostInterval* const prev, + CostInterval* const next) { + if (prev != NULL) { + prev->next_ = next; + } else { + manager->head_ = next; + } + + if (next != NULL) next->previous_ = prev; +} + +// Pop an interval in the manager. +static WEBP_INLINE void PopInterval(CostManager* const manager, + CostInterval* const interval) { + CostInterval* const next = interval->next_; + + if (interval == NULL) return; + + ConnectIntervals(manager, interval->previous_, next); + if (CostIntervalIsInFreeList(manager, interval)) { + CostIntervalAddToFreeList(manager, interval); + } else { // recycle regularly malloc'd intervals too + interval->next_ = manager->recycled_intervals_; + manager->recycled_intervals_ = interval; + } + --manager->count_; + assert(manager->count_ >= 0); +} + +// Update the cost at index i by going over all the stored intervals that +// overlap with i. +static WEBP_INLINE void UpdateCostPerIndex(CostManager* const manager, int i) { + CostInterval* current = manager->head_; + + while (current != NULL && current->start_ <= i) { + if (current->end_ <= i) { + // We have an outdated interval, remove it. + CostInterval* next = current->next_; + PopInterval(manager, current); + current = next; + } else { + UpdateCost(manager, i, current->index_, current->distance_cost_); + current = current->next_; + } + } +} + +// Given a current orphan interval and its previous interval, before +// it was orphaned (which can be NULL), set it at the right place in the list +// of intervals using the start_ ordering and the previous interval as a hint. +static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager, + CostInterval* const current, + CostInterval* previous) { + assert(current != NULL); + + if (previous == NULL) previous = manager->head_; + while (previous != NULL && current->start_ < previous->start_) { + previous = previous->previous_; + } + while (previous != NULL && previous->next_ != NULL && + previous->next_->start_ < current->start_) { + previous = previous->next_; + } + + if (previous != NULL) { + ConnectIntervals(manager, current, previous->next_); + } else { + ConnectIntervals(manager, current, manager->head_); + } + ConnectIntervals(manager, previous, current); +} + +// Insert an interval in the list contained in the manager by starting at +// interval_in as a hint. The intervals are sorted by start_ value. +static WEBP_INLINE void InsertInterval(CostManager* const manager, + CostInterval* const interval_in, + double distance_cost, double lower, + double upper, int index, int start, + int end) { + CostInterval* interval_new; + + if (IsCostCacheIntervalWritable(start, end) || + manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) { + // Write down the interval if it is too small. + UpdateCostPerInterval(manager, start, end, index, distance_cost); + return; + } + if (manager->free_intervals_ != NULL) { + interval_new = manager->free_intervals_; + manager->free_intervals_ = interval_new->next_; + } else if (manager->recycled_intervals_ != NULL) { + interval_new = manager->recycled_intervals_; + manager->recycled_intervals_ = interval_new->next_; + } else { // malloc for good + interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new)); + if (interval_new == NULL) { + // Write down the interval if we cannot create it. + UpdateCostPerInterval(manager, start, end, index, distance_cost); + return; + } + } + + interval_new->distance_cost_ = distance_cost; + interval_new->lower_ = lower; + interval_new->upper_ = upper; + interval_new->index_ = index; + interval_new->start_ = start; + interval_new->end_ = end; + PositionOrphanInterval(manager, interval_new, interval_in); + + ++manager->count_; +} + +// When an interval has its start_ or end_ modified, it needs to be +// repositioned in the linked list. +static WEBP_INLINE void RepositionInterval(CostManager* const manager, + CostInterval* const interval) { + if (IsCostCacheIntervalWritable(interval->start_, interval->end_)) { + // Maybe interval has been resized and is small enough to be removed. + UpdateCostPerInterval(manager, interval->start_, interval->end_, + interval->index_, interval->distance_cost_); + PopInterval(manager, interval); + return; + } + + // Early exit if interval is at the right spot. + if ((interval->previous_ == NULL || + interval->previous_->start_ <= interval->start_) && + (interval->next_ == NULL || + interval->start_ <= interval->next_->start_)) { + return; + } + + ConnectIntervals(manager, interval->previous_, interval->next_); + PositionOrphanInterval(manager, interval, interval->previous_); +} + +// Given a new cost interval defined by its start at index, its last value and +// distance_cost, add its contributions to the previous intervals and costs. +// If handling the interval or one of its subintervals becomes to heavy, its +// contribution is added to the costs right away. +static WEBP_INLINE void PushInterval(CostManager* const manager, + double distance_cost, int index, + int last) { + size_t i; + CostInterval* interval = manager->head_; + CostInterval* interval_next; + const CostCacheInterval* const cost_cache_intervals = + manager->cache_intervals_; + + for (i = 0; i < manager->cache_intervals_size_ && + cost_cache_intervals[i].start_ < last; + ++i) { + // Define the intersection of the ith interval with the new one. + int start = index + cost_cache_intervals[i].start_; + const int end = index + (cost_cache_intervals[i].end_ > last + ? last + : cost_cache_intervals[i].end_); + const double lower_in = cost_cache_intervals[i].lower_; + const double upper_in = cost_cache_intervals[i].upper_; + const double lower_full_in = distance_cost + lower_in; + const double upper_full_in = distance_cost + upper_in; + + if (cost_cache_intervals[i].do_write_) { + UpdateCostPerInterval(manager, start, end, index, distance_cost); + continue; + } + + for (; interval != NULL && interval->start_ < end && start < end; + interval = interval_next) { + const double lower_full_interval = + interval->distance_cost_ + interval->lower_; + const double upper_full_interval = + interval->distance_cost_ + interval->upper_; + + interval_next = interval->next_; + + // Make sure we have some overlap + if (start >= interval->end_) continue; + + if (lower_full_in >= upper_full_interval) { + // When intervals are represented, the lower, the better. + // [**********************************************************] + // start end + // [----------------------------------] + // interval->start_ interval->end_ + // If we are worse than what we already have, add whatever we have so + // far up to interval. + const int start_new = interval->end_; + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, + index, start, interval->start_); + start = start_new; + continue; + } + + // We know the two intervals intersect. + if (upper_full_in >= lower_full_interval) { + // There is no clear cut on which is best, so let's keep both. + // [*********[*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*]***********] + // start interval->start_ interval->end_ end + // OR + // [*********[*-*-*-*-*-*-*-*-*-*-*-]----------------------] + // start interval->start_ end interval->end_ + const int end_new = (interval->end_ <= end) ? interval->end_ : end; + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, + index, start, end_new); + start = end_new; + } else if (start <= interval->start_ && interval->end_ <= end) { + // [----------------------------------] + // interval->start_ interval->end_ + // [**************************************************************] + // start end + // We can safely remove the old interval as it is fully included. + PopInterval(manager, interval); + } else { + if (interval->start_ <= start && end <= interval->end_) { + // [--------------------------------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + // We have to split the old interval as it fully contains the new one. + const int end_original = interval->end_; + interval->end_ = start; + InsertInterval(manager, interval, interval->distance_cost_, + interval->lower_, interval->upper_, interval->index_, + end, end_original); + } else if (interval->start_ < start) { + // [------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + interval->end_ = start; + } else { + // [------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + interval->start_ = end; + } + + // The interval has been modified, we need to reposition it or write it. + RepositionInterval(manager, interval); + } + } + // Insert the remaining interval from start to end. + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, index, + start, end); + } +} + static int BackwardReferencesHashChainDistanceOnly( - int xsize, int ysize, const uint32_t* const argb, - int quality, int cache_bits, VP8LHashChain* const hash_chain, + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, uint16_t* const dist_array) { int i; int ok = 0; int cc_init = 0; const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); - float* const cost = - (float*)WebPSafeMalloc(pix_count, sizeof(*cost)); const size_t literal_array_size = sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + ((cache_bits > 0) ? (1 << cache_bits) : 0)); const size_t cost_model_size = sizeof(CostModel) + literal_array_size; CostModel* const cost_model = - (CostModel*)WebPSafeMalloc(1ULL, cost_model_size); + (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); VP8LColorCache hashers; const int skip_length = 32 + quality; const int skip_min_distance_code = 2; - int iter_max = GetMaxItersForQuality(quality, 0); - const int window_size = GetWindowSizeForHashChain(quality, xsize); + CostManager* cost_manager = + (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); - if (cost == NULL || cost_model == NULL) goto Error; + if (cost_model == NULL || cost_manager == NULL) goto Error; cost_model->literal_ = (double*)(cost_model + 1); if (use_color_cache) { @@ -634,34 +1199,91 @@ static int BackwardReferencesHashChainDistanceOnly( goto Error; } - for (i = 0; i < pix_count; ++i) cost[i] = 1e38f; + if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) { + goto Error; + } // We loop one pixel at a time, but store all currently best points to // non-processed locations from this point. dist_array[0] = 0; - HashChainReset(hash_chain); // Add first pixel as literal. - AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0, - 0, use_color_cache, 0.0, cost, dist_array); + AddSingleLiteralWithCostModel(argb + 0, &hashers, cost_model, 0, + use_color_cache, 0.0, cost_manager->costs_, + dist_array); + for (i = 1; i < pix_count - 1; ++i) { - int offset = 0; - int len = 0; - double prev_cost = cost[i - 1]; - const int max_len = MaxFindCopyLength(pix_count - i); - HashChainFindCopy(hash_chain, i, argb, max_len, window_size, - iter_max, &offset, &len); + int offset = 0, len = 0; + double prev_cost = cost_manager->costs_[i - 1]; + HashChainFindCopy(hash_chain, i, &offset, &len); if (len >= MIN_LENGTH) { const int code = DistanceToPlaneCode(xsize, offset); - const double distance_cost = - prev_cost + GetDistanceCost(cost_model, code); - int k; - for (k = 1; k < len; ++k) { - const double cost_val = distance_cost + GetLengthCost(cost_model, k); - if (cost[i + k] > cost_val) { - cost[i + k] = (float)cost_val; - dist_array[i + k] = k + 1; + const double offset_cost = GetDistanceCost(cost_model, code); + const int first_i = i; + int j_max = 0, interval_ends_index = 0; + const int is_offset_zero = (offset_cost == 0.); + + if (!is_offset_zero) { + j_max = (int)ceil( + (cost_manager->max_cost_cache_ - cost_manager->min_cost_cache_) / + offset_cost); + if (j_max < 1) { + j_max = 1; + } else if (j_max > cost_manager->interval_ends_size_ - 1) { + // This could only happen in the case of MAX_LENGTH. + j_max = cost_manager->interval_ends_size_ - 1; + } + } // else j_max is unused anyway. + + // Instead of considering all contributions from a pixel i by calling: + // PushInterval(cost_manager, prev_cost + offset_cost, i, len); + // we optimize these contributions in case offset_cost stays the same for + // consecutive pixels. This describes a set of pixels similar to a + // previous set (e.g. constant color regions). + for (; i < pix_count - 1; ++i) { + int offset_next, len_next; + prev_cost = cost_manager->costs_[i - 1]; + + if (is_offset_zero) { + // No optimization can be made so we just push all of the + // contributions from i. + PushInterval(cost_manager, prev_cost, i, len); + } else { + // j_max is chosen as the smallest j such that: + // max of cost_cache_ < j*offset cost + min of cost_cache_ + // Therefore, the pixel influenced by i-j_max, cannot be influenced + // by i. Only the costs after the end of what i contributed need to be + // updated. cost_manager->interval_ends_ is a circular buffer that + // stores those ends. + const double distance_cost = prev_cost + offset_cost; + int j = cost_manager->interval_ends_[interval_ends_index]; + if (i - first_i <= j_max || + !IsCostCacheIntervalWritable(j, i + len)) { + PushInterval(cost_manager, distance_cost, i, len); + } else { + for (; j < i + len; ++j) { + UpdateCost(cost_manager, j, i, distance_cost); + } + } + // Store the new end in the circular buffer. + assert(interval_ends_index < cost_manager->interval_ends_size_); + cost_manager->interval_ends_[interval_ends_index] = i + len; + if (++interval_ends_index > j_max) interval_ends_index = 0; } + + // Check whether i is the last pixel to consider, as it is handled + // differently. + if (i + 1 >= pix_count - 1) break; + HashChainFindCopy(hash_chain, i + 1, &offset_next, &len_next); + if (offset_next != offset) break; + len = len_next; + UpdateCostPerIndex(cost_manager, i); + AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); } + // Submit the last pixel. + UpdateCostPerIndex(cost_manager, i + 1); + // This if is for speedup only. It roughly doubles the speed, and // makes compression worse by .1 %. if (len >= skip_length && code <= skip_min_distance_code) { @@ -669,53 +1291,55 @@ static int BackwardReferencesHashChainDistanceOnly( // lookups for better copies. // 1) insert the hashes. if (use_color_cache) { + int k; for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); } } - // 2) Add to the hash_chain (but cannot add the last pixel) + // 2) jump. { - const int last = (len + i < pix_count - 1) ? len + i - : pix_count - 1; - for (k = i; k < last; ++k) { - HashChainInsert(hash_chain, &argb[k], k); - } + const int i_next = i + len - 1; // for loop does ++i, thus -1 here. + for (; i <= i_next; ++i) UpdateCostPerIndex(cost_manager, i + 1); + i = i_next; } - // 3) jump. - i += len - 1; // for loop does ++i, thus -1 here. goto next_symbol; } - if (len != MIN_LENGTH) { + if (len > MIN_LENGTH) { int code_min_length; double cost_total; - HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size, - &offset); + offset = HashChainFindOffset(hash_chain, i); code_min_length = DistanceToPlaneCode(xsize, offset); cost_total = prev_cost + GetDistanceCost(cost_model, code_min_length) + GetLengthCost(cost_model, 1); - if (cost[i + 1] > cost_total) { - cost[i + 1] = (float)cost_total; + if (cost_manager->costs_[i + 1] > cost_total) { + cost_manager->costs_[i + 1] = (float)cost_total; dist_array[i + 1] = 2; } } + } else { // len < MIN_LENGTH + UpdateCostPerIndex(cost_manager, i + 1); } - AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, - 0, use_color_cache, prev_cost, cost, - dist_array); + + AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); + next_symbol: ; } // Handle the last pixel. if (i == (pix_count - 1)) { - AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, - 1, use_color_cache, cost[pix_count - 2], cost, - dist_array); + AddSingleLiteralWithCostModel( + argb + i, &hashers, cost_model, i, use_color_cache, + cost_manager->costs_[pix_count - 2], cost_manager->costs_, dist_array); } + ok = !refs->error_; Error: if (cc_init) VP8LColorCacheClear(&hashers); + CostManagerClear(cost_manager); WebPSafeFree(cost_model); - WebPSafeFree(cost); + WebPSafeFree(cost_manager); return ok; } @@ -739,18 +1363,14 @@ static void TraceBackwards(uint16_t* const dist_array, } static int BackwardReferencesHashChainFollowChosenPath( - int xsize, int ysize, const uint32_t* const argb, - int quality, int cache_bits, + const uint32_t* const argb, int cache_bits, const uint16_t* const chosen_path, int chosen_path_size, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs) { - const int pix_count = xsize * ysize; + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { const int use_color_cache = (cache_bits > 0); int ix; int i = 0; int ok = 0; int cc_init = 0; - const int window_size = GetWindowSizeForHashChain(quality, xsize); VP8LColorCache hashers; if (use_color_cache) { @@ -759,25 +1379,17 @@ static int BackwardReferencesHashChainFollowChosenPath( } ClearBackwardRefs(refs); - HashChainReset(hash_chain); for (ix = 0; ix < chosen_path_size; ++ix) { - int offset = 0; const int len = chosen_path[ix]; if (len != 1) { int k; - HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset); + const int offset = HashChainFindOffset(hash_chain, i); BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); } } - { - const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i; - for (k = 0; k < last; ++k) { - HashChainInsert(hash_chain, &argb[i + k], i + k); - } - } i += len; } else { PixOrCopy v; @@ -790,9 +1402,6 @@ static int BackwardReferencesHashChainFollowChosenPath( v = PixOrCopyCreateLiteral(argb[i]); } BackwardRefsCursorAdd(refs, v); - if (i + 1 < pix_count) { - HashChainInsert(hash_chain, &argb[i], i); - } ++i; } } @@ -803,11 +1412,10 @@ static int BackwardReferencesHashChainFollowChosenPath( } // Returns 1 on success. -static int BackwardReferencesTraceBackwards(int xsize, int ysize, - const uint32_t* const argb, - int quality, int cache_bits, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs) { +static int BackwardReferencesTraceBackwards( + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { int ok = 0; const int dist_array_size = xsize * ysize; uint16_t* chosen_path = NULL; @@ -824,8 +1432,7 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize, } TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); if (!BackwardReferencesHashChainFollowChosenPath( - xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size, - hash_chain, refs)) { + argb, cache_bits, chosen_path, chosen_path_size, hash_chain, refs)) { goto Error; } ok = 1; @@ -913,7 +1520,7 @@ static double ComputeCacheEntropy(const uint32_t* argb, // Returns 0 in case of memory error. static int CalculateBestCacheSize(const uint32_t* const argb, int xsize, int ysize, int quality, - VP8LHashChain* const hash_chain, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, int* const lz77_computed, int* const best_cache_bits) { @@ -933,8 +1540,8 @@ static int CalculateBestCacheSize(const uint32_t* const argb, // Local color cache is disabled. return 1; } - if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0, - hash_chain, refs)) { + if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, hash_chain, + refs)) { return 0; } // Do a binary search to find the optimal entropy for cache_bits. @@ -999,13 +1606,12 @@ static int BackwardRefsWithLocalCache(const uint32_t* const argb, } static VP8LBackwardRefs* GetBackwardReferencesLowEffort( - int width, int height, const uint32_t* const argb, int quality, - int* const cache_bits, VP8LHashChain* const hash_chain, + int width, int height, const uint32_t* const argb, + int* const cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { VP8LBackwardRefs* refs_lz77 = &refs_array[0]; *cache_bits = 0; - if (!BackwardReferencesLz77(width, height, argb, 0, quality, - 1 /* Low effort. */, hash_chain, refs_lz77)) { + if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) { return NULL; } BackwardReferences2DLocality(width, refs_lz77); @@ -1014,7 +1620,7 @@ static VP8LBackwardRefs* GetBackwardReferencesLowEffort( static VP8LBackwardRefs* GetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int* const cache_bits, VP8LHashChain* const hash_chain, + int* const cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { int lz77_is_useful; int lz77_computed; @@ -1037,8 +1643,8 @@ static VP8LBackwardRefs* GetBackwardReferences( } } } else { - if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality, - 0 /* Low effort. */, hash_chain, refs_lz77)) { + if (!BackwardReferencesLz77(width, height, argb, *cache_bits, hash_chain, + refs_lz77)) { goto Error; } } @@ -1097,11 +1703,11 @@ static VP8LBackwardRefs* GetBackwardReferences( VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2]) { + int low_effort, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { if (low_effort) { - return GetBackwardReferencesLowEffort(width, height, argb, quality, - cache_bits, hash_chain, refs_array); + return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, + hash_chain, refs_array); } else { return GetBackwardReferences(width, height, argb, quality, cache_bits, hash_chain, refs_array); diff --git a/drivers/webp/enc/backward_references.h b/drivers/webp/enc/backward_references.h index e410b06f7d..b72a01fb0e 100644 --- a/drivers/webp/enc/backward_references.h +++ b/drivers/webp/enc/backward_references.h @@ -115,11 +115,12 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) { typedef struct VP8LHashChain VP8LHashChain; struct VP8LHashChain { - // Stores the most recently added position with the given hash value. - int32_t hash_to_first_index_[HASH_SIZE]; - // chain_[pos] stores the previous position with the same hash value - // for every pixel in the image. - int32_t* chain_; + // The 20 most significant bits contain the offset at which the best match + // is found. These 20 bits are the limit defined by GetWindowSizeForHashChain + // (through WINDOW_SIZE = 1<<20). + // The lower 12 bits contain the length of the match. The 12 bit limit is + // defined in MaxFindCopyLength with MAX_LENGTH=4096. + uint32_t* offset_length_; // This is the maximum size of the hash_chain that can be constructed. // Typically this is the pixel count (width x height) for a given image. int size_; @@ -127,6 +128,9 @@ struct VP8LHashChain { // Must be called first, to set size. int VP8LHashChainInit(VP8LHashChain* const p, int size); +// Pre-compute the best matches for argb. +int VP8LHashChainFill(VP8LHashChain* const p, int quality, + const uint32_t* const argb, int xsize, int ysize); void VP8LHashChainClear(VP8LHashChain* const p); // release memory // ----------------------------------------------------------------------------- @@ -192,8 +196,8 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { // refs[0] or refs[1]. VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs[2]); + int low_effort, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]); #ifdef __cplusplus } diff --git a/drivers/webp/enc/filter.c b/drivers/webp/enc/filter.c index 1a4dd947fb..e8ea8b4ff2 100644 --- a/drivers/webp/enc/filter.c +++ b/drivers/webp/enc/filter.c @@ -107,10 +107,9 @@ static void DoFilter(const VP8EncIterator* const it, int level) { //------------------------------------------------------------------------------ // SSIM metric -enum { KERNEL = 3 }; static const double kMinValue = 1.e-10; // minimal threshold -void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) { +void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst) { dst->w += src->w; dst->xm += src->xm; dst->ym += src->ym; @@ -119,32 +118,7 @@ void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) { dst->yym += src->yym; } -static void VP8SSIMAccumulate(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int xo, int yo, int W, int H, - DistoStats* const stats) { - const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL; - const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL; - const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL; - const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL; - int x, y; - src1 += ymin * stride1; - src2 += ymin * stride2; - for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) { - for (x = xmin; x <= xmax; ++x) { - const int s1 = src1[x]; - const int s2 = src2[x]; - stats->w += 1; - stats->xm += s1; - stats->ym += s2; - stats->xxm += s1 * s1; - stats->xym += s1 * s2; - stats->yym += s2 * s2; - } - } -} - -double VP8SSIMGet(const DistoStats* const stats) { +double VP8SSIMGet(const VP8DistoStats* const stats) { const double xmxm = stats->xm * stats->xm; const double ymym = stats->ym * stats->ym; const double xmym = stats->xm * stats->ym; @@ -165,7 +139,7 @@ double VP8SSIMGet(const DistoStats* const stats) { return (fden != 0.) ? fnum / fden : kMinValue; } -double VP8SSIMGetSquaredError(const DistoStats* const s) { +double VP8SSIMGetSquaredError(const VP8DistoStats* const s) { if (s->w > 0.) { const double iw2 = 1. / (s->w * s->w); const double sxx = s->xxm * s->w - s->xm * s->xm; @@ -177,34 +151,66 @@ double VP8SSIMGetSquaredError(const DistoStats* const s) { return kMinValue; } +#define LIMIT(A, M) ((A) > (M) ? (M) : (A)) +static void VP8SSIMAccumulateRow(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2, + int y, int W, int H, + VP8DistoStats* const stats) { + int x = 0; + const int w0 = LIMIT(VP8_SSIM_KERNEL, W); + for (x = 0; x < w0; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + for (; x <= W - 8 + VP8_SSIM_KERNEL; ++x) { + VP8SSIMAccumulate( + src1 + (y - VP8_SSIM_KERNEL) * stride1 + (x - VP8_SSIM_KERNEL), stride1, + src2 + (y - VP8_SSIM_KERNEL) * stride2 + (x - VP8_SSIM_KERNEL), stride2, + stats); + } + for (; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } +} + void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, - int W, int H, DistoStats* const stats) { + int W, int H, VP8DistoStats* const stats) { int x, y; - for (y = 0; y < H; ++y) { + const int h0 = LIMIT(VP8_SSIM_KERNEL, H); + const int h1 = LIMIT(VP8_SSIM_KERNEL, H - VP8_SSIM_KERNEL); + for (y = 0; y < h0; ++y) { + for (x = 0; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + } + for (; y < h1; ++y) { + VP8SSIMAccumulateRow(src1, stride1, src2, stride2, y, W, H, stats); + } + for (; y < H; ++y) { for (x = 0; x < W; ++x) { - VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats); + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); } } } +#undef LIMIT static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { int x, y; - DistoStats s = { .0, .0, .0, .0, .0, .0 }; + VP8DistoStats s = { .0, .0, .0, .0, .0, .0 }; // compute SSIM in a 10 x 10 window - for (x = 3; x < 13; x++) { - for (y = 3; y < 13; y++) { - VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, - x, y, 16, 16, &s); + for (y = VP8_SSIM_KERNEL; y < 16 - VP8_SSIM_KERNEL; y++) { + for (x = VP8_SSIM_KERNEL; x < 16 - VP8_SSIM_KERNEL; x++) { + VP8SSIMAccumulateClipped(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, + x, y, 16, 16, &s); } } for (x = 1; x < 7; x++) { for (y = 1; y < 7; y++) { - VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, - x, y, 8, 8, &s); - VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, - x, y, 8, 8, &s); + VP8SSIMAccumulateClipped(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, + x, y, 8, 8, &s); + VP8SSIMAccumulateClipped(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, + x, y, 8, 8, &s); } } return VP8SSIMGet(&s); @@ -222,6 +228,7 @@ void VP8InitFilter(VP8EncIterator* const it) { (*it->lf_stats_)[s][i] = 0; } } + VP8SSIMDspInit(); } } @@ -229,7 +236,7 @@ void VP8StoreFilterStats(VP8EncIterator* const it) { int d; VP8Encoder* const enc = it->enc_; const int s = it->mb_->segment_; - const int level0 = enc->dqm_[s].fstrength_; // TODO: ref_lf_delta[] + const int level0 = enc->dqm_[s].fstrength_; // explore +/-quant range of values around level0 const int delta_min = -enc->dqm_[s].quant_; diff --git a/drivers/webp/enc/histogram.c b/drivers/webp/enc/histogram.c index 68c27fb1db..61544f4ccd 100644 --- a/drivers/webp/enc/histogram.c +++ b/drivers/webp/enc/histogram.c @@ -157,6 +157,109 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, } // ----------------------------------------------------------------------------- +// Entropy-related functions. + +static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { + double mix; + if (entropy->nonzeros < 5) { + if (entropy->nonzeros <= 1) { + return 0; + } + // Two symbols, they will be 0 and 1 in a Huffman code. + // Let's mix in a bit of entropy to favor good clustering when + // distributions of these are combined. + if (entropy->nonzeros == 2) { + return 0.99 * entropy->sum + 0.01 * entropy->entropy; + } + // No matter what the entropy says, we cannot be better than min_limit + // with Huffman coding. I am mixing a bit of entropy into the + // min_limit since it produces much better (~0.5 %) compression results + // perhaps because of better entropy clustering. + if (entropy->nonzeros == 3) { + mix = 0.95; + } else { + mix = 0.7; // nonzeros == 4. + } + } else { + mix = 0.627; + } + + { + double min_limit = 2 * entropy->sum - entropy->max_val; + min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy; + return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; + } +} + +double VP8LBitsEntropy(const uint32_t* const array, int n, + uint32_t* const trivial_symbol) { + VP8LBitEntropy entropy; + VP8LBitsEntropyUnrefined(array, n, &entropy); + if (trivial_symbol != NULL) { + *trivial_symbol = + (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM; + } + + return BitsEntropyRefine(&entropy); +} + +static double InitialHuffmanCost(void) { + // Small bias because Huffman code length is typically not stored in + // full length. + static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; + static const double kSmallBias = 9.1; + return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; +} + +// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) +static double FinalHuffmanCost(const VP8LStreaks* const stats) { + double retval = InitialHuffmanCost(); + retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; + retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; + retval += 1.796875 * stats->streaks[0][0]; + retval += 3.28125 * stats->streaks[1][0]; + return retval; +} + +// Get the symbol entropy for the distribution 'population'. +// Set 'trivial_sym', if there's only one symbol present in the distribution. +static double PopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym) { + VP8LBitEntropy bit_entropy; + VP8LStreaks stats; + VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); + if (trivial_sym != NULL) { + *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code + : VP8L_NON_TRIVIAL_SYM; + } + + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); +} + +static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, + int length) { + VP8LBitEntropy bit_entropy; + VP8LStreaks stats; + VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); + + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); +} + +// Estimates the Entropy + Huffman + other block overhead size cost. +double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { + return + PopulationCost( + p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL) + + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL) + + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); +} + +// ----------------------------------------------------------------------------- // Various histogram combine/cost-eval functions static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, @@ -165,26 +268,25 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, double* cost) { const int palette_code_bits = a->palette_code_bits_; assert(a->palette_code_bits_ == b->palette_code_bits_); - *cost += VP8LGetCombinedEntropy(a->literal_, b->literal_, - VP8LHistogramNumCodes(palette_code_bits)); + *cost += GetCombinedEntropy(a->literal_, b->literal_, + VP8LHistogramNumCodes(palette_code_bits)); *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, b->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); if (*cost > cost_threshold) return 0; - *cost += VP8LGetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); + *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); if (*cost > cost_threshold) return 0; - *cost += VP8LGetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); + *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); if (*cost > cost_threshold) return 0; - *cost += VP8LGetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); + *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); if (*cost > cost_threshold) return 0; - *cost += VP8LGetCombinedEntropy(a->distance_, b->distance_, - NUM_DISTANCE_CODES); - *cost += VP8LExtraCostCombined(a->distance_, b->distance_, - NUM_DISTANCE_CODES); + *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); + *cost += + VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); if (*cost > cost_threshold) return 0; return 1; @@ -262,17 +364,17 @@ static void UpdateDominantCostRange( static void UpdateHistogramCost(VP8LHistogram* const h) { uint32_t alpha_sym, red_sym, blue_sym; - const double alpha_cost = VP8LPopulationCost(h->alpha_, NUM_LITERAL_CODES, - &alpha_sym); + const double alpha_cost = + PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym); const double distance_cost = - VP8LPopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + + PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); - h->literal_cost_ = VP8LPopulationCost(h->literal_, num_codes, NULL) + + h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) + VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); - h->red_cost_ = VP8LPopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); - h->blue_cost_ = VP8LPopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); + h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); + h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + alpha_cost + distance_cost; if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) { @@ -284,29 +386,27 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { } static int GetBinIdForEntropy(double min, double max, double val) { - const double range = max - min + 1e-6; - const double delta = val - min; - return (int)(NUM_PARTITIONS * delta / range); + const double range = max - min; + if (range > 0.) { + const double delta = val - min; + return (int)((NUM_PARTITIONS - 1e-6) * delta / range); + } else { + return 0; + } } -static int GetHistoBinIndexLowEffort( - const VP8LHistogram* const h, const DominantCostRange* const c) { - const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, - h->literal_cost_); +static int GetHistoBinIndex(const VP8LHistogram* const h, + const DominantCostRange* const c, int low_effort) { + int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, + h->literal_cost_); assert(bin_id < NUM_PARTITIONS); - return bin_id; -} - -static int GetHistoBinIndex( - const VP8LHistogram* const h, const DominantCostRange* const c) { - const int bin_id = - GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) + - NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_, - h->red_cost_) + - NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_, - c->literal_max_, - h->literal_cost_); - assert(bin_id < BIN_SIZE); + if (!low_effort) { + bin_id = bin_id * NUM_PARTITIONS + + GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_); + bin_id = bin_id * NUM_PARTITIONS + + GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_); + assert(bin_id < BIN_SIZE); + } return bin_id; } @@ -367,16 +467,13 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, // bin-hash histograms on three of the dominant (literal, red and blue) // symbol costs. for (i = 0; i < histo_size; ++i) { - int num_histos; - VP8LHistogram* const histo = histograms[i]; - const int16_t bin_id = low_effort ? - (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) : - (int16_t)GetHistoBinIndex(histo, &cost_range); + const VP8LHistogram* const histo = histograms[i]; + const int bin_id = GetHistoBinIndex(histo, &cost_range, low_effort); const int bin_offset = bin_id * bin_depth; // bin_map[n][0] for every bin 'n' maintains the counter for the number of // histograms in that bin. // Get and increment the num_histos in that bin. - num_histos = ++bin_map[bin_offset]; + const int num_histos = ++bin_map[bin_offset]; assert(bin_offset + num_histos < bin_depth * BIN_SIZE); // Add histogram i'th index at num_histos (last) position in the bin_map. bin_map[bin_offset + num_histos] = i; @@ -478,26 +575,31 @@ typedef struct { } HistogramPair; typedef struct { - HistogramPair* heap; - int* positions; + HistogramPair* queue; int size; - int max_index; -} HistoHeap; - -static int HistoHeapInit(HistoHeap* const histo_heap, const int max_index) { - histo_heap->size = 0; - histo_heap->max_index = max_index; - histo_heap->heap = WebPSafeMalloc(max_index * max_index, - sizeof(*histo_heap->heap)); - histo_heap->positions = WebPSafeMalloc(max_index * max_index, - sizeof(*histo_heap->positions)); - return histo_heap->heap != NULL && histo_heap->positions != NULL; -} - -static void HistoHeapClear(HistoHeap* const histo_heap) { - assert(histo_heap != NULL); - WebPSafeFree(histo_heap->heap); - WebPSafeFree(histo_heap->positions); + int max_size; +} HistoQueue; + +static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) { + histo_queue->size = 0; + // max_index^2 for the queue size is safe. If you look at + // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes + // data to the queue, you insert at most: + // - max_index*(max_index-1)/2 (the first two for loops) + // - max_index - 1 in the last for loop at the first iteration of the while + // loop, max_index - 2 at the second iteration ... therefore + // max_index*(max_index-1)/2 overall too + histo_queue->max_size = max_index * max_index; + // We allocate max_size + 1 because the last element at index "size" is + // used as temporary data (and it could be up to max_size). + histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1, + sizeof(*histo_queue->queue)); + return histo_queue->queue != NULL; +} + +static void HistoQueueClear(HistoQueue* const histo_queue) { + assert(histo_queue != NULL); + WebPSafeFree(histo_queue->queue); } static void SwapHistogramPairs(HistogramPair *p1, @@ -507,66 +609,33 @@ static void SwapHistogramPairs(HistogramPair *p1, *p2 = tmp; } -// Given a valid min-heap in range [0, heap_size-1) this function places value -// heap[heap_size-1] into right location within heap and sets its position in -// positions array. -static void HeapPush(HistoHeap* const histo_heap) { - HistogramPair* const heap = histo_heap->heap - 1; - int* const positions = histo_heap->positions; - const int max_index = histo_heap->max_index; - int v; - ++histo_heap->size; - v = histo_heap->size; - while (v > 1 && heap[v].cost_diff < heap[v >> 1].cost_diff) { - SwapHistogramPairs(&heap[v], &heap[v >> 1]); - // Change position of moved pair in heap. - if (heap[v].idx1 >= 0) { - const int pos = heap[v].idx1 * max_index + heap[v].idx2; - assert(pos >= 0 && pos < max_index * max_index); - positions[pos] = v; - } - v >>= 1; - } - positions[heap[v].idx1 * max_index + heap[v].idx2] = v; -} - -// Given a valid min-heap in range [0, heap_size) this function shortens heap -// range by one and places element with the lowest value to (heap_size-1). -static void HeapPop(HistoHeap* const histo_heap) { - HistogramPair* const heap = histo_heap->heap - 1; - int* const positions = histo_heap->positions; - const int heap_size = histo_heap->size; - const int max_index = histo_heap->max_index; - int v = 1; - if (heap[v].idx1 >= 0) { - positions[heap[v].idx1 * max_index + heap[v].idx2] = -1; - } - SwapHistogramPairs(&heap[v], &heap[heap_size]); - while ((v << 1) < heap_size) { - int son = (heap[v << 1].cost_diff < heap[v].cost_diff) ? (v << 1) : v; - if (((v << 1) + 1) < heap_size && - heap[(v << 1) + 1].cost_diff < heap[son].cost_diff) { - son = (v << 1) + 1; - } - if (son == v) break; - SwapHistogramPairs(&heap[v], &heap[son]); - // Change position of moved pair in heap. - if (heap[v].idx1 >= 0) { - positions[heap[v].idx1 * max_index + heap[v].idx2] = v; - } - v = son; - } - if (heap[v].idx1 >= 0) { - positions[heap[v].idx1 * max_index + heap[v].idx2] = v; +// Given a valid priority queue in range [0, queue_size) this function checks +// whether histo_queue[queue_size] should be accepted and swaps it with the +// front if it is smaller. Otherwise, it leaves it as is. +static void UpdateQueueFront(HistoQueue* const histo_queue) { + if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return; + + if (histo_queue->queue[histo_queue->size].cost_diff < + histo_queue->queue[0].cost_diff) { + SwapHistogramPairs(histo_queue->queue, + histo_queue->queue + histo_queue->size); } - --histo_heap->size; + ++histo_queue->size; + + // We cannot add more elements than the capacity. + // The allocation adds an extra element to the official capacity so that + // histo_queue->queue[histo_queue->max_size] is read/written within bound. + assert(histo_queue->size <= histo_queue->max_size); } // ----------------------------------------------------------------------------- static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, - HistogramPair* const pair, - VP8LHistogram* const histos) { + HistogramPair* const pair) { + VP8LHistogram* h1; + VP8LHistogram* h2; + double sum_cost; + if (idx1 > idx2) { const int tmp = idx2; idx2 = idx1; @@ -574,60 +643,27 @@ static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, } pair->idx1 = idx1; pair->idx2 = idx2; - pair->cost_diff = - HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0); - pair->cost_combo = histos->bit_cost_; -} - -#define POSITION_INVALID (-1) - -// Invalidates pairs intersecting (idx1, idx2) in heap. -static void InvalidatePairs(int idx1, int idx2, - const HistoHeap* const histo_heap) { - HistogramPair* const heap = histo_heap->heap - 1; - int* const positions = histo_heap->positions; - const int max_index = histo_heap->max_index; - int i; - for (i = 0; i < idx1; ++i) { - const int pos = positions[i * max_index + idx1]; - if (pos >= 0) { - heap[pos].idx1 = POSITION_INVALID; - } - } - for (i = idx1 + 1; i < max_index; ++i) { - const int pos = positions[idx1 * max_index + i]; - if (pos >= 0) { - heap[pos].idx1 = POSITION_INVALID; - } - } - for (i = 0; i < idx2; ++i) { - const int pos = positions[i * max_index + idx2]; - if (pos >= 0) { - heap[pos].idx1 = POSITION_INVALID; - } - } - for (i = idx2 + 1; i < max_index; ++i) { - const int pos = positions[idx2 * max_index + i]; - if (pos >= 0) { - heap[pos].idx1 = POSITION_INVALID; - } - } + h1 = histograms[idx1]; + h2 = histograms[idx2]; + sum_cost = h1->bit_cost_ + h2->bit_cost_; + pair->cost_combo = 0.; + GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo); + pair->cost_diff = pair->cost_combo - sum_cost; } // Combines histograms by continuously choosing the one with the highest cost // reduction. -static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, - VP8LHistogram* const histos) { +static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { int ok = 0; int image_histo_size = image_histo->size; int i, j; VP8LHistogram** const histograms = image_histo->histograms; // Indexes of remaining histograms. int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters)); - // Heap of histogram pairs. - HistoHeap histo_heap; + // Priority queue of histogram pairs. + HistoQueue histo_queue; - if (!HistoHeapInit(&histo_heap, image_histo_size) || clusters == NULL) { + if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) { goto End; } @@ -636,19 +672,17 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, clusters[i] = i; for (j = i + 1; j < image_histo_size; ++j) { // Initialize positions array. - histo_heap.positions[i * histo_heap.max_index + j] = POSITION_INVALID; - PreparePair(histograms, i, j, &histo_heap.heap[histo_heap.size], histos); - if (histo_heap.heap[histo_heap.size].cost_diff < 0) { - HeapPush(&histo_heap); - } + PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]); + UpdateQueueFront(&histo_queue); } } - while (image_histo_size > 1 && histo_heap.size > 0) { - const int idx1 = histo_heap.heap[0].idx1; - const int idx2 = histo_heap.heap[0].idx2; + while (image_histo_size > 1 && histo_queue.size > 0) { + HistogramPair* copy_to; + const int idx1 = histo_queue.queue[0].idx1; + const int idx2 = histo_queue.queue[0].idx2; VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); - histograms[idx1]->bit_cost_ = histo_heap.heap[0].cost_combo; + histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo; // Remove merged histogram. for (i = 0; i + 1 < image_histo_size; ++i) { if (clusters[i] >= idx2) { @@ -657,22 +691,31 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, } --image_histo_size; - // Invalidate pairs intersecting the just combined best pair. - InvalidatePairs(idx1, idx2, &histo_heap); - - // Pop invalid pairs from the top of the heap. - while (histo_heap.size > 0 && histo_heap.heap[0].idx1 < 0) { - HeapPop(&histo_heap); + // Remove pairs intersecting the just combined best pair. This will + // therefore pop the head of the queue. + copy_to = histo_queue.queue; + for (i = 0; i < histo_queue.size; ++i) { + HistogramPair* const p = histo_queue.queue + i; + if (p->idx1 == idx1 || p->idx2 == idx1 || + p->idx1 == idx2 || p->idx2 == idx2) { + // Do not copy the invalid pair. + continue; + } + if (p->cost_diff < histo_queue.queue[0].cost_diff) { + // Replace the top of the queue if we found better. + SwapHistogramPairs(histo_queue.queue, p); + } + SwapHistogramPairs(copy_to, p); + ++copy_to; } + histo_queue.size = (int)(copy_to - histo_queue.queue); - // Push new pairs formed with combined histogram to the heap. + // Push new pairs formed with combined histogram to the queue. for (i = 0; i < image_histo_size; ++i) { if (clusters[i] != idx1) { PreparePair(histograms, idx1, clusters[i], - &histo_heap.heap[histo_heap.size], histos); - if (histo_heap.heap[histo_heap.size].cost_diff < 0) { - HeapPush(&histo_heap); - } + &histo_queue.queue[histo_queue.size]); + UpdateQueueFront(&histo_queue); } } } @@ -688,15 +731,14 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, End: WebPSafeFree(clusters); - HistoHeapClear(&histo_heap); + HistoQueueClear(&histo_queue); return ok; } -static VP8LHistogram* HistogramCombineStochastic( - VP8LHistogramSet* const image_histo, - VP8LHistogram* tmp_histo, - VP8LHistogram* best_combo, - int quality, int min_cluster_size) { +static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo, + VP8LHistogram* tmp_histo, + VP8LHistogram* best_combo, + int quality, int min_cluster_size) { int iter; uint32_t seed = 0; int tries_with_no_success = 0; @@ -756,7 +798,6 @@ static VP8LHistogram* HistogramCombineStochastic( } } image_histo->size = image_histo_size; - return best_combo; } // ----------------------------------------------------------------------------- @@ -764,24 +805,23 @@ static VP8LHistogram* HistogramCombineStochastic( // Find the best 'out' histogram for each of the 'in' histograms. // Note: we assume that out[]->bit_cost_ is already up-to-date. -static void HistogramRemap(const VP8LHistogramSet* const orig_histo, - const VP8LHistogramSet* const image_histo, +static void HistogramRemap(const VP8LHistogramSet* const in, + const VP8LHistogramSet* const out, uint16_t* const symbols) { int i; - VP8LHistogram** const orig_histograms = orig_histo->histograms; - VP8LHistogram** const histograms = image_histo->histograms; - const int orig_histo_size = orig_histo->size; - const int image_histo_size = image_histo->size; - if (image_histo_size > 1) { - for (i = 0; i < orig_histo_size; ++i) { + VP8LHistogram** const in_histo = in->histograms; + VP8LHistogram** const out_histo = out->histograms; + const int in_size = in->size; + const int out_size = out->size; + if (out_size > 1) { + for (i = 0; i < in_size; ++i) { int best_out = 0; - double best_bits = - HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST); + double best_bits = MAX_COST; int k; - for (k = 1; k < image_histo_size; ++k) { + for (k = 0; k < out_size; ++k) { const double cur_bits = - HistogramAddThresh(histograms[k], orig_histograms[i], best_bits); - if (cur_bits < best_bits) { + HistogramAddThresh(out_histo[k], in_histo[i], best_bits); + if (k == 0 || cur_bits < best_bits) { best_bits = cur_bits; best_out = k; } @@ -789,20 +829,20 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo, symbols[i] = best_out; } } else { - assert(image_histo_size == 1); - for (i = 0; i < orig_histo_size; ++i) { + assert(out_size == 1); + for (i = 0; i < in_size; ++i) { symbols[i] = 0; } } // Recompute each out based on raw and symbols. - for (i = 0; i < image_histo_size; ++i) { - HistogramClear(histograms[i]); + for (i = 0; i < out_size; ++i) { + HistogramClear(out_histo[i]); } - for (i = 0; i < orig_histo_size; ++i) { + for (i = 0; i < in_size; ++i) { const int idx = symbols[i]; - VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]); + VP8LHistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]); } } @@ -876,11 +916,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const float x = quality / 100.f; // cubic ramp between 1 and MAX_HISTO_GREEDY: const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); - cur_combo = HistogramCombineStochastic(image_histo, - tmp_histos->histograms[0], - cur_combo, quality, threshold_size); + HistogramCombineStochastic(image_histo, tmp_histos->histograms[0], + cur_combo, quality, threshold_size); if ((image_histo->size <= threshold_size) && - !HistogramCombineGreedy(image_histo, cur_combo)) { + !HistogramCombineGreedy(image_histo)) { goto Error; } } diff --git a/drivers/webp/enc/histogram.h b/drivers/webp/enc/histogram.h index 72f045793a..59de42b33e 100644 --- a/drivers/webp/enc/histogram.h +++ b/drivers/webp/enc/histogram.h @@ -24,6 +24,9 @@ extern "C" { #endif +// Not a trivial literal symbol. +#define VP8L_NON_TRIVIAL_SYM (0xffffffff) + // A simple container for histograms of data. typedef struct { // literal_ contains green literal, palette-code and @@ -103,6 +106,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols); +// Returns the entropy for the symbols in the input array. +// Also sets trivial_symbol to the code value, if the array has only one code +// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM. +double VP8LBitsEntropy(const uint32_t* const array, int n, + uint32_t* const trivial_symbol); + +// Estimate how many bits the combined entropy of literals and distance +// approximately maps to. +double VP8LHistogramEstimateBits(const VP8LHistogram* const p); + #ifdef __cplusplus } #endif diff --git a/drivers/webp/enc/near_lossless.c b/drivers/webp/enc/near_lossless.c index 9bc0f0e786..f4ab91f571 100644 --- a/drivers/webp/enc/near_lossless.c +++ b/drivers/webp/enc/near_lossless.c @@ -14,6 +14,7 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // Converted to C by Aleksander Kramarz (akramarz@google.com) +#include <assert.h> #include <stdlib.h> #include "../dsp/lossless.h" @@ -23,42 +24,14 @@ #define MIN_DIM_FOR_NEAR_LOSSLESS 64 #define MAX_LIMIT_BITS 5 -// Computes quantized pixel value and distance from original value. -static void GetValAndDistance(int a, int initial, int bits, - int* const val, int* const distance) { - const int mask = ~((1 << bits) - 1); - *val = (initial & mask) | (initial >> (8 - bits)); - *distance = 2 * abs(a - *val); -} - -// Clamps the value to range [0, 255]. -static int Clamp8b(int val) { - const int min_val = 0; - const int max_val = 0xff; - return (val < min_val) ? min_val : (val > max_val) ? max_val : val; -} - -// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one. +// Quantizes the value up or down to a multiple of 1<<bits (or to 255), +// choosing the closer one, resolving ties using bankers' rounding. static int FindClosestDiscretized(int a, int bits) { - int best_val = a, i; - int min_distance = 256; - - for (i = -1; i <= 1; ++i) { - int candidate, distance; - const int val = Clamp8b(a + i * (1 << bits)); - GetValAndDistance(a, val, bits, &candidate, &distance); - if (i != 0) { - ++distance; - } - // Smallest distance but favor i == 0 over i == -1 and i == 1 - // since that keeps the overall intensity more constant in the - // images. - if (distance < min_distance) { - min_distance = distance; - best_val = candidate; - } - } - return best_val; + const int mask = (1 << bits) - 1; + const int biased = a + (mask >> 1) + ((a >> bits) & 1); + assert(bits > 0); + if (biased > 0xff) return 0xff; + return biased & ~mask; } // Applies FindClosestDiscretized to all channels of pixel. @@ -124,22 +97,11 @@ static void NearLossless(int xsize, int ysize, uint32_t* argb, } } -static int QualityToLimitBits(int quality) { - // quality mapping: - // 0..19 -> 5 - // 0..39 -> 4 - // 0..59 -> 3 - // 0..79 -> 2 - // 0..99 -> 1 - // 100 -> 0 - return MAX_LIMIT_BITS - quality / 20; -} - int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) { int i; uint32_t* const copy_buffer = (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer)); - const int limit_bits = QualityToLimitBits(quality); + const int limit_bits = VP8LNearLosslessBits(quality); assert(argb != NULL); assert(limit_bits >= 0); assert(limit_bits <= MAX_LIMIT_BITS); diff --git a/drivers/webp/enc/picture.c b/drivers/webp/enc/picture.c index 26679a72e4..d9befbc47d 100644 --- a/drivers/webp/enc/picture.c +++ b/drivers/webp/enc/picture.c @@ -237,6 +237,8 @@ static size_t Encode(const uint8_t* rgba, int width, int height, int stride, WebPMemoryWriter wrt; int ok; + if (output == NULL) return 0; + if (!WebPConfigPreset(&config, WEBP_PRESET_DEFAULT, quality_factor) || !WebPPictureInit(&pic)) { return 0; // shouldn't happen, except if system installation is broken diff --git a/drivers/webp/enc/picture_csp.c b/drivers/webp/enc/picture_csp.c index 0ef5f9eee2..607a6240b0 100644 --- a/drivers/webp/enc/picture_csp.c +++ b/drivers/webp/enc/picture_csp.c @@ -1125,32 +1125,44 @@ static int Import(WebPPicture* const picture, int WebPPictureImportRGB(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) { - return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0; + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 0, 0) + : 0; } int WebPPictureImportBGR(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) { - return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0; + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 1, 0) + : 0; } int WebPPictureImportRGBA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 0, 1) + : 0; } int WebPPictureImportBGRA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 1, 1) + : 0; } int WebPPictureImportRGBX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 0, 0) + : 0; } int WebPPictureImportBGRX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 1, 0) + : 0; } //------------------------------------------------------------------------------ diff --git a/drivers/webp/enc/picture_psnr.c b/drivers/webp/enc/picture_psnr.c index 40214efc95..81ab1b5ca1 100644 --- a/drivers/webp/enc/picture_psnr.c +++ b/drivers/webp/enc/picture_psnr.c @@ -27,7 +27,7 @@ static void AccumulateLSIM(const uint8_t* src, int src_stride, const uint8_t* ref, int ref_stride, - int w, int h, DistoStats* stats) { + int w, int h, VP8DistoStats* stats) { int x, y; double total_sse = 0.; for (y = 0; y < h; ++y) { @@ -71,11 +71,13 @@ static float GetPSNR(const double v) { int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, int type, float result[5]) { - DistoStats stats[5]; + VP8DistoStats stats[5]; int w, h; memset(stats, 0, sizeof(stats)); + VP8SSIMDspInit(); + if (src == NULL || ref == NULL || src->width != ref->width || src->height != ref->height || src->use_argb != ref->use_argb || result == NULL) { diff --git a/drivers/webp/enc/picture_tools.c b/drivers/webp/enc/picture_tools.c index 7c73646397..bf97af8408 100644 --- a/drivers/webp/enc/picture_tools.c +++ b/drivers/webp/enc/picture_tools.c @@ -11,6 +11,8 @@ // // Author: Skal (pascal.massimino@gmail.com) +#include <assert.h> + #include "./vp8enci.h" #include "../dsp/yuv.h" @@ -120,6 +122,24 @@ void WebPCleanupTransparentArea(WebPPicture* pic) { #undef SIZE #undef SIZE2 +void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { + int x, y, w, h; + uint32_t* argb; + assert(pic != NULL && pic->use_argb); + w = pic->width; + h = pic->height; + argb = pic->argb; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + if ((argb[x] & 0xff000000) == 0) { + argb[x] = 0x00000000; + } + } + argb += pic->argb_stride; + } +} + //------------------------------------------------------------------------------ // Blend color and remove transparency info diff --git a/drivers/webp/enc/quant.c b/drivers/webp/enc/quant.c index 002c326b82..549ad26f93 100644 --- a/drivers/webp/enc/quant.c +++ b/drivers/webp/enc/quant.c @@ -30,8 +30,6 @@ #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP // power-law modulation. Must be strictly less than 1. -#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision - // number of non-zero coeffs below which we consider the block very flat // (and apply a penalty to complex predictions) #define FLATNESS_LIMIT_I16 10 // I16 mode @@ -41,6 +39,8 @@ #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) +#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) + // #define DEBUG_BLOCK //------------------------------------------------------------------------------ @@ -54,15 +54,37 @@ static void PrintBlockInfo(const VP8EncIterator* const it, const VP8ModeScore* const rd) { int i, j; const int is_i16 = (it->mb_->type_ == 1); + const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC; + const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC; + const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC; printf("SOURCE / OUTPUT / ABS DELTA\n"); - for (j = 0; j < 24; ++j) { - if (j == 16) printf("\n"); // newline before the U/V block - for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]); + for (j = 0; j < 16; ++j) { + for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]); printf(" "); - for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]); + for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]); printf(" "); for (i = 0; i < 16; ++i) { - printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS])); + printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS])); + } + printf("\n"); + } + printf("\n"); // newline before the U/V block + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]); + printf(" "); + for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]); + printf(" "); + for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]); + printf(" "); + for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]); + printf(" "); + for (i = 0; i < 8; ++i) { + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); + } + printf(" "); + for (i = 8; i < 16; ++i) { + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); } printf("\n"); } @@ -212,6 +234,8 @@ static int ExpandMatrix(VP8Matrix* const m, int type) { return (sum + 8) >> 4; } +static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; } + static void SetupMatrices(VP8Encoder* enc) { int i; const int tlambda_scale = @@ -221,7 +245,7 @@ static void SetupMatrices(VP8Encoder* enc) { for (i = 0; i < num_segments; ++i) { VP8SegmentInfo* const m = &enc->dqm_[i]; const int q = m->quant_; - int q4, q16, quv; + int q_i4, q_i16, q_uv; m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; @@ -231,21 +255,33 @@ static void SetupMatrices(VP8Encoder* enc) { m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; - q4 = ExpandMatrix(&m->y1_, 0); - q16 = ExpandMatrix(&m->y2_, 1); - quv = ExpandMatrix(&m->uv_, 2); - - m->lambda_i4_ = (3 * q4 * q4) >> 7; - m->lambda_i16_ = (3 * q16 * q16); - m->lambda_uv_ = (3 * quv * quv) >> 6; - m->lambda_mode_ = (1 * q4 * q4) >> 7; - m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; - m->lambda_trellis_i16_ = (q16 * q16) >> 2; - m->lambda_trellis_uv_ = (quv *quv) << 1; - m->tlambda_ = (tlambda_scale * q4) >> 5; + q_i4 = ExpandMatrix(&m->y1_, 0); + q_i16 = ExpandMatrix(&m->y2_, 1); + q_uv = ExpandMatrix(&m->uv_, 2); + + m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7; + m->lambda_i16_ = (3 * q_i16 * q_i16); + m->lambda_uv_ = (3 * q_uv * q_uv) >> 6; + m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7; + m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3; + m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2; + m->lambda_trellis_uv_ = (q_uv * q_uv) << 1; + m->tlambda_ = (tlambda_scale * q_i4) >> 5; + + // none of these constants should be < 1 + CheckLambdaValue(&m->lambda_i4_); + CheckLambdaValue(&m->lambda_i16_); + CheckLambdaValue(&m->lambda_uv_); + CheckLambdaValue(&m->lambda_mode_); + CheckLambdaValue(&m->lambda_trellis_i4_); + CheckLambdaValue(&m->lambda_trellis_i16_); + CheckLambdaValue(&m->lambda_trellis_uv_); + CheckLambdaValue(&m->tlambda_); m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto m->max_edge_ = 0; + + m->i4_penalty_ = 1000 * q_i4 * q_i4; } } @@ -324,7 +360,12 @@ static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, static void SimplifySegments(VP8Encoder* const enc) { int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; - const int num_segments = enc->segment_hdr_.num_segments_; + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid a spurious warning about 'i' exceeding + // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9). + const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) + ? enc->segment_hdr_.num_segments_ + : NUM_MB_SEGMENTS; int num_final_segments = 1; int s1, s2; for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments @@ -535,13 +576,12 @@ typedef struct { #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { - // TODO: incorporate the "* 256" in the tables? - rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD); + rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD); } static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, score_t distortion) { - return rate * lambda + 256 * distortion; + return rate * lambda + RD_DISTO_MULT * distortion; } static int TrellisQuantizeBlock(const VP8Encoder* const enc, @@ -1050,7 +1090,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { // Compute RD-score rd_uv.D = VP8SSE16x8(src, tmp_dst); - rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. + rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas. rd_uv.H = VP8FixedCostsUV[mode]; rd_uv.R = VP8GetCostUV(it, &rd_uv); if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { @@ -1100,56 +1140,108 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { } // Refine intra16/intra4 sub-modes based on distortion only (not rate). -static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { - const int is_i16 = (it->mb_->type_ == 1); +static void RefineUsingDistortion(VP8EncIterator* const it, + int try_both_modes, int refine_uv_mode, + VP8ModeScore* const rd) { score_t best_score = MAX_COST; + int nz = 0; + int mode; + int is_i16 = try_both_modes || (it->mb_->type_ == 1); - if (try_both_i4_i16 || is_i16) { - int mode; + const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; + // Some empiric constants, of approximate order of magnitude. + const int lambda_d_i16 = 106; + const int lambda_d_i4 = 11; + const int lambda_d_uv = 120; + score_t score_i4 = dqm->i4_penalty_; + score_t i4_bit_sum = 0; + const score_t bit_limit = it->enc_->mb_header_limit_; + + if (is_i16) { // First, evaluate Intra16 distortion int best_mode = -1; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; - const score_t score = VP8SSE16x16(src, ref); + const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT + + VP8FixedCostsI16[mode] * lambda_d_i16; + if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { + continue; + } if (score < best_score) { best_mode = mode; best_score = score; } } VP8SetIntra16Mode(it, best_mode); + // we'll reconstruct later, if i16 mode actually gets selected } - if (try_both_i4_i16 || !is_i16) { - uint8_t modes_i4[16]; + + // Next, evaluate Intra4 + if (try_both_modes || !is_i16) { // We don't evaluate the rate here, but just account for it through a // constant penalty (i4 mode usually needs more bits compared to i16). - score_t score_i4 = (score_t)I4_PENALTY; - + is_i16 = 0; VP8IteratorStartI4(it); do { - int mode; - int best_sub_mode = -1; - score_t best_sub_score = MAX_COST; + int best_i4_mode = -1; + score_t best_i4_score = MAX_COST; const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); - // TODO(skal): we don't really need the prediction pixels here, - // but just the distortion against 'src'. VP8MakeIntra4Preds(it); for (mode = 0; mode < NUM_BMODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; - const score_t score = VP8SSE4x4(src, ref); - if (score < best_sub_score) { - best_sub_mode = mode; - best_sub_score = score; + const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT + + mode_costs[mode] * lambda_d_i4; + if (score < best_i4_score) { + best_i4_mode = mode; + best_i4_score = score; } } - modes_i4[it->i4_] = best_sub_mode; - score_i4 += best_sub_score; - if (score_i4 >= best_score) break; - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - if (score_i4 < best_score) { - VP8SetIntra4Mode(it, modes_i4); + i4_bit_sum += mode_costs[best_i4_mode]; + rd->modes_i4[it->i4_] = best_i4_mode; + score_i4 += best_i4_score; + if (score_i4 >= best_score || i4_bit_sum > bit_limit) { + // Intra4 won't be better than Intra16. Bail out and pick Intra16. + is_i16 = 1; + break; + } else { // reconstruct partial block inside yuv_out2_ buffer + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; + nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], + src, tmp_dst, best_i4_mode) << it->i4_; + } + } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); + } + + // Final reconstruction, depending on which mode is selected. + if (!is_i16) { + VP8SetIntra4Mode(it, rd->modes_i4); + SwapOut(it); + best_score = score_i4; + } else { + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); + } + + // ... and UV! + if (refine_uv_mode) { + int best_mode = -1; + score_t best_uv_score = MAX_COST; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; + const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT + + VP8FixedCostsUV[mode] * lambda_d_uv; + if (score < best_uv_score) { + best_mode = mode; + best_uv_score = score; + } } + VP8SetIntraUVMode(it, best_mode); } + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); + + rd->nz = nz; + rd->score = best_score; } //------------------------------------------------------------------------------ @@ -1179,13 +1271,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, SimpleQuantize(it, rd); } } else { - // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). - // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). - DistoRefine(it, (method >= 2)); - SimpleQuantize(it, rd); + // At this point we have heuristically decided intra16 / intra4. + // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). + // For method <= 1, we don't re-examine the decision but just go ahead with + // quantization/reconstruction. + RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); } is_skipped = (rd->nz == 0); VP8SetSkip(it, is_skipped); return is_skipped; } - diff --git a/drivers/webp/enc/vp8enci.h b/drivers/webp/enc/vp8enci.h index 0cb2ccc353..efd2f19a9b 100644 --- a/drivers/webp/enc/vp8enci.h +++ b/drivers/webp/enc/vp8enci.h @@ -22,10 +22,6 @@ #include "../utils/utils.h" #include "webp/encode.h" -#ifdef WEBP_EXPERIMENTAL_FEATURES -#include "./vp8li.h" -#endif // WEBP_EXPERIMENTAL_FEATURES - #ifdef __cplusplus extern "C" { #endif @@ -35,8 +31,8 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 -#define ENC_MIN_VERSION 4 -#define ENC_REV_VERSION 4 +#define ENC_MIN_VERSION 5 +#define ENC_REV_VERSION 1 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -200,6 +196,9 @@ typedef struct { int lambda_i16_, lambda_i4_, lambda_uv_; int lambda_mode_, lambda_trellis_, tlambda_; int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; + + // lambda values for distortion-based evaluation + score_t i4_penalty_; // penalty for using Intra4 } VP8SegmentInfo; // Handy transient struct to accumulate score and info during RD-optimization @@ -395,6 +394,7 @@ struct VP8Encoder { int method_; // 0=fastest, 6=best/slowest. VP8RDLevel rd_opt_level_; // Deduced from method_. int max_i4_header_bits_; // partition #0 safeness factor + int mb_header_limit_; // rough limit for header bits per MB int thread_level_; // derived from config->thread_level int do_search_; // derived from config->target_XXX int use_tokens_; // if true, use token buffer @@ -477,17 +477,12 @@ int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data // in filter.c - -// SSIM utils -typedef struct { - double w, xm, ym, xxm, xym, yym; -} DistoStats; -void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst); +void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst); void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, - int W, int H, DistoStats* const stats); -double VP8SSIMGet(const DistoStats* const stats); -double VP8SSIMGetSquaredError(const DistoStats* const stats); + int W, int H, VP8DistoStats* const stats); +double VP8SSIMGet(const VP8DistoStats* const stats); +double VP8SSIMGetSquaredError(const VP8DistoStats* const stats); // autofilter void VP8InitFilter(VP8EncIterator* const it); @@ -514,6 +509,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); // Returns false in case of error (invalid param, out-of-memory). int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); +// Clean-up the RGB samples under fully transparent area, to help lossless +// compressibility (no guarantee, though). Assumes that pic->use_argb is true. +void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); + // in near_lossless.c // Near lossless preprocessing in RGB color-space. int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality); diff --git a/drivers/webp/enc/vp8l.c b/drivers/webp/enc/vp8l.c index 284995e830..1f2d41ea91 100644 --- a/drivers/webp/enc/vp8l.c +++ b/drivers/webp/enc/vp8l.c @@ -16,6 +16,7 @@ #include <stdlib.h> #include "./backward_references.h" +#include "./histogram.h" #include "./vp8enci.h" #include "./vp8li.h" #include "../dsp/lossless.h" @@ -33,8 +34,8 @@ // Palette reordering for smaller sum of deltas (and for smaller storage). static int PaletteCompareColorsForQsort(const void* p1, const void* p2) { - const uint32_t a = *(const uint32_t*)p1; - const uint32_t b = *(const uint32_t*)p2; + const uint32_t a = WebPMemToUint32(p1); + const uint32_t b = WebPMemToUint32(p2); assert(a != b); return (a < b) ? -1 : 1; } @@ -125,54 +126,8 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic, int low_effort, uint32_t palette[MAX_PALETTE_SIZE], int* const palette_size) { - int i, x, y, key; - int num_colors = 0; - uint8_t in_use[MAX_PALETTE_SIZE * 4] = { 0 }; - uint32_t colors[MAX_PALETTE_SIZE * 4]; - static const uint32_t kHashMul = 0x1e35a7bd; - const uint32_t* argb = pic->argb; - const int width = pic->width; - const int height = pic->height; - uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0] - - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - if (argb[x] == last_pix) { - continue; - } - last_pix = argb[x]; - key = (kHashMul * last_pix) >> PALETTE_KEY_RIGHT_SHIFT; - while (1) { - if (!in_use[key]) { - colors[key] = last_pix; - in_use[key] = 1; - ++num_colors; - if (num_colors > MAX_PALETTE_SIZE) { - return 0; - } - break; - } else if (colors[key] == last_pix) { - // The color is already there. - break; - } else { - // Some other color sits there. - // Do linear conflict resolution. - ++key; - key &= (MAX_PALETTE_SIZE * 4 - 1); // key mask for 1K buffer. - } - } - } - argb += pic->argb_stride; - } - - // TODO(skal): could we reuse in_use[] to speed up EncodePalette()? - num_colors = 0; - for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) { - if (in_use[i]) { - palette[num_colors] = colors[i]; - ++num_colors; - } - } + const int num_colors = WebPGetColorPalette(pic, palette); + if (num_colors > MAX_PALETTE_SIZE) return 0; *palette_size = num_colors; qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { @@ -335,7 +290,7 @@ static int AnalyzeEntropy(const uint32_t* argb, } } } - free(histo); + WebPSafeFree(histo); return 1; } else { return 0; @@ -760,6 +715,10 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, } // Calculate backward references from ARGB image. + if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, hash_chain, refs_array); if (refs == NULL) { @@ -823,7 +782,8 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2], int width, int height, int quality, - int low_effort, int* cache_bits, + int low_effort, + int use_cache, int* cache_bits, int histogram_bits, size_t init_byte_position, int* const hdr_size, @@ -855,10 +815,14 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, goto Error; } - *cache_bits = MAX_COLOR_CACHE_BITS; + *cache_bits = use_cache ? MAX_COLOR_CACHE_BITS : 0; // 'best_refs' is the reference to the best backward refs and points to one // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. + if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } best_refs = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, cache_bits, hash_chain, refs_array); @@ -1006,13 +970,19 @@ static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, int width, int height, int quality, int low_effort, + int used_subtract_green, VP8LBitWriter* const bw) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); + // we disable near-lossless quantization if palette is used. + const int near_lossless_strength = enc->use_palette_ ? 100 + : enc->config_->near_lossless; VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, - enc->argb_scratch_, enc->transform_data_); + enc->argb_scratch_, enc->transform_data_, + near_lossless_strength, enc->config_->exact, + used_subtract_green); VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); @@ -1112,6 +1082,12 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic, // ----------------------------------------------------------------------------- +static void ClearTransformBuffer(VP8LEncoder* const enc) { + WebPSafeFree(enc->transform_mem_); + enc->transform_mem_ = NULL; + enc->transform_mem_size_ = 0; +} + // Allocates the memory for argb (W x H) buffer, 2 rows of context for // prediction and transform data. // Flags influencing the memory allocated: @@ -1120,43 +1096,48 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic, static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, int width, int height) { WebPEncodingError err = VP8_ENC_OK; - if (enc->argb_ == NULL) { - const int tile_size = 1 << enc->transform_bits_; - const uint64_t image_size = width * height; - // Ensure enough size for tiles, as well as for two scanlines and two - // extra pixels for CopyImageWithPrediction. - const uint64_t argb_scratch_size = - enc->use_predict_ ? tile_size * width + width + 2 : 0; - const int transform_data_size = - (enc->use_predict_ || enc->use_cross_color_) - ? VP8LSubSampleSize(width, enc->transform_bits_) * - VP8LSubSampleSize(height, enc->transform_bits_) - : 0; - const uint64_t total_size = - image_size + WEBP_ALIGN_CST + - argb_scratch_size + WEBP_ALIGN_CST + - (uint64_t)transform_data_size; - uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + const uint64_t image_size = width * height; + // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra + // pixel in each, plus 2 regular scanlines of bytes. + // TODO(skal): Clean up by using arithmetic in bytes instead of words. + const uint64_t argb_scratch_size = + enc->use_predict_ + ? (width + 1) * 2 + + (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t) + : 0; + const uint64_t transform_data_size = + (enc->use_predict_ || enc->use_cross_color_) + ? VP8LSubSampleSize(width, enc->transform_bits_) * + VP8LSubSampleSize(height, enc->transform_bits_) + : 0; + const uint64_t max_alignment_in_words = + (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t); + const uint64_t mem_size = + image_size + max_alignment_in_words + + argb_scratch_size + max_alignment_in_words + + transform_data_size; + uint32_t* mem = enc->transform_mem_; + if (mem == NULL || mem_size > enc->transform_mem_size_) { + ClearTransformBuffer(enc); + mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); if (mem == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - enc->argb_ = mem; - mem = (uint32_t*)WEBP_ALIGN(mem + image_size); - enc->argb_scratch_ = mem; - mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); - enc->transform_data_ = mem; - enc->current_width_ = width; + enc->transform_mem_ = mem; + enc->transform_mem_size_ = (size_t)mem_size; } + enc->argb_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + image_size); + enc->argb_scratch_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); + enc->transform_data_ = mem; + + enc->current_width_ = width; Error: return err; } -static void ClearTransformBuffer(VP8LEncoder* const enc) { - WebPSafeFree(enc->argb_); - enc->argb_ = NULL; -} - static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { WebPEncodingError err = VP8_ENC_OK; const WebPPicture* const picture = enc->pic_; @@ -1176,8 +1157,35 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { // ----------------------------------------------------------------------------- -static void MapToPalette(const uint32_t palette[], int num_colors, +static int SearchColor(const uint32_t sorted[], uint32_t color, int hi) { + int low = 0; + if (sorted[low] == color) return low; // loop invariant: sorted[low] != color + while (1) { + const int mid = (low + hi) >> 1; + if (sorted[mid] == color) { + return mid; + } else if (sorted[mid] < color) { + low = mid; + } else { + hi = mid; + } + } +} + +// Sort palette in increasing order and prepare an inverse mapping array. +static void PrepareMapToPalette(const uint32_t palette[], int num_colors, + uint32_t sorted[], int idx_map[]) { + int i; + memcpy(sorted, palette, num_colors * sizeof(*sorted)); + qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); + for (i = 0; i < num_colors; ++i) { + idx_map[SearchColor(sorted, palette[i], num_colors)] = i; + } +} + +static void MapToPalette(const uint32_t sorted_palette[], int num_colors, uint32_t* const last_pix, int* const last_idx, + const int idx_map[], const uint32_t* src, uint8_t* dst, int width) { int x; int prev_idx = *last_idx; @@ -1185,14 +1193,8 @@ static void MapToPalette(const uint32_t palette[], int num_colors, for (x = 0; x < width; ++x) { const uint32_t pix = src[x]; if (pix != prev_pix) { - int i; - for (i = 0; i < num_colors; ++i) { - if (pix == palette[i]) { - prev_idx = i; - prev_pix = pix; - break; - } - } + prev_idx = idx_map[SearchColor(sorted_palette, pix, num_colors)]; + prev_pix = pix; } dst[x] = prev_idx; } @@ -1239,11 +1241,16 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, } } else { // Use 1 pixel cache for ARGB pixels. - uint32_t last_pix = palette[0]; - int last_idx = 0; + uint32_t last_pix; + int last_idx; + uint32_t sorted[MAX_PALETTE_SIZE]; + int idx_map[MAX_PALETTE_SIZE]; + PrepareMapToPalette(palette, palette_size, sorted, idx_map); + last_pix = palette[0]; + last_idx = 0; for (y = 0; y < height; ++y) { - MapToPalette(palette, palette_size, &last_pix, &last_idx, - src, tmp_row, width); + MapToPalette(sorted, palette_size, &last_pix, &last_idx, + idx_map, src, tmp_row, width); VP8LBundleColorMap(tmp_row, width, xbits, dst); src += src_stride; dst += dst_stride; @@ -1376,7 +1383,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const WebPPicture* const picture, - VP8LBitWriter* const bw) { + VP8LBitWriter* const bw, int use_cache) { WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; const int low_effort = (config->method == 0); @@ -1403,7 +1410,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, } // Apply near-lossless preprocessing. - use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100); + use_near_lossless = + (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_; if (use_near_lossless) { if (!VP8ApplyNearLossless(width, height, picture->argb, config->near_lossless)) { @@ -1455,7 +1463,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, if (enc->use_predict_) { err = ApplyPredictFilter(enc, enc->current_width_, height, quality, - low_effort, bw); + low_effort, enc->use_subtract_green_, bw); if (err != VP8_ENC_OK) goto Error; } @@ -1472,8 +1480,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // Encode and write the transformed image. err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_, height, quality, low_effort, - &enc->cache_bits_, enc->histo_bits_, byte_position, - &hdr_size, &data_size); + use_cache, &enc->cache_bits_, enc->histo_bits_, + byte_position, &hdr_size, &data_size); if (err != VP8_ENC_OK) goto Error; if (picture->stats != NULL) { @@ -1558,7 +1566,7 @@ int VP8LEncodeImage(const WebPConfig* const config, if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort; // Encode main image stream. - err = VP8LEncodeStream(config, picture, &bw); + err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/); if (err != VP8_ENC_OK) goto Error; // TODO(skal): have a fine-grained progress report in VP8LEncodeStream(). diff --git a/drivers/webp/enc/vp8li.h b/drivers/webp/enc/vp8li.h index 4543c3b260..8e6b360d81 100644 --- a/drivers/webp/enc/vp8li.h +++ b/drivers/webp/enc/vp8li.h @@ -25,14 +25,17 @@ extern "C" { #endif typedef struct { - const WebPConfig* config_; // user configuration and parameters - const WebPPicture* pic_; // input picture. + const WebPConfig* config_; // user configuration and parameters + const WebPPicture* pic_; // input picture. - uint32_t* argb_; // Transformed argb image data. - uint32_t* argb_scratch_; // Scratch memory for argb rows - // (used for prediction). - uint32_t* transform_data_; // Scratch memory for transform data. - int current_width_; // Corresponds to packed image width. + uint32_t* argb_; // Transformed argb image data. + uint32_t* argb_scratch_; // Scratch memory for argb rows + // (used for prediction). + uint32_t* transform_data_; // Scratch memory for transform data. + uint32_t* transform_mem_; // Currently allocated memory. + size_t transform_mem_size_; // Currently allocated memory size. + + int current_width_; // Corresponds to packed image width. // Encoding parameters derived from quality parameter. int histo_bits_; @@ -64,9 +67,10 @@ int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture); // Encodes the main image stream using the supplied bit writer. +// If 'use_cache' is false, disables the use of color cache. WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const WebPPicture* const picture, - VP8LBitWriter* const bw); + VP8LBitWriter* const bw, int use_cache); //------------------------------------------------------------------------------ diff --git a/drivers/webp/enc/webpenc.c b/drivers/webp/enc/webpenc.c index 8ced07a2a3..a7d04ea2ce 100644 --- a/drivers/webp/enc/webpenc.c +++ b/drivers/webp/enc/webpenc.c @@ -79,7 +79,9 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) { //-------------------+---+---+---+---+---+---+---+ // basic rd-opt | | | | x | x | x | x | //-------------------+---+---+---+---+---+---+---+ -// disto-score i4/16 | | | x | | | | | +// disto-refine i4/16| x | x | x | | | | | +//-------------------+---+---+---+---+---+---+---+ +// disto-refine uv | | x | x | | | | | //-------------------+---+---+---+---+---+---+---+ // rd-opt i4/16 | | | ~ | x | x | x | x | //-------------------+---+---+---+---+---+---+---+ @@ -103,6 +105,10 @@ static void MapConfigToTools(VP8Encoder* const enc) { 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block (limit * limit) / (100 * 100); // ... modulated with a quadratic curve. + // partition0 = 512k max. + enc->mb_header_limit_ = + (score_t)256 * 510 * 8 * 1024 / (enc->mb_w_ * enc->mb_h_); + enc->thread_level_ = config->thread_level; enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0); @@ -137,29 +143,30 @@ static void MapConfigToTools(VP8Encoder* const enc) { static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, WebPPicture* const picture) { + VP8Encoder* enc; const int use_filter = (config->filter_strength > 0) || (config->autofilter > 0); const int mb_w = (picture->width + 15) >> 4; const int mb_h = (picture->height + 15) >> 4; const int preds_w = 4 * mb_w + 1; const int preds_h = 4 * mb_h + 1; - const size_t preds_size = preds_w * preds_h * sizeof(uint8_t); + const size_t preds_size = preds_w * preds_h * sizeof(*enc->preds_); const int top_stride = mb_w * 16; - const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + WEBP_ALIGN_CST; - const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo); - const size_t samples_size = 2 * top_stride * sizeof(uint8_t) // top-luma/u/v - + WEBP_ALIGN_CST; // align all + const size_t nz_size = (mb_w + 1) * sizeof(*enc->nz_) + WEBP_ALIGN_CST; + const size_t info_size = mb_w * mb_h * sizeof(*enc->mb_info_); + const size_t samples_size = + 2 * top_stride * sizeof(*enc->y_top_) // top-luma/u/v + + WEBP_ALIGN_CST; // align all const size_t lf_stats_size = - config->autofilter ? sizeof(LFStats) + WEBP_ALIGN_CST : 0; - VP8Encoder* enc; + config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0; uint8_t* mem; - const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct - + WEBP_ALIGN_CST // cache alignment - + info_size // modes info - + preds_size // prediction modes - + samples_size // top/left samples - + nz_size // coeff context bits - + lf_stats_size; // autofilter stats + const uint64_t size = (uint64_t)sizeof(*enc) // main struct + + WEBP_ALIGN_CST // cache alignment + + info_size // modes info + + preds_size // prediction modes + + samples_size // top/left samples + + nz_size // coeff context bits + + lf_stats_size; // autofilter stats #ifdef PRINT_MEMORY_INFO printf("===================================\n"); @@ -171,7 +178,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, " non-zero: %ld\n" " lf-stats: %ld\n" " total: %ld\n", - sizeof(VP8Encoder) + WEBP_ALIGN_CST, info_size, + sizeof(*enc) + WEBP_ALIGN_CST, info_size, preds_size, samples_size, nz_size, lf_stats_size, size); printf("Transient object sizes:\n" " VP8EncIterator: %ld\n" @@ -201,7 +208,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, enc->mb_info_ = (VP8MBInfo*)mem; mem += info_size; enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_; - mem += preds_w * preds_h * sizeof(uint8_t); + mem += preds_size; enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem); mem += nz_size; enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL; @@ -321,14 +328,15 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); - if (!config->exact) { - WebPCleanupTransparentArea(pic); - } - if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats)); if (!config->lossless) { VP8Encoder* enc = NULL; + + if (!config->exact) { + WebPCleanupTransparentArea(pic); + } + if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { // Make sure we have YUVA samples. if (config->preprocessing & 4) { @@ -376,6 +384,10 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { return 0; } + if (!config->exact) { + WebPCleanupTransparentAreaLossless(pic); + } + ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. } |