diff options
Diffstat (limited to 'thirdparty/libwebp/enc')
-rw-r--r-- | thirdparty/libwebp/enc/alpha_enc.c (renamed from thirdparty/libwebp/enc/alpha.c) | 8 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/analysis_enc.c (renamed from thirdparty/libwebp/enc/analysis.c) | 47 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/backward_references_enc.c (renamed from thirdparty/libwebp/enc/backward_references.c) | 327 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/backward_references_enc.h (renamed from thirdparty/libwebp/enc/backward_references.h) | 3 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/config_enc.c (renamed from thirdparty/libwebp/enc/config.c) | 91 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/cost_enc.c (renamed from thirdparty/libwebp/enc/cost.c) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/cost_enc.h (renamed from thirdparty/libwebp/enc/cost.h) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/delta_palettization_enc.c (renamed from thirdparty/libwebp/enc/delta_palettization.c) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/delta_palettization_enc.h (renamed from thirdparty/libwebp/enc/delta_palettization.h) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/filter_enc.c (renamed from thirdparty/libwebp/enc/filter.c) | 107 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/frame_enc.c (renamed from thirdparty/libwebp/enc/frame.c) | 10 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/histogram_enc.c (renamed from thirdparty/libwebp/enc/histogram.c) | 304 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/histogram_enc.h (renamed from thirdparty/libwebp/enc/histogram.h) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/iterator_enc.c (renamed from thirdparty/libwebp/enc/iterator.c) | 19 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/near_lossless_enc.c (renamed from thirdparty/libwebp/enc/near_lossless.c) | 4 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/picture_csp_enc.c (renamed from thirdparty/libwebp/enc/picture_csp.c) | 269 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/picture_enc.c (renamed from thirdparty/libwebp/enc/picture.c) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/picture_psnr.c | 177 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/picture_psnr_enc.c | 213 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/picture_rescale_enc.c (renamed from thirdparty/libwebp/enc/picture_rescale.c) | 4 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/picture_tools_enc.c (renamed from thirdparty/libwebp/enc/picture_tools.c) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/predictor_enc.c | 750 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/quant_enc.c (renamed from thirdparty/libwebp/enc/quant.c) | 21 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/syntax_enc.c (renamed from thirdparty/libwebp/enc/syntax.c) | 5 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/token_enc.c (renamed from thirdparty/libwebp/enc/token.c) | 7 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/tree_enc.c (renamed from thirdparty/libwebp/enc/tree.c) | 2 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/vp8i_enc.h (renamed from thirdparty/libwebp/enc/vp8enci.h) | 19 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/vp8l_enc.c (renamed from thirdparty/libwebp/enc/vp8l.c) | 253 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/vp8li_enc.h (renamed from thirdparty/libwebp/enc/vp8li.h) | 22 | ||||
-rw-r--r-- | thirdparty/libwebp/enc/webp_enc.c (renamed from thirdparty/libwebp/enc/webpenc.c) | 27 |
30 files changed, 1800 insertions, 903 deletions
diff --git a/thirdparty/libwebp/enc/alpha.c b/thirdparty/libwebp/enc/alpha_enc.c index 03e3ad07f5..5a2c931f92 100644 --- a/thirdparty/libwebp/enc/alpha.c +++ b/thirdparty/libwebp/enc/alpha_enc.c @@ -14,10 +14,10 @@ #include <assert.h> #include <stdlib.h> -#include "./vp8enci.h" +#include "./vp8i_enc.h" #include "../dsp/dsp.h" -#include "../utils/filters.h" -#include "../utils/quant_levels.h" +#include "../utils/filters_utils.h" +#include "../utils/quant_levels_utils.h" #include "../utils/utils.h" #include "../webp/format_constants.h" @@ -44,7 +44,7 @@ // invalid quality or method, or // memory allocation for the compressed data fails. -#include "../enc/vp8li.h" +#include "../enc/vp8li_enc.h" static int EncodeLossless(const uint8_t* const data, int width, int height, int effort_level, // in [0..6] range diff --git a/thirdparty/libwebp/enc/analysis.c b/thirdparty/libwebp/enc/analysis_enc.c index 136c331289..dce159b316 100644 --- a/thirdparty/libwebp/enc/analysis.c +++ b/thirdparty/libwebp/enc/analysis_enc.c @@ -15,8 +15,8 @@ #include <string.h> #include <assert.h> -#include "./vp8enci.h" -#include "./cost.h" +#include "./vp8i_enc.h" +#include "./cost_enc.h" #include "../utils/utils.h" #define MAX_ITERS_K_MEANS 6 @@ -262,6 +262,29 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { return best_alpha; } +static int FastMBAnalyze(VP8EncIterator* const it) { + // Empirical cut-off value, should be around 16 (~=block size). We use the + // [8-17] range and favor intra4 at high quality, intra16 for low quality. + const int q = (int)it->enc_->config_->quality; + const uint32_t kThreshold = 8 + (17 - 8) * q / 100; + int k; + uint32_t dc[16], m, m2; + for (k = 0; k < 16; k += 4) { + VP8Mean16x4(it->yuv_in_ + Y_OFF_ENC + k * BPS, &dc[k]); + } + for (m = 0, m2 = 0, k = 0; k < 16; ++k) { + m += dc[k]; + m2 += dc[k] * dc[k]; + } + if (kThreshold * m2 < m * m) { + VP8SetIntra16Mode(it, 0); // DC16 + } else { + const uint8_t modes[16] = { 0 }; // DC4 + VP8SetIntra4Mode(it, modes); + } + return 0; +} + static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, int best_alpha) { uint8_t modes[16]; @@ -344,13 +367,17 @@ static void MBAnalyze(VP8EncIterator* const it, VP8SetSkip(it, 0); // not skipped VP8SetSegment(it, 0); // default segment, spec-wise. - best_alpha = MBAnalyzeBestIntra16Mode(it); - if (enc->method_ >= 5) { - // We go and make a fast decision for intra4/intra16. - // It's usually not a good and definitive pick, but helps seeding the stats - // about level bit-cost. - // TODO(skal): improve criterion. - best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha); + if (enc->method_ <= 1) { + best_alpha = FastMBAnalyze(it); + } else { + best_alpha = MBAnalyzeBestIntra16Mode(it); + if (enc->method_ >= 5) { + // We go and make a fast decision for intra4/intra16. + // It's usually not a good and definitive pick, but helps seeding the + // stats about level bit-cost. + // TODO(skal): improve criterion. + best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha); + } } best_uv_alpha = MBAnalyzeBestUVMode(it); @@ -453,7 +480,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) { const int do_segments = enc->config_->emulate_jpeg_size || // We need the complexity evaluation. (enc->segment_hdr_.num_segments_ > 1) || - (enc->method_ == 0); // for method 0, we need preds_[] to be filled. + (enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled. if (do_segments) { const int last_row = enc->mb_h_; // We give a little more than a half work to the main thread. diff --git a/thirdparty/libwebp/enc/backward_references.c b/thirdparty/libwebp/enc/backward_references_enc.c index 136a24a8c3..7c0559ff1e 100644 --- a/thirdparty/libwebp/enc/backward_references.c +++ b/thirdparty/libwebp/enc/backward_references_enc.c @@ -13,11 +13,12 @@ #include <assert.h> #include <math.h> -#include "./backward_references.h" -#include "./histogram.h" +#include "./backward_references_enc.h" +#include "./histogram_enc.h" #include "../dsp/lossless.h" +#include "../dsp/lossless_common.h" #include "../dsp/dsp.h" -#include "../utils/color_cache.h" +#include "../utils/color_cache_utils.h" #include "../utils/utils.h" #define VALUES_IN_BYTE 256 @@ -30,8 +31,9 @@ #define WINDOW_SIZE_BITS 20 #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120) -// Bounds for the match length. -#define MIN_LENGTH 2 +// Minimum number of pixels for which it is cheaper to encode a +// distance + length instead of each pixel as a literal. +#define MIN_LENGTH 4 // If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it // is used in VP8LHashChain. #define MAX_LENGTH_BITS 12 @@ -211,13 +213,13 @@ void VP8LHashChainClear(VP8LHashChain* const p) { // ----------------------------------------------------------------------------- -#define HASH_MULTIPLIER_HI (0xc6a4a793U) -#define HASH_MULTIPLIER_LO (0x5bd1e996U) +#define HASH_MULTIPLIER_HI (0xc6a4a793ULL) +#define HASH_MULTIPLIER_LO (0x5bd1e996ULL) static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { uint32_t key; - key = argb[1] * HASH_MULTIPLIER_HI; - key += argb[0] * HASH_MULTIPLIER_LO; + key = (argb[1] * HASH_MULTIPLIER_HI) & 0xffffffffu; + key += (argb[0] * HASH_MULTIPLIER_LO) & 0xffffffffu; key = key >> (32 - HASH_BITS); return key; } @@ -242,19 +244,26 @@ static WEBP_INLINE int MaxFindCopyLength(int len) { } int VP8LHashChainFill(VP8LHashChain* const p, int quality, - const uint32_t* const argb, int xsize, int ysize) { + const uint32_t* const argb, int xsize, int ysize, + int low_effort) { const int size = xsize * ysize; const int iter_max = GetMaxItersForQuality(quality); - const int iter_min = iter_max - quality / 10; const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); int pos; + int argb_comp; uint32_t base_position; int32_t* hash_to_first_index; // Temporarily use the p->offset_length_ as a hash chain. int32_t* chain = (int32_t*)p->offset_length_; + assert(size > 0); assert(p->size_ != 0); assert(p->offset_length_ != NULL); + if (size <= 2) { + p->offset_length_[0] = p->offset_length_[size - 1] = 0; + return 1; + } + hash_to_first_index = (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); if (hash_to_first_index == NULL) return 0; @@ -262,48 +271,111 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, // Set the int32_t array to -1. memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); // Fill the chain linking pixels with the same hash. - for (pos = 0; pos < size - 1; ++pos) { - const uint32_t hash_code = GetPixPairHash64(argb + pos); - chain[pos] = hash_to_first_index[hash_code]; - hash_to_first_index[hash_code] = pos; + argb_comp = (argb[0] == argb[1]); + for (pos = 0; pos < size - 2;) { + uint32_t hash_code; + const int argb_comp_next = (argb[pos + 1] == argb[pos + 2]); + if (argb_comp && argb_comp_next) { + // Consecutive pixels with the same color will share the same hash. + // We therefore use a different hash: the color and its repetition + // length. + uint32_t tmp[2]; + uint32_t len = 1; + tmp[0] = argb[pos]; + // Figure out how far the pixels are the same. + // The last pixel has a different 64 bit hash, as its next pixel does + // not have the same color, so we just need to get to the last pixel equal + // to its follower. + while (pos + (int)len + 2 < size && argb[pos + len + 2] == argb[pos]) { + ++len; + } + if (len > MAX_LENGTH) { + // Skip the pixels that match for distance=1 and length>MAX_LENGTH + // because they are linked to their predecessor and we automatically + // check that in the main for loop below. Skipping means setting no + // predecessor in the chain, hence -1. + memset(chain + pos, 0xff, (len - MAX_LENGTH) * sizeof(*chain)); + pos += len - MAX_LENGTH; + len = MAX_LENGTH; + } + // Process the rest of the hash chain. + while (len) { + tmp[1] = len--; + hash_code = GetPixPairHash64(tmp); + chain[pos] = hash_to_first_index[hash_code]; + hash_to_first_index[hash_code] = pos++; + } + argb_comp = 0; + } else { + // Just move one pixel forward. + hash_code = GetPixPairHash64(argb + pos); + chain[pos] = hash_to_first_index[hash_code]; + hash_to_first_index[hash_code] = pos++; + argb_comp = argb_comp_next; + } } + // Process the penultimate pixel. + chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)]; + WebPSafeFree(hash_to_first_index); // Find the best match interval at each pixel, defined by an offset to the // pixel and a length. The right-most pixel cannot match anything to the right // (hence a best length of 0) and the left-most pixel nothing to the left // (hence an offset of 0). + assert(size > 2); p->offset_length_[0] = p->offset_length_[size - 1] = 0; - for (base_position = size - 2 < 0 ? 0 : size - 2; base_position > 0;) { + for (base_position = size - 2; base_position > 0;) { const int max_len = MaxFindCopyLength(size - 1 - base_position); const uint32_t* const argb_start = argb + base_position; int iter = iter_max; int best_length = 0; uint32_t best_distance = 0; + uint32_t best_argb; const int min_pos = (base_position > window_size) ? base_position - window_size : 0; const int length_max = (max_len < 256) ? max_len : 256; uint32_t max_base_position; - for (pos = chain[base_position]; pos >= min_pos; pos = chain[pos]) { + pos = chain[base_position]; + if (!low_effort) { int curr_length; - if (--iter < 0) { - break; + // Heuristic: use the comparison with the above line as an initialization. + if (base_position >= (uint32_t)xsize) { + curr_length = FindMatchLength(argb_start - xsize, argb_start, + best_length, max_len); + if (curr_length > best_length) { + best_length = curr_length; + best_distance = xsize; + } + --iter; + } + // Heuristic: compare to the previous pixel. + curr_length = + FindMatchLength(argb_start - 1, argb_start, best_length, max_len); + if (curr_length > best_length) { + best_length = curr_length; + best_distance = 1; } + --iter; + // Skip the for loop if we already have the maximum. + if (best_length == MAX_LENGTH) pos = min_pos - 1; + } + best_argb = argb_start[best_length]; + + for (; pos >= min_pos && --iter; pos = chain[pos]) { + int curr_length; assert(base_position > (uint32_t)pos); - curr_length = - FindMatchLength(argb + pos, argb_start, best_length, max_len); + if (argb[pos + best_length] != best_argb) continue; + + curr_length = VP8LVectorMismatch(argb + pos, argb_start, max_len); if (best_length < curr_length) { best_length = curr_length; best_distance = base_position - pos; - // Stop if we have reached the maximum length. Otherwise, make sure - // we have executed a minimum number of iterations depending on the - // quality. - if ((best_length == MAX_LENGTH) || - (curr_length >= length_max && iter < iter_min)) { - break; - } + best_argb = argb_start[best_length]; + // Stop if we have reached a good enough length. + if (best_length >= length_max) break; } } // We have the best match but in case the two intervals continue matching @@ -392,17 +464,16 @@ static int BackwardReferencesRle(int xsize, int ysize, i = 1; while (i < pix_count) { const int max_len = MaxFindCopyLength(pix_count - i); - const int kMinLength = 4; const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len); const int prev_row_len = (i < xsize) ? 0 : FindMatchLength(argb + i, argb + i - xsize, 0, max_len); - if (rle_len >= prev_row_len && rle_len >= kMinLength) { + if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) { BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len)); // We don't need to update the color cache here since it is always the // same pixel being copied, and that does not change the color cache // state. i += rle_len; - } else if (prev_row_len >= kMinLength) { + } else if (prev_row_len >= MIN_LENGTH) { BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len)); if (use_color_cache) { for (k = 0; k < prev_row_len; ++k) { @@ -442,7 +513,7 @@ static int BackwardReferencesLz77(int xsize, int ysize, int len = 0; int j; HashChainFindCopy(hash_chain, i, &offset, &len); - if (len > MIN_LENGTH + 1) { + if (len >= MIN_LENGTH) { const int len_ini = len; int max_reach = 0; assert(i + len < pix_count); @@ -457,7 +528,7 @@ static int BackwardReferencesLz77(int xsize, int ysize, for (j = i_last_check + 1; j <= i + len_ini; ++j) { const int len_j = HashChainFindLength(hash_chain, j); const int reach = - j + (len_j > MIN_LENGTH + 1 ? len_j : 1); // 1 for single literal. + j + (len_j >= MIN_LENGTH ? len_j : 1); // 1 for single literal. if (reach > max_reach) { len = j - i; max_reach = reach; @@ -581,9 +652,10 @@ static void AddSingleLiteralWithCostModel(const uint32_t* const argb, uint16_t* const dist_array) { double cost_val = prev_cost; const uint32_t color = argb[0]; - if (use_color_cache && VP8LColorCacheContains(hashers, color)) { + const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; + if (ix >= 0) { + // use_color_cache is true and hashers contains color const double mul0 = 0.68; - const int ix = VP8LColorCacheGetIndex(hashers, color); cost_val += GetCacheCost(cost_model, ix) * mul0; } else { const double mul1 = 0.82; @@ -1215,7 +1287,8 @@ static int BackwardReferencesHashChainDistanceOnly( int offset = 0, len = 0; double prev_cost = cost_manager->costs_[i - 1]; HashChainFindCopy(hash_chain, i, &offset, &len); - if (len >= MIN_LENGTH) { + if (len >= 2) { + // If we are dealing with a non-literal. const int code = DistanceToPlaneCode(xsize, offset); const double offset_cost = GetDistanceCost(cost_model, code); const int first_i = i; @@ -1304,20 +1377,17 @@ static int BackwardReferencesHashChainDistanceOnly( } goto next_symbol; } - if (len > MIN_LENGTH) { - int code_min_length; - double cost_total; - offset = HashChainFindOffset(hash_chain, i); - code_min_length = DistanceToPlaneCode(xsize, offset); - cost_total = prev_cost + - GetDistanceCost(cost_model, code_min_length) + - GetLengthCost(cost_model, 1); + if (len > 2) { + // Also try the smallest interval possible (size 2). + double cost_total = + prev_cost + offset_cost + GetLengthCost(cost_model, 1); if (cost_manager->costs_[i + 1] > cost_total) { cost_manager->costs_[i + 1] = (float)cost_total; dist_array[i + 1] = 2; } } - } else { // len < MIN_LENGTH + } else { + // The pixel is added as a single literal so just update the costs. UpdateCostPerIndex(cost_manager, i + 1); } @@ -1393,9 +1463,11 @@ static int BackwardReferencesHashChainFollowChosenPath( i += len; } else { PixOrCopy v; - if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { + const int idx = + use_color_cache ? VP8LColorCacheContains(&hashers, argb[i]) : -1; + if (idx >= 0) { + // use_color_cache is true and hashers contains argb[i] // push pixel as a color cache index - const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]); v = PixOrCopyCreateCacheIdx(idx); } else { if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); @@ -1454,63 +1526,89 @@ static void BackwardReferences2DLocality(int xsize, } } -// Returns entropy for the given cache bits. -static double ComputeCacheEntropy(const uint32_t* argb, - const VP8LBackwardRefs* const refs, - int cache_bits) { - const int use_color_cache = (cache_bits > 0); - int cc_init = 0; - double entropy = MAX_ENTROPY; - const double kSmallPenaltyForLargeCache = 4.0; - VP8LColorCache hashers; +// Computes the entropies for a color cache size (in bits) between 0 (unused) +// and cache_bits_max (inclusive). +// Returns 1 on success, 0 in case of allocation error. +static int ComputeCacheEntropies(const uint32_t* argb, + const VP8LBackwardRefs* const refs, + int cache_bits_max, double entropies[]) { + int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 }; + VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1]; VP8LRefsCursor c = VP8LRefsCursorInit(refs); - VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits); - if (histo == NULL) goto Error; + VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL }; + int ok = 0; + int i; - if (use_color_cache) { - cc_init = VP8LColorCacheInit(&hashers, cache_bits); - if (!cc_init) goto Error; + for (i = 0; i <= cache_bits_max; ++i) { + histos[i] = VP8LAllocateHistogram(i); + if (histos[i] == NULL) goto Error; + if (i == 0) continue; + cc_init[i] = VP8LColorCacheInit(&hashers[i], i); + if (!cc_init[i]) goto Error; } - if (!use_color_cache) { - while (VP8LRefsCursorOk(&c)) { - VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); - VP8LRefsCursorNext(&c); - } - } else { + + assert(cache_bits_max >= 0); + // Do not use the color cache for cache_bits=0. + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histos[0], c.cur_pos); + VP8LRefsCursorNext(&c); + } + if (cache_bits_max > 0) { + c = VP8LRefsCursorInit(refs); while (VP8LRefsCursorOk(&c)) { const PixOrCopy* const v = c.cur_pos; if (PixOrCopyIsLiteral(v)) { const uint32_t pix = *argb++; - const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix); - if (VP8LColorCacheLookup(&hashers, key) == pix) { - ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; - } else { - VP8LColorCacheSet(&hashers, key, pix); - ++histo->blue_[pix & 0xff]; - ++histo->literal_[(pix >> 8) & 0xff]; - ++histo->red_[(pix >> 16) & 0xff]; - ++histo->alpha_[pix >> 24]; + // The keys of the caches can be derived from the longest one. + int key = HashPix(pix, 32 - cache_bits_max); + for (i = cache_bits_max; i >= 1; --i, key >>= 1) { + if (VP8LColorCacheLookup(&hashers[i], key) == pix) { + ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; + } else { + VP8LColorCacheSet(&hashers[i], key, pix); + ++histos[i]->blue_[pix & 0xff]; + ++histos[i]->literal_[(pix >> 8) & 0xff]; + ++histos[i]->red_[(pix >> 16) & 0xff]; + ++histos[i]->alpha_[pix >> 24]; + } } } else { + // Update the histograms for distance/length. int len = PixOrCopyLength(v); - int code, extra_bits; - VP8LPrefixEncodeBits(len, &code, &extra_bits); - ++histo->literal_[NUM_LITERAL_CODES + code]; - VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); - ++histo->distance_[code]; + int code_dist, code_len, extra_bits; + uint32_t argb_prev = *argb ^ 0xffffffffu; + VP8LPrefixEncodeBits(len, &code_len, &extra_bits); + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code_dist, &extra_bits); + for (i = 1; i <= cache_bits_max; ++i) { + ++histos[i]->literal_[NUM_LITERAL_CODES + code_len]; + ++histos[i]->distance_[code_dist]; + } + // Update the colors caches. do { - VP8LColorCacheInsert(&hashers, *argb++); - } while(--len != 0); + if (*argb != argb_prev) { + // Efficiency: insert only if the color changes. + int key = HashPix(*argb, 32 - cache_bits_max); + for (i = cache_bits_max; i >= 1; --i, key >>= 1) { + hashers[i].colors_[key] = *argb; + } + argb_prev = *argb; + } + argb++; + } while (--len != 0); } VP8LRefsCursorNext(&c); } } - entropy = VP8LHistogramEstimateBits(histo) + - kSmallPenaltyForLargeCache * cache_bits; - Error: - if (cc_init) VP8LColorCacheClear(&hashers); - VP8LFreeHistogram(histo); - return entropy; + for (i = 0; i <= cache_bits_max; ++i) { + entropies[i] = VP8LHistogramEstimateBits(histos[i]); + } + ok = 1; +Error: + for (i = 0; i <= cache_bits_max; ++i) { + if (cc_init[i]) VP8LColorCacheClear(&hashers[i]); + VP8LFreeHistogram(histos[i]); + } + return ok; } // Evaluate optimal cache bits for the local color cache. @@ -1524,13 +1622,10 @@ static int CalculateBestCacheSize(const uint32_t* const argb, VP8LBackwardRefs* const refs, int* const lz77_computed, int* const best_cache_bits) { - int eval_low = 1; - int eval_high = 1; - double entropy_low = MAX_ENTROPY; - double entropy_high = MAX_ENTROPY; - const double cost_mul = 5e-4; - int cache_bits_low = 0; + int i; int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits; + double entropy_min = MAX_ENTROPY; + double entropies[MAX_COLOR_CACHE_BITS + 1]; assert(cache_bits_high <= MAX_COLOR_CACHE_BITS); @@ -1540,34 +1635,23 @@ static int CalculateBestCacheSize(const uint32_t* const argb, // Local color cache is disabled. return 1; } - if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, hash_chain, - refs)) { + // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color cache + // is not that different in practice. + if (!BackwardReferencesLz77(xsize, ysize, argb, 0, hash_chain, refs)) { return 0; } - // Do a binary search to find the optimal entropy for cache_bits. - while (eval_low || eval_high) { - if (eval_low) { - entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low); - entropy_low += entropy_low * cache_bits_low * cost_mul; - eval_low = 0; - } - if (eval_high) { - entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high); - entropy_high += entropy_high * cache_bits_high * cost_mul; - eval_high = 0; - } - if (entropy_high < entropy_low) { - const int prev_cache_bits_low = cache_bits_low; - *best_cache_bits = cache_bits_high; - cache_bits_low = (cache_bits_low + cache_bits_high) / 2; - if (cache_bits_low != prev_cache_bits_low) eval_low = 1; - } else { - *best_cache_bits = cache_bits_low; - cache_bits_high = (cache_bits_low + cache_bits_high) / 2; - if (cache_bits_high != cache_bits_low) eval_high = 1; + // Find the cache_bits giving the lowest entropy. The search is done in a + // brute-force way as the function (entropy w.r.t cache_bits) can be + // anything in practice. + if (!ComputeCacheEntropies(argb, refs, cache_bits_high, entropies)) { + return 0; + } + for (i = 0; i <= cache_bits_high; ++i) { + if (i == 0 || entropies[i] < entropy_min) { + entropy_min = entropies[i]; + *best_cache_bits = i; } } - *lz77_computed = 1; return 1; } @@ -1584,8 +1668,9 @@ static int BackwardRefsWithLocalCache(const uint32_t* const argb, PixOrCopy* const v = c.cur_pos; if (PixOrCopyIsLiteral(v)) { const uint32_t argb_literal = v->argb_or_distance; - if (VP8LColorCacheContains(&hashers, argb_literal)) { - const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal); + const int ix = VP8LColorCacheContains(&hashers, argb_literal); + if (ix >= 0) { + // hashers contains argb_literal *v = PixOrCopyCreateCacheIdx(ix); } else { VP8LColorCacheInsert(&hashers, argb_literal); diff --git a/thirdparty/libwebp/enc/backward_references.h b/thirdparty/libwebp/enc/backward_references_enc.h index 0cadb11e11..3a19aa763e 100644 --- a/thirdparty/libwebp/enc/backward_references.h +++ b/thirdparty/libwebp/enc/backward_references_enc.h @@ -130,7 +130,8 @@ struct VP8LHashChain { int VP8LHashChainInit(VP8LHashChain* const p, int size); // Pre-compute the best matches for argb. int VP8LHashChainFill(VP8LHashChain* const p, int quality, - const uint32_t* const argb, int xsize, int ysize); + const uint32_t* const argb, int xsize, int ysize, + int low_effort); void VP8LHashChainClear(VP8LHashChain* const p); // release memory // ----------------------------------------------------------------------------- diff --git a/thirdparty/libwebp/enc/config.c b/thirdparty/libwebp/enc/config_enc.c index f9f7961d58..4589dc0619 100644 --- a/thirdparty/libwebp/enc/config.c +++ b/thirdparty/libwebp/enc/config_enc.c @@ -11,6 +11,10 @@ // // Author: Skal (pascal.massimino@gmail.com) +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + #include "../webp/encode.h" //------------------------------------------------------------------------------ @@ -49,9 +53,8 @@ int WebPConfigInitInternal(WebPConfig* config, config->thread_level = 0; config->low_memory = 0; config->near_lossless = 100; -#ifdef WEBP_EXPERIMENTAL_FEATURES - config->delta_palettization = 0; -#endif // WEBP_EXPERIMENTAL_FEATURES + config->use_delta_palette = 0; + config->use_sharp_yuv = 0; // TODO(skal): tune. switch (preset) { @@ -92,60 +95,36 @@ int WebPConfigInitInternal(WebPConfig* config, int WebPValidateConfig(const WebPConfig* config) { if (config == NULL) return 0; - if (config->quality < 0 || config->quality > 100) - return 0; - if (config->target_size < 0) - return 0; - if (config->target_PSNR < 0) - return 0; - if (config->method < 0 || config->method > 6) - return 0; - if (config->segments < 1 || config->segments > 4) - return 0; - if (config->sns_strength < 0 || config->sns_strength > 100) - return 0; - if (config->filter_strength < 0 || config->filter_strength > 100) - return 0; - if (config->filter_sharpness < 0 || config->filter_sharpness > 7) - return 0; - if (config->filter_type < 0 || config->filter_type > 1) - return 0; - if (config->autofilter < 0 || config->autofilter > 1) - return 0; - if (config->pass < 1 || config->pass > 10) - return 0; - if (config->show_compressed < 0 || config->show_compressed > 1) - return 0; - if (config->preprocessing < 0 || config->preprocessing > 7) - return 0; - if (config->partitions < 0 || config->partitions > 3) + if (config->quality < 0 || config->quality > 100) return 0; + if (config->target_size < 0) return 0; + if (config->target_PSNR < 0) return 0; + if (config->method < 0 || config->method > 6) return 0; + if (config->segments < 1 || config->segments > 4) return 0; + if (config->sns_strength < 0 || config->sns_strength > 100) return 0; + if (config->filter_strength < 0 || config->filter_strength > 100) return 0; + if (config->filter_sharpness < 0 || config->filter_sharpness > 7) return 0; + if (config->filter_type < 0 || config->filter_type > 1) return 0; + if (config->autofilter < 0 || config->autofilter > 1) return 0; + if (config->pass < 1 || config->pass > 10) return 0; + if (config->show_compressed < 0 || config->show_compressed > 1) return 0; + if (config->preprocessing < 0 || config->preprocessing > 7) return 0; + if (config->partitions < 0 || config->partitions > 3) return 0; + if (config->partition_limit < 0 || config->partition_limit > 100) return 0; + if (config->alpha_compression < 0) return 0; + if (config->alpha_filtering < 0) return 0; + if (config->alpha_quality < 0 || config->alpha_quality > 100) return 0; + if (config->lossless < 0 || config->lossless > 1) return 0; + if (config->near_lossless < 0 || config->near_lossless > 100) return 0; + if (config->image_hint >= WEBP_HINT_LAST) return 0; + if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) return 0; + if (config->thread_level < 0 || config->thread_level > 1) return 0; + if (config->low_memory < 0 || config->low_memory > 1) return 0; + if (config->exact < 0 || config->exact > 1) return 0; + if (config->use_delta_palette < 0 || config->use_delta_palette > 1) { return 0; - if (config->partition_limit < 0 || config->partition_limit > 100) - return 0; - if (config->alpha_compression < 0) - return 0; - if (config->alpha_filtering < 0) - return 0; - if (config->alpha_quality < 0 || config->alpha_quality > 100) - return 0; - if (config->lossless < 0 || config->lossless > 1) - return 0; - if (config->near_lossless < 0 || config->near_lossless > 100) - return 0; - if (config->image_hint >= WEBP_HINT_LAST) - return 0; - if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) - return 0; - if (config->thread_level < 0 || config->thread_level > 1) - return 0; - if (config->low_memory < 0 || config->low_memory > 1) - return 0; - if (config->exact < 0 || config->exact > 1) - return 0; -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (config->delta_palettization < 0 || config->delta_palettization > 1) - return 0; -#endif // WEBP_EXPERIMENTAL_FEATURES + } + if (config->use_sharp_yuv < 0 || config->use_sharp_yuv > 1) return 0; + return 1; } diff --git a/thirdparty/libwebp/enc/cost.c b/thirdparty/libwebp/enc/cost_enc.c index 87f89378a7..c823f5a664 100644 --- a/thirdparty/libwebp/enc/cost.c +++ b/thirdparty/libwebp/enc/cost_enc.c @@ -11,7 +11,7 @@ // // Author: Skal (pascal.massimino@gmail.com) -#include "./cost.h" +#include "./cost_enc.h" //------------------------------------------------------------------------------ // Level cost tables diff --git a/thirdparty/libwebp/enc/cost.h b/thirdparty/libwebp/enc/cost_enc.h index ad7959feb4..99e4b37aa3 100644 --- a/thirdparty/libwebp/enc/cost.h +++ b/thirdparty/libwebp/enc/cost_enc.h @@ -16,7 +16,7 @@ #include <assert.h> #include <stdlib.h> -#include "./vp8enci.h" +#include "./vp8i_enc.h" #ifdef __cplusplus extern "C" { diff --git a/thirdparty/libwebp/enc/delta_palettization.c b/thirdparty/libwebp/enc/delta_palettization_enc.c index 062e588d79..eaf0f050ea 100644 --- a/thirdparty/libwebp/enc/delta_palettization.c +++ b/thirdparty/libwebp/enc/delta_palettization_enc.c @@ -10,7 +10,7 @@ // Author: Mislav Bradac (mislavm@google.com) // -#include "./delta_palettization.h" +#include "./delta_palettization_enc.h" #ifdef WEBP_EXPERIMENTAL_FEATURES #include "../webp/types.h" diff --git a/thirdparty/libwebp/enc/delta_palettization.h b/thirdparty/libwebp/enc/delta_palettization_enc.h index e41c0c5ab5..63048ec6e8 100644 --- a/thirdparty/libwebp/enc/delta_palettization.h +++ b/thirdparty/libwebp/enc/delta_palettization_enc.h @@ -14,7 +14,7 @@ #define WEBP_ENC_DELTA_PALETTIZATION_H_ #include "../webp/encode.h" -#include "../enc/vp8li.h" +#include "../enc/vp8li_enc.h" // Replaces enc->argb_[] input by a palettizable approximation of it, // and generates optimal enc->palette_[]. diff --git a/thirdparty/libwebp/enc/filter.c b/thirdparty/libwebp/enc/filter_enc.c index e8ea8b4ff2..4bc367274c 100644 --- a/thirdparty/libwebp/enc/filter.c +++ b/thirdparty/libwebp/enc/filter_enc.c @@ -12,7 +12,7 @@ // Author: somnath@google.com (Somnath Banerjee) #include <assert.h> -#include "./vp8enci.h" +#include "./vp8i_enc.h" #include "../dsp/dsp.h" // This table gives, for a given sharpness, the filtering strength to be @@ -105,115 +105,28 @@ static void DoFilter(const VP8EncIterator* const it, int level) { } //------------------------------------------------------------------------------ -// SSIM metric - -static const double kMinValue = 1.e-10; // minimal threshold - -void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst) { - dst->w += src->w; - dst->xm += src->xm; - dst->ym += src->ym; - dst->xxm += src->xxm; - dst->xym += src->xym; - dst->yym += src->yym; -} - -double VP8SSIMGet(const VP8DistoStats* const stats) { - const double xmxm = stats->xm * stats->xm; - const double ymym = stats->ym * stats->ym; - const double xmym = stats->xm * stats->ym; - const double w2 = stats->w * stats->w; - double sxx = stats->xxm * stats->w - xmxm; - double syy = stats->yym * stats->w - ymym; - double sxy = stats->xym * stats->w - xmym; - double C1, C2; - double fnum; - double fden; - // small errors are possible, due to rounding. Clamp to zero. - if (sxx < 0.) sxx = 0.; - if (syy < 0.) syy = 0.; - C1 = 6.5025 * w2; - C2 = 58.5225 * w2; - fnum = (2 * xmym + C1) * (2 * sxy + C2); - fden = (xmxm + ymym + C1) * (sxx + syy + C2); - return (fden != 0.) ? fnum / fden : kMinValue; -} - -double VP8SSIMGetSquaredError(const VP8DistoStats* const s) { - if (s->w > 0.) { - const double iw2 = 1. / (s->w * s->w); - const double sxx = s->xxm * s->w - s->xm * s->xm; - const double syy = s->yym * s->w - s->ym * s->ym; - const double sxy = s->xym * s->w - s->xm * s->ym; - const double SSE = iw2 * (sxx + syy - 2. * sxy); - if (SSE > kMinValue) return SSE; - } - return kMinValue; -} - -#define LIMIT(A, M) ((A) > (M) ? (M) : (A)) -static void VP8SSIMAccumulateRow(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int y, int W, int H, - VP8DistoStats* const stats) { - int x = 0; - const int w0 = LIMIT(VP8_SSIM_KERNEL, W); - for (x = 0; x < w0; ++x) { - VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); - } - for (; x <= W - 8 + VP8_SSIM_KERNEL; ++x) { - VP8SSIMAccumulate( - src1 + (y - VP8_SSIM_KERNEL) * stride1 + (x - VP8_SSIM_KERNEL), stride1, - src2 + (y - VP8_SSIM_KERNEL) * stride2 + (x - VP8_SSIM_KERNEL), stride2, - stats); - } - for (; x < W; ++x) { - VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); - } -} - -void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int W, int H, VP8DistoStats* const stats) { - int x, y; - const int h0 = LIMIT(VP8_SSIM_KERNEL, H); - const int h1 = LIMIT(VP8_SSIM_KERNEL, H - VP8_SSIM_KERNEL); - for (y = 0; y < h0; ++y) { - for (x = 0; x < W; ++x) { - VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); - } - } - for (; y < h1; ++y) { - VP8SSIMAccumulateRow(src1, stride1, src2, stride2, y, W, H, stats); - } - for (; y < H; ++y) { - for (x = 0; x < W; ++x) { - VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); - } - } -} -#undef LIMIT +// SSIM metric for one macroblock static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { int x, y; - VP8DistoStats s = { .0, .0, .0, .0, .0, .0 }; + double sum = 0.; // compute SSIM in a 10 x 10 window for (y = VP8_SSIM_KERNEL; y < 16 - VP8_SSIM_KERNEL; y++) { for (x = VP8_SSIM_KERNEL; x < 16 - VP8_SSIM_KERNEL; x++) { - VP8SSIMAccumulateClipped(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, - x, y, 16, 16, &s); + sum += VP8SSIMGetClipped(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, + x, y, 16, 16); } } for (x = 1; x < 7; x++) { for (y = 1; y < 7; y++) { - VP8SSIMAccumulateClipped(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, - x, y, 8, 8, &s); - VP8SSIMAccumulateClipped(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, - x, y, 8, 8, &s); + sum += VP8SSIMGetClipped(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, + x, y, 8, 8); + sum += VP8SSIMGetClipped(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, + x, y, 8, 8); } } - return VP8SSIMGet(&s); + return sum; } //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/frame.c b/thirdparty/libwebp/enc/frame_enc.c index 57fc471d17..abef523bbf 100644 --- a/thirdparty/libwebp/enc/frame.c +++ b/thirdparty/libwebp/enc/frame_enc.c @@ -14,8 +14,8 @@ #include <string.h> #include <math.h> -#include "./cost.h" -#include "./vp8enci.h" +#include "./cost_enc.h" +#include "./vp8i_enc.h" #include "../dsp/dsp.h" #include "../webp/format_constants.h" // RIFF constants @@ -248,8 +248,9 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) { p = res->prob[VP8EncBands[n]][1]; } else { if (!VP8PutBit(bw, v > 4, p[3])) { - if (VP8PutBit(bw, v != 2, p[4])) + if (VP8PutBit(bw, v != 2, p[4])) { VP8PutBit(bw, v == 4, p[5]); + } } else if (!VP8PutBit(bw, v > 10, p[6])) { if (!VP8PutBit(bw, v > 6, p[7])) { VP8PutBit(bw, v == 6, 159); @@ -557,8 +558,9 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt, size += info.R + info.H; size_p0 += info.H; distortion += info.D; - if (percent_delta && !VP8IteratorProgress(&it, percent_delta)) + if (percent_delta && !VP8IteratorProgress(&it, percent_delta)) { return 0; + } VP8IteratorSaveBoundary(&it); } while (VP8IteratorNext(&it) && --nb_mbs > 0); diff --git a/thirdparty/libwebp/enc/histogram.c b/thirdparty/libwebp/enc/histogram_enc.c index 36b7f22625..808b6f78ab 100644 --- a/thirdparty/libwebp/enc/histogram.c +++ b/thirdparty/libwebp/enc/histogram_enc.c @@ -15,9 +15,10 @@ #include <math.h> -#include "./backward_references.h" -#include "./histogram.h" +#include "./backward_references_enc.h" +#include "./histogram_enc.h" #include "../dsp/lossless.h" +#include "../dsp/lossless_common.h" #include "../utils/utils.h" #define MAX_COST 1.e38 @@ -213,10 +214,19 @@ static double InitialHuffmanCost(void) { // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) static double FinalHuffmanCost(const VP8LStreaks* const stats) { + // The constants in this function are experimental and got rounded from + // their original values in 1/8 when switched to 1/1024. double retval = InitialHuffmanCost(); + // Second coefficient: Many zeros in the histogram are covered efficiently + // by a run-length encode. Originally 2/8. retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; + // Second coefficient: Constant values are encoded less efficiently, but still + // RLE'ed. Originally 6/8. retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; + // 0s are usually encoded more efficiently than non-0s. + // Originally 15/8. retval += 1.796875 * stats->streaks[0][0]; + // Originally 26/8. retval += 3.28125 * stats->streaks[1][0]; return retval; } @@ -236,14 +246,30 @@ static double PopulationCost(const uint32_t* const population, int length, return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); } +// trivial_at_end is 1 if the two histograms only have one element that is +// non-zero: both the zero-th one, or both the last one. static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, const uint32_t* const Y, - int length) { - VP8LBitEntropy bit_entropy; + int length, int trivial_at_end) { VP8LStreaks stats; - VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); + if (trivial_at_end) { + // This configuration is due to palettization that transforms an indexed + // pixel into 0xff000000 | (pixel << 8) in VP8LBundleColorMap. + // BitsEntropyRefine is 0 for histograms with only one non-zero value. + // Only FinalHuffmanCost needs to be evaluated. + memset(&stats, 0, sizeof(stats)); + // Deal with the non-zero value at index 0 or length-1. + stats.streaks[1][0] += 1; + // Deal with the following/previous zero streak. + stats.counts[0] += 1; + stats.streaks[0][1] += length - 1; + return FinalHuffmanCost(&stats); + } else { + VP8LBitEntropy bit_entropy; + VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); - return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); + } } // Estimates the Entropy + Huffman + other block overhead size cost. @@ -267,24 +293,42 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, double cost_threshold, double* cost) { const int palette_code_bits = a->palette_code_bits_; + int trivial_at_end = 0; assert(a->palette_code_bits_ == b->palette_code_bits_); *cost += GetCombinedEntropy(a->literal_, b->literal_, - VP8LHistogramNumCodes(palette_code_bits)); + VP8LHistogramNumCodes(palette_code_bits), 0); *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, b->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); if (*cost > cost_threshold) return 0; - *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); + if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM && + a->trivial_symbol_ == b->trivial_symbol_) { + // A, R and B are all 0 or 0xff. + const uint32_t color_a = (a->trivial_symbol_ >> 24) & 0xff; + const uint32_t color_r = (a->trivial_symbol_ >> 16) & 0xff; + const uint32_t color_b = (a->trivial_symbol_ >> 0) & 0xff; + if ((color_a == 0 || color_a == 0xff) && + (color_r == 0 || color_r == 0xff) && + (color_b == 0 || color_b == 0xff)) { + trivial_at_end = 1; + } + } + + *cost += + GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end); if (*cost > cost_threshold) return 0; - *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); + *cost += + GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end); if (*cost > cost_threshold) return 0; - *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); + *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES, + trivial_at_end); if (*cost > cost_threshold) return 0; - *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); + *cost += + GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0); *cost += VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); if (*cost > cost_threshold) return 0; @@ -292,6 +336,15 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, return 1; } +static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a, + const VP8LHistogram* const b, + VP8LHistogram* const out) { + VP8LHistogramAdd(a, b, out); + out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) + ? a->trivial_symbol_ + : VP8L_NON_TRIVIAL_SYM; +} + // Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing // to the threshold value 'cost_threshold'. The score returned is // Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed. @@ -307,11 +360,9 @@ static double HistogramAddEval(const VP8LHistogram* const a, cost_threshold += sum_cost; if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { - VP8LHistogramAdd(a, b, out); + HistogramAdd(a, b, out); out->bit_cost_ = cost; out->palette_code_bits_ = a->palette_code_bits_; - out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ? - a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM; } return cost - sum_cost; @@ -450,113 +501,103 @@ static void HistogramCopyAndAnalyze( // Partition histograms to different entropy bins for three dominant (literal, // red and blue) symbol costs and compute the histogram aggregate bit_cost. static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, - int16_t* const bin_map, int low_effort) { + uint16_t* const bin_map, + int low_effort) { int i; VP8LHistogram** const histograms = image_histo->histograms; const int histo_size = image_histo->size; - const int bin_depth = histo_size + 1; DominantCostRange cost_range; DominantCostRangeInit(&cost_range); // Analyze the dominant (literal, red and blue) entropy costs. for (i = 0; i < histo_size; ++i) { - VP8LHistogram* const histo = histograms[i]; - UpdateDominantCostRange(histo, &cost_range); + UpdateDominantCostRange(histograms[i], &cost_range); } // bin-hash histograms on three of the dominant (literal, red and blue) - // symbol costs. + // symbol costs and store the resulting bin_id for each histogram. for (i = 0; i < histo_size; ++i) { - const VP8LHistogram* const histo = histograms[i]; - const int bin_id = GetHistoBinIndex(histo, &cost_range, low_effort); - const int bin_offset = bin_id * bin_depth; - // bin_map[n][0] for every bin 'n' maintains the counter for the number of - // histograms in that bin. - // Get and increment the num_histos in that bin. - const int num_histos = ++bin_map[bin_offset]; - assert(bin_offset + num_histos < bin_depth * BIN_SIZE); - // Add histogram i'th index at num_histos (last) position in the bin_map. - bin_map[bin_offset + num_histos] = i; - } -} - -// Compact the histogram set by removing unused entries. -static void HistogramCompactBins(VP8LHistogramSet* const image_histo) { - VP8LHistogram** const histograms = image_histo->histograms; - int i, j; - - for (i = 0, j = 0; i < image_histo->size; ++i) { - if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) { - if (j < i) { - histograms[j] = histograms[i]; - histograms[i] = NULL; - } - ++j; - } + bin_map[i] = GetHistoBinIndex(histograms[i], &cost_range, low_effort); } - image_histo->size = j; } +// Compact image_histo[] by merging some histograms with same bin_id together if +// it's advantageous. static VP8LHistogram* HistogramCombineEntropyBin( VP8LHistogramSet* const image_histo, VP8LHistogram* cur_combo, - int16_t* const bin_map, int bin_depth, int num_bins, + const uint16_t* const bin_map, int bin_map_size, int num_bins, double combine_cost_factor, int low_effort) { - int bin_id; VP8LHistogram** const histograms = image_histo->histograms; - - for (bin_id = 0; bin_id < num_bins; ++bin_id) { - const int bin_offset = bin_id * bin_depth; - const int num_histos = bin_map[bin_offset]; - const int idx1 = bin_map[bin_offset + 1]; - int num_combine_failures = 0; - int n; - for (n = 2; n <= num_histos; ++n) { - const int idx2 = bin_map[bin_offset + n]; - if (low_effort) { - // Merge all histograms with the same bin index, irrespective of cost of - // the merged histograms. - VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]); - histograms[idx2]->bit_cost_ = 0.; - } else { - const double bit_cost_idx2 = histograms[idx2]->bit_cost_; - if (bit_cost_idx2 > 0.) { - const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; - const double curr_cost_diff = - HistogramAddEval(histograms[idx1], histograms[idx2], - cur_combo, bit_cost_thresh); - if (curr_cost_diff < bit_cost_thresh) { - // Try to merge two histograms only if the combo is a trivial one or - // the two candidate histograms are already non-trivial. - // For some images, 'try_combine' turns out to be false for a lot of - // histogram pairs. In that case, we fallback to combining - // histograms as usual to avoid increasing the header size. - const int try_combine = - (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || - ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && - (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); - const int max_combine_failures = 32; - if (try_combine || (num_combine_failures >= max_combine_failures)) { - HistogramSwap(&cur_combo, &histograms[idx1]); - histograms[idx2]->bit_cost_ = 0.; - } else { - ++num_combine_failures; - } - } + int idx; + // Work in-place: processed histograms are put at the beginning of + // image_histo[]. At the end, we just have to truncate the array. + int size = 0; + struct { + int16_t first; // position of the histogram that accumulates all + // histograms with the same bin_id + uint16_t num_combine_failures; // number of combine failures per bin_id + } bin_info[BIN_SIZE]; + + assert(num_bins <= BIN_SIZE); + for (idx = 0; idx < num_bins; ++idx) { + bin_info[idx].first = -1; + bin_info[idx].num_combine_failures = 0; + } + + for (idx = 0; idx < bin_map_size; ++idx) { + const int bin_id = bin_map[idx]; + const int first = bin_info[bin_id].first; + assert(size <= idx); + if (first == -1) { + // just move histogram #idx to its final position + histograms[size] = histograms[idx]; + bin_info[bin_id].first = size++; + } else if (low_effort) { + HistogramAdd(histograms[idx], histograms[first], histograms[first]); + } else { + // try to merge #idx into #first (both share the same bin_id) + const double bit_cost = histograms[idx]->bit_cost_; + const double bit_cost_thresh = -bit_cost * combine_cost_factor; + const double curr_cost_diff = + HistogramAddEval(histograms[first], histograms[idx], + cur_combo, bit_cost_thresh); + if (curr_cost_diff < bit_cost_thresh) { + // Try to merge two histograms only if the combo is a trivial one or + // the two candidate histograms are already non-trivial. + // For some images, 'try_combine' turns out to be false for a lot of + // histogram pairs. In that case, we fallback to combining + // histograms as usual to avoid increasing the header size. + const int try_combine = + (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || + ((histograms[idx]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && + (histograms[first]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); + const int max_combine_failures = 32; + if (try_combine || + bin_info[bin_id].num_combine_failures >= max_combine_failures) { + // move the (better) merged histogram to its final slot + HistogramSwap(&cur_combo, &histograms[first]); + } else { + histograms[size++] = histograms[idx]; + ++bin_info[bin_id].num_combine_failures; } + } else { + histograms[size++] = histograms[idx]; } } - if (low_effort) { - // Update the bit_cost for the merged histograms (per bin index). - UpdateHistogramCost(histograms[idx1]); + } + image_histo->size = size; + if (low_effort) { + // for low_effort case, update the final cost when everything is merged + for (idx = 0; idx < size; ++idx) { + UpdateHistogramCost(histograms[idx]); } } - HistogramCompactBins(image_histo); return cur_combo; } -static uint32_t MyRand(uint32_t *seed) { - *seed *= 16807U; +static uint32_t MyRand(uint32_t* const seed) { + *seed = (*seed * 16807ull) & 0xffffffffu; if (*seed == 0) { *seed = 1; } @@ -682,7 +723,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { HistogramPair* copy_to; const int idx1 = histo_queue.queue[0].idx1; const int idx2 = histo_queue.queue[0].idx2; - VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); + HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo; // Remove merged histogram. for (i = 0; i + 1 < image_histo_size; ++i) { @@ -748,6 +789,8 @@ static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo, const int outer_iters = image_histo_size * iter_mult; const int num_pairs = image_histo_size / 2; const int num_tries_no_success = outer_iters / 2; + int idx2_max = image_histo_size - 1; + int do_brute_dorce = 0; VP8LHistogram** const histograms = image_histo->histograms; // Collapse similar histograms in 'image_histo'. @@ -758,43 +801,62 @@ static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo, double best_cost_diff = 0.; int best_idx1 = -1, best_idx2 = 1; int j; - const int num_tries = + int num_tries = (num_pairs < image_histo_size) ? num_pairs : image_histo_size; + // Use a brute force approach if: + // - stochastic has not worked for a while and + // - if the number of iterations for brute force is less than the number of + // iterations if we never find a match ever again stochastically (hence + // num_tries times the number of remaining outer iterations). + do_brute_dorce = + (tries_with_no_success > 10) && + (idx2_max * (idx2_max + 1) < 2 * num_tries * (outer_iters - iter)); + if (do_brute_dorce) num_tries = idx2_max; + seed += iter; for (j = 0; j < num_tries; ++j) { double curr_cost_diff; // Choose two histograms at random and try to combine them. - const uint32_t idx1 = MyRand(&seed) % image_histo_size; - const uint32_t tmp = (j & 7) + 1; - const uint32_t diff = - (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1); - const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size; - if (idx1 == idx2) { - continue; + uint32_t idx1, idx2; + if (do_brute_dorce) { + // Use a brute force approach. + idx1 = (uint32_t)j; + idx2 = (uint32_t)idx2_max; + } else { + const uint32_t tmp = (j & 7) + 1; + const uint32_t diff = + (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1); + idx1 = MyRand(&seed) % image_histo_size; + idx2 = (idx1 + diff + 1) % image_histo_size; + if (idx1 == idx2) { + continue; + } } // Calculate cost reduction on combining. curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], tmp_histo, best_cost_diff); - if (curr_cost_diff < best_cost_diff) { // found a better pair? + if (curr_cost_diff < best_cost_diff) { // found a better pair? HistogramSwap(&best_combo, &tmp_histo); best_cost_diff = curr_cost_diff; best_idx1 = idx1; best_idx2 = idx2; } } + if (do_brute_dorce) --idx2_max; if (best_idx1 >= 0) { HistogramSwap(&best_combo, &histograms[best_idx1]); // swap best_idx2 slot with last one (which is now unused) --image_histo_size; + if (idx2_max >= image_histo_size) idx2_max = image_histo_size - 1; if (best_idx2 != image_histo_size) { HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]); histograms[image_histo_size] = NULL; } tries_with_no_success = 0; } - if (++tries_with_no_success >= num_tries_no_success) { + if (++tries_with_no_success >= num_tries_no_success || idx2_max == 0) { break; } } @@ -843,7 +905,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, for (i = 0; i < in_size; ++i) { const int idx = symbols[i]; - VP8LHistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]); + HistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]); } } @@ -869,32 +931,18 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; const int image_histo_raw_size = histo_xsize * histo_ysize; - const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; - - // The bin_map for every bin follows following semantics: - // bin_map[n][0] = num_histo; // The number of histograms in that bin. - // bin_map[n][1] = index of first histogram in that bin; - // bin_map[n][num_histo] = index of last histogram in that bin; - // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices. - const int bin_depth = image_histo_raw_size + 1; - int16_t* bin_map = NULL; VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); VP8LHistogram* cur_combo; + // Don't attempt linear bin-partition heuristic for + // histograms of small sizes (as bin_map will be very sparse) and + // maximum quality q==100 (to preserve the compression gains at that level). + const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; const int entropy_combine = (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100); if (orig_histo == NULL) goto Error; - // Don't attempt linear bin-partition heuristic for: - // histograms of small sizes, as bin_map will be very sparse and; - // Maximum quality (q==100), to preserve the compression gains at that level. - if (entropy_combine) { - const int bin_map_size = bin_depth * entropy_combine_num_bins; - bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map)); - if (bin_map == NULL) goto Error; - } - // Construct the histograms from backward references. HistogramBuild(xsize, histo_bits, refs, orig_histo); // Copies the histograms and computes its bit_cost. @@ -902,12 +950,17 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, cur_combo = tmp_histos->histograms[1]; // pick up working slot if (entropy_combine) { + const int bin_map_size = orig_histo->size; + // Reuse histogram_symbols storage. By definition, it's guaranteed to be ok. + uint16_t* const bin_map = histogram_symbols; const double combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality); + HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); // Collapse histograms with similar entropy. - cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map, - bin_depth, entropy_combine_num_bins, + cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, + bin_map, bin_map_size, + entropy_combine_num_bins, combine_cost_factor, low_effort); } @@ -932,7 +985,6 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, ok = 1; Error: - WebPSafeFree(bin_map); VP8LFreeHistogramSet(orig_histo); return ok; } diff --git a/thirdparty/libwebp/enc/histogram.h b/thirdparty/libwebp/enc/histogram_enc.h index d303d1d58b..a9d258a166 100644 --- a/thirdparty/libwebp/enc/histogram.h +++ b/thirdparty/libwebp/enc/histogram_enc.h @@ -16,7 +16,7 @@ #include <string.h> -#include "./backward_references.h" +#include "./backward_references_enc.h" #include "../webp/format_constants.h" #include "../webp/types.h" diff --git a/thirdparty/libwebp/enc/iterator.c b/thirdparty/libwebp/enc/iterator_enc.c index 99d960a547..e48d30bd31 100644 --- a/thirdparty/libwebp/enc/iterator.c +++ b/thirdparty/libwebp/enc/iterator_enc.c @@ -13,7 +13,7 @@ #include <string.h> -#include "./vp8enci.h" +#include "./vp8i_enc.h" //------------------------------------------------------------------------------ // VP8Iterator @@ -53,7 +53,6 @@ void VP8IteratorReset(VP8EncIterator* const it) { VP8IteratorSetRow(it, 0); VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default InitTop(it); - InitLeft(it); memset(it->bit_count_, 0, sizeof(it->bit_count_)); it->do_trellis_ = 0; } @@ -68,8 +67,6 @@ int VP8IteratorIsDone(const VP8EncIterator* const it) { void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { it->enc_ = enc; - it->y_stride_ = enc->pic_->y_stride; - it->uv_stride_ = enc->pic_->uv_stride; it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_); it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC; it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC; @@ -309,14 +306,14 @@ void VP8IteratorSaveBoundary(VP8EncIterator* const it) { } int VP8IteratorNext(VP8EncIterator* const it) { - it->preds_ += 4; - it->mb_ += 1; - it->nz_ += 1; - it->y_top_ += 16; - it->uv_top_ += 16; - it->x_ += 1; - if (it->x_ == it->enc_->mb_w_) { + if (++it->x_ == it->enc_->mb_w_) { VP8IteratorSetRow(it, ++it->y_); + } else { + it->preds_ += 4; + it->mb_ += 1; + it->nz_ += 1; + it->y_top_ += 16; + it->uv_top_ += 16; } return (0 < --it->count_down_); } diff --git a/thirdparty/libwebp/enc/near_lossless.c b/thirdparty/libwebp/enc/near_lossless_enc.c index f4ab91f571..2bd03ab20d 100644 --- a/thirdparty/libwebp/enc/near_lossless.c +++ b/thirdparty/libwebp/enc/near_lossless_enc.c @@ -17,9 +17,9 @@ #include <assert.h> #include <stdlib.h> -#include "../dsp/lossless.h" +#include "../dsp/lossless_common.h" #include "../utils/utils.h" -#include "./vp8enci.h" +#include "./vp8i_enc.h" #define MIN_DIM_FOR_NEAR_LOSSLESS 64 #define MAX_LIMIT_BITS 5 diff --git a/thirdparty/libwebp/enc/picture_csp.c b/thirdparty/libwebp/enc/picture_csp_enc.c index 188a3ca55b..e5d1c75a66 100644 --- a/thirdparty/libwebp/enc/picture_csp.c +++ b/thirdparty/libwebp/enc/picture_csp_enc.c @@ -15,8 +15,8 @@ #include <stdlib.h> #include <math.h> -#include "./vp8enci.h" -#include "../utils/random.h" +#include "./vp8i_enc.h" +#include "../utils/random_utils.h" #include "../utils/utils.h" #include "../dsp/yuv.h" @@ -153,9 +153,9 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) { } //------------------------------------------------------------------------------ -// Smart RGB->YUV conversion +// Sharp RGB->YUV conversion -static const int kNumIterations = 6; +static const int kNumIterations = 4; static const int kMinDimensionIterativeConversion = 4; // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some @@ -171,9 +171,9 @@ typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W #if defined(USE_GAMMA_COMPRESSION) // float variant of gamma-correction -// We use tables of different size and precision, along with a 'real-world' -// Gamma value close to ~2. -#define kGammaF 2.2 +// We use tables of different size and precision for the Rec709 +// transfer function. +#define kGammaF (1./0.45) static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX static float kLinearToGammaTabF[kGammaTabSize + 2]; static volatile int kGammaTablesFOk = 0; @@ -183,11 +183,26 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) { int v; const double norm = 1. / MAX_Y_T; const double scale = 1. / kGammaTabSize; + const double a = 0.099; + const double thresh = 0.018; for (v = 0; v <= MAX_Y_T; ++v) { - kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF); + const double g = norm * v; + if (g <= thresh * 4.5) { + kGammaToLinearTabF[v] = (float)(g / 4.5); + } else { + const double a_rec = 1. / (1. + a); + kGammaToLinearTabF[v] = (float)pow(a_rec * (g + a), kGammaF); + } } for (v = 0; v <= kGammaTabSize; ++v) { - kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF)); + const double g = scale * v; + double value; + if (g <= thresh) { + value = 4.5 * g; + } else { + value = (1. + a) * pow(g, 1. / kGammaF) - a; + } + kLinearToGammaTabF[v] = (float)(MAX_Y_T * value); } // to prevent small rounding errors to cause read-overflow: kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize]; @@ -235,12 +250,12 @@ static fixed_y_t clip_y(int y) { //------------------------------------------------------------------------------ static int RGBToGray(int r, int g, int b) { - const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF; + const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF; return (luma >> YUV_FIX); } static float RGBToGrayF(float r, float g, float b) { - return 0.299f * r + 0.587f * g + 0.114f * b; + return (float)(0.2126 * r + 0.7152 * g + 0.0722 * b); } static int ScaleDown(int a, int b, int c, int d) { @@ -251,58 +266,50 @@ static int ScaleDown(int a, int b, int c, int d) { return LinearToGammaF(0.25f * (A + B + C + D)); } -static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) { - while (len-- > 0) { - const float R = GammaToLinearF(src[0]); - const float G = GammaToLinearF(src[1]); - const float B = GammaToLinearF(src[2]); +static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { + int i; + for (i = 0; i < w; ++i) { + const float R = GammaToLinearF(src[0 * w + i]); + const float G = GammaToLinearF(src[1 * w + i]); + const float B = GammaToLinearF(src[2 * w + i]); const float Y = RGBToGrayF(R, G, B); - *dst++ = (fixed_y_t)LinearToGammaF(Y); - src += 3; + dst[i] = (fixed_y_t)LinearToGammaF(Y); } } -static int UpdateChroma(const fixed_y_t* src1, - const fixed_y_t* src2, - fixed_t* dst, fixed_y_t* tmp, int len) { - int diff = 0; - while (len--> 0) { - const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]); - const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]); - const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]); +static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, + fixed_t* dst, int uv_w) { + int i; + for (i = 0; i < uv_w; ++i) { + const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], + src2[0 * uv_w + 0], src2[0 * uv_w + 1]); + const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], + src2[2 * uv_w + 0], src2[2 * uv_w + 1]); + const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], + src2[4 * uv_w + 0], src2[4 * uv_w + 1]); const int W = RGBToGray(r, g, b); - const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2; - const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2; - const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2; - dst[0] = (fixed_t)(r - W); - dst[1] = (fixed_t)(g - W); - dst[2] = (fixed_t)(b - W); - dst += 3; - src1 += 6; - src2 += 6; - if (tmp != NULL) { - tmp[0] = tmp[1] = clip_y(W); - tmp += 2; - } - diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W); + dst[0 * uv_w] = (fixed_t)(r - W); + dst[1 * uv_w] = (fixed_t)(g - W); + dst[2 * uv_w] = (fixed_t)(b - W); + dst += 1; + src1 += 2; + src2 += 2; } - return diff; } -//------------------------------------------------------------------------------ - -static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B, - int rightwise) { - int v; - if (!rightwise) { - v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]); - } else { - v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]); +static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { + int i; + for (i = 0; i < w; ++i) { + y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); } - return (v + 8) >> 4; } -static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; } +//------------------------------------------------------------------------------ + +static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { + const int v0 = (A * 3 + B + 2) >> 2; + return clip_y(v0 + W0); +} //------------------------------------------------------------------------------ @@ -317,52 +324,50 @@ static void ImportOneRow(const uint8_t* const r_ptr, int pic_width, fixed_y_t* const dst) { int i; + const int w = (pic_width + 1) & ~1; for (i = 0; i < pic_width; ++i) { const int off = i * step; - dst[3 * i + 0] = UpLift(r_ptr[off]); - dst[3 * i + 1] = UpLift(g_ptr[off]); - dst[3 * i + 2] = UpLift(b_ptr[off]); + dst[i + 0 * w] = UpLift(r_ptr[off]); + dst[i + 1 * w] = UpLift(g_ptr[off]); + dst[i + 2 * w] = UpLift(b_ptr[off]); } if (pic_width & 1) { // replicate rightmost pixel - memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst)); + dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; + dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; + dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; } } static void InterpolateTwoRows(const fixed_y_t* const best_y, - const fixed_t* const prev_uv, - const fixed_t* const cur_uv, - const fixed_t* const next_uv, + const fixed_t* prev_uv, + const fixed_t* cur_uv, + const fixed_t* next_uv, int w, - fixed_y_t* const out1, - fixed_y_t* const out2) { - int i, k; - { // special boundary case for i==0 - const int W0 = best_y[0]; - const int W1 = best_y[w]; - for (k = 0; k <= 2; ++k) { - out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0); - out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1); - } - } - for (i = 1; i < w - 1; ++i) { - const int W0 = best_y[i + 0]; - const int W1 = best_y[i + w]; - const int off = 3 * (i >> 1); - for (k = 0; k <= 2; ++k) { - const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1); - const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1); - out1[3 * i + k] = clip_y(tmp0 + W0); - out2[3 * i + k] = clip_y(tmp1 + W1); - } - } - { // special boundary case for i == w - 1 - const int W0 = best_y[i + 0]; - const int W1 = best_y[i + w]; - const int off = 3 * (i >> 1); - for (k = 0; k <= 2; ++k) { - out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0); - out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1); + fixed_y_t* out1, + fixed_y_t* out2) { + const int uv_w = w >> 1; + const int len = (w - 1) >> 1; // length to filter + int k = 3; + while (k-- > 0) { // process each R/G/B segments in turn + // special boundary case for i==0 + out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); + out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); + + WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); + WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); + + // special boundary case for i == w - 1 when w is even + if (!(w & 1)) { + out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], + best_y[w - 1 + 0]); + out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], + best_y[w - 1 + w]); } + out1 += w; + out2 += w; + prev_uv += uv_w; + cur_uv += uv_w; + next_uv += uv_w; } } @@ -394,11 +399,11 @@ static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, const int uv_h = h >> 1; for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) { for (i = 0; i < picture->width; ++i) { - const int off = 3 * (i >> 1); + const int off = (i >> 1); const int W = best_y[i]; - const int r = best_uv[off + 0] + W; - const int g = best_uv[off + 1] + W; - const int b = best_uv[off + 2] + W; + const int r = best_uv[off + 0 * uv_w] + W; + const int g = best_uv[off + 1 * uv_w] + W; + const int b = best_uv[off + 2 * uv_w] + W; dst_y[i] = ConvertRGBToY(r, g, b); } best_y += w; @@ -407,10 +412,10 @@ static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, } for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { for (i = 0; i < uv_w; ++i) { - const int off = 3 * i; - const int r = best_uv[off + 0]; - const int g = best_uv[off + 1]; - const int b = best_uv[off + 2]; + const int off = i; + const int r = best_uv[off + 0 * uv_w]; + const int g = best_uv[off + 1 * uv_w]; + const int b = best_uv[off + 2 * uv_w]; dst_u[i] = ConvertRGBToU(r, g, b); dst_v[i] = ConvertRGBToV(r, g, b); } @@ -436,7 +441,8 @@ static int PreprocessARGB(const uint8_t* r_ptr, const int h = (picture->height + 1) & ~1; const int uv_w = w >> 1; const int uv_h = h >> 1; - int i, j, iter; + uint64_t prev_diff_y_sum = ~0; + int j, iter; // TODO(skal): allocate one big memory chunk. But for now, it's easier // for valgrind debugging to have several chunks. @@ -451,11 +457,8 @@ static int PreprocessARGB(const uint8_t* r_ptr, fixed_y_t* target_y = target_y_base; fixed_t* best_uv = best_uv_base; fixed_t* target_uv = target_uv_base; + const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); int ok; - int diff_sum = 0; - const int first_diff_threshold = (int)(2.5 * w * h); - const int min_improvement = 5; // stop if improvement is below this % - const int min_first_improvement = 80; if (best_y_base == NULL || best_uv_base == NULL || target_y_base == NULL || target_uv_base == NULL || @@ -467,10 +470,12 @@ static int PreprocessARGB(const uint8_t* r_ptr, assert(picture->width >= kMinDimensionIterativeConversion); assert(picture->height >= kMinDimensionIterativeConversion); + WebPInitConvertARGBToYUV(); + // Import RGB samples to W/RGB representation. for (j = 0; j < picture->height; j += 2) { const int is_last_row = (j == picture->height - 1); - fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src1 = tmp_buffer + 0 * w; fixed_y_t* const src2 = tmp_buffer + 3 * w; // prepare two rows of input @@ -481,11 +486,13 @@ static int PreprocessARGB(const uint8_t* r_ptr, } else { memcpy(src2, src1, 3 * w * sizeof(*src2)); } + StoreGray(src1, best_y + 0, w); + StoreGray(src2, best_y + w, w); + UpdateW(src1, target_y, w); UpdateW(src2, target_y + w, w); - diff_sum += UpdateChroma(src1, src2, target_uv, best_y, uv_w); + UpdateChroma(src1, src2, target_uv, uv_w); memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); - memcpy(best_y + w, best_y, w * sizeof(*best_y)); best_y += 2 * w; best_uv += 3 * uv_w; target_y += 2 * w; @@ -497,18 +504,16 @@ static int PreprocessARGB(const uint8_t* r_ptr, // Iterate and resolve clipping conflicts. for (iter = 0; iter < kNumIterations; ++iter) { - int k; const fixed_t* cur_uv = best_uv_base; const fixed_t* prev_uv = best_uv_base; - const int old_diff_sum = diff_sum; - diff_sum = 0; + uint64_t diff_y_sum = 0; best_y = best_y_base; best_uv = best_uv_base; target_y = target_y_base; target_uv = target_uv_base; for (j = 0; j < h; j += 2) { - fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src1 = tmp_buffer + 0 * w; fixed_y_t* const src2 = tmp_buffer + 3 * w; { const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); @@ -519,50 +524,24 @@ static int PreprocessARGB(const uint8_t* r_ptr, UpdateW(src1, best_rgb_y + 0 * w, w); UpdateW(src2, best_rgb_y + 1 * w, w); - diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w); + UpdateChroma(src1, src2, best_rgb_uv, uv_w); // update two rows of Y and one row of RGB - for (i = 0; i < 2 * w; ++i) { - const int diff_y = target_y[i] - best_rgb_y[i]; - const int new_y = (int)best_y[i] + diff_y; - best_y[i] = clip_y(new_y); - } - for (i = 0; i < uv_w; ++i) { - const int off = 3 * i; - int W; - for (k = 0; k <= 2; ++k) { - const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[off + k]; - best_uv[off + k] += diff_uv; - } - W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]); - for (k = 0; k <= 2; ++k) { - best_uv[off + k] -= W; - } - } + diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w); + WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); + best_y += 2 * w; best_uv += 3 * uv_w; target_y += 2 * w; target_uv += 3 * uv_w; } // test exit condition - if (diff_sum > 0) { - const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum; - // Check if first iteration gave good result already, without a large - // jump of improvement (otherwise it means we need to try few extra - // iterations, just to be sure). - if (iter == 0 && diff_sum < first_diff_threshold && - improvement < min_first_improvement) { - break; - } - // then, check if improvement is stalling. - if (improvement < min_improvement) { - break; - } - } else { - break; + if (iter > 0) { + if (diff_y_sum < diff_y_threshold) break; + if (diff_y_sum > prev_diff_y_sum) break; } + prev_diff_y_sum = diff_y_sum; } - // final reconstruction ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture); @@ -1032,9 +1011,13 @@ int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { return PictureARGBToYUVA(picture, colorspace, 0.f, 0); } -int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { +int WebPPictureSharpARGBToYUVA(WebPPicture* picture) { return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1); } +// for backward compatibility +int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { + return WebPPictureSharpARGBToYUVA(picture); +} //------------------------------------------------------------------------------ // call for YUVA -> ARGB conversion diff --git a/thirdparty/libwebp/enc/picture.c b/thirdparty/libwebp/enc/picture_enc.c index 28c56cd6e5..dfa66510fb 100644 --- a/thirdparty/libwebp/enc/picture.c +++ b/thirdparty/libwebp/enc/picture_enc.c @@ -14,7 +14,7 @@ #include <assert.h> #include <stdlib.h> -#include "./vp8enci.h" +#include "./vp8i_enc.h" #include "../dsp/dsp.h" #include "../utils/utils.h" diff --git a/thirdparty/libwebp/enc/picture_psnr.c b/thirdparty/libwebp/enc/picture_psnr.c deleted file mode 100644 index 329757deb1..0000000000 --- a/thirdparty/libwebp/enc/picture_psnr.c +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright 2014 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// WebPPicture tools for measuring distortion -// -// Author: Skal (pascal.massimino@gmail.com) - -#include <math.h> -#include <stdlib.h> - -#include "./vp8enci.h" -#include "../utils/utils.h" - -//------------------------------------------------------------------------------ -// local-min distortion -// -// For every pixel in the *reference* picture, we search for the local best -// match in the compressed image. This is not a symmetrical measure. - -#define RADIUS 2 // search radius. Shouldn't be too large. - -static void AccumulateLSIM(const uint8_t* src, int src_stride, - const uint8_t* ref, int ref_stride, - int w, int h, VP8DistoStats* stats) { - int x, y; - double total_sse = 0.; - for (y = 0; y < h; ++y) { - const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS; - const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1; - for (x = 0; x < w; ++x) { - const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS; - const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1; - double best_sse = 255. * 255.; - const double value = (double)ref[y * ref_stride + x]; - int i, j; - for (j = y_0; j < y_1; ++j) { - const uint8_t* const s = src + j * src_stride; - for (i = x_0; i < x_1; ++i) { - const double diff = s[i] - value; - const double sse = diff * diff; - if (sse < best_sse) best_sse = sse; - } - } - total_sse += best_sse; - } - } - stats->w = w * h; - stats->xm = 0; - stats->ym = 0; - stats->xxm = total_sse; - stats->yym = 0; - stats->xxm = 0; -} -#undef RADIUS - -//------------------------------------------------------------------------------ -// Distortion - -// Max value returned in case of exact similarity. -static const double kMinDistortion_dB = 99.; -static float GetPSNR(const double v) { - return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) - : kMinDistortion_dB); -} - -int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, - int type, float result[5]) { - VP8DistoStats stats[5]; - int w, h; - - memset(stats, 0, sizeof(stats)); - - VP8SSIMDspInit(); - - if (src == NULL || ref == NULL || - src->width != ref->width || src->height != ref->height || - src->use_argb != ref->use_argb || result == NULL) { - return 0; - } - w = src->width; - h = src->height; - - if (src->use_argb == 1) { - if (src->argb == NULL || ref->argb == NULL) { - return 0; - } else { - int i, j, c; - uint8_t* tmp1, *tmp2; - uint8_t* const tmp_plane = - (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane)); - if (tmp_plane == NULL) return 0; - tmp1 = tmp_plane; - tmp2 = tmp_plane + w * h; - for (c = 0; c < 4; ++c) { - for (j = 0; j < h; ++j) { - for (i = 0; i < w; ++i) { - tmp1[j * w + i] = src->argb[i + j * src->argb_stride] >> (c * 8); - tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8); - } - } - if (type >= 2) { - AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]); - } else { - VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]); - } - } - WebPSafeFree(tmp_plane); - } - } else { - int has_alpha, uv_w, uv_h; - if (src->y == NULL || ref->y == NULL || - src->u == NULL || ref->u == NULL || - src->v == NULL || ref->v == NULL) { - return 0; - } - has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT); - if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) || - (has_alpha && (src->a == NULL || ref->a == NULL))) { - return 0; - } - - uv_w = (src->width + 1) >> 1; - uv_h = (src->height + 1) >> 1; - if (type >= 2) { - AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride, - w, h, &stats[0]); - AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride, - uv_w, uv_h, &stats[1]); - AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride, - uv_w, uv_h, &stats[2]); - if (has_alpha) { - AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride, - w, h, &stats[3]); - } - } else { - VP8SSIMAccumulatePlane(src->y, src->y_stride, - ref->y, ref->y_stride, - w, h, &stats[0]); - VP8SSIMAccumulatePlane(src->u, src->uv_stride, - ref->u, ref->uv_stride, - uv_w, uv_h, &stats[1]); - VP8SSIMAccumulatePlane(src->v, src->uv_stride, - ref->v, ref->uv_stride, - uv_w, uv_h, &stats[2]); - if (has_alpha) { - VP8SSIMAccumulatePlane(src->a, src->a_stride, - ref->a, ref->a_stride, - w, h, &stats[3]); - } - } - } - // Final stat calculations. - { - int c; - for (c = 0; c <= 4; ++c) { - if (type == 1) { - const double v = VP8SSIMGet(&stats[c]); - result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) - : kMinDistortion_dB); - } else { - const double v = VP8SSIMGetSquaredError(&stats[c]); - result[c] = GetPSNR(v); - } - // Accumulate forward - if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]); - } - } - return 1; -} - -//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/picture_psnr_enc.c b/thirdparty/libwebp/enc/picture_psnr_enc.c new file mode 100644 index 0000000000..9c0b229507 --- /dev/null +++ b/thirdparty/libwebp/enc/picture_psnr_enc.c @@ -0,0 +1,213 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools for measuring distortion +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <math.h> +#include <stdlib.h> + +#include "./vp8i_enc.h" +#include "../utils/utils.h" + +typedef double (*AccumulateFunc)(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h); + +//------------------------------------------------------------------------------ +// local-min distortion +// +// For every pixel in the *reference* picture, we search for the local best +// match in the compressed image. This is not a symmetrical measure. + +#define RADIUS 2 // search radius. Shouldn't be too large. + +static double AccumulateLSIM(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h) { + int x, y; + double total_sse = 0.; + for (y = 0; y < h; ++y) { + const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS; + const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1; + for (x = 0; x < w; ++x) { + const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS; + const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1; + double best_sse = 255. * 255.; + const double value = (double)ref[y * ref_stride + x]; + int i, j; + for (j = y_0; j < y_1; ++j) { + const uint8_t* const s = src + j * src_stride; + for (i = x_0; i < x_1; ++i) { + const double diff = s[i] - value; + const double sse = diff * diff; + if (sse < best_sse) best_sse = sse; + } + } + total_sse += best_sse; + } + } + return total_sse; +} +#undef RADIUS + +static double AccumulateSSE(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h) { + int y; + double total_sse = 0.; + for (y = 0; y < h; ++y) { + total_sse += VP8AccumulateSSE(src, ref, w); + src += src_stride; + ref += ref_stride; + } + return total_sse; +} + +//------------------------------------------------------------------------------ + +static double AccumulateSSIM(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h) { + const int w0 = (w < VP8_SSIM_KERNEL) ? w : VP8_SSIM_KERNEL; + const int w1 = w - VP8_SSIM_KERNEL - 1; + const int h0 = (h < VP8_SSIM_KERNEL) ? h : VP8_SSIM_KERNEL; + const int h1 = h - VP8_SSIM_KERNEL - 1; + int x, y; + double sum = 0.; + for (y = 0; y < h0; ++y) { + for (x = 0; x < w; ++x) { + sum += VP8SSIMGetClipped(src, src_stride, ref, ref_stride, x, y, w, h); + } + } + for (; y < h1; ++y) { + for (x = 0; x < w0; ++x) { + sum += VP8SSIMGetClipped(src, src_stride, ref, ref_stride, x, y, w, h); + } + for (; x < w1; ++x) { + const int off1 = x - VP8_SSIM_KERNEL + (y - VP8_SSIM_KERNEL) * src_stride; + const int off2 = x - VP8_SSIM_KERNEL + (y - VP8_SSIM_KERNEL) * ref_stride; + sum += VP8SSIMGet(src + off1, src_stride, ref + off2, ref_stride); + } + for (; x < w; ++x) { + sum += VP8SSIMGetClipped(src, src_stride, ref, ref_stride, x, y, w, h); + } + } + for (; y < h; ++y) { + for (x = 0; x < w; ++x) { + sum += VP8SSIMGetClipped(src, src_stride, ref, ref_stride, x, y, w, h); + } + } + return sum; +} + +//------------------------------------------------------------------------------ +// Distortion + +// Max value returned in case of exact similarity. +static const double kMinDistortion_dB = 99.; + +static double GetPSNR(double v, double size) { + return (v > 0. && size > 0.) ? -4.3429448 * log(v / (size * 255 * 255.)) + : kMinDistortion_dB; +} + +static double GetLogSSIM(double v, double size) { + v = (size > 0.) ? v / size : 1.; + return (v < 1.) ? -10.0 * log10(1. - v) : kMinDistortion_dB; +} + +int WebPPlaneDistortion(const uint8_t* src, size_t src_stride, + const uint8_t* ref, size_t ref_stride, + int width, int height, size_t x_step, + int type, float* distortion, float* result) { + uint8_t* allocated = NULL; + const AccumulateFunc metric = (type == 0) ? AccumulateSSE : + (type == 1) ? AccumulateSSIM : + AccumulateLSIM; + if (src == NULL || ref == NULL || + src_stride < x_step * width || ref_stride < x_step * width || + result == NULL || distortion == NULL) { + return 0; + } + + VP8SSIMDspInit(); + if (x_step != 1) { // extract a packed plane if needed + int x, y; + uint8_t* tmp1; + uint8_t* tmp2; + allocated = + (uint8_t*)WebPSafeMalloc(2ULL * width * height, sizeof(*allocated)); + if (allocated == NULL) return 0; + tmp1 = allocated; + tmp2 = tmp1 + (size_t)width * height; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + tmp1[x + y * width] = src[x * x_step + y * src_stride]; + tmp2[x + y * width] = ref[x * x_step + y * ref_stride]; + } + } + src = tmp1; + ref = tmp2; + } + *distortion = (float)metric(src, width, ref, width, width, height); + WebPSafeFree(allocated); + + *result = (type == 1) ? (float)GetLogSSIM(*distortion, (double)width * height) + : (float)GetPSNR(*distortion, (double)width * height); + return 1; +} + +int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, + int type, float results[5]) { + int w, h, c; + int ok = 0; + WebPPicture p0, p1; + double total_size = 0., total_distortion = 0.; + if (src == NULL || ref == NULL || + src->width != ref->width || src->height != ref->height || + results == NULL) { + return 0; + } + + VP8SSIMDspInit(); + if (!WebPPictureInit(&p0) || !WebPPictureInit(&p1)) return 0; + w = src->width; + h = src->height; + if (!WebPPictureView(src, 0, 0, w, h, &p0)) goto Error; + if (!WebPPictureView(ref, 0, 0, w, h, &p1)) goto Error; + + // We always measure distortion in ARGB space. + if (p0.use_argb == 0 && !WebPPictureYUVAToARGB(&p0)) goto Error; + if (p1.use_argb == 0 && !WebPPictureYUVAToARGB(&p1)) goto Error; + for (c = 0; c < 4; ++c) { + float distortion; + const size_t stride0 = 4 * (size_t)p0.argb_stride; + const size_t stride1 = 4 * (size_t)p1.argb_stride; + if (!WebPPlaneDistortion((const uint8_t*)p0.argb + c, stride0, + (const uint8_t*)p1.argb + c, stride1, + w, h, 4, type, &distortion, results + c)) { + goto Error; + } + total_distortion += distortion; + total_size += w * h; + } + + results[4] = (type == 1) ? (float)GetLogSSIM(total_distortion, total_size) + : (float)GetPSNR(total_distortion, total_size); + ok = 1; + + Error: + WebPPictureFree(&p0); + WebPPictureFree(&p1); + return ok; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/picture_rescale.c b/thirdparty/libwebp/enc/picture_rescale_enc.c index 9f19e8e80f..0b7181c0d7 100644 --- a/thirdparty/libwebp/enc/picture_rescale.c +++ b/thirdparty/libwebp/enc/picture_rescale_enc.c @@ -14,8 +14,8 @@ #include <assert.h> #include <stdlib.h> -#include "./vp8enci.h" -#include "../utils/rescaler.h" +#include "./vp8i_enc.h" +#include "../utils/rescaler_utils.h" #include "../utils/utils.h" #define HALVE(x) (((x) + 1) >> 1) diff --git a/thirdparty/libwebp/enc/picture_tools.c b/thirdparty/libwebp/enc/picture_tools_enc.c index bf97af8408..895df51156 100644 --- a/thirdparty/libwebp/enc/picture_tools.c +++ b/thirdparty/libwebp/enc/picture_tools_enc.c @@ -13,7 +13,7 @@ #include <assert.h> -#include "./vp8enci.h" +#include "./vp8i_enc.h" #include "../dsp/yuv.h" static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { diff --git a/thirdparty/libwebp/enc/predictor_enc.c b/thirdparty/libwebp/enc/predictor_enc.c new file mode 100644 index 0000000000..0639b74f1c --- /dev/null +++ b/thirdparty/libwebp/enc/predictor_enc.c @@ -0,0 +1,750 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Image transform methods for lossless encoder. +// +// Authors: Vikas Arora (vikaas.arora@gmail.com) +// Jyrki Alakuijala (jyrki@google.com) +// Urvang Joshi (urvang@google.com) +// Vincent Rabaud (vrabaud@google.com) + +#include "../dsp/lossless.h" +#include "../dsp/lossless_common.h" +#include "./vp8li_enc.h" + +#define MAX_DIFF_COST (1e30f) + +static const float kSpatialPredictorBias = 15.f; +static const int kPredLowEffort = 11; +static const uint32_t kMaskAlpha = 0xff000000; + +// Mostly used to reduce code size + readability +static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } +static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; } + +//------------------------------------------------------------------------------ +// Methods to calculate Entropy (Shannon). + +static float PredictionCostSpatial(const int counts[256], int weight_0, + double exp_val) { + const int significant_symbols = 256 >> 4; + const double exp_decay_factor = 0.6; + double bits = weight_0 * counts[0]; + int i; + for (i = 1; i < significant_symbols; ++i) { + bits += exp_val * (counts[i] + counts[256 - i]); + exp_val *= exp_decay_factor; + } + return (float)(-0.1 * bits); +} + +static float PredictionCostSpatialHistogram(const int accumulated[4][256], + const int tile[4][256]) { + int i; + double retval = 0; + for (i = 0; i < 4; ++i) { + const double kExpValue = 0.94; + retval += PredictionCostSpatial(tile[i], 1, kExpValue); + retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]); + } + return (float)retval; +} + +static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) { + ++histo_argb[0][argb >> 24]; + ++histo_argb[1][(argb >> 16) & 0xff]; + ++histo_argb[2][(argb >> 8) & 0xff]; + ++histo_argb[3][argb & 0xff]; +} + +//------------------------------------------------------------------------------ +// Spatial transform functions. + +static WEBP_INLINE void PredictBatch(int mode, int x_start, int y, + int num_pixels, const uint32_t* current, + const uint32_t* upper, uint32_t* out) { + if (x_start == 0) { + if (y == 0) { + // ARGB_BLACK. + VP8LPredictorsSub[0](current, NULL, 1, out); + } else { + // Top one. + VP8LPredictorsSub[2](current, upper, 1, out); + } + ++x_start; + ++out; + --num_pixels; + } + if (y == 0) { + // Left one. + VP8LPredictorsSub[1](current + x_start, NULL, num_pixels, out); + } else { + VP8LPredictorsSub[mode](current + x_start, upper + x_start, num_pixels, + out); + } +} + +static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) { + const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24)); + const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff)); + const int diff_g = abs((int)((p1 >> 8) & 0xff) - (int)((p2 >> 8) & 0xff)); + const int diff_b = abs((int)(p1 & 0xff) - (int)(p2 & 0xff)); + return GetMax(GetMax(diff_a, diff_r), GetMax(diff_g, diff_b)); +} + +static int MaxDiffAroundPixel(uint32_t current, uint32_t up, uint32_t down, + uint32_t left, uint32_t right) { + const int diff_up = MaxDiffBetweenPixels(current, up); + const int diff_down = MaxDiffBetweenPixels(current, down); + const int diff_left = MaxDiffBetweenPixels(current, left); + const int diff_right = MaxDiffBetweenPixels(current, right); + return GetMax(GetMax(diff_up, diff_down), GetMax(diff_left, diff_right)); +} + +static uint32_t AddGreenToBlueAndRed(uint32_t argb) { + const uint32_t green = (argb >> 8) & 0xff; + uint32_t red_blue = argb & 0x00ff00ffu; + red_blue += (green << 16) | green; + red_blue &= 0x00ff00ffu; + return (argb & 0xff00ff00u) | red_blue; +} + +static void MaxDiffsForRow(int width, int stride, const uint32_t* const argb, + uint8_t* const max_diffs, int used_subtract_green) { + uint32_t current, up, down, left, right; + int x; + if (width <= 2) return; + current = argb[0]; + right = argb[1]; + if (used_subtract_green) { + current = AddGreenToBlueAndRed(current); + right = AddGreenToBlueAndRed(right); + } + // max_diffs[0] and max_diffs[width - 1] are never used. + for (x = 1; x < width - 1; ++x) { + up = argb[-stride + x]; + down = argb[stride + x]; + left = current; + current = right; + right = argb[x + 1]; + if (used_subtract_green) { + up = AddGreenToBlueAndRed(up); + down = AddGreenToBlueAndRed(down); + right = AddGreenToBlueAndRed(right); + } + max_diffs[x] = MaxDiffAroundPixel(current, up, down, left, right); + } +} + +// Quantize the difference between the actual component value and its prediction +// to a multiple of quantization, working modulo 256, taking care not to cross +// a boundary (inclusive upper limit). +static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict, + uint8_t boundary, int quantization) { + const int residual = (value - predict) & 0xff; + const int boundary_residual = (boundary - predict) & 0xff; + const int lower = residual & ~(quantization - 1); + const int upper = lower + quantization; + // Resolve ties towards a value closer to the prediction (i.e. towards lower + // if value comes after prediction and towards upper otherwise). + const int bias = ((boundary - value) & 0xff) < boundary_residual; + if (residual - lower < upper - residual + bias) { + // lower is closer to residual than upper. + if (residual > boundary_residual && lower <= boundary_residual) { + // Halve quantization step to avoid crossing boundary. This midpoint is + // on the same side of boundary as residual because midpoint >= residual + // (since lower is closer than upper) and residual is above the boundary. + return lower + (quantization >> 1); + } + return lower; + } else { + // upper is closer to residual than lower. + if (residual <= boundary_residual && upper > boundary_residual) { + // Halve quantization step to avoid crossing boundary. This midpoint is + // on the same side of boundary as residual because midpoint <= residual + // (since upper is closer than lower) and residual is below the boundary. + return lower + (quantization >> 1); + } + return upper & 0xff; + } +} + +// Quantize every component of the difference between the actual pixel value and +// its prediction to a multiple of a quantization (a power of 2, not larger than +// max_quantization which is a power of 2, smaller than max_diff). Take care if +// value and predict have undergone subtract green, which means that red and +// blue are represented as offsets from green. +static uint32_t NearLossless(uint32_t value, uint32_t predict, + int max_quantization, int max_diff, + int used_subtract_green) { + int quantization; + uint8_t new_green = 0; + uint8_t green_diff = 0; + uint8_t a, r, g, b; + if (max_diff <= 2) { + return VP8LSubPixels(value, predict); + } + quantization = max_quantization; + while (quantization >= max_diff) { + quantization >>= 1; + } + if ((value >> 24) == 0 || (value >> 24) == 0xff) { + // Preserve transparency of fully transparent or fully opaque pixels. + a = ((value >> 24) - (predict >> 24)) & 0xff; + } else { + a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization); + } + g = NearLosslessComponent((value >> 8) & 0xff, (predict >> 8) & 0xff, 0xff, + quantization); + if (used_subtract_green) { + // The green offset will be added to red and blue components during decoding + // to obtain the actual red and blue values. + new_green = ((predict >> 8) + g) & 0xff; + // The amount by which green has been adjusted during quantization. It is + // subtracted from red and blue for compensation, to avoid accumulating two + // quantization errors in them. + green_diff = (new_green - (value >> 8)) & 0xff; + } + r = NearLosslessComponent(((value >> 16) - green_diff) & 0xff, + (predict >> 16) & 0xff, 0xff - new_green, + quantization); + b = NearLosslessComponent((value - green_diff) & 0xff, predict & 0xff, + 0xff - new_green, quantization); + return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b; +} + +// Stores the difference between the pixel and its prediction in "out". +// In case of a lossy encoding, updates the source image to avoid propagating +// the deviation further to pixels which depend on the current pixel for their +// predictions. +static WEBP_INLINE void GetResidual( + int width, int height, uint32_t* const upper_row, + uint32_t* const current_row, const uint8_t* const max_diffs, int mode, + int x_start, int x_end, int y, int max_quantization, int exact, + int used_subtract_green, uint32_t* const out) { + if (exact) { + PredictBatch(mode, x_start, y, x_end - x_start, current_row, upper_row, + out); + } else { + const VP8LPredictorFunc pred_func = VP8LPredictors[mode]; + int x; + for (x = x_start; x < x_end; ++x) { + uint32_t predict; + uint32_t residual; + if (y == 0) { + predict = (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left. + } else if (x == 0) { + predict = upper_row[x]; // Top. + } else { + predict = pred_func(current_row[x - 1], upper_row + x); + } + if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 || + x == 0 || x == width - 1) { + residual = VP8LSubPixels(current_row[x], predict); + } else { + residual = NearLossless(current_row[x], predict, max_quantization, + max_diffs[x], used_subtract_green); + // Update the source image. + current_row[x] = VP8LAddPixels(predict, residual); + // x is never 0 here so we do not need to update upper_row like below. + } + if ((current_row[x] & kMaskAlpha) == 0) { + // If alpha is 0, cleanup RGB. We can choose the RGB values of the + // residual for best compression. The prediction of alpha itself can be + // non-zero and must be kept though. We choose RGB of the residual to be + // 0. + residual &= kMaskAlpha; + // Update the source image. + current_row[x] = predict & ~kMaskAlpha; + // The prediction for the rightmost pixel in a row uses the leftmost + // pixel + // in that row as its top-right context pixel. Hence if we change the + // leftmost pixel of current_row, the corresponding change must be + // applied + // to upper_row as well where top-right context is being read from. + if (x == 0 && y != 0) upper_row[width] = current_row[0]; + } + out[x - x_start] = residual; + } + } +} + +// Returns best predictor and updates the accumulated histogram. +// If max_quantization > 1, assumes that near lossless processing will be +// applied, quantizing residuals to multiples of quantization levels up to +// max_quantization (the actual quantization level depends on smoothness near +// the given pixel). +static int GetBestPredictorForTile(int width, int height, + int tile_x, int tile_y, int bits, + int accumulated[4][256], + uint32_t* const argb_scratch, + const uint32_t* const argb, + int max_quantization, + int exact, int used_subtract_green, + const uint32_t* const modes) { + const int kNumPredModes = 14; + const int start_x = tile_x << bits; + const int start_y = tile_y << bits; + const int tile_size = 1 << bits; + const int max_y = GetMin(tile_size, height - start_y); + const int max_x = GetMin(tile_size, width - start_x); + // Whether there exist columns just outside the tile. + const int have_left = (start_x > 0); + const int have_right = (max_x < width - start_x); + // Position and size of the strip covering the tile and adjacent columns if + // they exist. + const int context_start_x = start_x - have_left; + const int context_width = max_x + have_left + have_right; + const int tiles_per_row = VP8LSubSampleSize(width, bits); + // Prediction modes of the left and above neighbor tiles. + const int left_mode = (tile_x > 0) ? + (modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff : 0xff; + const int above_mode = (tile_y > 0) ? + (modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff : 0xff; + // The width of upper_row and current_row is one pixel larger than image width + // to allow the top right pixel to point to the leftmost pixel of the next row + // when at the right edge. + uint32_t* upper_row = argb_scratch; + uint32_t* current_row = upper_row + width + 1; + uint8_t* const max_diffs = (uint8_t*)(current_row + width + 1); + float best_diff = MAX_DIFF_COST; + int best_mode = 0; + int mode; + int histo_stack_1[4][256]; + int histo_stack_2[4][256]; + // Need pointers to be able to swap arrays. + int (*histo_argb)[256] = histo_stack_1; + int (*best_histo)[256] = histo_stack_2; + int i, j; + uint32_t residuals[1 << MAX_TRANSFORM_BITS]; + assert(bits <= MAX_TRANSFORM_BITS); + assert(max_x <= (1 << MAX_TRANSFORM_BITS)); + + for (mode = 0; mode < kNumPredModes; ++mode) { + float cur_diff; + int relative_y; + memset(histo_argb, 0, sizeof(histo_stack_1)); + if (start_y > 0) { + // Read the row above the tile which will become the first upper_row. + // Include a pixel to the left if it exists; include a pixel to the right + // in all cases (wrapping to the leftmost pixel of the next row if it does + // not exist). + memcpy(current_row + context_start_x, + argb + (start_y - 1) * width + context_start_x, + sizeof(*argb) * (max_x + have_left + 1)); + } + for (relative_y = 0; relative_y < max_y; ++relative_y) { + const int y = start_y + relative_y; + int relative_x; + uint32_t* tmp = upper_row; + upper_row = current_row; + current_row = tmp; + // Read current_row. Include a pixel to the left if it exists; include a + // pixel to the right in all cases except at the bottom right corner of + // the image (wrapping to the leftmost pixel of the next row if it does + // not exist in the current row). + memcpy(current_row + context_start_x, + argb + y * width + context_start_x, + sizeof(*argb) * (max_x + have_left + (y + 1 < height))); + if (max_quantization > 1 && y >= 1 && y + 1 < height) { + MaxDiffsForRow(context_width, width, argb + y * width + context_start_x, + max_diffs + context_start_x, used_subtract_green); + } + + GetResidual(width, height, upper_row, current_row, max_diffs, mode, + start_x, start_x + max_x, y, max_quantization, exact, + used_subtract_green, residuals); + for (relative_x = 0; relative_x < max_x; ++relative_x) { + UpdateHisto(histo_argb, residuals[relative_x]); + } + } + cur_diff = PredictionCostSpatialHistogram( + (const int (*)[256])accumulated, (const int (*)[256])histo_argb); + // Favor keeping the areas locally similar. + if (mode == left_mode) cur_diff -= kSpatialPredictorBias; + if (mode == above_mode) cur_diff -= kSpatialPredictorBias; + + if (cur_diff < best_diff) { + int (*tmp)[256] = histo_argb; + histo_argb = best_histo; + best_histo = tmp; + best_diff = cur_diff; + best_mode = mode; + } + } + + for (i = 0; i < 4; i++) { + for (j = 0; j < 256; j++) { + accumulated[i][j] += best_histo[i][j]; + } + } + + return best_mode; +} + +// Converts pixels of the image to residuals with respect to predictions. +// If max_quantization > 1, applies near lossless processing, quantizing +// residuals to multiples of quantization levels up to max_quantization +// (the actual quantization level depends on smoothness near the given pixel). +static void CopyImageWithPrediction(int width, int height, + int bits, uint32_t* const modes, + uint32_t* const argb_scratch, + uint32_t* const argb, + int low_effort, int max_quantization, + int exact, int used_subtract_green) { + const int tiles_per_row = VP8LSubSampleSize(width, bits); + // The width of upper_row and current_row is one pixel larger than image width + // to allow the top right pixel to point to the leftmost pixel of the next row + // when at the right edge. + uint32_t* upper_row = argb_scratch; + uint32_t* current_row = upper_row + width + 1; + uint8_t* current_max_diffs = (uint8_t*)(current_row + width + 1); + uint8_t* lower_max_diffs = current_max_diffs + width; + int y; + + for (y = 0; y < height; ++y) { + int x; + uint32_t* const tmp32 = upper_row; + upper_row = current_row; + current_row = tmp32; + memcpy(current_row, argb + y * width, + sizeof(*argb) * (width + (y + 1 < height))); + + if (low_effort) { + PredictBatch(kPredLowEffort, 0, y, width, current_row, upper_row, + argb + y * width); + } else { + if (max_quantization > 1) { + // Compute max_diffs for the lower row now, because that needs the + // contents of argb for the current row, which we will overwrite with + // residuals before proceeding with the next row. + uint8_t* const tmp8 = current_max_diffs; + current_max_diffs = lower_max_diffs; + lower_max_diffs = tmp8; + if (y + 2 < height) { + MaxDiffsForRow(width, width, argb + (y + 1) * width, lower_max_diffs, + used_subtract_green); + } + } + for (x = 0; x < width;) { + const int mode = + (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff; + int x_end = x + (1 << bits); + if (x_end > width) x_end = width; + GetResidual(width, height, upper_row, current_row, current_max_diffs, + mode, x, x_end, y, max_quantization, exact, + used_subtract_green, argb + y * width + x); + x = x_end; + } + } + } +} + +// Finds the best predictor for each tile, and converts the image to residuals +// with respect to predictions. If near_lossless_quality < 100, applies +// near lossless processing, shaving off more bits of residuals for lower +// qualities. +void VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image, int near_lossless_quality, + int exact, int used_subtract_green) { + const int tiles_per_row = VP8LSubSampleSize(width, bits); + const int tiles_per_col = VP8LSubSampleSize(height, bits); + int tile_y; + int histo[4][256]; + const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality); + if (low_effort) { + int i; + for (i = 0; i < tiles_per_row * tiles_per_col; ++i) { + image[i] = ARGB_BLACK | (kPredLowEffort << 8); + } + } else { + memset(histo, 0, sizeof(histo)); + for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { + int tile_x; + for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { + const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y, + bits, histo, argb_scratch, argb, max_quantization, exact, + used_subtract_green, image); + image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8); + } + } + } + + CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, + low_effort, max_quantization, exact, + used_subtract_green); +} + +//------------------------------------------------------------------------------ +// Color transform functions. + +static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) { + m->green_to_red_ = 0; + m->green_to_blue_ = 0; + m->red_to_blue_ = 0; +} + +static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, + VP8LMultipliers* const m) { + m->green_to_red_ = (color_code >> 0) & 0xff; + m->green_to_blue_ = (color_code >> 8) & 0xff; + m->red_to_blue_ = (color_code >> 16) & 0xff; +} + +static WEBP_INLINE uint32_t MultipliersToColorCode( + const VP8LMultipliers* const m) { + return 0xff000000u | + ((uint32_t)(m->red_to_blue_) << 16) | + ((uint32_t)(m->green_to_blue_) << 8) | + m->green_to_red_; +} + +static float PredictionCostCrossColor(const int accumulated[256], + const int counts[256]) { + // Favor low entropy, locally and globally. + // Favor small absolute values for PredictionCostSpatial + static const double kExpValue = 2.4; + return VP8LCombinedShannonEntropy(counts, accumulated) + + PredictionCostSpatial(counts, 3, kExpValue); +} + +static float GetPredictionCostCrossColorRed( + const uint32_t* argb, int stride, int tile_width, int tile_height, + VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red, + const int accumulated_red_histo[256]) { + int histo[256] = { 0 }; + float cur_diff; + + VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height, + green_to_red, histo); + + cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo); + if ((uint8_t)green_to_red == prev_x.green_to_red_) { + cur_diff -= 3; // favor keeping the areas locally similar + } + if ((uint8_t)green_to_red == prev_y.green_to_red_) { + cur_diff -= 3; // favor keeping the areas locally similar + } + if (green_to_red == 0) { + cur_diff -= 3; + } + return cur_diff; +} + +static void GetBestGreenToRed( + const uint32_t* argb, int stride, int tile_width, int tile_height, + VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality, + const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) { + const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6] + int green_to_red_best = 0; + int iter, offset; + float best_diff = GetPredictionCostCrossColorRed( + argb, stride, tile_width, tile_height, prev_x, prev_y, + green_to_red_best, accumulated_red_histo); + for (iter = 0; iter < kMaxIters; ++iter) { + // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to + // one in color computation. Having initial delta here as 1 is sufficient + // to explore the range of (-2, 2). + const int delta = 32 >> iter; + // Try a negative and a positive delta from the best known value. + for (offset = -delta; offset <= delta; offset += 2 * delta) { + const int green_to_red_cur = offset + green_to_red_best; + const float cur_diff = GetPredictionCostCrossColorRed( + argb, stride, tile_width, tile_height, prev_x, prev_y, + green_to_red_cur, accumulated_red_histo); + if (cur_diff < best_diff) { + best_diff = cur_diff; + green_to_red_best = green_to_red_cur; + } + } + } + best_tx->green_to_red_ = green_to_red_best; +} + +static float GetPredictionCostCrossColorBlue( + const uint32_t* argb, int stride, int tile_width, int tile_height, + VP8LMultipliers prev_x, VP8LMultipliers prev_y, + int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) { + int histo[256] = { 0 }; + float cur_diff; + + VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height, + green_to_blue, red_to_blue, histo); + + cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo); + if ((uint8_t)green_to_blue == prev_x.green_to_blue_) { + cur_diff -= 3; // favor keeping the areas locally similar + } + if ((uint8_t)green_to_blue == prev_y.green_to_blue_) { + cur_diff -= 3; // favor keeping the areas locally similar + } + if ((uint8_t)red_to_blue == prev_x.red_to_blue_) { + cur_diff -= 3; // favor keeping the areas locally similar + } + if ((uint8_t)red_to_blue == prev_y.red_to_blue_) { + cur_diff -= 3; // favor keeping the areas locally similar + } + if (green_to_blue == 0) { + cur_diff -= 3; + } + if (red_to_blue == 0) { + cur_diff -= 3; + } + return cur_diff; +} + +#define kGreenRedToBlueNumAxis 8 +#define kGreenRedToBlueMaxIters 7 +static void GetBestGreenRedToBlue( + const uint32_t* argb, int stride, int tile_width, int tile_height, + VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality, + const int accumulated_blue_histo[256], + VP8LMultipliers* const best_tx) { + const int8_t offset[kGreenRedToBlueNumAxis][2] = + {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}}; + const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 }; + const int iters = + (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4; + int green_to_blue_best = 0; + int red_to_blue_best = 0; + int iter; + // Initial value at origin: + float best_diff = GetPredictionCostCrossColorBlue( + argb, stride, tile_width, tile_height, prev_x, prev_y, + green_to_blue_best, red_to_blue_best, accumulated_blue_histo); + for (iter = 0; iter < iters; ++iter) { + const int delta = delta_lut[iter]; + int axis; + for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) { + const int green_to_blue_cur = + offset[axis][0] * delta + green_to_blue_best; + const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best; + const float cur_diff = GetPredictionCostCrossColorBlue( + argb, stride, tile_width, tile_height, prev_x, prev_y, + green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo); + if (cur_diff < best_diff) { + best_diff = cur_diff; + green_to_blue_best = green_to_blue_cur; + red_to_blue_best = red_to_blue_cur; + } + if (quality < 25 && iter == 4) { + // Only axis aligned diffs for lower quality. + break; // next iter. + } + } + if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) { + // Further iterations would not help. + break; // out of iter-loop. + } + } + best_tx->green_to_blue_ = green_to_blue_best; + best_tx->red_to_blue_ = red_to_blue_best; +} +#undef kGreenRedToBlueMaxIters +#undef kGreenRedToBlueNumAxis + +static VP8LMultipliers GetBestColorTransformForTile( + int tile_x, int tile_y, int bits, + VP8LMultipliers prev_x, + VP8LMultipliers prev_y, + int quality, int xsize, int ysize, + const int accumulated_red_histo[256], + const int accumulated_blue_histo[256], + const uint32_t* const argb) { + const int max_tile_size = 1 << bits; + const int tile_y_offset = tile_y * max_tile_size; + const int tile_x_offset = tile_x * max_tile_size; + const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize); + const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize); + const int tile_width = all_x_max - tile_x_offset; + const int tile_height = all_y_max - tile_y_offset; + const uint32_t* const tile_argb = argb + tile_y_offset * xsize + + tile_x_offset; + VP8LMultipliers best_tx; + MultipliersClear(&best_tx); + + GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height, + prev_x, prev_y, quality, accumulated_red_histo, &best_tx); + GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height, + prev_x, prev_y, quality, accumulated_blue_histo, + &best_tx); + return best_tx; +} + +static void CopyTileWithColorTransform(int xsize, int ysize, + int tile_x, int tile_y, + int max_tile_size, + VP8LMultipliers color_transform, + uint32_t* argb) { + const int xscan = GetMin(max_tile_size, xsize - tile_x); + int yscan = GetMin(max_tile_size, ysize - tile_y); + argb += tile_y * xsize + tile_x; + while (yscan-- > 0) { + VP8LTransformColor(&color_transform, argb, xscan); + argb += xsize; + } +} + +void VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image) { + const int max_tile_size = 1 << bits; + const int tile_xsize = VP8LSubSampleSize(width, bits); + const int tile_ysize = VP8LSubSampleSize(height, bits); + int accumulated_red_histo[256] = { 0 }; + int accumulated_blue_histo[256] = { 0 }; + int tile_x, tile_y; + VP8LMultipliers prev_x, prev_y; + MultipliersClear(&prev_y); + MultipliersClear(&prev_x); + for (tile_y = 0; tile_y < tile_ysize; ++tile_y) { + for (tile_x = 0; tile_x < tile_xsize; ++tile_x) { + int y; + const int tile_x_offset = tile_x * max_tile_size; + const int tile_y_offset = tile_y * max_tile_size; + const int all_x_max = GetMin(tile_x_offset + max_tile_size, width); + const int all_y_max = GetMin(tile_y_offset + max_tile_size, height); + const int offset = tile_y * tile_xsize + tile_x; + if (tile_y != 0) { + ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y); + } + prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits, + prev_x, prev_y, + quality, width, height, + accumulated_red_histo, + accumulated_blue_histo, + argb); + image[offset] = MultipliersToColorCode(&prev_x); + CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset, + max_tile_size, prev_x, argb); + + // Gather accumulated histogram data. + for (y = tile_y_offset; y < all_y_max; ++y) { + int ix = y * width + tile_x_offset; + const int ix_end = ix + all_x_max - tile_x_offset; + for (; ix < ix_end; ++ix) { + const uint32_t pix = argb[ix]; + if (ix >= 2 && + pix == argb[ix - 2] && + pix == argb[ix - 1]) { + continue; // repeated pixels are handled by backward references + } + if (ix >= width + 2 && + argb[ix - 2] == argb[ix - width - 2] && + argb[ix - 1] == argb[ix - width - 1] && + pix == argb[ix - width]) { + continue; // repeated pixels are handled by backward references + } + ++accumulated_red_histo[(pix >> 16) & 0xff]; + ++accumulated_blue_histo[(pix >> 0) & 0xff]; + } + } + } + } +} diff --git a/thirdparty/libwebp/enc/quant.c b/thirdparty/libwebp/enc/quant_enc.c index 07ffaf0aeb..b118fb2a13 100644 --- a/thirdparty/libwebp/enc/quant.c +++ b/thirdparty/libwebp/enc/quant_enc.c @@ -15,8 +15,8 @@ #include <math.h> #include <stdlib.h> // for abs() -#include "./vp8enci.h" -#include "./cost.h" +#include "./vp8i_enc.h" +#include "./cost_enc.h" #define DO_TRELLIS_I4 1 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate. @@ -643,6 +643,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, const int sign = (in[j] < 0); const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; int level0 = QUANTDIV(coeff0, iQ, B); + int thresh_level = QUANTDIV(coeff0, iQ, BIAS(0x80)); + if (thresh_level > MAX_LEVEL) thresh_level = MAX_LEVEL; if (level0 > MAX_LEVEL) level0 = MAX_LEVEL; { // Swap current and previous score states @@ -657,23 +659,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, int level = level0 + m; const int ctx = (level > 2) ? 2 : level; const int band = VP8EncBands[n + 1]; - score_t base_score, last_pos_score; + score_t base_score; score_t best_cur_score = MAX_COST; int best_prev = 0; // default, in case ss_cur[m].score = MAX_COST; ss_cur[m].costs = costs[n + 1][ctx]; - if (level > MAX_LEVEL || level < 0) { // node is dead? + if (level < 0 || level > thresh_level) { + // Node is dead. continue; } - // Compute extra rate cost if last coeff's position is < 15 - { - const score_t last_pos_cost = - (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; - last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); - } - { // Compute delta_error = how much coding this level will // subtract to max_error as distortion. @@ -705,6 +701,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, // Now, record best terminal node (and thus best entry in the graph). if (level != 0) { + const score_t last_pos_cost = + (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; + const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); const score_t score = best_cur_score + last_pos_score; if (score < best_score) { best_score = score; diff --git a/thirdparty/libwebp/enc/syntax.c b/thirdparty/libwebp/enc/syntax_enc.c index a0e79ef404..90665bd7e5 100644 --- a/thirdparty/libwebp/enc/syntax.c +++ b/thirdparty/libwebp/enc/syntax_enc.c @@ -16,7 +16,7 @@ #include "../utils/utils.h" #include "../webp/format_constants.h" // RIFF constants #include "../webp/mux_types.h" // ALPHA_FLAG -#include "./vp8enci.h" +#include "./vp8i_enc.h" //------------------------------------------------------------------------------ // Helper functions @@ -362,8 +362,7 @@ int VP8EncWrite(VP8Encoder* const enc) { for (p = 0; p < enc->num_parts_; ++p) { const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p); const size_t size = VP8BitWriterSize(enc->parts_ + p); - if (size) - ok = ok && pic->writer(buf, size, pic); + if (size) ok = ok && pic->writer(buf, size, pic); VP8BitWriterWipeOut(enc->parts_ + p); // will free the internal buffer. ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part, &enc->percent_); diff --git a/thirdparty/libwebp/enc/token.c b/thirdparty/libwebp/enc/token_enc.c index 087940e5ff..02a0d72cc6 100644 --- a/thirdparty/libwebp/enc/token.c +++ b/thirdparty/libwebp/enc/token_enc.c @@ -20,8 +20,8 @@ #include <stdlib.h> #include <string.h> -#include "./cost.h" -#include "./vp8enci.h" +#include "./cost_enc.h" +#include "./vp8i_enc.h" #include "../utils/utils.h" #if !defined(DISABLE_TOKEN_BUFFER) @@ -137,8 +137,9 @@ int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res, s = res->stats[VP8EncBands[n]][1]; } else { if (!AddToken(tokens, v > 4, base_id + 3, s + 3)) { - if (AddToken(tokens, v != 2, base_id + 4, s + 4)) + if (AddToken(tokens, v != 2, base_id + 4, s + 4)) { AddToken(tokens, v == 4, base_id + 5, s + 5); + } } else if (!AddToken(tokens, v > 10, base_id + 6, s + 6)) { if (!AddToken(tokens, v > 6, base_id + 7, s + 7)) { AddConstantToken(tokens, v == 6, 159); diff --git a/thirdparty/libwebp/enc/tree.c b/thirdparty/libwebp/enc/tree_enc.c index f141006d19..2c40fe7f3d 100644 --- a/thirdparty/libwebp/enc/tree.c +++ b/thirdparty/libwebp/enc/tree_enc.c @@ -11,7 +11,7 @@ // // Author: Skal (pascal.massimino@gmail.com) -#include "./vp8enci.h" +#include "./vp8i_enc.h" //------------------------------------------------------------------------------ // Default probabilities diff --git a/thirdparty/libwebp/enc/vp8enci.h b/thirdparty/libwebp/enc/vp8i_enc.h index 5b4e162a58..93c95ecbfb 100644 --- a/thirdparty/libwebp/enc/vp8enci.h +++ b/thirdparty/libwebp/enc/vp8i_enc.h @@ -15,10 +15,10 @@ #define WEBP_ENC_VP8ENCI_H_ #include <string.h> // for memcpy() -#include "../dec/common.h" +#include "../dec/common_dec.h" #include "../dsp/dsp.h" -#include "../utils/bit_writer.h" -#include "../utils/thread.h" +#include "../utils/bit_writer_utils.h" +#include "../utils/thread_utils.h" #include "../utils/utils.h" #include "../webp/encode.h" @@ -31,8 +31,8 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 -#define ENC_MIN_VERSION 5 -#define ENC_REV_VERSION 2 +#define ENC_MIN_VERSION 6 +#define ENC_REV_VERSION 0 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -219,7 +219,6 @@ typedef struct { // right neighbouring data (samples, predictions, contexts, ...) typedef struct { int x_, y_; // current macroblock - int y_stride_, uv_stride_; // respective strides uint8_t* yuv_in_; // input samples uint8_t* yuv_out_; // output samples uint8_t* yuv_out2_; // secondary buffer swapped with yuv_out_. @@ -474,14 +473,6 @@ int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data - // in filter.c -void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst); -void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int W, int H, VP8DistoStats* const stats); -double VP8SSIMGet(const VP8DistoStats* const stats); -double VP8SSIMGetSquaredError(const VP8DistoStats* const stats); - // autofilter void VP8InitFilter(VP8EncIterator* const it); void VP8StoreFilterStats(VP8EncIterator* const it); diff --git a/thirdparty/libwebp/enc/vp8l.c b/thirdparty/libwebp/enc/vp8l_enc.c index e4ad2959b8..b1a793d956 100644 --- a/thirdparty/libwebp/enc/vp8l.c +++ b/thirdparty/libwebp/enc/vp8l_enc.c @@ -15,17 +15,18 @@ #include <assert.h> #include <stdlib.h> -#include "./backward_references.h" -#include "./histogram.h" -#include "./vp8enci.h" -#include "./vp8li.h" +#include "./backward_references_enc.h" +#include "./histogram_enc.h" +#include "./vp8i_enc.h" +#include "./vp8li_enc.h" #include "../dsp/lossless.h" -#include "../utils/bit_writer.h" -#include "../utils/huffman_encode.h" +#include "../dsp/lossless_common.h" +#include "../utils/bit_writer_utils.h" +#include "../utils/huffman_encode_utils.h" #include "../utils/utils.h" #include "../webp/format_constants.h" -#include "./delta_palettization.h" +#include "./delta_palettization_enc.h" #define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer. // Maximum number of histogram images (sub-blocks). @@ -163,18 +164,25 @@ typedef enum { kHistoTotal // Must be last. } HistoIx; -static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) { - const uint32_t green = p >> 8; // The upper bits are masked away later. +static void AddSingleSubGreen(int p, uint32_t* const r, uint32_t* const b) { + const int green = p >> 8; // The upper bits are masked away later. ++r[((p >> 16) - green) & 0xff]; - ++b[(p - green) & 0xff]; + ++b[((p >> 0) - green) & 0xff]; } static void AddSingle(uint32_t p, - uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) { - ++a[p >> 24]; + uint32_t* const a, uint32_t* const r, + uint32_t* const g, uint32_t* const b) { + ++a[(p >> 24) & 0xff]; ++r[(p >> 16) & 0xff]; - ++g[(p >> 8) & 0xff]; - ++b[(p & 0xff)]; + ++g[(p >> 8) & 0xff]; + ++b[(p >> 0) & 0xff]; +} + +static WEBP_INLINE uint32_t HashPix(uint32_t pix) { + // Note that masking with 0xffffffffu is for preventing an + // 'unsigned int overflow' warning. Doesn't impact the compiled code. + return ((((uint64_t)pix + (pix >> 19)) * 0x39c5fba7ull) & 0xffffffffu) >> 24; } static int AnalyzeEntropy(const uint32_t* argb, @@ -214,8 +222,8 @@ static int AnalyzeEntropy(const uint32_t* argb, &histo[kHistoBluePredSubGreen * 256]); { // Approximate the palette by the entropy of the multiplicative hash. - const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24; - ++histo[kHistoPalette * 256 + (hash & 0xff)]; + const uint32_t hash = HashPix(pix); + ++histo[kHistoPalette * 256 + hash]; } } prev_row = curr_row; @@ -311,7 +319,10 @@ static int GetHistoBits(int method, int use_palette, int width, int height) { static int GetTransformBits(int method, int histo_bits) { const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5; - return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; + const int res = + (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; + assert(res <= MAX_TRANSFORM_BITS); + return res; } static int AnalyzeAndInit(VP8LEncoder* const enc) { @@ -696,7 +707,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2], int width, int height, - int quality) { + int quality, int low_effort) { int i; int max_tokens = 0; WebPEncodingError err = VP8_ENC_OK; @@ -714,7 +725,8 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, } // Calculate backward references from ARGB image. - if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } @@ -814,11 +826,18 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, goto Error; } - *cache_bits = use_cache ? MAX_COLOR_CACHE_BITS : 0; + if (use_cache) { + // If the value is different from zero, it has been set during the + // palette analysis. + if (*cache_bits == 0) *cache_bits = MAX_COLOR_CACHE_BITS; + } else { + *cache_bits = 0; + } // 'best_refs' is the reference to the best backward refs and points to one // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. - if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } @@ -899,7 +918,7 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array, VP8LSubSampleSize(width, histogram_bits), VP8LSubSampleSize(height, histogram_bits), - quality); + quality, low_effort); WebPSafeFree(histogram_argb); if (err != VP8_ENC_OK) goto Error; } @@ -990,12 +1009,12 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, (VP8LHashChain*)&enc->hash_chain_, (VP8LBackwardRefs*)enc->refs_, // cast const away transform_width, transform_height, - quality); + quality, low_effort); } static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, int width, int height, - int quality, + int quality, int low_effort, VP8LBitWriter* const bw) { const int ccolor_transform_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); @@ -1011,7 +1030,7 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, (VP8LHashChain*)&enc->hash_chain_, (VP8LBackwardRefs*)enc->refs_, // cast const away transform_width, transform_height, - quality); + quality, low_effort); } // ----------------------------------------------------------------------------- @@ -1156,7 +1175,8 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { // ----------------------------------------------------------------------------- -static int SearchColor(const uint32_t sorted[], uint32_t color, int hi) { +static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, + int hi) { int low = 0; if (sorted[low] == color) return low; // loop invariant: sorted[low] != color while (1) { @@ -1171,35 +1191,68 @@ static int SearchColor(const uint32_t sorted[], uint32_t color, int hi) { } } +#define APPLY_PALETTE_GREEDY_MAX 4 + +static WEBP_INLINE uint32_t SearchColorGreedy(const uint32_t palette[], + int palette_size, + uint32_t color) { + (void)palette_size; + assert(palette_size < APPLY_PALETTE_GREEDY_MAX); + assert(3 == APPLY_PALETTE_GREEDY_MAX - 1); + if (color == palette[0]) return 0; + if (color == palette[1]) return 1; + if (color == palette[2]) return 2; + return 3; +} + +static WEBP_INLINE uint32_t ApplyPaletteHash0(uint32_t color) { + // Focus on the green color. + return (color >> 8) & 0xff; +} + +#define PALETTE_INV_SIZE_BITS 11 +#define PALETTE_INV_SIZE (1 << PALETTE_INV_SIZE_BITS) + +static WEBP_INLINE uint32_t ApplyPaletteHash1(uint32_t color) { + // Forget about alpha. + return ((color & 0x00ffffffu) * 4222244071u) >> (32 - PALETTE_INV_SIZE_BITS); +} + +static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) { + // Forget about alpha. + return (color & 0x00ffffffu) * ((1u << 31) - 1) >> + (32 - PALETTE_INV_SIZE_BITS); +} + // Sort palette in increasing order and prepare an inverse mapping array. static void PrepareMapToPalette(const uint32_t palette[], int num_colors, - uint32_t sorted[], int idx_map[]) { + uint32_t sorted[], uint32_t idx_map[]) { int i; memcpy(sorted, palette, num_colors * sizeof(*sorted)); qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); for (i = 0; i < num_colors; ++i) { - idx_map[SearchColor(sorted, palette[i], num_colors)] = i; + idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; } } -static void MapToPalette(const uint32_t sorted_palette[], int num_colors, - uint32_t* const last_pix, int* const last_idx, - const int idx_map[], - const uint32_t* src, uint8_t* dst, int width) { - int x; - int prev_idx = *last_idx; - uint32_t prev_pix = *last_pix; - for (x = 0; x < width; ++x) { - const uint32_t pix = src[x]; - if (pix != prev_pix) { - prev_idx = idx_map[SearchColor(sorted_palette, pix, num_colors)]; - prev_pix = pix; - } - dst[x] = prev_idx; - } - *last_idx = prev_idx; - *last_pix = prev_pix; -} +// Use 1 pixel cache for ARGB pixels. +#define APPLY_PALETTE_FOR(COLOR_INDEX) do { \ + uint32_t prev_pix = palette[0]; \ + uint32_t prev_idx = 0; \ + for (y = 0; y < height; ++y) { \ + for (x = 0; x < width; ++x) { \ + const uint32_t pix = src[x]; \ + if (pix != prev_pix) { \ + prev_idx = COLOR_INDEX; \ + prev_pix = pix; \ + } \ + tmp_row[x] = prev_idx; \ + } \ + VP8LBundleColorMap(tmp_row, width, xbits, dst); \ + src += src_stride; \ + dst += dst_stride; \ + } \ +} while (0) // Remap argb values in src[] to packed palettes entries in dst[] // using 'row' as a temporary buffer of size 'width'. @@ -1212,52 +1265,59 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be // made to work in-place. uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); - int i, x, y; - int use_LUT = 1; + int x, y; if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; - for (i = 0; i < palette_size; ++i) { - if ((palette[i] & 0xffff00ffu) != 0) { - use_LUT = 0; - break; - } - } - if (use_LUT) { - uint8_t inv_palette[MAX_PALETTE_SIZE] = { 0 }; - for (i = 0; i < palette_size; ++i) { - const int color = (palette[i] >> 8) & 0xff; - inv_palette[color] = i; - } - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int color = (src[x] >> 8) & 0xff; - tmp_row[x] = inv_palette[color]; + if (palette_size < APPLY_PALETTE_GREEDY_MAX) { + APPLY_PALETTE_FOR(SearchColorGreedy(palette, palette_size, pix)); + } else { + int i, j; + uint16_t buffer[PALETTE_INV_SIZE]; + uint32_t (*const hash_functions[])(uint32_t) = { + ApplyPaletteHash0, ApplyPaletteHash1, ApplyPaletteHash2 + }; + + // Try to find a perfect hash function able to go from a color to an index + // within 1 << PALETTE_INV_SIZE_BITS in order to build a hash map to go + // from color to index in palette. + for (i = 0; i < 3; ++i) { + int use_LUT = 1; + // Set each element in buffer to max uint16_t. + memset(buffer, 0xff, sizeof(buffer)); + for (j = 0; j < palette_size; ++j) { + const uint32_t ind = hash_functions[i](palette[j]); + if (buffer[ind] != 0xffffu) { + use_LUT = 0; + break; + } else { + buffer[ind] = j; + } } - VP8LBundleColorMap(tmp_row, width, xbits, dst); - src += src_stride; - dst += dst_stride; + if (use_LUT) break; } - } else { - // Use 1 pixel cache for ARGB pixels. - uint32_t last_pix; - int last_idx; - uint32_t sorted[MAX_PALETTE_SIZE]; - int idx_map[MAX_PALETTE_SIZE]; - PrepareMapToPalette(palette, palette_size, sorted, idx_map); - last_pix = palette[0]; - last_idx = 0; - for (y = 0; y < height; ++y) { - MapToPalette(sorted, palette_size, &last_pix, &last_idx, - idx_map, src, tmp_row, width); - VP8LBundleColorMap(tmp_row, width, xbits, dst); - src += src_stride; - dst += dst_stride; + + if (i == 0) { + APPLY_PALETTE_FOR(buffer[ApplyPaletteHash0(pix)]); + } else if (i == 1) { + APPLY_PALETTE_FOR(buffer[ApplyPaletteHash1(pix)]); + } else if (i == 2) { + APPLY_PALETTE_FOR(buffer[ApplyPaletteHash2(pix)]); + } else { + uint32_t idx_map[MAX_PALETTE_SIZE]; + uint32_t palette_sorted[MAX_PALETTE_SIZE]; + PrepareMapToPalette(palette, palette_size, palette_sorted, idx_map); + APPLY_PALETTE_FOR( + idx_map[SearchColorNoIdx(palette_sorted, pix, palette_size)]); } } WebPSafeFree(tmp_row); return VP8_ENC_OK; } +#undef APPLY_PALETTE_FOR +#undef PALETTE_INV_SIZE_BITS +#undef PALETTE_INV_SIZE +#undef APPLY_PALETTE_GREEDY_MAX // Note: Expects "enc->palette_" to be set properly. static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, @@ -1290,7 +1350,7 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, } // Save palette_[] to bitstream. -static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, +static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, VP8LEncoder* const enc) { int i; uint32_t tmp_palette[MAX_PALETTE_SIZE]; @@ -1305,13 +1365,14 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, } tmp_palette[0] = palette[0]; return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_, - palette_size, 1, 20 /* quality */); + palette_size, 1, 20 /* quality */, low_effort); } #ifdef WEBP_EXPERIMENTAL_FEATURES static WebPEncodingError EncodeDeltaPalettePredictorImage( - VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality) { + VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality, + int low_effort) { const WebPPicture* const pic = enc->pic_; const int width = pic->width; const int height = pic->height; @@ -1342,7 +1403,7 @@ static WebPEncodingError EncodeDeltaPalettePredictorImage( err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_, (VP8LBackwardRefs*)enc->refs_, // cast const away transform_width, transform_height, - quality); + quality, low_effort); WebPSafeFree(predictors); return err; } @@ -1393,7 +1454,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, int use_near_lossless = 0; int hdr_size = 0; int data_size = 0; - int use_delta_palettization = 0; + int use_delta_palette = 0; if (enc == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; @@ -1420,7 +1481,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, } #ifdef WEBP_EXPERIMENTAL_FEATURES - if (config->delta_palettization) { + if (config->use_delta_palette) { enc->use_predict_ = 1; enc->use_cross_color_ = 0; enc->use_subtract_green_ = 0; @@ -1432,21 +1493,25 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, if (enc->use_palette_) { err = AllocateTransformBuffer(enc, width, height); if (err != VP8_ENC_OK) goto Error; - err = EncodeDeltaPalettePredictorImage(bw, enc, quality); + err = EncodeDeltaPalettePredictorImage(bw, enc, quality, low_effort); if (err != VP8_ENC_OK) goto Error; - use_delta_palettization = 1; + use_delta_palette = 1; } } #endif // WEBP_EXPERIMENTAL_FEATURES // Encode palette if (enc->use_palette_) { - err = EncodePalette(bw, enc); + err = EncodePalette(bw, low_effort, enc); if (err != VP8_ENC_OK) goto Error; - err = MapImageFromPalette(enc, use_delta_palettization); + err = MapImageFromPalette(enc, use_delta_palette); if (err != VP8_ENC_OK) goto Error; + // If using a color cache, do not have it bigger than the number of colors. + if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) { + enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1; + } } - if (!use_delta_palettization) { + if (!use_delta_palette) { // In case image is not packed. if (enc->argb_ == NULL) { err = MakeInputImageCopy(enc); @@ -1468,7 +1533,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, if (enc->use_cross_color_) { err = ApplyCrossColorFilter(enc, enc->current_width_, - height, quality, bw); + height, quality, low_effort, bw); if (err != VP8_ENC_OK) goto Error; } } diff --git a/thirdparty/libwebp/enc/vp8li.h b/thirdparty/libwebp/enc/vp8li_enc.h index 371e276ee0..8c5fbcbb2e 100644 --- a/thirdparty/libwebp/enc/vp8li.h +++ b/thirdparty/libwebp/enc/vp8li_enc.h @@ -14,9 +14,9 @@ #ifndef WEBP_ENC_VP8LI_H_ #define WEBP_ENC_VP8LI_H_ -#include "./backward_references.h" -#include "./histogram.h" -#include "../utils/bit_writer.h" +#include "./backward_references_enc.h" +#include "./histogram_enc.h" +#include "../utils/bit_writer_utils.h" #include "../webp/encode.h" #include "../webp/format_constants.h" @@ -24,6 +24,9 @@ extern "C" { #endif +// maximum value of transform_bits_ in VP8LEncoder. +#define MAX_TRANSFORM_BITS 6 + typedef struct { const WebPConfig* config_; // user configuration and parameters const WebPPicture* pic_; // input picture. @@ -39,7 +42,7 @@ typedef struct { // Encoding parameters derived from quality parameter. int histo_bits_; - int transform_bits_; + int transform_bits_; // <= MAX_TRANSFORM_BITS. int cache_bits_; // If equal to 0, don't use color cache. // Encoding parameters derived from image characteristics. @@ -73,6 +76,17 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, VP8LBitWriter* const bw, int use_cache); //------------------------------------------------------------------------------ +// Image transforms in predictor.c. + +void VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image, int near_lossless, int exact, + int used_subtract_green); + +void VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image); + +//------------------------------------------------------------------------------ #ifdef __cplusplus } // extern "C" diff --git a/thirdparty/libwebp/enc/webpenc.c b/thirdparty/libwebp/enc/webp_enc.c index a7d04ea2ce..f18461ef92 100644 --- a/thirdparty/libwebp/enc/webpenc.c +++ b/thirdparty/libwebp/enc/webp_enc.c @@ -16,9 +16,9 @@ #include <string.h> #include <math.h> -#include "./cost.h" -#include "./vp8enci.h" -#include "./vp8li.h" +#include "./cost_enc.h" +#include "./vp8i_enc.h" +#include "./vp8li_enc.h" #include "../utils/utils.h" // #define PRINT_MEMORY_INFO @@ -75,7 +75,7 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) { //-------------------+---+---+---+---+---+---+---+ // dynamic proba | ~ | x | x | x | x | x | x | //-------------------+---+---+---+---+---+---+---+ -// fast mode analysis| | | | | x | x | x | +// fast mode analysis|[x]|[x]| | | x | x | x | //-------------------+---+---+---+---+---+---+---+ // basic rd-opt | | | | x | x | x | x | //-------------------+---+---+---+---+---+---+---+ @@ -315,18 +315,21 @@ int WebPReportProgress(const WebPPicture* const pic, int WebPEncode(const WebPConfig* config, WebPPicture* pic) { int ok = 0; + if (pic == NULL) return 0; - if (pic == NULL) - return 0; WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far - if (config == NULL) // bad params + if (config == NULL) { // bad params return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); - if (!WebPValidateConfig(config)) + } + if (!WebPValidateConfig(config)) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); - if (pic->width <= 0 || pic->height <= 0) + } + if (pic->width <= 0 || pic->height <= 0) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); - if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) + } + if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); + } if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats)); @@ -339,8 +342,8 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { // Make sure we have YUVA samples. - if (config->preprocessing & 4) { - if (!WebPPictureSmartARGBToYUVA(pic)) { + if (config->use_sharp_yuv || (config->preprocessing & 4)) { + if (!WebPPictureSharpARGBToYUVA(pic)) { return 0; } } else { |