diff options
author | Rémi Verschelde <rverschelde@gmail.com> | 2016-10-11 23:35:58 +0200 |
---|---|---|
committer | Rémi Verschelde <rverschelde@gmail.com> | 2016-10-15 11:50:39 +0200 |
commit | ee3cf211c6fd4d1e30617467cdbbe945798a68b3 (patch) | |
tree | d770150c48c806df4daca66770cde8d5b665a3ff /thirdparty/libwebp/enc | |
parent | b1e8889d969f5f88539c47c2afac6c9ea2a2dc11 (diff) |
webp: Make it a module and unbundle libwebp thirdparty files
Note that there are two Godot-specific changes made to libwebp
for the javascript/HTML5 platform. They are documented in the
README.md.
Diffstat (limited to 'thirdparty/libwebp/enc')
28 files changed, 13916 insertions, 0 deletions
diff --git a/thirdparty/libwebp/enc/alpha.c b/thirdparty/libwebp/enc/alpha.c new file mode 100644 index 0000000000..03e3ad07f5 --- /dev/null +++ b/thirdparty/libwebp/enc/alpha.c @@ -0,0 +1,433 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Alpha-plane compression. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> + +#include "./vp8enci.h" +#include "../dsp/dsp.h" +#include "../utils/filters.h" +#include "../utils/quant_levels.h" +#include "../utils/utils.h" +#include "../webp/format_constants.h" + +// ----------------------------------------------------------------------------- +// Encodes the given alpha data via specified compression method 'method'. +// The pre-processing (quantization) is performed if 'quality' is less than 100. +// For such cases, the encoding is lossy. The valid range is [0, 100] for +// 'quality' and [0, 1] for 'method': +// 'method = 0' - No compression; +// 'method = 1' - Use lossless coder on the alpha plane only +// 'filter' values [0, 4] correspond to prediction modes none, horizontal, +// vertical & gradient filters. The prediction mode 4 will try all the +// prediction modes 0 to 3 and pick the best one. +// 'effort_level': specifies how much effort must be spent to try and reduce +// the compressed output size. In range 0 (quick) to 6 (slow). +// +// 'output' corresponds to the buffer containing compressed alpha data. +// This buffer is allocated by this method and caller should call +// WebPSafeFree(*output) when done. +// 'output_size' corresponds to size of this compressed alpha buffer. +// +// Returns 1 on successfully encoding the alpha and +// 0 if either: +// invalid quality or method, or +// memory allocation for the compressed data fails. + +#include "../enc/vp8li.h" + +static int EncodeLossless(const uint8_t* const data, int width, int height, + int effort_level, // in [0..6] range + VP8LBitWriter* const bw, + WebPAuxStats* const stats) { + int ok = 0; + WebPConfig config; + WebPPicture picture; + + WebPPictureInit(&picture); + picture.width = width; + picture.height = height; + picture.use_argb = 1; + picture.stats = stats; + if (!WebPPictureAlloc(&picture)) return 0; + + // Transfer the alpha values to the green channel. + WebPDispatchAlphaToGreen(data, width, picture.width, picture.height, + picture.argb, picture.argb_stride); + + WebPConfigInit(&config); + config.lossless = 1; + // Enable exact, or it would alter RGB values of transparent alpha, which is + // normally OK but not here since we are not encoding the input image but an + // internal encoding-related image containing necessary exact information in + // RGB channels. + config.exact = 1; + config.method = effort_level; // impact is very small + // Set a low default quality for encoding alpha. Ensure that Alpha quality at + // lower methods (3 and below) is less than the threshold for triggering + // costly 'BackwardReferencesTraceBackwards'. + config.quality = 8.f * effort_level; + assert(config.quality >= 0 && config.quality <= 100.f); + + // TODO(urvang): Temporary fix to avoid generating images that trigger + // a decoder bug related to alpha with color cache. + // See: https://code.google.com/p/webp/issues/detail?id=239 + // Need to re-enable this later. + ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK); + WebPPictureFree(&picture); + ok = ok && !bw->error_; + if (!ok) { + VP8LBitWriterWipeOut(bw); + return 0; + } + return 1; +} + +// ----------------------------------------------------------------------------- + +// Small struct to hold the result of a filter mode compression attempt. +typedef struct { + size_t score; + VP8BitWriter bw; + WebPAuxStats stats; +} FilterTrial; + +// This function always returns an initialized 'bw' object, even upon error. +static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, + int method, int filter, int reduce_levels, + int effort_level, // in [0..6] range + uint8_t* const tmp_alpha, + FilterTrial* result) { + int ok = 0; + const uint8_t* alpha_src; + WebPFilterFunc filter_func; + uint8_t header; + const size_t data_size = width * height; + const uint8_t* output = NULL; + size_t output_size = 0; + VP8LBitWriter tmp_bw; + + assert((uint64_t)data_size == (uint64_t)width * height); // as per spec + assert(filter >= 0 && filter < WEBP_FILTER_LAST); + assert(method >= ALPHA_NO_COMPRESSION); + assert(method <= ALPHA_LOSSLESS_COMPRESSION); + assert(sizeof(header) == ALPHA_HEADER_LEN); + + filter_func = WebPFilters[filter]; + if (filter_func != NULL) { + filter_func(data, width, height, width, tmp_alpha); + alpha_src = tmp_alpha; + } else { + alpha_src = data; + } + + if (method != ALPHA_NO_COMPRESSION) { + ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3); + ok = ok && EncodeLossless(alpha_src, width, height, effort_level, + &tmp_bw, &result->stats); + if (ok) { + output = VP8LBitWriterFinish(&tmp_bw); + output_size = VP8LBitWriterNumBytes(&tmp_bw); + if (output_size > data_size) { + // compressed size is larger than source! Revert to uncompressed mode. + method = ALPHA_NO_COMPRESSION; + VP8LBitWriterWipeOut(&tmp_bw); + } + } else { + VP8LBitWriterWipeOut(&tmp_bw); + return 0; + } + } + + if (method == ALPHA_NO_COMPRESSION) { + output = alpha_src; + output_size = data_size; + ok = 1; + } + + // Emit final result. + header = method | (filter << 2); + if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; + + VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); + ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); + ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); + + if (method != ALPHA_NO_COMPRESSION) { + VP8LBitWriterWipeOut(&tmp_bw); + } + ok = ok && !result->bw.error_; + result->score = VP8BitWriterSize(&result->bw); + return ok; +} + +// ----------------------------------------------------------------------------- + +static int GetNumColors(const uint8_t* data, int width, int height, + int stride) { + int j; + int colors = 0; + uint8_t color[256] = { 0 }; + + for (j = 0; j < height; ++j) { + int i; + const uint8_t* const p = data + j * stride; + for (i = 0; i < width; ++i) { + color[p[i]] = 1; + } + } + for (j = 0; j < 256; ++j) { + if (color[j] > 0) ++colors; + } + return colors; +} + +#define FILTER_TRY_NONE (1 << WEBP_FILTER_NONE) +#define FILTER_TRY_ALL ((1 << WEBP_FILTER_LAST) - 1) + +// Given the input 'filter' option, return an OR'd bit-set of filters to try. +static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height, + int filter, int effort_level) { + uint32_t bit_map = 0U; + if (filter == WEBP_FILTER_FAST) { + // Quick estimate of the best candidate. + int try_filter_none = (effort_level > 3); + const int kMinColorsForFilterNone = 16; + const int kMaxColorsForFilterNone = 192; + const int num_colors = GetNumColors(alpha, width, height, width); + // For low number of colors, NONE yields better compression. + filter = (num_colors <= kMinColorsForFilterNone) + ? WEBP_FILTER_NONE + : WebPEstimateBestFilter(alpha, width, height, width); + bit_map |= 1 << filter; + // For large number of colors, try FILTER_NONE in addition to the best + // filter as well. + if (try_filter_none || num_colors > kMaxColorsForFilterNone) { + bit_map |= FILTER_TRY_NONE; + } + } else if (filter == WEBP_FILTER_NONE) { + bit_map = FILTER_TRY_NONE; + } else { // WEBP_FILTER_BEST -> try all + bit_map = FILTER_TRY_ALL; + } + return bit_map; +} + +static void InitFilterTrial(FilterTrial* const score) { + score->score = (size_t)~0U; + VP8BitWriterInit(&score->bw, 0); +} + +static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, + size_t data_size, int method, int filter, + int reduce_levels, int effort_level, + uint8_t** const output, + size_t* const output_size, + WebPAuxStats* const stats) { + int ok = 1; + FilterTrial best; + uint32_t try_map = + GetFilterMap(alpha, width, height, filter, effort_level); + InitFilterTrial(&best); + + if (try_map != FILTER_TRY_NONE) { + uint8_t* filtered_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); + if (filtered_alpha == NULL) return 0; + + for (filter = WEBP_FILTER_NONE; ok && try_map; ++filter, try_map >>= 1) { + if (try_map & 1) { + FilterTrial trial; + ok = EncodeAlphaInternal(alpha, width, height, method, filter, + reduce_levels, effort_level, filtered_alpha, + &trial); + if (ok && trial.score < best.score) { + VP8BitWriterWipeOut(&best.bw); + best = trial; + } else { + VP8BitWriterWipeOut(&trial.bw); + } + } + } + WebPSafeFree(filtered_alpha); + } else { + ok = EncodeAlphaInternal(alpha, width, height, method, WEBP_FILTER_NONE, + reduce_levels, effort_level, NULL, &best); + } + if (ok) { + if (stats != NULL) { + stats->lossless_features = best.stats.lossless_features; + stats->histogram_bits = best.stats.histogram_bits; + stats->transform_bits = best.stats.transform_bits; + stats->cache_bits = best.stats.cache_bits; + stats->palette_size = best.stats.palette_size; + stats->lossless_size = best.stats.lossless_size; + stats->lossless_hdr_size = best.stats.lossless_hdr_size; + stats->lossless_data_size = best.stats.lossless_data_size; + } + *output_size = VP8BitWriterSize(&best.bw); + *output = VP8BitWriterBuf(&best.bw); + } else { + VP8BitWriterWipeOut(&best.bw); + } + return ok; +} + +static int EncodeAlpha(VP8Encoder* const enc, + int quality, int method, int filter, + int effort_level, + uint8_t** const output, size_t* const output_size) { + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + + uint8_t* quant_alpha = NULL; + const size_t data_size = width * height; + uint64_t sse = 0; + int ok = 1; + const int reduce_levels = (quality < 100); + + // quick sanity checks + assert((uint64_t)data_size == (uint64_t)width * height); // as per spec + assert(enc != NULL && pic != NULL && pic->a != NULL); + assert(output != NULL && output_size != NULL); + assert(width > 0 && height > 0); + assert(pic->a_stride >= width); + assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST); + + if (quality < 0 || quality > 100) { + return 0; + } + + if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) { + return 0; + } + + if (method == ALPHA_NO_COMPRESSION) { + // Don't filter, as filtering will make no impact on compressed size. + filter = WEBP_FILTER_NONE; + } + + quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); + if (quant_alpha == NULL) { + return 0; + } + + // Extract alpha data (width x height) from raw_data (stride x height). + WebPCopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height); + + if (reduce_levels) { // No Quantization required for 'quality = 100'. + // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence + // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16] + // and Quality:]70, 100] -> Levels:]16, 256]. + const int alpha_levels = (quality <= 70) ? (2 + quality / 5) + : (16 + (quality - 70) * 8); + ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &sse); + } + + if (ok) { + VP8FiltersInit(); + ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, + filter, reduce_levels, effort_level, output, + output_size, pic->stats); + if (pic->stats != NULL) { // need stats? + pic->stats->coded_size += (int)(*output_size); + enc->sse_[3] = sse; + } + } + + WebPSafeFree(quant_alpha); + return ok; +} + +//------------------------------------------------------------------------------ +// Main calls + +static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) { + const WebPConfig* config = enc->config_; + uint8_t* alpha_data = NULL; + size_t alpha_size = 0; + const int effort_level = config->method; // maps to [0..6] + const WEBP_FILTER_TYPE filter = + (config->alpha_filtering == 0) ? WEBP_FILTER_NONE : + (config->alpha_filtering == 1) ? WEBP_FILTER_FAST : + WEBP_FILTER_BEST; + if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression, + filter, effort_level, &alpha_data, &alpha_size)) { + return 0; + } + if (alpha_size != (uint32_t)alpha_size) { // Sanity check. + WebPSafeFree(alpha_data); + return 0; + } + enc->alpha_data_size_ = (uint32_t)alpha_size; + enc->alpha_data_ = alpha_data; + (void)dummy; + return 1; +} + +void VP8EncInitAlpha(VP8Encoder* const enc) { + WebPInitAlphaProcessing(); + enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); + enc->alpha_data_ = NULL; + enc->alpha_data_size_ = 0; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + WebPGetWorkerInterface()->Init(worker); + worker->data1 = enc; + worker->data2 = NULL; + worker->hook = (WebPWorkerHook)CompressAlphaJob; + } +} + +int VP8EncStartAlpha(VP8Encoder* const enc) { + if (enc->has_alpha_) { + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + // Makes sure worker is good to go. + if (!WebPGetWorkerInterface()->Reset(worker)) { + return 0; + } + WebPGetWorkerInterface()->Launch(worker); + return 1; + } else { + return CompressAlphaJob(enc, NULL); // just do the job right away + } + } + return 1; +} + +int VP8EncFinishAlpha(VP8Encoder* const enc) { + if (enc->has_alpha_) { + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + if (!WebPGetWorkerInterface()->Sync(worker)) return 0; // error + } + } + return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); +} + +int VP8EncDeleteAlpha(VP8Encoder* const enc) { + int ok = 1; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + // finish anything left in flight + ok = WebPGetWorkerInterface()->Sync(worker); + // still need to end the worker, even if !ok + WebPGetWorkerInterface()->End(worker); + } + WebPSafeFree(enc->alpha_data_); + enc->alpha_data_ = NULL; + enc->alpha_data_size_ = 0; + enc->has_alpha_ = 0; + return ok; +} diff --git a/thirdparty/libwebp/enc/analysis.c b/thirdparty/libwebp/enc/analysis.c new file mode 100644 index 0000000000..b55128fd48 --- /dev/null +++ b/thirdparty/libwebp/enc/analysis.c @@ -0,0 +1,501 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Macroblock analysis +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "./vp8enci.h" +#include "./cost.h" +#include "../utils/utils.h" + +#define MAX_ITERS_K_MEANS 6 + +//------------------------------------------------------------------------------ +// Smooth the segment map by replacing isolated block by the majority of its +// neighbours. + +static void SmoothSegmentMap(VP8Encoder* const enc) { + int n, x, y; + const int w = enc->mb_w_; + const int h = enc->mb_h_; + const int majority_cnt_3_x_3_grid = 5; + uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp)); + assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec + + if (tmp == NULL) return; + for (y = 1; y < h - 1; ++y) { + for (x = 1; x < w - 1; ++x) { + int cnt[NUM_MB_SEGMENTS] = { 0 }; + const VP8MBInfo* const mb = &enc->mb_info_[x + w * y]; + int majority_seg = mb->segment_; + // Check the 8 neighbouring segment values. + cnt[mb[-w - 1].segment_]++; // top-left + cnt[mb[-w + 0].segment_]++; // top + cnt[mb[-w + 1].segment_]++; // top-right + cnt[mb[ - 1].segment_]++; // left + cnt[mb[ + 1].segment_]++; // right + cnt[mb[ w - 1].segment_]++; // bottom-left + cnt[mb[ w + 0].segment_]++; // bottom + cnt[mb[ w + 1].segment_]++; // bottom-right + for (n = 0; n < NUM_MB_SEGMENTS; ++n) { + if (cnt[n] >= majority_cnt_3_x_3_grid) { + majority_seg = n; + break; + } + } + tmp[x + y * w] = majority_seg; + } + } + for (y = 1; y < h - 1; ++y) { + for (x = 1; x < w - 1; ++x) { + VP8MBInfo* const mb = &enc->mb_info_[x + w * y]; + mb->segment_ = tmp[x + y * w]; + } + } + WebPSafeFree(tmp); +} + +//------------------------------------------------------------------------------ +// set segment susceptibility alpha_ / beta_ + +static WEBP_INLINE int clip(int v, int m, int M) { + return (v < m) ? m : (v > M) ? M : v; +} + +static void SetSegmentAlphas(VP8Encoder* const enc, + const int centers[NUM_MB_SEGMENTS], + int mid) { + const int nb = enc->segment_hdr_.num_segments_; + int min = centers[0], max = centers[0]; + int n; + + if (nb > 1) { + for (n = 0; n < nb; ++n) { + if (min > centers[n]) min = centers[n]; + if (max < centers[n]) max = centers[n]; + } + } + if (max == min) max = min + 1; + assert(mid <= max && mid >= min); + for (n = 0; n < nb; ++n) { + const int alpha = 255 * (centers[n] - mid) / (max - min); + const int beta = 255 * (centers[n] - min) / (max - min); + enc->dqm_[n].alpha_ = clip(alpha, -127, 127); + enc->dqm_[n].beta_ = clip(beta, 0, 255); + } +} + +//------------------------------------------------------------------------------ +// Compute susceptibility based on DCT-coeff histograms: +// the higher, the "easier" the macroblock is to compress. + +#define MAX_ALPHA 255 // 8b of precision for susceptibilities. +#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha. +#define DEFAULT_ALPHA (-1) +#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha)) + +static int FinalAlphaValue(int alpha) { + alpha = MAX_ALPHA - alpha; + return clip(alpha, 0, MAX_ALPHA); +} + +static int GetAlpha(const VP8Histogram* const histo) { + // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer + // values which happen to be mostly noise. This leaves the maximum precision + // for handling the useful small values which contribute most. + const int max_value = histo->max_value; + const int last_non_zero = histo->last_non_zero; + const int alpha = + (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; + return alpha; +} + +static void InitHistogram(VP8Histogram* const histo) { + histo->max_value = 0; + histo->last_non_zero = 1; +} + +static void MergeHistograms(const VP8Histogram* const in, + VP8Histogram* const out) { + if (in->max_value > out->max_value) { + out->max_value = in->max_value; + } + if (in->last_non_zero > out->last_non_zero) { + out->last_non_zero = in->last_non_zero; + } +} + +//------------------------------------------------------------------------------ +// Simplified k-Means, to assign Nb segments based on alpha-histogram + +static void AssignSegments(VP8Encoder* const enc, + const int alphas[MAX_ALPHA + 1]) { + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid spurious warning about 'n + 1' exceeding + // array bounds of 'centers' with some compilers (noticed with gcc-4.9). + const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ? + enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS; + int centers[NUM_MB_SEGMENTS]; + int weighted_average = 0; + int map[MAX_ALPHA + 1]; + int a, n, k; + int min_a = 0, max_a = MAX_ALPHA, range_a; + // 'int' type is ok for histo, and won't overflow + int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS]; + + assert(nb >= 1); + assert(nb <= NUM_MB_SEGMENTS); + + // bracket the input + for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {} + min_a = n; + for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {} + max_a = n; + range_a = max_a - min_a; + + // Spread initial centers evenly + for (k = 0, n = 1; k < nb; ++k, n += 2) { + assert(n < 2 * nb); + centers[k] = min_a + (n * range_a) / (2 * nb); + } + + for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough + int total_weight; + int displaced; + // Reset stats + for (n = 0; n < nb; ++n) { + accum[n] = 0; + dist_accum[n] = 0; + } + // Assign nearest center for each 'a' + n = 0; // track the nearest center for current 'a' + for (a = min_a; a <= max_a; ++a) { + if (alphas[a]) { + while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) { + n++; + } + map[a] = n; + // accumulate contribution into best centroid + dist_accum[n] += a * alphas[a]; + accum[n] += alphas[a]; + } + } + // All point are classified. Move the centroids to the + // center of their respective cloud. + displaced = 0; + weighted_average = 0; + total_weight = 0; + for (n = 0; n < nb; ++n) { + if (accum[n]) { + const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n]; + displaced += abs(centers[n] - new_center); + centers[n] = new_center; + weighted_average += new_center * accum[n]; + total_weight += accum[n]; + } + } + weighted_average = (weighted_average + total_weight / 2) / total_weight; + if (displaced < 5) break; // no need to keep on looping... + } + + // Map each original value to the closest centroid + for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { + VP8MBInfo* const mb = &enc->mb_info_[n]; + const int alpha = mb->alpha_; + mb->segment_ = map[alpha]; + mb->alpha_ = centers[map[alpha]]; // for the record. + } + + if (nb > 1) { + const int smooth = (enc->config_->preprocessing & 1); + if (smooth) SmoothSegmentMap(enc); + } + + SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas. +} + +//------------------------------------------------------------------------------ +// Macroblock analysis: collect histogram for each mode, deduce the maximal +// susceptibility and set best modes for this macroblock. +// Segment assignment is done later. + +// Number of modes to inspect for alpha_ evaluation. We don't need to test all +// the possible modes during the analysis phase: we risk falling into a local +// optimum, or be subject to boundary effect +#define MAX_INTRA16_MODE 2 +#define MAX_INTRA4_MODE 2 +#define MAX_UV_MODE 2 + +static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { + const int max_mode = MAX_INTRA16_MODE; + int mode; + int best_alpha = DEFAULT_ALPHA; + int best_mode = 0; + + VP8MakeLuma16Preds(it); + for (mode = 0; mode < max_mode; ++mode) { + VP8Histogram histo; + int alpha; + + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC, + it->yuv_p_ + VP8I16ModeOffsets[mode], + 0, 16, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { + best_alpha = alpha; + best_mode = mode; + } + } + VP8SetIntra16Mode(it, best_mode); + return best_alpha; +} + +static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, + int best_alpha) { + uint8_t modes[16]; + const int max_mode = MAX_INTRA4_MODE; + int i4_alpha; + VP8Histogram total_histo; + int cur_histo = 0; + InitHistogram(&total_histo); + + VP8IteratorStartI4(it); + do { + int mode; + int best_mode_alpha = DEFAULT_ALPHA; + VP8Histogram histos[2]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + + VP8MakeIntra4Preds(it); + for (mode = 0; mode < max_mode; ++mode) { + int alpha; + + InitHistogram(&histos[cur_histo]); + VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], + 0, 1, &histos[cur_histo]); + alpha = GetAlpha(&histos[cur_histo]); + if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { + best_mode_alpha = alpha; + modes[it->i4_] = mode; + cur_histo ^= 1; // keep track of best histo so far. + } + } + // accumulate best histogram + MergeHistograms(&histos[cur_histo ^ 1], &total_histo); + // Note: we reuse the original samples for predictors + } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); + + i4_alpha = GetAlpha(&total_histo); + if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { + VP8SetIntra4Mode(it, modes); + best_alpha = i4_alpha; + } + return best_alpha; +} + +static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { + int best_alpha = DEFAULT_ALPHA; + int best_mode = 0; + const int max_mode = MAX_UV_MODE; + int mode; + + VP8MakeChroma8Preds(it); + for (mode = 0; mode < max_mode; ++mode) { + VP8Histogram histo; + int alpha; + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC, + it->yuv_p_ + VP8UVModeOffsets[mode], + 16, 16 + 4 + 4, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { + best_alpha = alpha; + best_mode = mode; + } + } + VP8SetIntraUVMode(it, best_mode); + return best_alpha; +} + +static void MBAnalyze(VP8EncIterator* const it, + int alphas[MAX_ALPHA + 1], + int* const alpha, int* const uv_alpha) { + const VP8Encoder* const enc = it->enc_; + int best_alpha, best_uv_alpha; + + VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED + VP8SetSkip(it, 0); // not skipped + VP8SetSegment(it, 0); // default segment, spec-wise. + + best_alpha = MBAnalyzeBestIntra16Mode(it); + if (enc->method_ >= 5) { + // We go and make a fast decision for intra4/intra16. + // It's usually not a good and definitive pick, but helps seeding the stats + // about level bit-cost. + // TODO(skal): improve criterion. + best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha); + } + best_uv_alpha = MBAnalyzeBestUVMode(it); + + // Final susceptibility mix + best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2; + best_alpha = FinalAlphaValue(best_alpha); + alphas[best_alpha]++; + it->mb_->alpha_ = best_alpha; // for later remapping. + + // Accumulate for later complexity analysis. + *alpha += best_alpha; // mixed susceptibility (not just luma) + *uv_alpha += best_uv_alpha; +} + +static void DefaultMBInfo(VP8MBInfo* const mb) { + mb->type_ = 1; // I16x16 + mb->uv_mode_ = 0; + mb->skip_ = 0; // not skipped + mb->segment_ = 0; // default segment + mb->alpha_ = 0; +} + +//------------------------------------------------------------------------------ +// Main analysis loop: +// Collect all susceptibilities for each macroblock and record their +// distribution in alphas[]. Segments is assigned a-posteriori, based on +// this histogram. +// We also pick an intra16 prediction mode, which shouldn't be considered +// final except for fast-encode settings. We can also pick some intra4 modes +// and decide intra4/intra16, but that's usually almost always a bad choice at +// this stage. + +static void ResetAllMBInfo(VP8Encoder* const enc) { + int n; + for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { + DefaultMBInfo(&enc->mb_info_[n]); + } + // Default susceptibilities. + enc->dqm_[0].alpha_ = 0; + enc->dqm_[0].beta_ = 0; + // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value. + enc->alpha_ = 0; + enc->uv_alpha_ = 0; + WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); +} + +// struct used to collect job result +typedef struct { + WebPWorker worker; + int alphas[MAX_ALPHA + 1]; + int alpha, uv_alpha; + VP8EncIterator it; + int delta_progress; +} SegmentJob; + +// main work call +static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) { + int ok = 1; + if (!VP8IteratorIsDone(it)) { + uint8_t tmp[32 + WEBP_ALIGN_CST]; + uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp); + do { + // Let's pretend we have perfect lossless reconstruction. + VP8IteratorImport(it, scratch); + MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha); + ok = VP8IteratorProgress(it, job->delta_progress); + } while (ok && VP8IteratorNext(it)); + } + return ok; +} + +static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) { + int i; + for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i]; + dst->alpha += src->alpha; + dst->uv_alpha += src->uv_alpha; +} + +// initialize the job struct with some TODOs +static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, + int start_row, int end_row) { + WebPGetWorkerInterface()->Init(&job->worker); + job->worker.data1 = job; + job->worker.data2 = &job->it; + job->worker.hook = (WebPWorkerHook)DoSegmentsJob; + VP8IteratorInit(enc, &job->it); + VP8IteratorSetRow(&job->it, start_row); + VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_); + memset(job->alphas, 0, sizeof(job->alphas)); + job->alpha = 0; + job->uv_alpha = 0; + // only one of both jobs can record the progress, since we don't + // expect the user's hook to be multi-thread safe + job->delta_progress = (start_row == 0) ? 20 : 0; +} + +// main entry point +int VP8EncAnalyze(VP8Encoder* const enc) { + int ok = 1; + const int do_segments = + enc->config_->emulate_jpeg_size || // We need the complexity evaluation. + (enc->segment_hdr_.num_segments_ > 1) || + (enc->method_ == 0); // for method 0, we need preds_[] to be filled. + if (do_segments) { + const int last_row = enc->mb_h_; + // We give a little more than a half work to the main thread. + const int split_row = (9 * last_row + 15) >> 4; + const int total_mb = last_row * enc->mb_w_; +#ifdef WEBP_USE_THREAD + const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it + const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow); +#else + const int do_mt = 0; +#endif + const WebPWorkerInterface* const worker_interface = + WebPGetWorkerInterface(); + SegmentJob main_job; + if (do_mt) { + SegmentJob side_job; + // Note the use of '&' instead of '&&' because we must call the functions + // no matter what. + InitSegmentJob(enc, &main_job, 0, split_row); + InitSegmentJob(enc, &side_job, split_row, last_row); + // we don't need to call Reset() on main_job.worker, since we're calling + // WebPWorkerExecute() on it + ok &= worker_interface->Reset(&side_job.worker); + // launch the two jobs in parallel + if (ok) { + worker_interface->Launch(&side_job.worker); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&side_job.worker); + ok &= worker_interface->Sync(&main_job.worker); + } + worker_interface->End(&side_job.worker); + if (ok) MergeJobs(&side_job, &main_job); // merge results together + } else { + // Even for single-thread case, we use the generic Worker tools. + InitSegmentJob(enc, &main_job, 0, last_row); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&main_job.worker); + } + worker_interface->End(&main_job.worker); + if (ok) { + enc->alpha_ = main_job.alpha / total_mb; + enc->uv_alpha_ = main_job.uv_alpha / total_mb; + AssignSegments(enc, main_job.alphas); + } + } else { // Use only one default segment. + ResetAllMBInfo(enc); + } + return ok; +} + diff --git a/thirdparty/libwebp/enc/backward_references.c b/thirdparty/libwebp/enc/backward_references.c new file mode 100644 index 0000000000..136a24a8c3 --- /dev/null +++ b/thirdparty/libwebp/enc/backward_references.c @@ -0,0 +1,1715 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Jyrki Alakuijala (jyrki@google.com) +// + +#include <assert.h> +#include <math.h> + +#include "./backward_references.h" +#include "./histogram.h" +#include "../dsp/lossless.h" +#include "../dsp/dsp.h" +#include "../utils/color_cache.h" +#include "../utils/utils.h" + +#define VALUES_IN_BYTE 256 + +#define MIN_BLOCK_SIZE 256 // minimum block size for backward references + +#define MAX_ENTROPY (1e30f) + +// 1M window (4M bytes) minus 120 special codes for short distances. +#define WINDOW_SIZE_BITS 20 +#define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120) + +// Bounds for the match length. +#define MIN_LENGTH 2 +// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it +// is used in VP8LHashChain. +#define MAX_LENGTH_BITS 12 +// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits. +#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1) +#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32 +#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32" +#endif + +// ----------------------------------------------------------------------------- + +static const uint8_t plane_to_code_lut[128] = { + 96, 73, 55, 39, 23, 13, 5, 1, 255, 255, 255, 255, 255, 255, 255, 255, + 101, 78, 58, 42, 26, 16, 8, 2, 0, 3, 9, 17, 27, 43, 59, 79, + 102, 86, 62, 46, 32, 20, 10, 6, 4, 7, 11, 21, 33, 47, 63, 87, + 105, 90, 70, 52, 37, 28, 18, 14, 12, 15, 19, 29, 38, 53, 71, 91, + 110, 99, 82, 66, 48, 35, 30, 24, 22, 25, 31, 36, 49, 67, 83, 100, + 115, 108, 94, 76, 64, 50, 44, 40, 34, 41, 45, 51, 65, 77, 95, 109, + 118, 113, 103, 92, 80, 68, 60, 56, 54, 57, 61, 69, 81, 93, 104, 114, + 119, 116, 111, 106, 97, 88, 84, 74, 72, 75, 85, 89, 98, 107, 112, 117 +}; + +static int DistanceToPlaneCode(int xsize, int dist) { + const int yoffset = dist / xsize; + const int xoffset = dist - yoffset * xsize; + if (xoffset <= 8 && yoffset < 8) { + return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1; + } else if (xoffset > xsize - 8 && yoffset < 7) { + return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1; + } + return dist + 120; +} + +// Returns the exact index where array1 and array2 are different. For an index +// inferior or equal to best_len_match, the return value just has to be strictly +// inferior to best_len_match. The current behavior is to return 0 if this index +// is best_len_match, and the index itself otherwise. +// If no two elements are the same, it returns max_limit. +static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, + const uint32_t* const array2, + int best_len_match, int max_limit) { + // Before 'expensive' linear match, check if the two arrays match at the + // current best length index. + if (array1[best_len_match] != array2[best_len_match]) return 0; + + return VP8LVectorMismatch(array1, array2, max_limit); +} + +// ----------------------------------------------------------------------------- +// VP8LBackwardRefs + +struct PixOrCopyBlock { + PixOrCopyBlock* next_; // next block (or NULL) + PixOrCopy* start_; // data start + int size_; // currently used size +}; + +static void ClearBackwardRefs(VP8LBackwardRefs* const refs) { + assert(refs != NULL); + if (refs->tail_ != NULL) { + *refs->tail_ = refs->free_blocks_; // recycle all blocks at once + } + refs->free_blocks_ = refs->refs_; + refs->tail_ = &refs->refs_; + refs->last_block_ = NULL; + refs->refs_ = NULL; +} + +void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { + assert(refs != NULL); + ClearBackwardRefs(refs); + while (refs->free_blocks_ != NULL) { + PixOrCopyBlock* const next = refs->free_blocks_->next_; + WebPSafeFree(refs->free_blocks_); + refs->free_blocks_ = next; + } +} + +void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) { + assert(refs != NULL); + memset(refs, 0, sizeof(*refs)); + refs->tail_ = &refs->refs_; + refs->block_size_ = + (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size; +} + +VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) { + VP8LRefsCursor c; + c.cur_block_ = refs->refs_; + if (refs->refs_ != NULL) { + c.cur_pos = c.cur_block_->start_; + c.last_pos_ = c.cur_pos + c.cur_block_->size_; + } else { + c.cur_pos = NULL; + c.last_pos_ = NULL; + } + return c; +} + +void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) { + PixOrCopyBlock* const b = c->cur_block_->next_; + c->cur_pos = (b == NULL) ? NULL : b->start_; + c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_; + c->cur_block_ = b; +} + +// Create a new block, either from the free list or allocated +static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { + PixOrCopyBlock* b = refs->free_blocks_; + if (b == NULL) { // allocate new memory chunk + const size_t total_size = + sizeof(*b) + refs->block_size_ * sizeof(*b->start_); + b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size); + if (b == NULL) { + refs->error_ |= 1; + return NULL; + } + b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned + } else { // recycle from free-list + refs->free_blocks_ = b->next_; + } + *refs->tail_ = b; + refs->tail_ = &b->next_; + refs->last_block_ = b; + b->next_ = NULL; + b->size_ = 0; + return b; +} + +static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs, + const PixOrCopy v) { + PixOrCopyBlock* b = refs->last_block_; + if (b == NULL || b->size_ == refs->block_size_) { + b = BackwardRefsNewBlock(refs); + if (b == NULL) return; // refs->error_ is set + } + b->start_[b->size_++] = v; +} + +int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, + VP8LBackwardRefs* const dst) { + const PixOrCopyBlock* b = src->refs_; + ClearBackwardRefs(dst); + assert(src->block_size_ == dst->block_size_); + while (b != NULL) { + PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst); + if (new_b == NULL) return 0; // dst->error_ is set + memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_)); + new_b->size_ = b->size_; + b = b->next_; + } + return 1; +} + +// ----------------------------------------------------------------------------- +// Hash chains + +int VP8LHashChainInit(VP8LHashChain* const p, int size) { + assert(p->size_ == 0); + assert(p->offset_length_ == NULL); + assert(size > 0); + p->offset_length_ = + (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_)); + if (p->offset_length_ == NULL) return 0; + p->size_ = size; + + return 1; +} + +void VP8LHashChainClear(VP8LHashChain* const p) { + assert(p != NULL); + WebPSafeFree(p->offset_length_); + + p->size_ = 0; + p->offset_length_ = NULL; +} + +// ----------------------------------------------------------------------------- + +#define HASH_MULTIPLIER_HI (0xc6a4a793U) +#define HASH_MULTIPLIER_LO (0x5bd1e996U) + +static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { + uint32_t key; + key = argb[1] * HASH_MULTIPLIER_HI; + key += argb[0] * HASH_MULTIPLIER_LO; + key = key >> (32 - HASH_BITS); + return key; +} + +// Returns the maximum number of hash chain lookups to do for a +// given compression quality. Return value in range [8, 86]. +static int GetMaxItersForQuality(int quality) { + return 8 + (quality * quality) / 128; +} + +static int GetWindowSizeForHashChain(int quality, int xsize) { + const int max_window_size = (quality > 75) ? WINDOW_SIZE + : (quality > 50) ? (xsize << 8) + : (quality > 25) ? (xsize << 6) + : (xsize << 4); + assert(xsize > 0); + return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size; +} + +static WEBP_INLINE int MaxFindCopyLength(int len) { + return (len < MAX_LENGTH) ? len : MAX_LENGTH; +} + +int VP8LHashChainFill(VP8LHashChain* const p, int quality, + const uint32_t* const argb, int xsize, int ysize) { + const int size = xsize * ysize; + const int iter_max = GetMaxItersForQuality(quality); + const int iter_min = iter_max - quality / 10; + const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); + int pos; + uint32_t base_position; + int32_t* hash_to_first_index; + // Temporarily use the p->offset_length_ as a hash chain. + int32_t* chain = (int32_t*)p->offset_length_; + assert(p->size_ != 0); + assert(p->offset_length_ != NULL); + + hash_to_first_index = + (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); + if (hash_to_first_index == NULL) return 0; + + // Set the int32_t array to -1. + memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); + // Fill the chain linking pixels with the same hash. + for (pos = 0; pos < size - 1; ++pos) { + const uint32_t hash_code = GetPixPairHash64(argb + pos); + chain[pos] = hash_to_first_index[hash_code]; + hash_to_first_index[hash_code] = pos; + } + WebPSafeFree(hash_to_first_index); + + // Find the best match interval at each pixel, defined by an offset to the + // pixel and a length. The right-most pixel cannot match anything to the right + // (hence a best length of 0) and the left-most pixel nothing to the left + // (hence an offset of 0). + p->offset_length_[0] = p->offset_length_[size - 1] = 0; + for (base_position = size - 2 < 0 ? 0 : size - 2; base_position > 0;) { + const int max_len = MaxFindCopyLength(size - 1 - base_position); + const uint32_t* const argb_start = argb + base_position; + int iter = iter_max; + int best_length = 0; + uint32_t best_distance = 0; + const int min_pos = + (base_position > window_size) ? base_position - window_size : 0; + const int length_max = (max_len < 256) ? max_len : 256; + uint32_t max_base_position; + + for (pos = chain[base_position]; pos >= min_pos; pos = chain[pos]) { + int curr_length; + if (--iter < 0) { + break; + } + assert(base_position > (uint32_t)pos); + + curr_length = + FindMatchLength(argb + pos, argb_start, best_length, max_len); + if (best_length < curr_length) { + best_length = curr_length; + best_distance = base_position - pos; + // Stop if we have reached the maximum length. Otherwise, make sure + // we have executed a minimum number of iterations depending on the + // quality. + if ((best_length == MAX_LENGTH) || + (curr_length >= length_max && iter < iter_min)) { + break; + } + } + } + // We have the best match but in case the two intervals continue matching + // to the left, we have the best matches for the left-extended pixels. + max_base_position = base_position; + while (1) { + assert(best_length <= MAX_LENGTH); + assert(best_distance <= WINDOW_SIZE); + p->offset_length_[base_position] = + (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length; + --base_position; + // Stop if we don't have a match or if we are out of bounds. + if (best_distance == 0 || base_position == 0) break; + // Stop if we cannot extend the matching intervals to the left. + if (base_position < best_distance || + argb[base_position - best_distance] != argb[base_position]) { + break; + } + // Stop if we are matching at its limit because there could be a closer + // matching interval with the same maximum length. Then again, if the + // matching interval is as close as possible (best_distance == 1), we will + // never find anything better so let's continue. + if (best_length == MAX_LENGTH && best_distance != 1 && + base_position + MAX_LENGTH < max_base_position) { + break; + } + if (best_length < MAX_LENGTH) { + ++best_length; + max_base_position = base_position; + } + } + } + return 1; +} + +static WEBP_INLINE int HashChainFindOffset(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] >> MAX_LENGTH_BITS; +} + +static WEBP_INLINE int HashChainFindLength(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1); +} + +static WEBP_INLINE void HashChainFindCopy(const VP8LHashChain* const p, + int base_position, + int* const offset_ptr, + int* const length_ptr) { + *offset_ptr = HashChainFindOffset(p, base_position); + *length_ptr = HashChainFindLength(p, base_position); +} + +static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, + VP8LColorCache* const hashers, + VP8LBackwardRefs* const refs) { + PixOrCopy v; + if (use_color_cache) { + const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel); + if (VP8LColorCacheLookup(hashers, key) == pixel) { + v = PixOrCopyCreateCacheIdx(key); + } else { + v = PixOrCopyCreateLiteral(pixel); + VP8LColorCacheSet(hashers, key, pixel); + } + } else { + v = PixOrCopyCreateLiteral(pixel); + } + BackwardRefsCursorAdd(refs, v); +} + +static int BackwardReferencesRle(int xsize, int ysize, + const uint32_t* const argb, + int cache_bits, VP8LBackwardRefs* const refs) { + const int pix_count = xsize * ysize; + int i, k; + const int use_color_cache = (cache_bits > 0); + VP8LColorCache hashers; + + if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) { + return 0; + } + ClearBackwardRefs(refs); + // Add first pixel as literal. + AddSingleLiteral(argb[0], use_color_cache, &hashers, refs); + i = 1; + while (i < pix_count) { + const int max_len = MaxFindCopyLength(pix_count - i); + const int kMinLength = 4; + const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len); + const int prev_row_len = (i < xsize) ? 0 : + FindMatchLength(argb + i, argb + i - xsize, 0, max_len); + if (rle_len >= prev_row_len && rle_len >= kMinLength) { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len)); + // We don't need to update the color cache here since it is always the + // same pixel being copied, and that does not change the color cache + // state. + i += rle_len; + } else if (prev_row_len >= kMinLength) { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len)); + if (use_color_cache) { + for (k = 0; k < prev_row_len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); + } + } + i += prev_row_len; + } else { + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + i++; + } + } + if (use_color_cache) VP8LColorCacheClear(&hashers); + return !refs->error_; +} + +static int BackwardReferencesLz77(int xsize, int ysize, + const uint32_t* const argb, int cache_bits, + const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { + int i; + int i_last_check = -1; + int ok = 0; + int cc_init = 0; + const int use_color_cache = (cache_bits > 0); + const int pix_count = xsize * ysize; + VP8LColorCache hashers; + + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; + } + ClearBackwardRefs(refs); + for (i = 0; i < pix_count;) { + // Alternative#1: Code the pixels starting at 'i' using backward reference. + int offset = 0; + int len = 0; + int j; + HashChainFindCopy(hash_chain, i, &offset, &len); + if (len > MIN_LENGTH + 1) { + const int len_ini = len; + int max_reach = 0; + assert(i + len < pix_count); + // Only start from what we have not checked already. + i_last_check = (i > i_last_check) ? i : i_last_check; + // We know the best match for the current pixel but we try to find the + // best matches for the current pixel AND the next one combined. + // The naive method would use the intervals: + // [i,i+len) + [i+len, length of best match at i+len) + // while we check if we can use: + // [i,j) (where j<=i+len) + [j, length of best match at j) + for (j = i_last_check + 1; j <= i + len_ini; ++j) { + const int len_j = HashChainFindLength(hash_chain, j); + const int reach = + j + (len_j > MIN_LENGTH + 1 ? len_j : 1); // 1 for single literal. + if (reach > max_reach) { + len = j - i; + max_reach = reach; + } + } + } else { + len = 1; + } + // Go with literal or backward reference. + assert(len > 0); + if (len == 1) { + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + } else { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); + if (use_color_cache) { + for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]); + } + } + i += len; + } + + ok = !refs->error_; + Error: + if (cc_init) VP8LColorCacheClear(&hashers); + return ok; +} + +// ----------------------------------------------------------------------------- + +typedef struct { + double alpha_[VALUES_IN_BYTE]; + double red_[VALUES_IN_BYTE]; + double blue_[VALUES_IN_BYTE]; + double distance_[NUM_DISTANCE_CODES]; + double* literal_; +} CostModel; + +static int BackwardReferencesTraceBackwards( + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs); + +static void ConvertPopulationCountTableToBitEstimates( + int num_symbols, const uint32_t population_counts[], double output[]) { + uint32_t sum = 0; + int nonzeros = 0; + int i; + for (i = 0; i < num_symbols; ++i) { + sum += population_counts[i]; + if (population_counts[i] > 0) { + ++nonzeros; + } + } + if (nonzeros <= 1) { + memset(output, 0, num_symbols * sizeof(*output)); + } else { + const double logsum = VP8LFastLog2(sum); + for (i = 0; i < num_symbols; ++i) { + output[i] = logsum - VP8LFastLog2(population_counts[i]); + } + } +} + +static int CostModelBuild(CostModel* const m, int cache_bits, + VP8LBackwardRefs* const refs) { + int ok = 0; + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; + + VP8LHistogramCreate(histo, refs, cache_bits); + + ConvertPopulationCountTableToBitEstimates( + VP8LHistogramNumCodes(histo->palette_code_bits_), + histo->literal_, m->literal_); + ConvertPopulationCountTableToBitEstimates( + VALUES_IN_BYTE, histo->red_, m->red_); + ConvertPopulationCountTableToBitEstimates( + VALUES_IN_BYTE, histo->blue_, m->blue_); + ConvertPopulationCountTableToBitEstimates( + VALUES_IN_BYTE, histo->alpha_, m->alpha_); + ConvertPopulationCountTableToBitEstimates( + NUM_DISTANCE_CODES, histo->distance_, m->distance_); + ok = 1; + + Error: + VP8LFreeHistogram(histo); + return ok; +} + +static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) { + return m->alpha_[v >> 24] + + m->red_[(v >> 16) & 0xff] + + m->literal_[(v >> 8) & 0xff] + + m->blue_[v & 0xff]; +} + +static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { + const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; + return m->literal_[literal_idx]; +} + +static WEBP_INLINE double GetLengthCost(const CostModel* const m, + uint32_t length) { + int code, extra_bits; + VP8LPrefixEncodeBits(length, &code, &extra_bits); + return m->literal_[VALUES_IN_BYTE + code] + extra_bits; +} + +static WEBP_INLINE double GetDistanceCost(const CostModel* const m, + uint32_t distance) { + int code, extra_bits; + VP8LPrefixEncodeBits(distance, &code, &extra_bits); + return m->distance_[code] + extra_bits; +} + +static void AddSingleLiteralWithCostModel(const uint32_t* const argb, + VP8LColorCache* const hashers, + const CostModel* const cost_model, + int idx, int use_color_cache, + double prev_cost, float* const cost, + uint16_t* const dist_array) { + double cost_val = prev_cost; + const uint32_t color = argb[0]; + if (use_color_cache && VP8LColorCacheContains(hashers, color)) { + const double mul0 = 0.68; + const int ix = VP8LColorCacheGetIndex(hashers, color); + cost_val += GetCacheCost(cost_model, ix) * mul0; + } else { + const double mul1 = 0.82; + if (use_color_cache) VP8LColorCacheInsert(hashers, color); + cost_val += GetLiteralCost(cost_model, color) * mul1; + } + if (cost[idx] > cost_val) { + cost[idx] = (float)cost_val; + dist_array[idx] = 1; // only one is inserted. + } +} + +// ----------------------------------------------------------------------------- +// CostManager and interval handling + +// Empirical value to avoid high memory consumption but good for performance. +#define COST_CACHE_INTERVAL_SIZE_MAX 100 + +// To perform backward reference every pixel at index index_ is considered and +// the cost for the MAX_LENGTH following pixels computed. Those following pixels +// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of: +// distance_cost_ at index_ + GetLengthCost(cost_model, k) +// (named cost) (named cached cost) +// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an +// array of size MAX_LENGTH. +// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the +// minimal values using intervals, for which lower_ and upper_ bounds are kept. +// An interval is defined by the index_ of the pixel that generated it and +// is only useful in a range of indices from start_ to end_ (exclusive), i.e. +// it contains the minimum value for pixels between start_ and end_. +// Intervals are stored in a linked list and ordered by start_. When a new +// interval has a better minimum, old intervals are split or removed. +typedef struct CostInterval CostInterval; +struct CostInterval { + double lower_; + double upper_; + int start_; + int end_; + double distance_cost_; + int index_; + CostInterval* previous_; + CostInterval* next_; +}; + +// The GetLengthCost(cost_model, k) part of the costs is also bounded for +// efficiency in a set of intervals of a different type. +// If those intervals are small enough, they are not used for comparison and +// written into the costs right away. +typedef struct { + double lower_; // Lower bound of the interval. + double upper_; // Upper bound of the interval. + int start_; + int end_; // Exclusive. + int do_write_; // If !=0, the interval is saved to cost instead of being kept + // for comparison. +} CostCacheInterval; + +// This structure is in charge of managing intervals and costs. +// It caches the different CostCacheInterval, caches the different +// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose +// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX). +#define COST_MANAGER_MAX_FREE_LIST 10 +typedef struct { + CostInterval* head_; + int count_; // The number of stored intervals. + CostCacheInterval* cache_intervals_; + size_t cache_intervals_size_; + double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). + double min_cost_cache_; // The minimum value in cost_cache_[1:]. + double max_cost_cache_; // The maximum value in cost_cache_[1:]. + float* costs_; + uint16_t* dist_array_; + // Most of the time, we only need few intervals -> use a free-list, to avoid + // fragmentation with small allocs in most common cases. + CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST]; + CostInterval* free_intervals_; + // These are regularly malloc'd remains. This list can't grow larger than than + // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note. + CostInterval* recycled_intervals_; + // Buffer used in BackwardReferencesHashChainDistanceOnly to store the ends + // of the intervals that can have impacted the cost at a pixel. + int* interval_ends_; + int interval_ends_size_; +} CostManager; + +static int IsCostCacheIntervalWritable(int start, int end) { + // 100 is the length for which we consider an interval for comparison, and not + // for writing. + // The first intervals are very small and go in increasing size. This constant + // helps merging them into one big interval (up to index 150/200 usually from + // which intervals start getting much bigger). + // This value is empirical. + return (end - start + 1 < 100); +} + +static void CostIntervalAddToFreeList(CostManager* const manager, + CostInterval* const interval) { + interval->next_ = manager->free_intervals_; + manager->free_intervals_ = interval; +} + +static int CostIntervalIsInFreeList(const CostManager* const manager, + const CostInterval* const interval) { + return (interval >= &manager->intervals_[0] && + interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]); +} + +static void CostManagerInitFreeList(CostManager* const manager) { + int i; + manager->free_intervals_ = NULL; + for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) { + CostIntervalAddToFreeList(manager, &manager->intervals_[i]); + } +} + +static void DeleteIntervalList(CostManager* const manager, + const CostInterval* interval) { + while (interval != NULL) { + const CostInterval* const next = interval->next_; + if (!CostIntervalIsInFreeList(manager, interval)) { + WebPSafeFree((void*)interval); + } // else: do nothing + interval = next; + } +} + +static void CostManagerClear(CostManager* const manager) { + if (manager == NULL) return; + + WebPSafeFree(manager->costs_); + WebPSafeFree(manager->cache_intervals_); + WebPSafeFree(manager->interval_ends_); + + // Clear the interval lists. + DeleteIntervalList(manager, manager->head_); + manager->head_ = NULL; + DeleteIntervalList(manager, manager->recycled_intervals_); + manager->recycled_intervals_ = NULL; + + // Reset pointers, count_ and cache_intervals_size_. + memset(manager, 0, sizeof(*manager)); + CostManagerInitFreeList(manager); +} + +static int CostManagerInit(CostManager* const manager, + uint16_t* const dist_array, int pix_count, + const CostModel* const cost_model) { + int i; + const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count; + // This constant is tied to the cost_model we use. + // Empirically, differences between intervals is usually of more than 1. + const double min_cost_diff = 0.1; + + manager->costs_ = NULL; + manager->cache_intervals_ = NULL; + manager->interval_ends_ = NULL; + manager->head_ = NULL; + manager->recycled_intervals_ = NULL; + manager->count_ = 0; + manager->dist_array_ = dist_array; + CostManagerInitFreeList(manager); + + // Fill in the cost_cache_. + manager->cache_intervals_size_ = 1; + manager->cost_cache_[0] = 0; + for (i = 1; i < cost_cache_size; ++i) { + manager->cost_cache_[i] = GetLengthCost(cost_model, i); + // Get an approximation of the number of bound intervals. + if (fabs(manager->cost_cache_[i] - manager->cost_cache_[i - 1]) > + min_cost_diff) { + ++manager->cache_intervals_size_; + } + // Compute the minimum of cost_cache_. + if (i == 1) { + manager->min_cost_cache_ = manager->cost_cache_[1]; + manager->max_cost_cache_ = manager->cost_cache_[1]; + } else if (manager->cost_cache_[i] < manager->min_cost_cache_) { + manager->min_cost_cache_ = manager->cost_cache_[i]; + } else if (manager->cost_cache_[i] > manager->max_cost_cache_) { + manager->max_cost_cache_ = manager->cost_cache_[i]; + } + } + + // With the current cost models, we have 15 intervals, so we are safe by + // setting a maximum of COST_CACHE_INTERVAL_SIZE_MAX. + if (manager->cache_intervals_size_ > COST_CACHE_INTERVAL_SIZE_MAX) { + manager->cache_intervals_size_ = COST_CACHE_INTERVAL_SIZE_MAX; + } + manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc( + manager->cache_intervals_size_, sizeof(*manager->cache_intervals_)); + if (manager->cache_intervals_ == NULL) { + CostManagerClear(manager); + return 0; + } + + // Fill in the cache_intervals_. + { + double cost_prev = -1e38f; // unprobably low initial value + CostCacheInterval* prev = NULL; + CostCacheInterval* cur = manager->cache_intervals_; + const CostCacheInterval* const end = + manager->cache_intervals_ + manager->cache_intervals_size_; + + // Consecutive values in cost_cache_ are compared and if a big enough + // difference is found, a new interval is created and bounded. + for (i = 0; i < cost_cache_size; ++i) { + const double cost_val = manager->cost_cache_[i]; + if (i == 0 || + (fabs(cost_val - cost_prev) > min_cost_diff && cur + 1 < end)) { + if (i > 1) { + const int is_writable = + IsCostCacheIntervalWritable(cur->start_, cur->end_); + // Merge with the previous interval if both are writable. + if (is_writable && cur != manager->cache_intervals_ && + prev->do_write_) { + // Update the previous interval. + prev->end_ = cur->end_; + if (cur->lower_ < prev->lower_) { + prev->lower_ = cur->lower_; + } else if (cur->upper_ > prev->upper_) { + prev->upper_ = cur->upper_; + } + } else { + cur->do_write_ = is_writable; + prev = cur; + ++cur; + } + } + // Initialize an interval. + cur->start_ = i; + cur->do_write_ = 0; + cur->lower_ = cost_val; + cur->upper_ = cost_val; + } else { + // Update the current interval bounds. + if (cost_val < cur->lower_) { + cur->lower_ = cost_val; + } else if (cost_val > cur->upper_) { + cur->upper_ = cost_val; + } + } + cur->end_ = i + 1; + cost_prev = cost_val; + } + manager->cache_intervals_size_ = cur + 1 - manager->cache_intervals_; + } + + manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); + if (manager->costs_ == NULL) { + CostManagerClear(manager); + return 0; + } + // Set the initial costs_ high for every pixel as we will keep the minimum. + for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; + + // The cost at pixel is influenced by the cost intervals from previous pixels. + // Let us take the specific case where the offset is the same (which actually + // happens a lot in case of uniform regions). + // pixel i contributes to j>i a cost of: offset cost + cost_cache_[j-i] + // pixel i+1 contributes to j>i a cost of: 2*offset cost + cost_cache_[j-i-1] + // pixel i+2 contributes to j>i a cost of: 3*offset cost + cost_cache_[j-i-2] + // and so on. + // A pixel i influences the following length(j) < MAX_LENGTH pixels. What is + // the value of j such that pixel i + j cannot influence any of those pixels? + // This value is such that: + // max of cost_cache_ < j*offset cost + min of cost_cache_ + // (pixel i + j 's cost cannot beat the worst cost given by pixel i). + // This value will be used to optimize the cost computation in + // BackwardReferencesHashChainDistanceOnly. + { + // The offset cost is computed in GetDistanceCost and has a minimum value of + // the minimum in cost_model->distance_. The case where the offset cost is 0 + // will be dealt with differently later so we are only interested in the + // minimum non-zero offset cost. + double offset_cost_min = 0.; + int size; + for (i = 0; i < NUM_DISTANCE_CODES; ++i) { + if (cost_model->distance_[i] != 0) { + if (offset_cost_min == 0.) { + offset_cost_min = cost_model->distance_[i]; + } else if (cost_model->distance_[i] < offset_cost_min) { + offset_cost_min = cost_model->distance_[i]; + } + } + } + // In case all the cost_model->distance_ is 0, the next non-zero cost we + // can have is from the extra bit in GetDistanceCost, hence 1. + if (offset_cost_min < 1.) offset_cost_min = 1.; + + size = 1 + (int)ceil((manager->max_cost_cache_ - manager->min_cost_cache_) / + offset_cost_min); + // Empirically, we usually end up with a value below 100. + if (size > MAX_LENGTH) size = MAX_LENGTH; + + manager->interval_ends_ = + (int*)WebPSafeMalloc(size, sizeof(*manager->interval_ends_)); + if (manager->interval_ends_ == NULL) { + CostManagerClear(manager); + return 0; + } + manager->interval_ends_size_ = size; + } + + return 1; +} + +// Given the distance_cost for pixel 'index', update the cost at pixel 'i' if it +// is smaller than the previously computed value. +static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, int index, + double distance_cost) { + int k = i - index; + double cost_tmp; + assert(k >= 0 && k < MAX_LENGTH); + cost_tmp = distance_cost + manager->cost_cache_[k]; + + if (manager->costs_[i] > cost_tmp) { + manager->costs_[i] = (float)cost_tmp; + manager->dist_array_[i] = k + 1; + } +} + +// Given the distance_cost for pixel 'index', update the cost for all the pixels +// between 'start' and 'end' excluded. +static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager, + int start, int end, int index, + double distance_cost) { + int i; + for (i = start; i < end; ++i) UpdateCost(manager, i, index, distance_cost); +} + +// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. +static WEBP_INLINE void ConnectIntervals(CostManager* const manager, + CostInterval* const prev, + CostInterval* const next) { + if (prev != NULL) { + prev->next_ = next; + } else { + manager->head_ = next; + } + + if (next != NULL) next->previous_ = prev; +} + +// Pop an interval in the manager. +static WEBP_INLINE void PopInterval(CostManager* const manager, + CostInterval* const interval) { + CostInterval* const next = interval->next_; + + if (interval == NULL) return; + + ConnectIntervals(manager, interval->previous_, next); + if (CostIntervalIsInFreeList(manager, interval)) { + CostIntervalAddToFreeList(manager, interval); + } else { // recycle regularly malloc'd intervals too + interval->next_ = manager->recycled_intervals_; + manager->recycled_intervals_ = interval; + } + --manager->count_; + assert(manager->count_ >= 0); +} + +// Update the cost at index i by going over all the stored intervals that +// overlap with i. +static WEBP_INLINE void UpdateCostPerIndex(CostManager* const manager, int i) { + CostInterval* current = manager->head_; + + while (current != NULL && current->start_ <= i) { + if (current->end_ <= i) { + // We have an outdated interval, remove it. + CostInterval* next = current->next_; + PopInterval(manager, current); + current = next; + } else { + UpdateCost(manager, i, current->index_, current->distance_cost_); + current = current->next_; + } + } +} + +// Given a current orphan interval and its previous interval, before +// it was orphaned (which can be NULL), set it at the right place in the list +// of intervals using the start_ ordering and the previous interval as a hint. +static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager, + CostInterval* const current, + CostInterval* previous) { + assert(current != NULL); + + if (previous == NULL) previous = manager->head_; + while (previous != NULL && current->start_ < previous->start_) { + previous = previous->previous_; + } + while (previous != NULL && previous->next_ != NULL && + previous->next_->start_ < current->start_) { + previous = previous->next_; + } + + if (previous != NULL) { + ConnectIntervals(manager, current, previous->next_); + } else { + ConnectIntervals(manager, current, manager->head_); + } + ConnectIntervals(manager, previous, current); +} + +// Insert an interval in the list contained in the manager by starting at +// interval_in as a hint. The intervals are sorted by start_ value. +static WEBP_INLINE void InsertInterval(CostManager* const manager, + CostInterval* const interval_in, + double distance_cost, double lower, + double upper, int index, int start, + int end) { + CostInterval* interval_new; + + if (IsCostCacheIntervalWritable(start, end) || + manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) { + // Write down the interval if it is too small. + UpdateCostPerInterval(manager, start, end, index, distance_cost); + return; + } + if (manager->free_intervals_ != NULL) { + interval_new = manager->free_intervals_; + manager->free_intervals_ = interval_new->next_; + } else if (manager->recycled_intervals_ != NULL) { + interval_new = manager->recycled_intervals_; + manager->recycled_intervals_ = interval_new->next_; + } else { // malloc for good + interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new)); + if (interval_new == NULL) { + // Write down the interval if we cannot create it. + UpdateCostPerInterval(manager, start, end, index, distance_cost); + return; + } + } + + interval_new->distance_cost_ = distance_cost; + interval_new->lower_ = lower; + interval_new->upper_ = upper; + interval_new->index_ = index; + interval_new->start_ = start; + interval_new->end_ = end; + PositionOrphanInterval(manager, interval_new, interval_in); + + ++manager->count_; +} + +// When an interval has its start_ or end_ modified, it needs to be +// repositioned in the linked list. +static WEBP_INLINE void RepositionInterval(CostManager* const manager, + CostInterval* const interval) { + if (IsCostCacheIntervalWritable(interval->start_, interval->end_)) { + // Maybe interval has been resized and is small enough to be removed. + UpdateCostPerInterval(manager, interval->start_, interval->end_, + interval->index_, interval->distance_cost_); + PopInterval(manager, interval); + return; + } + + // Early exit if interval is at the right spot. + if ((interval->previous_ == NULL || + interval->previous_->start_ <= interval->start_) && + (interval->next_ == NULL || + interval->start_ <= interval->next_->start_)) { + return; + } + + ConnectIntervals(manager, interval->previous_, interval->next_); + PositionOrphanInterval(manager, interval, interval->previous_); +} + +// Given a new cost interval defined by its start at index, its last value and +// distance_cost, add its contributions to the previous intervals and costs. +// If handling the interval or one of its subintervals becomes to heavy, its +// contribution is added to the costs right away. +static WEBP_INLINE void PushInterval(CostManager* const manager, + double distance_cost, int index, + int last) { + size_t i; + CostInterval* interval = manager->head_; + CostInterval* interval_next; + const CostCacheInterval* const cost_cache_intervals = + manager->cache_intervals_; + + for (i = 0; i < manager->cache_intervals_size_ && + cost_cache_intervals[i].start_ < last; + ++i) { + // Define the intersection of the ith interval with the new one. + int start = index + cost_cache_intervals[i].start_; + const int end = index + (cost_cache_intervals[i].end_ > last + ? last + : cost_cache_intervals[i].end_); + const double lower_in = cost_cache_intervals[i].lower_; + const double upper_in = cost_cache_intervals[i].upper_; + const double lower_full_in = distance_cost + lower_in; + const double upper_full_in = distance_cost + upper_in; + + if (cost_cache_intervals[i].do_write_) { + UpdateCostPerInterval(manager, start, end, index, distance_cost); + continue; + } + + for (; interval != NULL && interval->start_ < end && start < end; + interval = interval_next) { + const double lower_full_interval = + interval->distance_cost_ + interval->lower_; + const double upper_full_interval = + interval->distance_cost_ + interval->upper_; + + interval_next = interval->next_; + + // Make sure we have some overlap + if (start >= interval->end_) continue; + + if (lower_full_in >= upper_full_interval) { + // When intervals are represented, the lower, the better. + // [**********************************************************] + // start end + // [----------------------------------] + // interval->start_ interval->end_ + // If we are worse than what we already have, add whatever we have so + // far up to interval. + const int start_new = interval->end_; + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, + index, start, interval->start_); + start = start_new; + continue; + } + + // We know the two intervals intersect. + if (upper_full_in >= lower_full_interval) { + // There is no clear cut on which is best, so let's keep both. + // [*********[*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*]***********] + // start interval->start_ interval->end_ end + // OR + // [*********[*-*-*-*-*-*-*-*-*-*-*-]----------------------] + // start interval->start_ end interval->end_ + const int end_new = (interval->end_ <= end) ? interval->end_ : end; + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, + index, start, end_new); + start = end_new; + } else if (start <= interval->start_ && interval->end_ <= end) { + // [----------------------------------] + // interval->start_ interval->end_ + // [**************************************************************] + // start end + // We can safely remove the old interval as it is fully included. + PopInterval(manager, interval); + } else { + if (interval->start_ <= start && end <= interval->end_) { + // [--------------------------------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + // We have to split the old interval as it fully contains the new one. + const int end_original = interval->end_; + interval->end_ = start; + InsertInterval(manager, interval, interval->distance_cost_, + interval->lower_, interval->upper_, interval->index_, + end, end_original); + } else if (interval->start_ < start) { + // [------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + interval->end_ = start; + } else { + // [------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + interval->start_ = end; + } + + // The interval has been modified, we need to reposition it or write it. + RepositionInterval(manager, interval); + } + } + // Insert the remaining interval from start to end. + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, index, + start, end); + } +} + +static int BackwardReferencesHashChainDistanceOnly( + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, uint16_t* const dist_array) { + int i; + int ok = 0; + int cc_init = 0; + const int pix_count = xsize * ysize; + const int use_color_cache = (cache_bits > 0); + const size_t literal_array_size = sizeof(double) * + (NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((cache_bits > 0) ? (1 << cache_bits) : 0)); + const size_t cost_model_size = sizeof(CostModel) + literal_array_size; + CostModel* const cost_model = + (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); + VP8LColorCache hashers; + const int skip_length = 32 + quality; + const int skip_min_distance_code = 2; + CostManager* cost_manager = + (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); + + if (cost_model == NULL || cost_manager == NULL) goto Error; + + cost_model->literal_ = (double*)(cost_model + 1); + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; + } + + if (!CostModelBuild(cost_model, cache_bits, refs)) { + goto Error; + } + + if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) { + goto Error; + } + + // We loop one pixel at a time, but store all currently best points to + // non-processed locations from this point. + dist_array[0] = 0; + // Add first pixel as literal. + AddSingleLiteralWithCostModel(argb + 0, &hashers, cost_model, 0, + use_color_cache, 0.0, cost_manager->costs_, + dist_array); + + for (i = 1; i < pix_count - 1; ++i) { + int offset = 0, len = 0; + double prev_cost = cost_manager->costs_[i - 1]; + HashChainFindCopy(hash_chain, i, &offset, &len); + if (len >= MIN_LENGTH) { + const int code = DistanceToPlaneCode(xsize, offset); + const double offset_cost = GetDistanceCost(cost_model, code); + const int first_i = i; + int j_max = 0, interval_ends_index = 0; + const int is_offset_zero = (offset_cost == 0.); + + if (!is_offset_zero) { + j_max = (int)ceil( + (cost_manager->max_cost_cache_ - cost_manager->min_cost_cache_) / + offset_cost); + if (j_max < 1) { + j_max = 1; + } else if (j_max > cost_manager->interval_ends_size_ - 1) { + // This could only happen in the case of MAX_LENGTH. + j_max = cost_manager->interval_ends_size_ - 1; + } + } // else j_max is unused anyway. + + // Instead of considering all contributions from a pixel i by calling: + // PushInterval(cost_manager, prev_cost + offset_cost, i, len); + // we optimize these contributions in case offset_cost stays the same for + // consecutive pixels. This describes a set of pixels similar to a + // previous set (e.g. constant color regions). + for (; i < pix_count - 1; ++i) { + int offset_next, len_next; + prev_cost = cost_manager->costs_[i - 1]; + + if (is_offset_zero) { + // No optimization can be made so we just push all of the + // contributions from i. + PushInterval(cost_manager, prev_cost, i, len); + } else { + // j_max is chosen as the smallest j such that: + // max of cost_cache_ < j*offset cost + min of cost_cache_ + // Therefore, the pixel influenced by i-j_max, cannot be influenced + // by i. Only the costs after the end of what i contributed need to be + // updated. cost_manager->interval_ends_ is a circular buffer that + // stores those ends. + const double distance_cost = prev_cost + offset_cost; + int j = cost_manager->interval_ends_[interval_ends_index]; + if (i - first_i <= j_max || + !IsCostCacheIntervalWritable(j, i + len)) { + PushInterval(cost_manager, distance_cost, i, len); + } else { + for (; j < i + len; ++j) { + UpdateCost(cost_manager, j, i, distance_cost); + } + } + // Store the new end in the circular buffer. + assert(interval_ends_index < cost_manager->interval_ends_size_); + cost_manager->interval_ends_[interval_ends_index] = i + len; + if (++interval_ends_index > j_max) interval_ends_index = 0; + } + + // Check whether i is the last pixel to consider, as it is handled + // differently. + if (i + 1 >= pix_count - 1) break; + HashChainFindCopy(hash_chain, i + 1, &offset_next, &len_next); + if (offset_next != offset) break; + len = len_next; + UpdateCostPerIndex(cost_manager, i); + AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); + } + // Submit the last pixel. + UpdateCostPerIndex(cost_manager, i + 1); + + // This if is for speedup only. It roughly doubles the speed, and + // makes compression worse by .1 %. + if (len >= skip_length && code <= skip_min_distance_code) { + // Long copy for short distances, let's skip the middle + // lookups for better copies. + // 1) insert the hashes. + if (use_color_cache) { + int k; + for (k = 0; k < len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); + } + } + // 2) jump. + { + const int i_next = i + len - 1; // for loop does ++i, thus -1 here. + for (; i <= i_next; ++i) UpdateCostPerIndex(cost_manager, i + 1); + i = i_next; + } + goto next_symbol; + } + if (len > MIN_LENGTH) { + int code_min_length; + double cost_total; + offset = HashChainFindOffset(hash_chain, i); + code_min_length = DistanceToPlaneCode(xsize, offset); + cost_total = prev_cost + + GetDistanceCost(cost_model, code_min_length) + + GetLengthCost(cost_model, 1); + if (cost_manager->costs_[i + 1] > cost_total) { + cost_manager->costs_[i + 1] = (float)cost_total; + dist_array[i + 1] = 2; + } + } + } else { // len < MIN_LENGTH + UpdateCostPerIndex(cost_manager, i + 1); + } + + AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); + + next_symbol: ; + } + // Handle the last pixel. + if (i == (pix_count - 1)) { + AddSingleLiteralWithCostModel( + argb + i, &hashers, cost_model, i, use_color_cache, + cost_manager->costs_[pix_count - 2], cost_manager->costs_, dist_array); + } + + ok = !refs->error_; + Error: + if (cc_init) VP8LColorCacheClear(&hashers); + CostManagerClear(cost_manager); + WebPSafeFree(cost_model); + WebPSafeFree(cost_manager); + return ok; +} + +// We pack the path at the end of *dist_array and return +// a pointer to this part of the array. Example: +// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232] +static void TraceBackwards(uint16_t* const dist_array, + int dist_array_size, + uint16_t** const chosen_path, + int* const chosen_path_size) { + uint16_t* path = dist_array + dist_array_size; + uint16_t* cur = dist_array + dist_array_size - 1; + while (cur >= dist_array) { + const int k = *cur; + --path; + *path = k; + cur -= k; + } + *chosen_path = path; + *chosen_path_size = (int)(dist_array + dist_array_size - path); +} + +static int BackwardReferencesHashChainFollowChosenPath( + const uint32_t* const argb, int cache_bits, + const uint16_t* const chosen_path, int chosen_path_size, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { + const int use_color_cache = (cache_bits > 0); + int ix; + int i = 0; + int ok = 0; + int cc_init = 0; + VP8LColorCache hashers; + + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; + } + + ClearBackwardRefs(refs); + for (ix = 0; ix < chosen_path_size; ++ix) { + const int len = chosen_path[ix]; + if (len != 1) { + int k; + const int offset = HashChainFindOffset(hash_chain, i); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); + if (use_color_cache) { + for (k = 0; k < len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); + } + } + i += len; + } else { + PixOrCopy v; + if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { + // push pixel as a color cache index + const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]); + v = PixOrCopyCreateCacheIdx(idx); + } else { + if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); + v = PixOrCopyCreateLiteral(argb[i]); + } + BackwardRefsCursorAdd(refs, v); + ++i; + } + } + ok = !refs->error_; + Error: + if (cc_init) VP8LColorCacheClear(&hashers); + return ok; +} + +// Returns 1 on success. +static int BackwardReferencesTraceBackwards( + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { + int ok = 0; + const int dist_array_size = xsize * ysize; + uint16_t* chosen_path = NULL; + int chosen_path_size = 0; + uint16_t* dist_array = + (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); + + if (dist_array == NULL) goto Error; + + if (!BackwardReferencesHashChainDistanceOnly( + xsize, ysize, argb, quality, cache_bits, hash_chain, + refs, dist_array)) { + goto Error; + } + TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); + if (!BackwardReferencesHashChainFollowChosenPath( + argb, cache_bits, chosen_path, chosen_path_size, hash_chain, refs)) { + goto Error; + } + ok = 1; + Error: + WebPSafeFree(dist_array); + return ok; +} + +static void BackwardReferences2DLocality(int xsize, + const VP8LBackwardRefs* const refs) { + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + if (PixOrCopyIsCopy(c.cur_pos)) { + const int dist = c.cur_pos->argb_or_distance; + const int transformed_dist = DistanceToPlaneCode(xsize, dist); + c.cur_pos->argb_or_distance = transformed_dist; + } + VP8LRefsCursorNext(&c); + } +} + +// Returns entropy for the given cache bits. +static double ComputeCacheEntropy(const uint32_t* argb, + const VP8LBackwardRefs* const refs, + int cache_bits) { + const int use_color_cache = (cache_bits > 0); + int cc_init = 0; + double entropy = MAX_ENTROPY; + const double kSmallPenaltyForLargeCache = 4.0; + VP8LColorCache hashers; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; + + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; + } + if (!use_color_cache) { + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LRefsCursorNext(&c); + } + } else { + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; + if (PixOrCopyIsLiteral(v)) { + const uint32_t pix = *argb++; + const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix); + if (VP8LColorCacheLookup(&hashers, key) == pix) { + ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; + } else { + VP8LColorCacheSet(&hashers, key, pix); + ++histo->blue_[pix & 0xff]; + ++histo->literal_[(pix >> 8) & 0xff]; + ++histo->red_[(pix >> 16) & 0xff]; + ++histo->alpha_[pix >> 24]; + } + } else { + int len = PixOrCopyLength(v); + int code, extra_bits; + VP8LPrefixEncodeBits(len, &code, &extra_bits); + ++histo->literal_[NUM_LITERAL_CODES + code]; + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); + ++histo->distance_[code]; + do { + VP8LColorCacheInsert(&hashers, *argb++); + } while(--len != 0); + } + VP8LRefsCursorNext(&c); + } + } + entropy = VP8LHistogramEstimateBits(histo) + + kSmallPenaltyForLargeCache * cache_bits; + Error: + if (cc_init) VP8LColorCacheClear(&hashers); + VP8LFreeHistogram(histo); + return entropy; +} + +// Evaluate optimal cache bits for the local color cache. +// The input *best_cache_bits sets the maximum cache bits to use (passing 0 +// implies disabling the local color cache). The local color cache is also +// disabled for the lower (<= 25) quality. +// Returns 0 in case of memory error. +static int CalculateBestCacheSize(const uint32_t* const argb, + int xsize, int ysize, int quality, + const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, + int* const lz77_computed, + int* const best_cache_bits) { + int eval_low = 1; + int eval_high = 1; + double entropy_low = MAX_ENTROPY; + double entropy_high = MAX_ENTROPY; + const double cost_mul = 5e-4; + int cache_bits_low = 0; + int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits; + + assert(cache_bits_high <= MAX_COLOR_CACHE_BITS); + + *lz77_computed = 0; + if (cache_bits_high == 0) { + *best_cache_bits = 0; + // Local color cache is disabled. + return 1; + } + if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, hash_chain, + refs)) { + return 0; + } + // Do a binary search to find the optimal entropy for cache_bits. + while (eval_low || eval_high) { + if (eval_low) { + entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low); + entropy_low += entropy_low * cache_bits_low * cost_mul; + eval_low = 0; + } + if (eval_high) { + entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high); + entropy_high += entropy_high * cache_bits_high * cost_mul; + eval_high = 0; + } + if (entropy_high < entropy_low) { + const int prev_cache_bits_low = cache_bits_low; + *best_cache_bits = cache_bits_high; + cache_bits_low = (cache_bits_low + cache_bits_high) / 2; + if (cache_bits_low != prev_cache_bits_low) eval_low = 1; + } else { + *best_cache_bits = cache_bits_low; + cache_bits_high = (cache_bits_low + cache_bits_high) / 2; + if (cache_bits_high != cache_bits_low) eval_high = 1; + } + } + *lz77_computed = 1; + return 1; +} + +// Update (in-place) backward references for specified cache_bits. +static int BackwardRefsWithLocalCache(const uint32_t* const argb, + int cache_bits, + VP8LBackwardRefs* const refs) { + int pixel_index = 0; + VP8LColorCache hashers; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0; + + while (VP8LRefsCursorOk(&c)) { + PixOrCopy* const v = c.cur_pos; + if (PixOrCopyIsLiteral(v)) { + const uint32_t argb_literal = v->argb_or_distance; + if (VP8LColorCacheContains(&hashers, argb_literal)) { + const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal); + *v = PixOrCopyCreateCacheIdx(ix); + } else { + VP8LColorCacheInsert(&hashers, argb_literal); + } + ++pixel_index; + } else { + // refs was created without local cache, so it can not have cache indexes. + int k; + assert(PixOrCopyIsCopy(v)); + for (k = 0; k < v->len; ++k) { + VP8LColorCacheInsert(&hashers, argb[pixel_index++]); + } + } + VP8LRefsCursorNext(&c); + } + VP8LColorCacheClear(&hashers); + return 1; +} + +static VP8LBackwardRefs* GetBackwardReferencesLowEffort( + int width, int height, const uint32_t* const argb, + int* const cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + *cache_bits = 0; + if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) { + return NULL; + } + BackwardReferences2DLocality(width, refs_lz77); + return refs_lz77; +} + +static VP8LBackwardRefs* GetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int* const cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + int lz77_is_useful; + int lz77_computed; + double bit_cost_lz77, bit_cost_rle; + VP8LBackwardRefs* best = NULL; + VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + VP8LBackwardRefs* refs_rle = &refs_array[1]; + VP8LHistogram* histo = NULL; + + if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain, + refs_lz77, &lz77_computed, cache_bits)) { + goto Error; + } + + if (lz77_computed) { + // Transform refs_lz77 for the optimized cache_bits. + if (*cache_bits > 0) { + if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) { + goto Error; + } + } + } else { + if (!BackwardReferencesLz77(width, height, argb, *cache_bits, hash_chain, + refs_lz77)) { + goto Error; + } + } + + if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) { + goto Error; + } + + histo = VP8LAllocateHistogram(*cache_bits); + if (histo == NULL) goto Error; + + { + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_lz77, *cache_bits); + bit_cost_lz77 = VP8LHistogramEstimateBits(histo); + // Evaluate RLE coding. + VP8LHistogramCreate(histo, refs_rle, *cache_bits); + bit_cost_rle = VP8LHistogramEstimateBits(histo); + // Decide if LZ77 is useful. + lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); + } + + // Choose appropriate backward reference. + if (lz77_is_useful) { + // TraceBackwards is costly. Don't execute it at lower quality. + const int try_lz77_trace_backwards = (quality >= 25); + best = refs_lz77; // default guess: lz77 is better + if (try_lz77_trace_backwards) { + VP8LBackwardRefs* const refs_trace = refs_rle; + if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) { + best = NULL; + goto Error; + } + if (BackwardReferencesTraceBackwards(width, height, argb, quality, + *cache_bits, hash_chain, + refs_trace)) { + double bit_cost_trace; + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_trace, *cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_cost_lz77) { + best = refs_trace; + } + } + } + } else { + best = refs_rle; + } + + BackwardReferences2DLocality(width, best); + + Error: + VP8LFreeHistogram(histo); + return best; +} + +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int low_effort, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { + if (low_effort) { + return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, + hash_chain, refs_array); + } else { + return GetBackwardReferences(width, height, argb, quality, cache_bits, + hash_chain, refs_array); + } +} diff --git a/thirdparty/libwebp/enc/backward_references.h b/thirdparty/libwebp/enc/backward_references.h new file mode 100644 index 0000000000..0cadb11e11 --- /dev/null +++ b/thirdparty/libwebp/enc/backward_references.h @@ -0,0 +1,206 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Jyrki Alakuijala (jyrki@google.com) +// + +#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_ +#define WEBP_ENC_BACKWARD_REFERENCES_H_ + +#include <assert.h> +#include <stdlib.h> +#include "../webp/types.h" +#include "../webp/format_constants.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// The maximum allowed limit is 11. +#define MAX_COLOR_CACHE_BITS 10 + +// ----------------------------------------------------------------------------- +// PixOrCopy + +enum Mode { + kLiteral, + kCacheIdx, + kCopy, + kNone +}; + +typedef struct { + // mode as uint8_t to make the memory layout to be exactly 8 bytes. + uint8_t mode; + uint16_t len; + uint32_t argb_or_distance; +} PixOrCopy; + +static WEBP_INLINE PixOrCopy PixOrCopyCreateCopy(uint32_t distance, + uint16_t len) { + PixOrCopy retval; + retval.mode = kCopy; + retval.argb_or_distance = distance; + retval.len = len; + return retval; +} + +static WEBP_INLINE PixOrCopy PixOrCopyCreateCacheIdx(int idx) { + PixOrCopy retval; + assert(idx >= 0); + assert(idx < (1 << MAX_COLOR_CACHE_BITS)); + retval.mode = kCacheIdx; + retval.argb_or_distance = idx; + retval.len = 1; + return retval; +} + +static WEBP_INLINE PixOrCopy PixOrCopyCreateLiteral(uint32_t argb) { + PixOrCopy retval; + retval.mode = kLiteral; + retval.argb_or_distance = argb; + retval.len = 1; + return retval; +} + +static WEBP_INLINE int PixOrCopyIsLiteral(const PixOrCopy* const p) { + return (p->mode == kLiteral); +} + +static WEBP_INLINE int PixOrCopyIsCacheIdx(const PixOrCopy* const p) { + return (p->mode == kCacheIdx); +} + +static WEBP_INLINE int PixOrCopyIsCopy(const PixOrCopy* const p) { + return (p->mode == kCopy); +} + +static WEBP_INLINE uint32_t PixOrCopyLiteral(const PixOrCopy* const p, + int component) { + assert(p->mode == kLiteral); + return (p->argb_or_distance >> (component * 8)) & 0xff; +} + +static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) { + return p->len; +} + +static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) { + assert(p->mode == kLiteral); + return p->argb_or_distance; +} + +static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) { + assert(p->mode == kCacheIdx); + assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS)); + return p->argb_or_distance; +} + +static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) { + assert(p->mode == kCopy); + return p->argb_or_distance; +} + +// ----------------------------------------------------------------------------- +// VP8LHashChain + +#define HASH_BITS 18 +#define HASH_SIZE (1 << HASH_BITS) + +typedef struct VP8LHashChain VP8LHashChain; +struct VP8LHashChain { + // The 20 most significant bits contain the offset at which the best match + // is found. These 20 bits are the limit defined by GetWindowSizeForHashChain + // (through WINDOW_SIZE = 1<<20). + // The lower 12 bits contain the length of the match. The 12 bit limit is + // defined in MaxFindCopyLength with MAX_LENGTH=4096. + uint32_t* offset_length_; + // This is the maximum size of the hash_chain that can be constructed. + // Typically this is the pixel count (width x height) for a given image. + int size_; +}; + +// Must be called first, to set size. +int VP8LHashChainInit(VP8LHashChain* const p, int size); +// Pre-compute the best matches for argb. +int VP8LHashChainFill(VP8LHashChain* const p, int quality, + const uint32_t* const argb, int xsize, int ysize); +void VP8LHashChainClear(VP8LHashChain* const p); // release memory + +// ----------------------------------------------------------------------------- +// VP8LBackwardRefs (block-based backward-references storage) + +// maximum number of reference blocks the image will be segmented into +#define MAX_REFS_BLOCK_PER_IMAGE 16 + +typedef struct PixOrCopyBlock PixOrCopyBlock; // forward declaration +typedef struct VP8LBackwardRefs VP8LBackwardRefs; + +// Container for blocks chain +struct VP8LBackwardRefs { + int block_size_; // common block-size + int error_; // set to true if some memory error occurred + PixOrCopyBlock* refs_; // list of currently used blocks + PixOrCopyBlock** tail_; // for list recycling + PixOrCopyBlock* free_blocks_; // free-list + PixOrCopyBlock* last_block_; // used for adding new refs (internal) +}; + +// Initialize the object. 'block_size' is the common block size to store +// references (typically, width * height / MAX_REFS_BLOCK_PER_IMAGE). +void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size); +// Release memory for backward references. +void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs); +// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error. +int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, + VP8LBackwardRefs* const dst); + +// Cursor for iterating on references content +typedef struct { + // public: + PixOrCopy* cur_pos; // current position + // private: + PixOrCopyBlock* cur_block_; // current block in the refs list + const PixOrCopy* last_pos_; // sentinel for switching to next block +} VP8LRefsCursor; + +// Returns a cursor positioned at the beginning of the references list. +VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs); +// Returns true if cursor is pointing at a valid position. +static WEBP_INLINE int VP8LRefsCursorOk(const VP8LRefsCursor* const c) { + return (c->cur_pos != NULL); +} +// Move to next block of references. Internal, not to be called directly. +void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c); +// Move to next position, or NULL. Should not be called if !VP8LRefsCursorOk(). +static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { + assert(c != NULL); + assert(VP8LRefsCursorOk(c)); + if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c); +} + +// ----------------------------------------------------------------------------- +// Main entry points + +// Evaluates best possible backward references for specified quality. +// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache +// bits to use (passing 0 implies disabling the local color cache). +// The optimal cache bits is evaluated and set for the *cache_bits parameter. +// The return value is the pointer to the best of the two backward refs viz, +// refs[0] or refs[1]. +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int low_effort, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]); + +#ifdef __cplusplus +} +#endif + +#endif // WEBP_ENC_BACKWARD_REFERENCES_H_ diff --git a/thirdparty/libwebp/enc/config.c b/thirdparty/libwebp/enc/config.c new file mode 100644 index 0000000000..f9f7961d58 --- /dev/null +++ b/thirdparty/libwebp/enc/config.c @@ -0,0 +1,173 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Coding tools configuration +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "../webp/encode.h" + +//------------------------------------------------------------------------------ +// WebPConfig +//------------------------------------------------------------------------------ + +int WebPConfigInitInternal(WebPConfig* config, + WebPPreset preset, float quality, int version) { + if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) { + return 0; // caller/system version mismatch! + } + if (config == NULL) return 0; + + config->quality = quality; + config->target_size = 0; + config->target_PSNR = 0.; + config->method = 4; + config->sns_strength = 50; + config->filter_strength = 60; // mid-filtering + config->filter_sharpness = 0; + config->filter_type = 1; // default: strong (so U/V is filtered too) + config->partitions = 0; + config->segments = 4; + config->pass = 1; + config->show_compressed = 0; + config->preprocessing = 0; + config->autofilter = 0; + config->partition_limit = 0; + config->alpha_compression = 1; + config->alpha_filtering = 1; + config->alpha_quality = 100; + config->lossless = 0; + config->exact = 0; + config->image_hint = WEBP_HINT_DEFAULT; + config->emulate_jpeg_size = 0; + config->thread_level = 0; + config->low_memory = 0; + config->near_lossless = 100; +#ifdef WEBP_EXPERIMENTAL_FEATURES + config->delta_palettization = 0; +#endif // WEBP_EXPERIMENTAL_FEATURES + + // TODO(skal): tune. + switch (preset) { + case WEBP_PRESET_PICTURE: + config->sns_strength = 80; + config->filter_sharpness = 4; + config->filter_strength = 35; + config->preprocessing &= ~2; // no dithering + break; + case WEBP_PRESET_PHOTO: + config->sns_strength = 80; + config->filter_sharpness = 3; + config->filter_strength = 30; + config->preprocessing |= 2; + break; + case WEBP_PRESET_DRAWING: + config->sns_strength = 25; + config->filter_sharpness = 6; + config->filter_strength = 10; + break; + case WEBP_PRESET_ICON: + config->sns_strength = 0; + config->filter_strength = 0; // disable filtering to retain sharpness + config->preprocessing &= ~2; // no dithering + break; + case WEBP_PRESET_TEXT: + config->sns_strength = 0; + config->filter_strength = 0; // disable filtering to retain sharpness + config->preprocessing &= ~2; // no dithering + config->segments = 2; + break; + case WEBP_PRESET_DEFAULT: + default: + break; + } + return WebPValidateConfig(config); +} + +int WebPValidateConfig(const WebPConfig* config) { + if (config == NULL) return 0; + if (config->quality < 0 || config->quality > 100) + return 0; + if (config->target_size < 0) + return 0; + if (config->target_PSNR < 0) + return 0; + if (config->method < 0 || config->method > 6) + return 0; + if (config->segments < 1 || config->segments > 4) + return 0; + if (config->sns_strength < 0 || config->sns_strength > 100) + return 0; + if (config->filter_strength < 0 || config->filter_strength > 100) + return 0; + if (config->filter_sharpness < 0 || config->filter_sharpness > 7) + return 0; + if (config->filter_type < 0 || config->filter_type > 1) + return 0; + if (config->autofilter < 0 || config->autofilter > 1) + return 0; + if (config->pass < 1 || config->pass > 10) + return 0; + if (config->show_compressed < 0 || config->show_compressed > 1) + return 0; + if (config->preprocessing < 0 || config->preprocessing > 7) + return 0; + if (config->partitions < 0 || config->partitions > 3) + return 0; + if (config->partition_limit < 0 || config->partition_limit > 100) + return 0; + if (config->alpha_compression < 0) + return 0; + if (config->alpha_filtering < 0) + return 0; + if (config->alpha_quality < 0 || config->alpha_quality > 100) + return 0; + if (config->lossless < 0 || config->lossless > 1) + return 0; + if (config->near_lossless < 0 || config->near_lossless > 100) + return 0; + if (config->image_hint >= WEBP_HINT_LAST) + return 0; + if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) + return 0; + if (config->thread_level < 0 || config->thread_level > 1) + return 0; + if (config->low_memory < 0 || config->low_memory > 1) + return 0; + if (config->exact < 0 || config->exact > 1) + return 0; +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->delta_palettization < 0 || config->delta_palettization > 1) + return 0; +#endif // WEBP_EXPERIMENTAL_FEATURES + return 1; +} + +//------------------------------------------------------------------------------ + +#define MAX_LEVEL 9 + +// Mapping between -z level and -m / -q parameter settings. +static const struct { + uint8_t method_; + uint8_t quality_; +} kLosslessPresets[MAX_LEVEL + 1] = { + { 0, 0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 }, + { 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 } +}; + +int WebPConfigLosslessPreset(WebPConfig* config, int level) { + if (config == NULL || level < 0 || level > MAX_LEVEL) return 0; + config->lossless = 1; + config->method = kLosslessPresets[level].method_; + config->quality = kLosslessPresets[level].quality_; + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/cost.c b/thirdparty/libwebp/enc/cost.c new file mode 100644 index 0000000000..ae7fe01388 --- /dev/null +++ b/thirdparty/libwebp/enc/cost.c @@ -0,0 +1,354 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Cost tables for level and modes +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./cost.h" + +//------------------------------------------------------------------------------ +// Level cost tables + +// For each given level, the following table gives the pattern of contexts to +// use for coding it (in [][0]) as well as the bit value to use for each +// context (in [][1]). +const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = { + {0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005}, + {0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023}, + {0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013}, + {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, + {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x093}, + {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, + {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, + {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, + {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, + {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153} +}; + +static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) { + int pattern = VP8LevelCodes[level - 1][0]; + int bits = VP8LevelCodes[level - 1][1]; + int cost = 0; + int i; + for (i = 2; pattern; ++i) { + if (pattern & 1) { + cost += VP8BitCost(bits & 1, probas[i]); + } + bits >>= 1; + pattern >>= 1; + } + return cost; +} + +//------------------------------------------------------------------------------ +// Pre-calc level costs once for all + +void VP8CalculateLevelCosts(VP8EncProba* const proba) { + int ctype, band, ctx; + + if (!proba->dirty_) return; // nothing to do. + + for (ctype = 0; ctype < NUM_TYPES; ++ctype) { + int n; + for (band = 0; band < NUM_BANDS; ++band) { + for (ctx = 0; ctx < NUM_CTX; ++ctx) { + const uint8_t* const p = proba->coeffs_[ctype][band][ctx]; + uint16_t* const table = proba->level_cost_[ctype][band][ctx]; + const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0; + const int cost_base = VP8BitCost(1, p[1]) + cost0; + int v; + table[0] = VP8BitCost(0, p[1]) + cost0; + for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) { + table[v] = cost_base + VariableLevelCost(v, p); + } + // Starting at level 67 and up, the variable part of the cost is + // actually constant. + } + } + for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel. + for (ctx = 0; ctx < NUM_CTX; ++ctx) { + proba->remapped_costs_[ctype][n][ctx] = + proba->level_cost_[ctype][VP8EncBands[n]][ctx]; + } + } + } + proba->dirty_ = 0; +} + +//------------------------------------------------------------------------------ +// Mode cost tables. + +// These are the fixed probabilities (in the coding trees) turned into bit-cost +// by calling VP8BitCost(). +const uint16_t VP8FixedCostsUV[4] = { 302, 984, 439, 642 }; +// note: these values include the fixed VP8BitCost(1, 145) mode selection cost. +const uint16_t VP8FixedCostsI16[4] = { 663, 919, 872, 919 }; +const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = { + { { 40, 1151, 1723, 1874, 2103, 2019, 1628, 1777, 2226, 2137 }, + { 192, 469, 1296, 1308, 1849, 1794, 1781, 1703, 1713, 1522 }, + { 142, 910, 762, 1684, 1849, 1576, 1460, 1305, 1801, 1657 }, + { 559, 641, 1370, 421, 1182, 1569, 1612, 1725, 863, 1007 }, + { 299, 1059, 1256, 1108, 636, 1068, 1581, 1883, 869, 1142 }, + { 277, 1111, 707, 1362, 1089, 672, 1603, 1541, 1545, 1291 }, + { 214, 781, 1609, 1303, 1632, 2229, 726, 1560, 1713, 918 }, + { 152, 1037, 1046, 1759, 1983, 2174, 1358, 742, 1740, 1390 }, + { 512, 1046, 1420, 753, 752, 1297, 1486, 1613, 460, 1207 }, + { 424, 827, 1362, 719, 1462, 1202, 1199, 1476, 1199, 538 } }, + { { 240, 402, 1134, 1491, 1659, 1505, 1517, 1555, 1979, 2099 }, + { 467, 242, 960, 1232, 1714, 1620, 1834, 1570, 1676, 1391 }, + { 500, 455, 463, 1507, 1699, 1282, 1564, 982, 2114, 2114 }, + { 672, 643, 1372, 331, 1589, 1667, 1453, 1938, 996, 876 }, + { 458, 783, 1037, 911, 738, 968, 1165, 1518, 859, 1033 }, + { 504, 815, 504, 1139, 1219, 719, 1506, 1085, 1268, 1268 }, + { 333, 630, 1445, 1239, 1883, 3672, 799, 1548, 1865, 598 }, + { 399, 644, 746, 1342, 1856, 1350, 1493, 613, 1855, 1015 }, + { 622, 749, 1205, 608, 1066, 1408, 1290, 1406, 546, 971 }, + { 500, 753, 1041, 668, 1230, 1617, 1297, 1425, 1383, 523 } }, + { { 394, 553, 523, 1502, 1536, 981, 1608, 1142, 1666, 2181 }, + { 655, 430, 375, 1411, 1861, 1220, 1677, 1135, 1978, 1553 }, + { 690, 640, 245, 1954, 2070, 1194, 1528, 982, 1972, 2232 }, + { 559, 834, 741, 867, 1131, 980, 1225, 852, 1092, 784 }, + { 690, 875, 516, 959, 673, 894, 1056, 1190, 1528, 1126 }, + { 740, 951, 384, 1277, 1177, 492, 1579, 1155, 1846, 1513 }, + { 323, 775, 1062, 1776, 3062, 1274, 813, 1188, 1372, 655 }, + { 488, 971, 484, 1767, 1515, 1775, 1115, 503, 1539, 1461 }, + { 740, 1006, 998, 709, 851, 1230, 1337, 788, 741, 721 }, + { 522, 1073, 573, 1045, 1346, 887, 1046, 1146, 1203, 697 } }, + { { 105, 864, 1442, 1009, 1934, 1840, 1519, 1920, 1673, 1579 }, + { 534, 305, 1193, 683, 1388, 2164, 1802, 1894, 1264, 1170 }, + { 305, 518, 877, 1108, 1426, 3215, 1425, 1064, 1320, 1242 }, + { 683, 732, 1927, 257, 1493, 2048, 1858, 1552, 1055, 947 }, + { 394, 814, 1024, 660, 959, 1556, 1282, 1289, 893, 1047 }, + { 528, 615, 996, 940, 1201, 635, 1094, 2515, 803, 1358 }, + { 347, 614, 1609, 1187, 3133, 1345, 1007, 1339, 1017, 667 }, + { 218, 740, 878, 1605, 3650, 3650, 1345, 758, 1357, 1617 }, + { 672, 750, 1541, 558, 1257, 1599, 1870, 2135, 402, 1087 }, + { 592, 684, 1161, 430, 1092, 1497, 1475, 1489, 1095, 822 } }, + { { 228, 1056, 1059, 1368, 752, 982, 1512, 1518, 987, 1782 }, + { 494, 514, 818, 942, 965, 892, 1610, 1356, 1048, 1363 }, + { 512, 648, 591, 1042, 761, 991, 1196, 1454, 1309, 1463 }, + { 683, 749, 1043, 676, 841, 1396, 1133, 1138, 654, 939 }, + { 622, 1101, 1126, 994, 361, 1077, 1203, 1318, 877, 1219 }, + { 631, 1068, 857, 1650, 651, 477, 1650, 1419, 828, 1170 }, + { 555, 727, 1068, 1335, 3127, 1339, 820, 1331, 1077, 429 }, + { 504, 879, 624, 1398, 889, 889, 1392, 808, 891, 1406 }, + { 683, 1602, 1289, 977, 578, 983, 1280, 1708, 406, 1122 }, + { 399, 865, 1433, 1070, 1072, 764, 968, 1477, 1223, 678 } }, + { { 333, 760, 935, 1638, 1010, 529, 1646, 1410, 1472, 2219 }, + { 512, 494, 750, 1160, 1215, 610, 1870, 1868, 1628, 1169 }, + { 572, 646, 492, 1934, 1208, 603, 1580, 1099, 1398, 1995 }, + { 786, 789, 942, 581, 1018, 951, 1599, 1207, 731, 768 }, + { 690, 1015, 672, 1078, 582, 504, 1693, 1438, 1108, 2897 }, + { 768, 1267, 571, 2005, 1243, 244, 2881, 1380, 1786, 1453 }, + { 452, 899, 1293, 903, 1311, 3100, 465, 1311, 1319, 813 }, + { 394, 927, 942, 1103, 1358, 1104, 946, 593, 1363, 1109 }, + { 559, 1005, 1007, 1016, 658, 1173, 1021, 1164, 623, 1028 }, + { 564, 796, 632, 1005, 1014, 863, 2316, 1268, 938, 764 } }, + { { 266, 606, 1098, 1228, 1497, 1243, 948, 1030, 1734, 1461 }, + { 366, 585, 901, 1060, 1407, 1247, 876, 1134, 1620, 1054 }, + { 452, 565, 542, 1729, 1479, 1479, 1016, 886, 2938, 1150 }, + { 555, 1088, 1533, 950, 1354, 895, 834, 1019, 1021, 496 }, + { 704, 815, 1193, 971, 973, 640, 1217, 2214, 832, 578 }, + { 672, 1245, 579, 871, 875, 774, 872, 1273, 1027, 949 }, + { 296, 1134, 2050, 1784, 1636, 3425, 442, 1550, 2076, 722 }, + { 342, 982, 1259, 1846, 1848, 1848, 622, 568, 1847, 1052 }, + { 555, 1064, 1304, 828, 746, 1343, 1075, 1329, 1078, 494 }, + { 288, 1167, 1285, 1174, 1639, 1639, 833, 2254, 1304, 509 } }, + { { 342, 719, 767, 1866, 1757, 1270, 1246, 550, 1746, 2151 }, + { 483, 653, 694, 1509, 1459, 1410, 1218, 507, 1914, 1266 }, + { 488, 757, 447, 2979, 1813, 1268, 1654, 539, 1849, 2109 }, + { 522, 1097, 1085, 851, 1365, 1111, 851, 901, 961, 605 }, + { 709, 716, 841, 728, 736, 945, 941, 862, 2845, 1057 }, + { 512, 1323, 500, 1336, 1083, 681, 1342, 717, 1604, 1350 }, + { 452, 1155, 1372, 1900, 1501, 3290, 311, 944, 1919, 922 }, + { 403, 1520, 977, 2132, 1733, 3522, 1076, 276, 3335, 1547 }, + { 559, 1374, 1101, 615, 673, 2462, 974, 795, 984, 984 }, + { 547, 1122, 1062, 812, 1410, 951, 1140, 622, 1268, 651 } }, + { { 165, 982, 1235, 938, 1334, 1366, 1659, 1578, 964, 1612 }, + { 592, 422, 925, 847, 1139, 1112, 1387, 2036, 861, 1041 }, + { 403, 837, 732, 770, 941, 1658, 1250, 809, 1407, 1407 }, + { 896, 874, 1071, 381, 1568, 1722, 1437, 2192, 480, 1035 }, + { 640, 1098, 1012, 1032, 684, 1382, 1581, 2106, 416, 865 }, + { 559, 1005, 819, 914, 710, 770, 1418, 920, 838, 1435 }, + { 415, 1258, 1245, 870, 1278, 3067, 770, 1021, 1287, 522 }, + { 406, 990, 601, 1009, 1265, 1265, 1267, 759, 1017, 1277 }, + { 968, 1182, 1329, 788, 1032, 1292, 1705, 1714, 203, 1403 }, + { 732, 877, 1279, 471, 901, 1161, 1545, 1294, 755, 755 } }, + { { 111, 931, 1378, 1185, 1933, 1648, 1148, 1714, 1873, 1307 }, + { 406, 414, 1030, 1023, 1910, 1404, 1313, 1647, 1509, 793 }, + { 342, 640, 575, 1088, 1241, 1349, 1161, 1350, 1756, 1502 }, + { 559, 766, 1185, 357, 1682, 1428, 1329, 1897, 1219, 802 }, + { 473, 909, 1164, 771, 719, 2508, 1427, 1432, 722, 782 }, + { 342, 892, 785, 1145, 1150, 794, 1296, 1550, 973, 1057 }, + { 208, 1036, 1326, 1343, 1606, 3395, 815, 1455, 1618, 712 }, + { 228, 928, 890, 1046, 3499, 1711, 994, 829, 1720, 1318 }, + { 768, 724, 1058, 636, 991, 1075, 1319, 1324, 616, 825 }, + { 305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501 } } +}; + +//------------------------------------------------------------------------------ +// helper functions for residuals struct VP8Residual. + +void VP8InitResidual(int first, int coeff_type, + VP8Encoder* const enc, VP8Residual* const res) { + res->coeff_type = coeff_type; + res->prob = enc->proba_.coeffs_[coeff_type]; + res->stats = enc->proba_.stats_[coeff_type]; + res->costs = enc->proba_.remapped_costs_[coeff_type]; + res->first = first; +} + +//------------------------------------------------------------------------------ +// Mode costs + +int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) { + const int x = (it->i4_ & 3), y = (it->i4_ >> 2); + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int R = 0; + int ctx; + + VP8InitResidual(0, 3, enc, &res); + ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(levels, &res); + R += VP8GetResidualCost(ctx, &res); + return R; +} + +int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) { + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int x, y; + int R = 0; + + VP8IteratorNzToBytes(it); // re-import the non-zero context + + // DC + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res); + + // AC + VP8InitResidual(1, 0, enc, &res); + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + R += VP8GetResidualCost(ctx, &res); + it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0); + } + } + return R; +} + +int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) { + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int ch, x, y; + int R = 0; + + VP8IteratorNzToBytes(it); // re-import the non-zero context + + VP8InitResidual(0, 2, enc, &res); + for (ch = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + R += VP8GetResidualCost(ctx, &res); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0); + } + } + } + return R; +} + + +//------------------------------------------------------------------------------ +// Recording of token probabilities. + +// Record proba context used +static int Record(int bit, proba_t* const stats) { + proba_t p = *stats; + if (p >= 0xffff0000u) { // an overflow is inbound. + p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. + } + // record bit count (lower 16 bits) and increment total count (upper 16 bits). + p += 0x00010000u + bit; + *stats = p; + return bit; +} + +// We keep the table-free variant around for reference, in case. +#define USE_LEVEL_CODE_TABLE + +// Simulate block coding, but only record statistics. +// Note: no need to record the fixed probas. +int VP8RecordCoeffs(int ctx, const VP8Residual* const res) { + int n = res->first; + // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1 + proba_t* s = res->stats[n][ctx]; + if (res->last < 0) { + Record(0, s + 0); + return 0; + } + while (n <= res->last) { + int v; + Record(1, s + 0); // order of record doesn't matter + while ((v = res->coeffs[n++]) == 0) { + Record(0, s + 1); + s = res->stats[VP8EncBands[n]][0]; + } + Record(1, s + 1); + if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1 + s = res->stats[VP8EncBands[n]][1]; + } else { + v = abs(v); +#if !defined(USE_LEVEL_CODE_TABLE) + if (!Record(v > 4, s + 3)) { + if (Record(v != 2, s + 4)) + Record(v == 4, s + 5); + } else if (!Record(v > 10, s + 6)) { + Record(v > 6, s + 7); + } else if (!Record((v >= 3 + (8 << 2)), s + 8)) { + Record((v >= 3 + (8 << 1)), s + 9); + } else { + Record((v >= 3 + (8 << 3)), s + 10); + } +#else + if (v > MAX_VARIABLE_LEVEL) { + v = MAX_VARIABLE_LEVEL; + } + + { + const int bits = VP8LevelCodes[v - 1][1]; + int pattern = VP8LevelCodes[v - 1][0]; + int i; + for (i = 0; (pattern >>= 1) != 0; ++i) { + const int mask = 2 << i; + if (pattern & 1) Record(!!(bits & mask), s + 3 + i); + } + } +#endif + s = res->stats[VP8EncBands[n]][2]; + } + } + if (n < 16) Record(0, s + 0); + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/cost.h b/thirdparty/libwebp/enc/cost.h new file mode 100644 index 0000000000..20960d6d74 --- /dev/null +++ b/thirdparty/libwebp/enc/cost.h @@ -0,0 +1,68 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Cost tables for level and modes. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_ENC_COST_H_ +#define WEBP_ENC_COST_H_ + +#include <assert.h> +#include <stdlib.h> +#include "./vp8enci.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// On-the-fly info about the current set of residuals. Handy to avoid +// passing zillions of params. +typedef struct VP8Residual VP8Residual; +struct VP8Residual { + int first; + int last; + const int16_t* coeffs; + + int coeff_type; + ProbaArray* prob; + StatsArray* stats; + CostArrayPtr costs; +}; + +void VP8InitResidual(int first, int coeff_type, + VP8Encoder* const enc, VP8Residual* const res); + +int VP8RecordCoeffs(int ctx, const VP8Residual* const res); + +// Cost of coding one event with probability 'proba'. +static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { + return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba]; +} + +// Level cost calculations +extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; +void VP8CalculateLevelCosts(VP8EncProba* const proba); +static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) { + return VP8LevelFixedCosts[level] + + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level]; +} + +// Mode costs +extern const uint16_t VP8FixedCostsUV[4]; +extern const uint16_t VP8FixedCostsI16[4]; +extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES]; + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_ENC_COST_H_ */ diff --git a/thirdparty/libwebp/enc/delta_palettization.c b/thirdparty/libwebp/enc/delta_palettization.c new file mode 100644 index 0000000000..062e588d79 --- /dev/null +++ b/thirdparty/libwebp/enc/delta_palettization.c @@ -0,0 +1,455 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Mislav Bradac (mislavm@google.com) +// + +#include "./delta_palettization.h" + +#ifdef WEBP_EXPERIMENTAL_FEATURES +#include "../webp/types.h" +#include "../dsp/lossless.h" + +#define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b)) + +// Format allows palette up to 256 entries, but more palette entries produce +// bigger entropy. In the future it will probably be useful to add more entries +// that are far from the origin of the palette or choose remaining entries +// dynamically. +#define DELTA_PALETTE_SIZE 226 + +// Palette used for delta_palettization. Entries are roughly sorted by distance +// of their signed equivalents from the origin. +static const uint32_t kDeltaPalette[DELTA_PALETTE_SIZE] = { + MK_COL(0u, 0u, 0u), + MK_COL(255u, 255u, 255u), + MK_COL(1u, 1u, 1u), + MK_COL(254u, 254u, 254u), + MK_COL(2u, 2u, 2u), + MK_COL(4u, 4u, 4u), + MK_COL(252u, 252u, 252u), + MK_COL(250u, 0u, 0u), + MK_COL(0u, 250u, 0u), + MK_COL(0u, 0u, 250u), + MK_COL(6u, 0u, 0u), + MK_COL(0u, 6u, 0u), + MK_COL(0u, 0u, 6u), + MK_COL(0u, 0u, 248u), + MK_COL(0u, 0u, 8u), + MK_COL(0u, 248u, 0u), + MK_COL(0u, 248u, 248u), + MK_COL(0u, 248u, 8u), + MK_COL(0u, 8u, 0u), + MK_COL(0u, 8u, 248u), + MK_COL(0u, 8u, 8u), + MK_COL(8u, 8u, 8u), + MK_COL(248u, 0u, 0u), + MK_COL(248u, 0u, 248u), + MK_COL(248u, 0u, 8u), + MK_COL(248u, 248u, 0u), + MK_COL(248u, 8u, 0u), + MK_COL(8u, 0u, 0u), + MK_COL(8u, 0u, 248u), + MK_COL(8u, 0u, 8u), + MK_COL(8u, 248u, 0u), + MK_COL(8u, 8u, 0u), + MK_COL(23u, 23u, 23u), + MK_COL(13u, 13u, 13u), + MK_COL(232u, 232u, 232u), + MK_COL(244u, 244u, 244u), + MK_COL(245u, 245u, 250u), + MK_COL(50u, 50u, 50u), + MK_COL(204u, 204u, 204u), + MK_COL(236u, 236u, 236u), + MK_COL(16u, 16u, 16u), + MK_COL(240u, 16u, 16u), + MK_COL(16u, 240u, 16u), + MK_COL(240u, 240u, 16u), + MK_COL(16u, 16u, 240u), + MK_COL(240u, 16u, 240u), + MK_COL(16u, 240u, 240u), + MK_COL(240u, 240u, 240u), + MK_COL(0u, 0u, 232u), + MK_COL(0u, 232u, 0u), + MK_COL(232u, 0u, 0u), + MK_COL(0u, 0u, 24u), + MK_COL(0u, 24u, 0u), + MK_COL(24u, 0u, 0u), + MK_COL(32u, 32u, 32u), + MK_COL(224u, 32u, 32u), + MK_COL(32u, 224u, 32u), + MK_COL(224u, 224u, 32u), + MK_COL(32u, 32u, 224u), + MK_COL(224u, 32u, 224u), + MK_COL(32u, 224u, 224u), + MK_COL(224u, 224u, 224u), + MK_COL(0u, 0u, 176u), + MK_COL(0u, 0u, 80u), + MK_COL(0u, 176u, 0u), + MK_COL(0u, 176u, 176u), + MK_COL(0u, 176u, 80u), + MK_COL(0u, 80u, 0u), + MK_COL(0u, 80u, 176u), + MK_COL(0u, 80u, 80u), + MK_COL(176u, 0u, 0u), + MK_COL(176u, 0u, 176u), + MK_COL(176u, 0u, 80u), + MK_COL(176u, 176u, 0u), + MK_COL(176u, 80u, 0u), + MK_COL(80u, 0u, 0u), + MK_COL(80u, 0u, 176u), + MK_COL(80u, 0u, 80u), + MK_COL(80u, 176u, 0u), + MK_COL(80u, 80u, 0u), + MK_COL(0u, 0u, 152u), + MK_COL(0u, 0u, 104u), + MK_COL(0u, 152u, 0u), + MK_COL(0u, 152u, 152u), + MK_COL(0u, 152u, 104u), + MK_COL(0u, 104u, 0u), + MK_COL(0u, 104u, 152u), + MK_COL(0u, 104u, 104u), + MK_COL(152u, 0u, 0u), + MK_COL(152u, 0u, 152u), + MK_COL(152u, 0u, 104u), + MK_COL(152u, 152u, 0u), + MK_COL(152u, 104u, 0u), + MK_COL(104u, 0u, 0u), + MK_COL(104u, 0u, 152u), + MK_COL(104u, 0u, 104u), + MK_COL(104u, 152u, 0u), + MK_COL(104u, 104u, 0u), + MK_COL(216u, 216u, 216u), + MK_COL(216u, 216u, 40u), + MK_COL(216u, 216u, 176u), + MK_COL(216u, 216u, 80u), + MK_COL(216u, 40u, 216u), + MK_COL(216u, 40u, 40u), + MK_COL(216u, 40u, 176u), + MK_COL(216u, 40u, 80u), + MK_COL(216u, 176u, 216u), + MK_COL(216u, 176u, 40u), + MK_COL(216u, 176u, 176u), + MK_COL(216u, 176u, 80u), + MK_COL(216u, 80u, 216u), + MK_COL(216u, 80u, 40u), + MK_COL(216u, 80u, 176u), + MK_COL(216u, 80u, 80u), + MK_COL(40u, 216u, 216u), + MK_COL(40u, 216u, 40u), + MK_COL(40u, 216u, 176u), + MK_COL(40u, 216u, 80u), + MK_COL(40u, 40u, 216u), + MK_COL(40u, 40u, 40u), + MK_COL(40u, 40u, 176u), + MK_COL(40u, 40u, 80u), + MK_COL(40u, 176u, 216u), + MK_COL(40u, 176u, 40u), + MK_COL(40u, 176u, 176u), + MK_COL(40u, 176u, 80u), + MK_COL(40u, 80u, 216u), + MK_COL(40u, 80u, 40u), + MK_COL(40u, 80u, 176u), + MK_COL(40u, 80u, 80u), + MK_COL(80u, 216u, 216u), + MK_COL(80u, 216u, 40u), + MK_COL(80u, 216u, 176u), + MK_COL(80u, 216u, 80u), + MK_COL(80u, 40u, 216u), + MK_COL(80u, 40u, 40u), + MK_COL(80u, 40u, 176u), + MK_COL(80u, 40u, 80u), + MK_COL(80u, 176u, 216u), + MK_COL(80u, 176u, 40u), + MK_COL(80u, 176u, 176u), + MK_COL(80u, 176u, 80u), + MK_COL(80u, 80u, 216u), + MK_COL(80u, 80u, 40u), + MK_COL(80u, 80u, 176u), + MK_COL(80u, 80u, 80u), + MK_COL(0u, 0u, 192u), + MK_COL(0u, 0u, 64u), + MK_COL(0u, 0u, 128u), + MK_COL(0u, 192u, 0u), + MK_COL(0u, 192u, 192u), + MK_COL(0u, 192u, 64u), + MK_COL(0u, 192u, 128u), + MK_COL(0u, 64u, 0u), + MK_COL(0u, 64u, 192u), + MK_COL(0u, 64u, 64u), + MK_COL(0u, 64u, 128u), + MK_COL(0u, 128u, 0u), + MK_COL(0u, 128u, 192u), + MK_COL(0u, 128u, 64u), + MK_COL(0u, 128u, 128u), + MK_COL(176u, 216u, 216u), + MK_COL(176u, 216u, 40u), + MK_COL(176u, 216u, 176u), + MK_COL(176u, 216u, 80u), + MK_COL(176u, 40u, 216u), + MK_COL(176u, 40u, 40u), + MK_COL(176u, 40u, 176u), + MK_COL(176u, 40u, 80u), + MK_COL(176u, 176u, 216u), + MK_COL(176u, 176u, 40u), + MK_COL(176u, 176u, 176u), + MK_COL(176u, 176u, 80u), + MK_COL(176u, 80u, 216u), + MK_COL(176u, 80u, 40u), + MK_COL(176u, 80u, 176u), + MK_COL(176u, 80u, 80u), + MK_COL(192u, 0u, 0u), + MK_COL(192u, 0u, 192u), + MK_COL(192u, 0u, 64u), + MK_COL(192u, 0u, 128u), + MK_COL(192u, 192u, 0u), + MK_COL(192u, 192u, 192u), + MK_COL(192u, 192u, 64u), + MK_COL(192u, 192u, 128u), + MK_COL(192u, 64u, 0u), + MK_COL(192u, 64u, 192u), + MK_COL(192u, 64u, 64u), + MK_COL(192u, 64u, 128u), + MK_COL(192u, 128u, 0u), + MK_COL(192u, 128u, 192u), + MK_COL(192u, 128u, 64u), + MK_COL(192u, 128u, 128u), + MK_COL(64u, 0u, 0u), + MK_COL(64u, 0u, 192u), + MK_COL(64u, 0u, 64u), + MK_COL(64u, 0u, 128u), + MK_COL(64u, 192u, 0u), + MK_COL(64u, 192u, 192u), + MK_COL(64u, 192u, 64u), + MK_COL(64u, 192u, 128u), + MK_COL(64u, 64u, 0u), + MK_COL(64u, 64u, 192u), + MK_COL(64u, 64u, 64u), + MK_COL(64u, 64u, 128u), + MK_COL(64u, 128u, 0u), + MK_COL(64u, 128u, 192u), + MK_COL(64u, 128u, 64u), + MK_COL(64u, 128u, 128u), + MK_COL(128u, 0u, 0u), + MK_COL(128u, 0u, 192u), + MK_COL(128u, 0u, 64u), + MK_COL(128u, 0u, 128u), + MK_COL(128u, 192u, 0u), + MK_COL(128u, 192u, 192u), + MK_COL(128u, 192u, 64u), + MK_COL(128u, 192u, 128u), + MK_COL(128u, 64u, 0u), + MK_COL(128u, 64u, 192u), + MK_COL(128u, 64u, 64u), + MK_COL(128u, 64u, 128u), + MK_COL(128u, 128u, 0u), + MK_COL(128u, 128u, 192u), + MK_COL(128u, 128u, 64u), + MK_COL(128u, 128u, 128u), +}; + +#undef MK_COL + +//------------------------------------------------------------------------------ +// TODO(skal): move the functions to dsp/lossless.c when the correct +// granularity is found. For now, we'll just copy-paste some useful bits +// here instead. + +// In-place sum of each component with mod 256. +static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { + const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); + const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); + *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); +} + +static WEBP_INLINE uint32_t Clip255(uint32_t a) { + if (a < 256) { + return a; + } + // return 0, when a is a negative integer. + // return 255, when a is positive. + return ~a >> 24; +} + +// Delta palettization functions. +static WEBP_INLINE int Square(int x) { + return x * x; +} + +static WEBP_INLINE uint32_t Intensity(uint32_t a) { + return + 30 * ((a >> 16) & 0xff) + + 59 * ((a >> 8) & 0xff) + + 11 * ((a >> 0) & 0xff); +} + +static uint32_t CalcDist(uint32_t predicted_value, uint32_t actual_value, + uint32_t palette_entry) { + int i; + uint32_t distance = 0; + AddPixelsEq(&predicted_value, palette_entry); + for (i = 0; i < 32; i += 8) { + const int32_t av = (actual_value >> i) & 0xff; + const int32_t pv = (predicted_value >> i) & 0xff; + distance += Square(pv - av); + } + // We sum square of intensity difference with factor 10, but because Intensity + // returns 100 times real intensity we need to multiply differences of colors + // by 1000. + distance *= 1000u; + distance += Square(Intensity(predicted_value) + - Intensity(actual_value)); + return distance; +} + +static uint32_t Predict(int x, int y, uint32_t* image) { + const uint32_t t = (y == 0) ? ARGB_BLACK : image[x]; + const uint32_t l = (x == 0) ? ARGB_BLACK : image[x - 1]; + const uint32_t p = + (((((t >> 24) & 0xff) + ((l >> 24) & 0xff)) / 2) << 24) + + (((((t >> 16) & 0xff) + ((l >> 16) & 0xff)) / 2) << 16) + + (((((t >> 8) & 0xff) + ((l >> 8) & 0xff)) / 2) << 8) + + (((((t >> 0) & 0xff) + ((l >> 0) & 0xff)) / 2) << 0); + if (x == 0 && y == 0) return ARGB_BLACK; + if (x == 0) return t; + if (y == 0) return l; + return p; +} + +static WEBP_INLINE int AddSubtractComponentFullWithCoefficient( + int a, int b, int c) { + return Clip255(a + ((b - c) >> 2)); +} + +static WEBP_INLINE uint32_t ClampedAddSubtractFullWithCoefficient( + uint32_t c0, uint32_t c1, uint32_t c2) { + const int a = AddSubtractComponentFullWithCoefficient( + c0 >> 24, c1 >> 24, c2 >> 24); + const int r = AddSubtractComponentFullWithCoefficient((c0 >> 16) & 0xff, + (c1 >> 16) & 0xff, + (c2 >> 16) & 0xff); + const int g = AddSubtractComponentFullWithCoefficient((c0 >> 8) & 0xff, + (c1 >> 8) & 0xff, + (c2 >> 8) & 0xff); + const int b = AddSubtractComponentFullWithCoefficient( + c0 & 0xff, c1 & 0xff, c2 & 0xff); + return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; +} + +//------------------------------------------------------------------------------ + +// Find palette entry with minimum error from difference of actual pixel value +// and predicted pixel value. Propagate error of pixel to its top and left pixel +// in src array. Write predicted_value + palette_entry to new_image. Return +// index of best palette entry. +static int FindBestPaletteEntry(uint32_t src, uint32_t predicted_value, + const uint32_t palette[], int palette_size) { + int i; + int idx = 0; + uint32_t best_distance = CalcDist(predicted_value, src, palette[0]); + for (i = 1; i < palette_size; ++i) { + const uint32_t distance = CalcDist(predicted_value, src, palette[i]); + if (distance < best_distance) { + best_distance = distance; + idx = i; + } + } + return idx; +} + +static void ApplyBestPaletteEntry(int x, int y, + uint32_t new_value, uint32_t palette_value, + uint32_t* src, int src_stride, + uint32_t* new_image) { + AddPixelsEq(&new_value, palette_value); + if (x > 0) { + src[x - 1] = ClampedAddSubtractFullWithCoefficient(src[x - 1], + new_value, src[x]); + } + if (y > 0) { + src[x - src_stride] = + ClampedAddSubtractFullWithCoefficient(src[x - src_stride], + new_value, src[x]); + } + new_image[x] = new_value; +} + +//------------------------------------------------------------------------------ +// Main entry point + +static WebPEncodingError ApplyDeltaPalette(uint32_t* src, uint32_t* dst, + uint32_t src_stride, + uint32_t dst_stride, + const uint32_t* palette, + int palette_size, + int width, int height, + int num_passes) { + int x, y; + WebPEncodingError err = VP8_ENC_OK; + uint32_t* new_image = (uint32_t*)WebPSafeMalloc(width, sizeof(*new_image)); + uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); + if (new_image == NULL || tmp_row == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + while (num_passes--) { + uint32_t* cur_src = src; + uint32_t* cur_dst = dst; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + const uint32_t predicted_value = Predict(x, y, new_image); + tmp_row[x] = FindBestPaletteEntry(cur_src[x], predicted_value, + palette, palette_size); + ApplyBestPaletteEntry(x, y, predicted_value, palette[tmp_row[x]], + cur_src, src_stride, new_image); + } + for (x = 0; x < width; ++x) { + cur_dst[x] = palette[tmp_row[x]]; + } + cur_src += src_stride; + cur_dst += dst_stride; + } + } + Error: + WebPSafeFree(new_image); + WebPSafeFree(tmp_row); + return err; +} + +// replaces enc->argb_ by a palettizable approximation of it, +// and generates optimal enc->palette_[] +WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) { + const WebPPicture* const pic = enc->pic_; + uint32_t* src = pic->argb; + uint32_t* dst = enc->argb_; + const int width = pic->width; + const int height = pic->height; + + WebPEncodingError err = VP8_ENC_OK; + memcpy(enc->palette_, kDeltaPalette, sizeof(kDeltaPalette)); + enc->palette_[DELTA_PALETTE_SIZE - 1] = src[0] - 0xff000000u; + enc->palette_size_ = DELTA_PALETTE_SIZE; + err = ApplyDeltaPalette(src, dst, pic->argb_stride, enc->current_width_, + enc->palette_, enc->palette_size_, + width, height, 2); + if (err != VP8_ENC_OK) goto Error; + + Error: + return err; +} + +#else // !WEBP_EXPERIMENTAL_FEATURES + +WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) { + (void)enc; + return VP8_ENC_ERROR_INVALID_CONFIGURATION; +} + +#endif // WEBP_EXPERIMENTAL_FEATURES diff --git a/thirdparty/libwebp/enc/delta_palettization.h b/thirdparty/libwebp/enc/delta_palettization.h new file mode 100644 index 0000000000..e41c0c5ab5 --- /dev/null +++ b/thirdparty/libwebp/enc/delta_palettization.h @@ -0,0 +1,25 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Mislav Bradac (mislavm@google.com) +// + +#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_ +#define WEBP_ENC_DELTA_PALETTIZATION_H_ + +#include "../webp/encode.h" +#include "../enc/vp8li.h" + +// Replaces enc->argb_[] input by a palettizable approximation of it, +// and generates optimal enc->palette_[]. +// This function can revert enc->use_palette_ / enc->use_predict_ flag +// if delta-palettization is not producing expected saving. +WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc); + +#endif // WEBP_ENC_DELTA_PALETTIZATION_H_ diff --git a/thirdparty/libwebp/enc/filter.c b/thirdparty/libwebp/enc/filter.c new file mode 100644 index 0000000000..e8ea8b4ff2 --- /dev/null +++ b/thirdparty/libwebp/enc/filter.c @@ -0,0 +1,306 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Selecting filter level +// +// Author: somnath@google.com (Somnath Banerjee) + +#include <assert.h> +#include "./vp8enci.h" +#include "../dsp/dsp.h" + +// This table gives, for a given sharpness, the filtering strength to be +// used (at least) in order to filter a given edge step delta. +// This is constructed by brute force inspection: for all delta, we iterate +// over all possible filtering strength / thresh until needs_filter() returns +// true. +#define MAX_DELTA_SIZE 64 +static const uint8_t kLevelsFromDelta[8][MAX_DELTA_SIZE] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 17, 18, + 20, 21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, + 44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 19, + 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, + 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 18, 19, + 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, + 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 20, + 21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, + 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, + 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44, + 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 18, 19, 21, + 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, + 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, + 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45, + 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 } +}; + +int VP8FilterStrengthFromDelta(int sharpness, int delta) { + const int pos = (delta < MAX_DELTA_SIZE) ? delta : MAX_DELTA_SIZE - 1; + assert(sharpness >= 0 && sharpness <= 7); + return kLevelsFromDelta[sharpness][pos]; +} + +//------------------------------------------------------------------------------ +// Paragraph 15.4: compute the inner-edge filtering strength + +static int GetILevel(int sharpness, int level) { + if (sharpness > 0) { + if (sharpness > 4) { + level >>= 2; + } else { + level >>= 1; + } + if (level > 9 - sharpness) { + level = 9 - sharpness; + } + } + if (level < 1) level = 1; + return level; +} + +static void DoFilter(const VP8EncIterator* const it, int level) { + const VP8Encoder* const enc = it->enc_; + const int ilevel = GetILevel(enc->config_->filter_sharpness, level); + const int limit = 2 * level + ilevel; + + uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC; + uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC; + uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC; + + // copy current block to yuv_out2_ + memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t)); + + if (enc->filter_hdr_.simple_ == 1) { // simple + VP8SimpleHFilter16i(y_dst, BPS, limit); + VP8SimpleVFilter16i(y_dst, BPS, limit); + } else { // complex + const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0; + VP8HFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); + VP8HFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); + VP8VFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); + VP8VFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); + } +} + +//------------------------------------------------------------------------------ +// SSIM metric + +static const double kMinValue = 1.e-10; // minimal threshold + +void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst) { + dst->w += src->w; + dst->xm += src->xm; + dst->ym += src->ym; + dst->xxm += src->xxm; + dst->xym += src->xym; + dst->yym += src->yym; +} + +double VP8SSIMGet(const VP8DistoStats* const stats) { + const double xmxm = stats->xm * stats->xm; + const double ymym = stats->ym * stats->ym; + const double xmym = stats->xm * stats->ym; + const double w2 = stats->w * stats->w; + double sxx = stats->xxm * stats->w - xmxm; + double syy = stats->yym * stats->w - ymym; + double sxy = stats->xym * stats->w - xmym; + double C1, C2; + double fnum; + double fden; + // small errors are possible, due to rounding. Clamp to zero. + if (sxx < 0.) sxx = 0.; + if (syy < 0.) syy = 0.; + C1 = 6.5025 * w2; + C2 = 58.5225 * w2; + fnum = (2 * xmym + C1) * (2 * sxy + C2); + fden = (xmxm + ymym + C1) * (sxx + syy + C2); + return (fden != 0.) ? fnum / fden : kMinValue; +} + +double VP8SSIMGetSquaredError(const VP8DistoStats* const s) { + if (s->w > 0.) { + const double iw2 = 1. / (s->w * s->w); + const double sxx = s->xxm * s->w - s->xm * s->xm; + const double syy = s->yym * s->w - s->ym * s->ym; + const double sxy = s->xym * s->w - s->xm * s->ym; + const double SSE = iw2 * (sxx + syy - 2. * sxy); + if (SSE > kMinValue) return SSE; + } + return kMinValue; +} + +#define LIMIT(A, M) ((A) > (M) ? (M) : (A)) +static void VP8SSIMAccumulateRow(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2, + int y, int W, int H, + VP8DistoStats* const stats) { + int x = 0; + const int w0 = LIMIT(VP8_SSIM_KERNEL, W); + for (x = 0; x < w0; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + for (; x <= W - 8 + VP8_SSIM_KERNEL; ++x) { + VP8SSIMAccumulate( + src1 + (y - VP8_SSIM_KERNEL) * stride1 + (x - VP8_SSIM_KERNEL), stride1, + src2 + (y - VP8_SSIM_KERNEL) * stride2 + (x - VP8_SSIM_KERNEL), stride2, + stats); + } + for (; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } +} + +void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2, + int W, int H, VP8DistoStats* const stats) { + int x, y; + const int h0 = LIMIT(VP8_SSIM_KERNEL, H); + const int h1 = LIMIT(VP8_SSIM_KERNEL, H - VP8_SSIM_KERNEL); + for (y = 0; y < h0; ++y) { + for (x = 0; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + } + for (; y < h1; ++y) { + VP8SSIMAccumulateRow(src1, stride1, src2, stride2, y, W, H, stats); + } + for (; y < H; ++y) { + for (x = 0; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + } +} +#undef LIMIT + +static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { + int x, y; + VP8DistoStats s = { .0, .0, .0, .0, .0, .0 }; + + // compute SSIM in a 10 x 10 window + for (y = VP8_SSIM_KERNEL; y < 16 - VP8_SSIM_KERNEL; y++) { + for (x = VP8_SSIM_KERNEL; x < 16 - VP8_SSIM_KERNEL; x++) { + VP8SSIMAccumulateClipped(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, + x, y, 16, 16, &s); + } + } + for (x = 1; x < 7; x++) { + for (y = 1; y < 7; y++) { + VP8SSIMAccumulateClipped(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, + x, y, 8, 8, &s); + VP8SSIMAccumulateClipped(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, + x, y, 8, 8, &s); + } + } + return VP8SSIMGet(&s); +} + +//------------------------------------------------------------------------------ +// Exposed APIs: Encoder should call the following 3 functions to adjust +// loop filter strength + +void VP8InitFilter(VP8EncIterator* const it) { + if (it->lf_stats_ != NULL) { + int s, i; + for (s = 0; s < NUM_MB_SEGMENTS; s++) { + for (i = 0; i < MAX_LF_LEVELS; i++) { + (*it->lf_stats_)[s][i] = 0; + } + } + VP8SSIMDspInit(); + } +} + +void VP8StoreFilterStats(VP8EncIterator* const it) { + int d; + VP8Encoder* const enc = it->enc_; + const int s = it->mb_->segment_; + const int level0 = enc->dqm_[s].fstrength_; + + // explore +/-quant range of values around level0 + const int delta_min = -enc->dqm_[s].quant_; + const int delta_max = enc->dqm_[s].quant_; + const int step_size = (delta_max - delta_min >= 4) ? 4 : 1; + + if (it->lf_stats_ == NULL) return; + + // NOTE: Currently we are applying filter only across the sublock edges + // There are two reasons for that. + // 1. Applying filter on macro block edges will change the pixels in + // the left and top macro blocks. That will be hard to restore + // 2. Macro Blocks on the bottom and right are not yet compressed. So we + // cannot apply filter on the right and bottom macro block edges. + if (it->mb_->type_ == 1 && it->mb_->skip_) return; + + // Always try filter level zero + (*it->lf_stats_)[s][0] += GetMBSSIM(it->yuv_in_, it->yuv_out_); + + for (d = delta_min; d <= delta_max; d += step_size) { + const int level = level0 + d; + if (level <= 0 || level >= MAX_LF_LEVELS) { + continue; + } + DoFilter(it, level); + (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_); + } +} + +void VP8AdjustFilterStrength(VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + if (it->lf_stats_ != NULL) { + int s; + for (s = 0; s < NUM_MB_SEGMENTS; s++) { + int i, best_level = 0; + // Improvement over filter level 0 should be at least 1e-5 (relatively) + double best_v = 1.00001 * (*it->lf_stats_)[s][0]; + for (i = 1; i < MAX_LF_LEVELS; i++) { + const double v = (*it->lf_stats_)[s][i]; + if (v > best_v) { + best_v = v; + best_level = i; + } + } + enc->dqm_[s].fstrength_ = best_level; + } + } else if (enc->config_->filter_strength > 0) { + int max_level = 0; + int s; + for (s = 0; s < NUM_MB_SEGMENTS; s++) { + VP8SegmentInfo* const dqm = &enc->dqm_[s]; + // this '>> 3' accounts for some inverse WHT scaling + const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3; + const int level = + VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta); + if (level > dqm->fstrength_) { + dqm->fstrength_ = level; + } + if (max_level < dqm->fstrength_) { + max_level = dqm->fstrength_; + } + } + enc->filter_hdr_.level_ = max_level; + } +} + +// ----------------------------------------------------------------------------- diff --git a/thirdparty/libwebp/enc/frame.c b/thirdparty/libwebp/enc/frame.c new file mode 100644 index 0000000000..5b7a40b9ad --- /dev/null +++ b/thirdparty/libwebp/enc/frame.c @@ -0,0 +1,850 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// frame coding and analysis +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <string.h> +#include <math.h> + +#include "./cost.h" +#include "./vp8enci.h" +#include "../dsp/dsp.h" +#include "../webp/format_constants.h" // RIFF constants + +#define SEGMENT_VISU 0 +#define DEBUG_SEARCH 0 // useful to track search convergence + +//------------------------------------------------------------------------------ +// multi-pass convergence + +#define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + \ + VP8_FRAME_HEADER_SIZE) +#define DQ_LIMIT 0.4 // convergence is considered reached if dq < DQ_LIMIT +// we allow 2k of extra head-room in PARTITION0 limit. +#define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11) + +typedef struct { // struct for organizing convergence in either size or PSNR + int is_first; + float dq; + float q, last_q; + double value, last_value; // PSNR or size + double target; + int do_size_search; +} PassStats; + +static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { + const uint64_t target_size = (uint64_t)enc->config_->target_size; + const int do_size_search = (target_size != 0); + const float target_PSNR = enc->config_->target_PSNR; + + s->is_first = 1; + s->dq = 10.f; + s->q = s->last_q = enc->config_->quality; + s->target = do_size_search ? (double)target_size + : (target_PSNR > 0.) ? target_PSNR + : 40.; // default, just in case + s->value = s->last_value = 0.; + s->do_size_search = do_size_search; + return do_size_search; +} + +static float Clamp(float v, float min, float max) { + return (v < min) ? min : (v > max) ? max : v; +} + +static float ComputeNextQ(PassStats* const s) { + float dq; + if (s->is_first) { + dq = (s->value > s->target) ? -s->dq : s->dq; + s->is_first = 0; + } else if (s->value != s->last_value) { + const double slope = (s->target - s->value) / (s->last_value - s->value); + dq = (float)(slope * (s->last_q - s->q)); + } else { + dq = 0.; // we're done?! + } + // Limit variable to avoid large swings. + s->dq = Clamp(dq, -30.f, 30.f); + s->last_q = s->q; + s->last_value = s->value; + s->q = Clamp(s->q + s->dq, 0.f, 100.f); + return s->q; +} + +//------------------------------------------------------------------------------ +// Tables for level coding + +const uint8_t VP8Cat3[] = { 173, 148, 140 }; +const uint8_t VP8Cat4[] = { 176, 155, 140, 135 }; +const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 }; +const uint8_t VP8Cat6[] = + { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; + +//------------------------------------------------------------------------------ +// Reset the statistics about: number of skips, token proba, level cost,... + +static void ResetStats(VP8Encoder* const enc) { + VP8EncProba* const proba = &enc->proba_; + VP8CalculateLevelCosts(proba); + proba->nb_skip_ = 0; +} + +//------------------------------------------------------------------------------ +// Skip decision probability + +#define SKIP_PROBA_THRESHOLD 250 // value below which using skip_proba is OK. + +static int CalcSkipProba(uint64_t nb, uint64_t total) { + return (int)(total ? (total - nb) * 255 / total : 255); +} + +// Returns the bit-cost for coding the skip probability. +static int FinalizeSkipProba(VP8Encoder* const enc) { + VP8EncProba* const proba = &enc->proba_; + const int nb_mbs = enc->mb_w_ * enc->mb_h_; + const int nb_events = proba->nb_skip_; + int size; + proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs); + proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD); + size = 256; // 'use_skip_proba' bit + if (proba->use_skip_proba_) { + size += nb_events * VP8BitCost(1, proba->skip_proba_) + + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_); + size += 8 * 256; // cost of signaling the skip_proba_ itself. + } + return size; +} + +// Collect statistics and deduce probabilities for next coding pass. +// Return the total bit-cost for coding the probability updates. +static int CalcTokenProba(int nb, int total) { + assert(nb <= total); + return nb ? (255 - nb * 255 / total) : 255; +} + +// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability. +static int BranchCost(int nb, int total, int proba) { + return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba); +} + +static void ResetTokenStats(VP8Encoder* const enc) { + VP8EncProba* const proba = &enc->proba_; + memset(proba->stats_, 0, sizeof(proba->stats_)); +} + +static int FinalizeTokenProbas(VP8EncProba* const proba) { + int has_changed = 0; + int size = 0; + int t, b, c, p; + for (t = 0; t < NUM_TYPES; ++t) { + for (b = 0; b < NUM_BANDS; ++b) { + for (c = 0; c < NUM_CTX; ++c) { + for (p = 0; p < NUM_PROBAS; ++p) { + const proba_t stats = proba->stats_[t][b][c][p]; + const int nb = (stats >> 0) & 0xffff; + const int total = (stats >> 16) & 0xffff; + const int update_proba = VP8CoeffsUpdateProba[t][b][c][p]; + const int old_p = VP8CoeffsProba0[t][b][c][p]; + const int new_p = CalcTokenProba(nb, total); + const int old_cost = BranchCost(nb, total, old_p) + + VP8BitCost(0, update_proba); + const int new_cost = BranchCost(nb, total, new_p) + + VP8BitCost(1, update_proba) + + 8 * 256; + const int use_new_p = (old_cost > new_cost); + size += VP8BitCost(use_new_p, update_proba); + if (use_new_p) { // only use proba that seem meaningful enough. + proba->coeffs_[t][b][c][p] = new_p; + has_changed |= (new_p != old_p); + size += 8 * 256; + } else { + proba->coeffs_[t][b][c][p] = old_p; + } + } + } + } + } + proba->dirty_ = has_changed; + return size; +} + +//------------------------------------------------------------------------------ +// Finalize Segment probability based on the coding tree + +static int GetProba(int a, int b) { + const int total = a + b; + return (total == 0) ? 255 // that's the default probability. + : (255 * a + total / 2) / total; // rounded proba +} + +static void SetSegmentProbas(VP8Encoder* const enc) { + int p[NUM_MB_SEGMENTS] = { 0 }; + int n; + + for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { + const VP8MBInfo* const mb = &enc->mb_info_[n]; + p[mb->segment_]++; + } + if (enc->pic_->stats != NULL) { + for (n = 0; n < NUM_MB_SEGMENTS; ++n) { + enc->pic_->stats->segment_size[n] = p[n]; + } + } + if (enc->segment_hdr_.num_segments_ > 1) { + uint8_t* const probas = enc->proba_.segments_; + probas[0] = GetProba(p[0] + p[1], p[2] + p[3]); + probas[1] = GetProba(p[0], p[1]); + probas[2] = GetProba(p[2], p[3]); + + enc->segment_hdr_.update_map_ = + (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255); + enc->segment_hdr_.size_ = + p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) + + p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) + + p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) + + p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2])); + } else { + enc->segment_hdr_.update_map_ = 0; + enc->segment_hdr_.size_ = 0; + } +} + +//------------------------------------------------------------------------------ +// Coefficient coding + +static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) { + int n = res->first; + // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 + const uint8_t* p = res->prob[n][ctx]; + if (!VP8PutBit(bw, res->last >= 0, p[0])) { + return 0; + } + + while (n < 16) { + const int c = res->coeffs[n++]; + const int sign = c < 0; + int v = sign ? -c : c; + if (!VP8PutBit(bw, v != 0, p[1])) { + p = res->prob[VP8EncBands[n]][0]; + continue; + } + if (!VP8PutBit(bw, v > 1, p[2])) { + p = res->prob[VP8EncBands[n]][1]; + } else { + if (!VP8PutBit(bw, v > 4, p[3])) { + if (VP8PutBit(bw, v != 2, p[4])) + VP8PutBit(bw, v == 4, p[5]); + } else if (!VP8PutBit(bw, v > 10, p[6])) { + if (!VP8PutBit(bw, v > 6, p[7])) { + VP8PutBit(bw, v == 6, 159); + } else { + VP8PutBit(bw, v >= 9, 165); + VP8PutBit(bw, !(v & 1), 145); + } + } else { + int mask; + const uint8_t* tab; + if (v < 3 + (8 << 1)) { // VP8Cat3 (3b) + VP8PutBit(bw, 0, p[8]); + VP8PutBit(bw, 0, p[9]); + v -= 3 + (8 << 0); + mask = 1 << 2; + tab = VP8Cat3; + } else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b) + VP8PutBit(bw, 0, p[8]); + VP8PutBit(bw, 1, p[9]); + v -= 3 + (8 << 1); + mask = 1 << 3; + tab = VP8Cat4; + } else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b) + VP8PutBit(bw, 1, p[8]); + VP8PutBit(bw, 0, p[10]); + v -= 3 + (8 << 2); + mask = 1 << 4; + tab = VP8Cat5; + } else { // VP8Cat6 (11b) + VP8PutBit(bw, 1, p[8]); + VP8PutBit(bw, 1, p[10]); + v -= 3 + (8 << 3); + mask = 1 << 10; + tab = VP8Cat6; + } + while (mask) { + VP8PutBit(bw, !!(v & mask), *tab++); + mask >>= 1; + } + } + p = res->prob[VP8EncBands[n]][2]; + } + VP8PutBitUniform(bw, sign); + if (n == 16 || !VP8PutBit(bw, n <= res->last, p[0])) { + return 1; // EOB + } + } + return 1; +} + +static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it, + const VP8ModeScore* const rd) { + int x, y, ch; + VP8Residual res; + uint64_t pos1, pos2, pos3; + const int i16 = (it->mb_->type_ == 1); + const int segment = it->mb_->segment_; + VP8Encoder* const enc = it->enc_; + + VP8IteratorNzToBytes(it); + + pos1 = VP8BitWriterPos(bw); + if (i16) { + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + it->top_nz_[8] = it->left_nz_[8] = + PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res); + VP8InitResidual(1, 0, enc, &res); + } else { + VP8InitResidual(0, 3, enc, &res); + } + + // luma-AC + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res); + } + } + pos2 = VP8BitWriterPos(bw); + + // U/V + VP8InitResidual(0, 2, enc, &res); + for (ch = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = + PutCoeffs(bw, ctx, &res); + } + } + } + pos3 = VP8BitWriterPos(bw); + it->luma_bits_ = pos2 - pos1; + it->uv_bits_ = pos3 - pos2; + it->bit_count_[segment][i16] += it->luma_bits_; + it->bit_count_[segment][2] += it->uv_bits_; + VP8IteratorBytesToNz(it); +} + +// Same as CodeResiduals, but doesn't actually write anything. +// Instead, it just records the event distribution. +static void RecordResiduals(VP8EncIterator* const it, + const VP8ModeScore* const rd) { + int x, y, ch; + VP8Residual res; + VP8Encoder* const enc = it->enc_; + + VP8IteratorNzToBytes(it); + + if (it->mb_->type_ == 1) { // i16x16 + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + it->top_nz_[8] = it->left_nz_[8] = + VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res); + VP8InitResidual(1, 0, enc, &res); + } else { + VP8InitResidual(0, 3, enc, &res); + } + + // luma-AC + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res); + } + } + + // U/V + VP8InitResidual(0, 2, enc, &res); + for (ch = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = + VP8RecordCoeffs(ctx, &res); + } + } + } + + VP8IteratorBytesToNz(it); +} + +//------------------------------------------------------------------------------ +// Token buffer + +#if !defined(DISABLE_TOKEN_BUFFER) + +static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, + VP8TBuffer* const tokens) { + int x, y, ch; + VP8Residual res; + VP8Encoder* const enc = it->enc_; + + VP8IteratorNzToBytes(it); + if (it->mb_->type_ == 1) { // i16x16 + const int ctx = it->top_nz_[8] + it->left_nz_[8]; + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + it->top_nz_[8] = it->left_nz_[8] = + VP8RecordCoeffTokens(ctx, 1, + res.first, res.last, res.coeffs, tokens); + VP8RecordCoeffs(ctx, &res); + VP8InitResidual(1, 0, enc, &res); + } else { + VP8InitResidual(0, 3, enc, &res); + } + + // luma-AC + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + it->top_nz_[x] = it->left_nz_[y] = + VP8RecordCoeffTokens(ctx, res.coeff_type, + res.first, res.last, res.coeffs, tokens); + VP8RecordCoeffs(ctx, &res); + } + } + + // U/V + VP8InitResidual(0, 2, enc, &res); + for (ch = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = + VP8RecordCoeffTokens(ctx, 2, + res.first, res.last, res.coeffs, tokens); + VP8RecordCoeffs(ctx, &res); + } + } + } + VP8IteratorBytesToNz(it); + return !tokens->error_; +} + +#endif // !DISABLE_TOKEN_BUFFER + +//------------------------------------------------------------------------------ +// ExtraInfo map / Debug function + +#if SEGMENT_VISU +static void SetBlock(uint8_t* p, int value, int size) { + int y; + for (y = 0; y < size; ++y) { + memset(p, value, size); + p += BPS; + } +} +#endif + +static void ResetSSE(VP8Encoder* const enc) { + enc->sse_[0] = 0; + enc->sse_[1] = 0; + enc->sse_[2] = 0; + // Note: enc->sse_[3] is managed by alpha.c + enc->sse_count_ = 0; +} + +static void StoreSSE(const VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + const uint8_t* const in = it->yuv_in_; + const uint8_t* const out = it->yuv_out_; + // Note: not totally accurate at boundary. And doesn't include in-loop filter. + enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC); + enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC); + enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC); + enc->sse_count_ += 16 * 16; +} + +static void StoreSideInfo(const VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + const VP8MBInfo* const mb = it->mb_; + WebPPicture* const pic = enc->pic_; + + if (pic->stats != NULL) { + StoreSSE(it); + enc->block_count_[0] += (mb->type_ == 0); + enc->block_count_[1] += (mb->type_ == 1); + enc->block_count_[2] += (mb->skip_ != 0); + } + + if (pic->extra_info != NULL) { + uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_]; + switch (pic->extra_info_type) { + case 1: *info = mb->type_; break; + case 2: *info = mb->segment_; break; + case 3: *info = enc->dqm_[mb->segment_].quant_; break; + case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break; + case 5: *info = mb->uv_mode_; break; + case 6: { + const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3); + *info = (b > 255) ? 255 : b; break; + } + case 7: *info = mb->alpha_; break; + default: *info = 0; break; + } + } +#if SEGMENT_VISU // visualize segments and prediction modes + SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16); + SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8); + SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8); +#endif +} + +static double GetPSNR(uint64_t mse, uint64_t size) { + return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99; +} + +//------------------------------------------------------------------------------ +// StatLoop(): only collect statistics (number of skips, token usage, ...). +// This is used for deciding optimal probabilities. It also modifies the +// quantizer value if some target (size, PSNR) was specified. + +static void SetLoopParams(VP8Encoder* const enc, float q) { + // Make sure the quality parameter is inside valid bounds + q = Clamp(q, 0.f, 100.f); + + VP8SetSegmentParams(enc, q); // setup segment quantizations and filters + SetSegmentProbas(enc); // compute segment probabilities + + ResetStats(enc); + ResetSSE(enc); +} + +static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt, + int nb_mbs, int percent_delta, + PassStats* const s) { + VP8EncIterator it; + uint64_t size = 0; + uint64_t size_p0 = 0; + uint64_t distortion = 0; + const uint64_t pixel_count = nb_mbs * 384; + + VP8IteratorInit(enc, &it); + SetLoopParams(enc, s->q); + do { + VP8ModeScore info; + VP8IteratorImport(&it, NULL); + if (VP8Decimate(&it, &info, rd_opt)) { + // Just record the number of skips and act like skip_proba is not used. + enc->proba_.nb_skip_++; + } + RecordResiduals(&it, &info); + size += info.R + info.H; + size_p0 += info.H; + distortion += info.D; + if (percent_delta && !VP8IteratorProgress(&it, percent_delta)) + return 0; + VP8IteratorSaveBoundary(&it); + } while (VP8IteratorNext(&it) && --nb_mbs > 0); + + size_p0 += enc->segment_hdr_.size_; + if (s->do_size_search) { + size += FinalizeSkipProba(enc); + size += FinalizeTokenProbas(&enc->proba_); + size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE; + s->value = (double)size; + } else { + s->value = GetPSNR(distortion, pixel_count); + } + return size_p0; +} + +static int StatLoop(VP8Encoder* const enc) { + const int method = enc->method_; + const int do_search = enc->do_search_; + const int fast_probe = ((method == 0 || method == 3) && !do_search); + int num_pass_left = enc->config_->pass; + const int task_percent = 20; + const int percent_per_pass = + (task_percent + num_pass_left / 2) / num_pass_left; + const int final_percent = enc->percent_ + task_percent; + const VP8RDLevel rd_opt = + (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE; + int nb_mbs = enc->mb_w_ * enc->mb_h_; + PassStats stats; + + InitPassStats(enc, &stats); + ResetTokenStats(enc); + + // Fast mode: quick analysis pass over few mbs. Better than nothing. + if (fast_probe) { + if (method == 3) { // we need more stats for method 3 to be reliable. + nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100; + } else { + nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50; + } + } + + while (num_pass_left-- > 0) { + const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) || + (num_pass_left == 0) || + (enc->max_i4_header_bits_ == 0); + const uint64_t size_p0 = + OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats); + if (size_p0 == 0) return 0; +#if (DEBUG_SEARCH > 0) + printf("#%d value:%.1lf -> %.1lf q:%.2f -> %.2f\n", + num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q); +#endif + if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) { + ++num_pass_left; + enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation... + continue; // ...and start over + } + if (is_last_pass) { + break; + } + // If no target size: just do several pass without changing 'q' + if (do_search) { + ComputeNextQ(&stats); + if (fabs(stats.dq) <= DQ_LIMIT) break; + } + } + if (!do_search || !stats.do_size_search) { + // Need to finalize probas now, since it wasn't done during the search. + FinalizeSkipProba(enc); + FinalizeTokenProbas(&enc->proba_); + } + VP8CalculateLevelCosts(&enc->proba_); // finalize costs + return WebPReportProgress(enc->pic_, final_percent, &enc->percent_); +} + +//------------------------------------------------------------------------------ +// Main loops +// + +static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 }; + +static int PreLoopInitialize(VP8Encoder* const enc) { + int p; + int ok = 1; + const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4]; + const int bytes_per_parts = + enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_; + // Initialize the bit-writers + for (p = 0; ok && p < enc->num_parts_; ++p) { + ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts); + } + if (!ok) { + VP8EncFreeBitWriters(enc); // malloc error occurred + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + return ok; +} + +static int PostLoopFinalize(VP8EncIterator* const it, int ok) { + VP8Encoder* const enc = it->enc_; + if (ok) { // Finalize the partitions, check for extra errors. + int p; + for (p = 0; p < enc->num_parts_; ++p) { + VP8BitWriterFinish(enc->parts_ + p); + ok &= !enc->parts_[p].error_; + } + } + + if (ok) { // All good. Finish up. + if (enc->pic_->stats != NULL) { // finalize byte counters... + int i, s; + for (i = 0; i <= 2; ++i) { + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3); + } + } + } + VP8AdjustFilterStrength(it); // ...and store filter stats. + } else { + // Something bad happened -> need to do some memory cleanup. + VP8EncFreeBitWriters(enc); + } + return ok; +} + +//------------------------------------------------------------------------------ +// VP8EncLoop(): does the final bitstream coding. + +static void ResetAfterSkip(VP8EncIterator* const it) { + if (it->mb_->type_ == 1) { + *it->nz_ = 0; // reset all predictors + it->left_nz_[8] = 0; + } else { + *it->nz_ &= (1 << 24); // preserve the dc_nz bit + } +} + +int VP8EncLoop(VP8Encoder* const enc) { + VP8EncIterator it; + int ok = PreLoopInitialize(enc); + if (!ok) return 0; + + StatLoop(enc); // stats-collection loop + + VP8IteratorInit(enc, &it); + VP8InitFilter(&it); + do { + VP8ModeScore info; + const int dont_use_skip = !enc->proba_.use_skip_proba_; + const VP8RDLevel rd_opt = enc->rd_opt_level_; + + VP8IteratorImport(&it, NULL); + // Warning! order is important: first call VP8Decimate() and + // *then* decide how to code the skip decision if there's one. + if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) { + CodeResiduals(it.bw_, &it, &info); + } else { // reset predictors after a skip + ResetAfterSkip(&it); + } + StoreSideInfo(&it); + VP8StoreFilterStats(&it); + VP8IteratorExport(&it); + ok = VP8IteratorProgress(&it, 20); + VP8IteratorSaveBoundary(&it); + } while (ok && VP8IteratorNext(&it)); + + return PostLoopFinalize(&it, ok); +} + +//------------------------------------------------------------------------------ +// Single pass using Token Buffer. + +#if !defined(DISABLE_TOKEN_BUFFER) + +#define MIN_COUNT 96 // minimum number of macroblocks before updating stats + +int VP8EncTokenLoop(VP8Encoder* const enc) { + // Roughly refresh the proba eight times per pass + int max_count = (enc->mb_w_ * enc->mb_h_) >> 3; + int num_pass_left = enc->config_->pass; + const int do_search = enc->do_search_; + VP8EncIterator it; + VP8EncProba* const proba = &enc->proba_; + const VP8RDLevel rd_opt = enc->rd_opt_level_; + const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384; + PassStats stats; + int ok; + + InitPassStats(enc, &stats); + ok = PreLoopInitialize(enc); + if (!ok) return 0; + + if (max_count < MIN_COUNT) max_count = MIN_COUNT; + + assert(enc->num_parts_ == 1); + assert(enc->use_tokens_); + assert(proba->use_skip_proba_ == 0); + assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful + assert(num_pass_left > 0); + + while (ok && num_pass_left-- > 0) { + const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) || + (num_pass_left == 0) || + (enc->max_i4_header_bits_ == 0); + uint64_t size_p0 = 0; + uint64_t distortion = 0; + int cnt = max_count; + VP8IteratorInit(enc, &it); + SetLoopParams(enc, stats.q); + if (is_last_pass) { + ResetTokenStats(enc); + VP8InitFilter(&it); // don't collect stats until last pass (too costly) + } + VP8TBufferClear(&enc->tokens_); + do { + VP8ModeScore info; + VP8IteratorImport(&it, NULL); + if (--cnt < 0) { + FinalizeTokenProbas(proba); + VP8CalculateLevelCosts(proba); // refresh cost tables for rd-opt + cnt = max_count; + } + VP8Decimate(&it, &info, rd_opt); + ok = RecordTokens(&it, &info, &enc->tokens_); + if (!ok) { + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + break; + } + size_p0 += info.H; + distortion += info.D; + if (is_last_pass) { + StoreSideInfo(&it); + VP8StoreFilterStats(&it); + VP8IteratorExport(&it); + ok = VP8IteratorProgress(&it, 20); + } + VP8IteratorSaveBoundary(&it); + } while (ok && VP8IteratorNext(&it)); + if (!ok) break; + + size_p0 += enc->segment_hdr_.size_; + if (stats.do_size_search) { + uint64_t size = FinalizeTokenProbas(&enc->proba_); + size += VP8EstimateTokenSize(&enc->tokens_, + (const uint8_t*)proba->coeffs_); + size = (size + size_p0 + 1024) >> 11; // -> size in bytes + size += HEADER_SIZE_ESTIMATE; + stats.value = (double)size; + } else { // compute and store PSNR + stats.value = GetPSNR(distortion, pixel_count); + } + +#if (DEBUG_SEARCH > 0) + printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf\n", + num_pass_left, stats.last_value, stats.value, + stats.last_q, stats.q, stats.dq); +#endif + if (size_p0 > PARTITION0_SIZE_LIMIT) { + ++num_pass_left; + enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation... + continue; // ...and start over + } + if (is_last_pass) { + break; // done + } + if (do_search) { + ComputeNextQ(&stats); // Adjust q + } + } + if (ok) { + if (!stats.do_size_search) { + FinalizeTokenProbas(&enc->proba_); + } + ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, + (const uint8_t*)proba->coeffs_, 1); + } + ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); + return PostLoopFinalize(&it, ok); +} + +#else + +int VP8EncTokenLoop(VP8Encoder* const enc) { + (void)enc; + return 0; // we shouldn't be here. +} + +#endif // DISABLE_TOKEN_BUFFER + +//------------------------------------------------------------------------------ + diff --git a/thirdparty/libwebp/enc/histogram.c b/thirdparty/libwebp/enc/histogram.c new file mode 100644 index 0000000000..395372b245 --- /dev/null +++ b/thirdparty/libwebp/enc/histogram.c @@ -0,0 +1,937 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Jyrki Alakuijala (jyrki@google.com) +// +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#include <math.h> + +#include "./backward_references.h" +#include "./histogram.h" +#include "../dsp/lossless.h" +#include "../utils/utils.h" + +#define MAX_COST 1.e38 + +// Number of partitions for the three dominant (literal, red and blue) symbol +// costs. +#define NUM_PARTITIONS 4 +// The size of the bin-hash corresponding to the three dominant costs. +#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS) +// Maximum number of histograms allowed in greedy combining algorithm. +#define MAX_HISTO_GREEDY 100 + +static void HistogramClear(VP8LHistogram* const p) { + uint32_t* const literal = p->literal_; + const int cache_bits = p->palette_code_bits_; + const int histo_size = VP8LGetHistogramSize(cache_bits); + memset(p, 0, histo_size); + p->palette_code_bits_ = cache_bits; + p->literal_ = literal; +} + +// Swap two histogram pointers. +static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) { + VP8LHistogram* const tmp = *A; + *A = *B; + *B = tmp; +} + +static void HistogramCopy(const VP8LHistogram* const src, + VP8LHistogram* const dst) { + uint32_t* const dst_literal = dst->literal_; + const int dst_cache_bits = dst->palette_code_bits_; + const int histo_size = VP8LGetHistogramSize(dst_cache_bits); + assert(src->palette_code_bits_ == dst_cache_bits); + memcpy(dst, src, histo_size); + dst->literal_ = dst_literal; +} + +int VP8LGetHistogramSize(int cache_bits) { + const int literal_size = VP8LHistogramNumCodes(cache_bits); + const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size; + assert(total_size <= (size_t)0x7fffffff); + return (int)total_size; +} + +void VP8LFreeHistogram(VP8LHistogram* const histo) { + WebPSafeFree(histo); +} + +void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) { + WebPSafeFree(histo); +} + +void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, + VP8LHistogram* const histo) { + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LRefsCursorNext(&c); + } +} + +void VP8LHistogramCreate(VP8LHistogram* const p, + const VP8LBackwardRefs* const refs, + int palette_code_bits) { + if (palette_code_bits >= 0) { + p->palette_code_bits_ = palette_code_bits; + } + HistogramClear(p); + VP8LHistogramStoreRefs(refs, p); +} + +void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) { + p->palette_code_bits_ = palette_code_bits; + HistogramClear(p); +} + +VP8LHistogram* VP8LAllocateHistogram(int cache_bits) { + VP8LHistogram* histo = NULL; + const int total_size = VP8LGetHistogramSize(cache_bits); + uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); + if (memory == NULL) return NULL; + histo = (VP8LHistogram*)memory; + // literal_ won't necessary be aligned. + histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); + VP8LHistogramInit(histo, cache_bits); + return histo; +} + +VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { + int i; + VP8LHistogramSet* set; + const int histo_size = VP8LGetHistogramSize(cache_bits); + const size_t total_size = + sizeof(*set) + size * (sizeof(*set->histograms) + + histo_size + WEBP_ALIGN_CST); + uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); + if (memory == NULL) return NULL; + + set = (VP8LHistogramSet*)memory; + memory += sizeof(*set); + set->histograms = (VP8LHistogram**)memory; + memory += size * sizeof(*set->histograms); + set->max_size = size; + set->size = size; + for (i = 0; i < size; ++i) { + memory = (uint8_t*)WEBP_ALIGN(memory); + set->histograms[i] = (VP8LHistogram*)memory; + // literal_ won't necessary be aligned. + set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); + VP8LHistogramInit(set->histograms[i], cache_bits); + memory += histo_size; + } + return set; +} + +// ----------------------------------------------------------------------------- + +void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, + const PixOrCopy* const v) { + if (PixOrCopyIsLiteral(v)) { + ++histo->alpha_[PixOrCopyLiteral(v, 3)]; + ++histo->red_[PixOrCopyLiteral(v, 2)]; + ++histo->literal_[PixOrCopyLiteral(v, 1)]; + ++histo->blue_[PixOrCopyLiteral(v, 0)]; + } else if (PixOrCopyIsCacheIdx(v)) { + const int literal_ix = + NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); + ++histo->literal_[literal_ix]; + } else { + int code, extra_bits; + VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits); + ++histo->literal_[NUM_LITERAL_CODES + code]; + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); + ++histo->distance_[code]; + } +} + +// ----------------------------------------------------------------------------- +// Entropy-related functions. + +static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { + double mix; + if (entropy->nonzeros < 5) { + if (entropy->nonzeros <= 1) { + return 0; + } + // Two symbols, they will be 0 and 1 in a Huffman code. + // Let's mix in a bit of entropy to favor good clustering when + // distributions of these are combined. + if (entropy->nonzeros == 2) { + return 0.99 * entropy->sum + 0.01 * entropy->entropy; + } + // No matter what the entropy says, we cannot be better than min_limit + // with Huffman coding. I am mixing a bit of entropy into the + // min_limit since it produces much better (~0.5 %) compression results + // perhaps because of better entropy clustering. + if (entropy->nonzeros == 3) { + mix = 0.95; + } else { + mix = 0.7; // nonzeros == 4. + } + } else { + mix = 0.627; + } + + { + double min_limit = 2 * entropy->sum - entropy->max_val; + min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy; + return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; + } +} + +double VP8LBitsEntropy(const uint32_t* const array, int n, + uint32_t* const trivial_symbol) { + VP8LBitEntropy entropy; + VP8LBitsEntropyUnrefined(array, n, &entropy); + if (trivial_symbol != NULL) { + *trivial_symbol = + (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM; + } + + return BitsEntropyRefine(&entropy); +} + +static double InitialHuffmanCost(void) { + // Small bias because Huffman code length is typically not stored in + // full length. + static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; + static const double kSmallBias = 9.1; + return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; +} + +// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) +static double FinalHuffmanCost(const VP8LStreaks* const stats) { + double retval = InitialHuffmanCost(); + retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; + retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; + retval += 1.796875 * stats->streaks[0][0]; + retval += 3.28125 * stats->streaks[1][0]; + return retval; +} + +// Get the symbol entropy for the distribution 'population'. +// Set 'trivial_sym', if there's only one symbol present in the distribution. +static double PopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym) { + VP8LBitEntropy bit_entropy; + VP8LStreaks stats; + VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); + if (trivial_sym != NULL) { + *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code + : VP8L_NON_TRIVIAL_SYM; + } + + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); +} + +static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, + int length) { + VP8LBitEntropy bit_entropy; + VP8LStreaks stats; + VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); + + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); +} + +// Estimates the Entropy + Huffman + other block overhead size cost. +double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { + return + PopulationCost( + p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL) + + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL) + + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); +} + +// ----------------------------------------------------------------------------- +// Various histogram combine/cost-eval functions + +static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, + const VP8LHistogram* const b, + double cost_threshold, + double* cost) { + const int palette_code_bits = a->palette_code_bits_; + assert(a->palette_code_bits_ == b->palette_code_bits_); + *cost += GetCombinedEntropy(a->literal_, b->literal_, + VP8LHistogramNumCodes(palette_code_bits)); + *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, + b->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); + *cost += + VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); + if (*cost > cost_threshold) return 0; + + return 1; +} + +// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing +// to the threshold value 'cost_threshold'. The score returned is +// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed. +// Since the previous score passed is 'cost_threshold', we only need to compare +// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out +// early. +static double HistogramAddEval(const VP8LHistogram* const a, + const VP8LHistogram* const b, + VP8LHistogram* const out, + double cost_threshold) { + double cost = 0; + const double sum_cost = a->bit_cost_ + b->bit_cost_; + cost_threshold += sum_cost; + + if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { + VP8LHistogramAdd(a, b, out); + out->bit_cost_ = cost; + out->palette_code_bits_ = a->palette_code_bits_; + out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ? + a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM; + } + + return cost - sum_cost; +} + +// Same as HistogramAddEval(), except that the resulting histogram +// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit +// the term C(b) which is constant over all the evaluations. +static double HistogramAddThresh(const VP8LHistogram* const a, + const VP8LHistogram* const b, + double cost_threshold) { + double cost = -a->bit_cost_; + GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); + return cost; +} + +// ----------------------------------------------------------------------------- + +// The structure to keep track of cost range for the three dominant entropy +// symbols. +// TODO(skal): Evaluate if float can be used here instead of double for +// representing the entropy costs. +typedef struct { + double literal_max_; + double literal_min_; + double red_max_; + double red_min_; + double blue_max_; + double blue_min_; +} DominantCostRange; + +static void DominantCostRangeInit(DominantCostRange* const c) { + c->literal_max_ = 0.; + c->literal_min_ = MAX_COST; + c->red_max_ = 0.; + c->red_min_ = MAX_COST; + c->blue_max_ = 0.; + c->blue_min_ = MAX_COST; +} + +static void UpdateDominantCostRange( + const VP8LHistogram* const h, DominantCostRange* const c) { + if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_; + if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_; + if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_; + if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_; + if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_; + if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_; +} + +static void UpdateHistogramCost(VP8LHistogram* const h) { + uint32_t alpha_sym, red_sym, blue_sym; + const double alpha_cost = + PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym); + const double distance_cost = + PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); + const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); + h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) + + VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES); + h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); + h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); + h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + + alpha_cost + distance_cost; + if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) { + h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM; + } else { + h->trivial_symbol_ = + ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0); + } +} + +static int GetBinIdForEntropy(double min, double max, double val) { + const double range = max - min; + if (range > 0.) { + const double delta = val - min; + return (int)((NUM_PARTITIONS - 1e-6) * delta / range); + } else { + return 0; + } +} + +static int GetHistoBinIndex(const VP8LHistogram* const h, + const DominantCostRange* const c, int low_effort) { + int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, + h->literal_cost_); + assert(bin_id < NUM_PARTITIONS); + if (!low_effort) { + bin_id = bin_id * NUM_PARTITIONS + + GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_); + bin_id = bin_id * NUM_PARTITIONS + + GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_); + assert(bin_id < BIN_SIZE); + } + return bin_id; +} + +// Construct the histograms from backward references. +static void HistogramBuild( + int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs, + VP8LHistogramSet* const image_histo) { + int x = 0, y = 0; + const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits); + VP8LHistogram** const histograms = image_histo->histograms; + VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs); + assert(histo_bits > 0); + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; + const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits); + VP8LHistogramAddSinglePixOrCopy(histograms[ix], v); + x += PixOrCopyLength(v); + while (x >= xsize) { + x -= xsize; + ++y; + } + VP8LRefsCursorNext(&c); + } +} + +// Copies the histograms and computes its bit_cost. +static void HistogramCopyAndAnalyze( + VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) { + int i; + const int histo_size = orig_histo->size; + VP8LHistogram** const orig_histograms = orig_histo->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + for (i = 0; i < histo_size; ++i) { + VP8LHistogram* const histo = orig_histograms[i]; + UpdateHistogramCost(histo); + // Copy histograms from orig_histo[] to image_histo[]. + HistogramCopy(histo, histograms[i]); + } +} + +// Partition histograms to different entropy bins for three dominant (literal, +// red and blue) symbol costs and compute the histogram aggregate bit_cost. +static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, + int16_t* const bin_map, int low_effort) { + int i; + VP8LHistogram** const histograms = image_histo->histograms; + const int histo_size = image_histo->size; + const int bin_depth = histo_size + 1; + DominantCostRange cost_range; + DominantCostRangeInit(&cost_range); + + // Analyze the dominant (literal, red and blue) entropy costs. + for (i = 0; i < histo_size; ++i) { + VP8LHistogram* const histo = histograms[i]; + UpdateDominantCostRange(histo, &cost_range); + } + + // bin-hash histograms on three of the dominant (literal, red and blue) + // symbol costs. + for (i = 0; i < histo_size; ++i) { + const VP8LHistogram* const histo = histograms[i]; + const int bin_id = GetHistoBinIndex(histo, &cost_range, low_effort); + const int bin_offset = bin_id * bin_depth; + // bin_map[n][0] for every bin 'n' maintains the counter for the number of + // histograms in that bin. + // Get and increment the num_histos in that bin. + const int num_histos = ++bin_map[bin_offset]; + assert(bin_offset + num_histos < bin_depth * BIN_SIZE); + // Add histogram i'th index at num_histos (last) position in the bin_map. + bin_map[bin_offset + num_histos] = i; + } +} + +// Compact the histogram set by removing unused entries. +static void HistogramCompactBins(VP8LHistogramSet* const image_histo) { + VP8LHistogram** const histograms = image_histo->histograms; + int i, j; + + for (i = 0, j = 0; i < image_histo->size; ++i) { + if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) { + if (j < i) { + histograms[j] = histograms[i]; + histograms[i] = NULL; + } + ++j; + } + } + image_histo->size = j; +} + +static VP8LHistogram* HistogramCombineEntropyBin( + VP8LHistogramSet* const image_histo, + VP8LHistogram* cur_combo, + int16_t* const bin_map, int bin_depth, int num_bins, + double combine_cost_factor, int low_effort) { + int bin_id; + VP8LHistogram** const histograms = image_histo->histograms; + + for (bin_id = 0; bin_id < num_bins; ++bin_id) { + const int bin_offset = bin_id * bin_depth; + const int num_histos = bin_map[bin_offset]; + const int idx1 = bin_map[bin_offset + 1]; + int num_combine_failures = 0; + int n; + for (n = 2; n <= num_histos; ++n) { + const int idx2 = bin_map[bin_offset + n]; + if (low_effort) { + // Merge all histograms with the same bin index, irrespective of cost of + // the merged histograms. + VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + const double bit_cost_idx2 = histograms[idx2]->bit_cost_; + if (bit_cost_idx2 > 0.) { + const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; + const double curr_cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], + cur_combo, bit_cost_thresh); + if (curr_cost_diff < bit_cost_thresh) { + // Try to merge two histograms only if the combo is a trivial one or + // the two candidate histograms are already non-trivial. + // For some images, 'try_combine' turns out to be false for a lot of + // histogram pairs. In that case, we fallback to combining + // histograms as usual to avoid increasing the header size. + const int try_combine = + (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || + ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && + (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); + const int max_combine_failures = 32; + if (try_combine || (num_combine_failures >= max_combine_failures)) { + HistogramSwap(&cur_combo, &histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + ++num_combine_failures; + } + } + } + } + } + if (low_effort) { + // Update the bit_cost for the merged histograms (per bin index). + UpdateHistogramCost(histograms[idx1]); + } + } + HistogramCompactBins(image_histo); + return cur_combo; +} + +static uint32_t MyRand(uint32_t *seed) { + *seed *= 16807U; + if (*seed == 0) { + *seed = 1; + } + return *seed; +} + +// ----------------------------------------------------------------------------- +// Histogram pairs priority queue + +// Pair of histograms. Negative idx1 value means that pair is out-of-date. +typedef struct { + int idx1; + int idx2; + double cost_diff; + double cost_combo; +} HistogramPair; + +typedef struct { + HistogramPair* queue; + int size; + int max_size; +} HistoQueue; + +static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) { + histo_queue->size = 0; + // max_index^2 for the queue size is safe. If you look at + // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes + // data to the queue, you insert at most: + // - max_index*(max_index-1)/2 (the first two for loops) + // - max_index - 1 in the last for loop at the first iteration of the while + // loop, max_index - 2 at the second iteration ... therefore + // max_index*(max_index-1)/2 overall too + histo_queue->max_size = max_index * max_index; + // We allocate max_size + 1 because the last element at index "size" is + // used as temporary data (and it could be up to max_size). + histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1, + sizeof(*histo_queue->queue)); + return histo_queue->queue != NULL; +} + +static void HistoQueueClear(HistoQueue* const histo_queue) { + assert(histo_queue != NULL); + WebPSafeFree(histo_queue->queue); +} + +static void SwapHistogramPairs(HistogramPair *p1, + HistogramPair *p2) { + const HistogramPair tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +// Given a valid priority queue in range [0, queue_size) this function checks +// whether histo_queue[queue_size] should be accepted and swaps it with the +// front if it is smaller. Otherwise, it leaves it as is. +static void UpdateQueueFront(HistoQueue* const histo_queue) { + if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return; + + if (histo_queue->queue[histo_queue->size].cost_diff < + histo_queue->queue[0].cost_diff) { + SwapHistogramPairs(histo_queue->queue, + histo_queue->queue + histo_queue->size); + } + ++histo_queue->size; + + // We cannot add more elements than the capacity. + // The allocation adds an extra element to the official capacity so that + // histo_queue->queue[histo_queue->max_size] is read/written within bound. + assert(histo_queue->size <= histo_queue->max_size); +} + +// ----------------------------------------------------------------------------- + +static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, + HistogramPair* const pair) { + VP8LHistogram* h1; + VP8LHistogram* h2; + double sum_cost; + + if (idx1 > idx2) { + const int tmp = idx2; + idx2 = idx1; + idx1 = tmp; + } + pair->idx1 = idx1; + pair->idx2 = idx2; + h1 = histograms[idx1]; + h2 = histograms[idx2]; + sum_cost = h1->bit_cost_ + h2->bit_cost_; + pair->cost_combo = 0.; + GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo); + pair->cost_diff = pair->cost_combo - sum_cost; +} + +// Combines histograms by continuously choosing the one with the highest cost +// reduction. +static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { + int ok = 0; + int image_histo_size = image_histo->size; + int i, j; + VP8LHistogram** const histograms = image_histo->histograms; + // Indexes of remaining histograms. + int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters)); + // Priority queue of histogram pairs. + HistoQueue histo_queue; + + if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) { + goto End; + } + + for (i = 0; i < image_histo_size; ++i) { + // Initialize clusters indexes. + clusters[i] = i; + for (j = i + 1; j < image_histo_size; ++j) { + // Initialize positions array. + PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]); + UpdateQueueFront(&histo_queue); + } + } + + while (image_histo_size > 1 && histo_queue.size > 0) { + HistogramPair* copy_to; + const int idx1 = histo_queue.queue[0].idx1; + const int idx2 = histo_queue.queue[0].idx2; + VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); + histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo; + // Remove merged histogram. + for (i = 0; i + 1 < image_histo_size; ++i) { + if (clusters[i] >= idx2) { + clusters[i] = clusters[i + 1]; + } + } + --image_histo_size; + + // Remove pairs intersecting the just combined best pair. This will + // therefore pop the head of the queue. + copy_to = histo_queue.queue; + for (i = 0; i < histo_queue.size; ++i) { + HistogramPair* const p = histo_queue.queue + i; + if (p->idx1 == idx1 || p->idx2 == idx1 || + p->idx1 == idx2 || p->idx2 == idx2) { + // Do not copy the invalid pair. + continue; + } + if (p->cost_diff < histo_queue.queue[0].cost_diff) { + // Replace the top of the queue if we found better. + SwapHistogramPairs(histo_queue.queue, p); + } + SwapHistogramPairs(copy_to, p); + ++copy_to; + } + histo_queue.size = (int)(copy_to - histo_queue.queue); + + // Push new pairs formed with combined histogram to the queue. + for (i = 0; i < image_histo_size; ++i) { + if (clusters[i] != idx1) { + PreparePair(histograms, idx1, clusters[i], + &histo_queue.queue[histo_queue.size]); + UpdateQueueFront(&histo_queue); + } + } + } + // Move remaining histograms to the beginning of the array. + for (i = 0; i < image_histo_size; ++i) { + if (i != clusters[i]) { // swap the two histograms + HistogramSwap(&histograms[i], &histograms[clusters[i]]); + } + } + + image_histo->size = image_histo_size; + ok = 1; + + End: + WebPSafeFree(clusters); + HistoQueueClear(&histo_queue); + return ok; +} + +static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo, + VP8LHistogram* tmp_histo, + VP8LHistogram* best_combo, + int quality, int min_cluster_size) { + int iter; + uint32_t seed = 0; + int tries_with_no_success = 0; + int image_histo_size = image_histo->size; + const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8; + const int outer_iters = image_histo_size * iter_mult; + const int num_pairs = image_histo_size / 2; + const int num_tries_no_success = outer_iters / 2; + VP8LHistogram** const histograms = image_histo->histograms; + + // Collapse similar histograms in 'image_histo'. + ++min_cluster_size; + for (iter = 0; + iter < outer_iters && image_histo_size >= min_cluster_size; + ++iter) { + double best_cost_diff = 0.; + int best_idx1 = -1, best_idx2 = 1; + int j; + const int num_tries = + (num_pairs < image_histo_size) ? num_pairs : image_histo_size; + seed += iter; + for (j = 0; j < num_tries; ++j) { + double curr_cost_diff; + // Choose two histograms at random and try to combine them. + const uint32_t idx1 = MyRand(&seed) % image_histo_size; + const uint32_t tmp = (j & 7) + 1; + const uint32_t diff = + (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1); + const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size; + if (idx1 == idx2) { + continue; + } + + // Calculate cost reduction on combining. + curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], + tmp_histo, best_cost_diff); + if (curr_cost_diff < best_cost_diff) { // found a better pair? + HistogramSwap(&best_combo, &tmp_histo); + best_cost_diff = curr_cost_diff; + best_idx1 = idx1; + best_idx2 = idx2; + } + } + + if (best_idx1 >= 0) { + HistogramSwap(&best_combo, &histograms[best_idx1]); + // swap best_idx2 slot with last one (which is now unused) + --image_histo_size; + if (best_idx2 != image_histo_size) { + HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]); + histograms[image_histo_size] = NULL; + } + tries_with_no_success = 0; + } + if (++tries_with_no_success >= num_tries_no_success) { + break; + } + } + image_histo->size = image_histo_size; +} + +// ----------------------------------------------------------------------------- +// Histogram refinement + +// Find the best 'out' histogram for each of the 'in' histograms. +// Note: we assume that out[]->bit_cost_ is already up-to-date. +static void HistogramRemap(const VP8LHistogramSet* const in, + const VP8LHistogramSet* const out, + uint16_t* const symbols) { + int i; + VP8LHistogram** const in_histo = in->histograms; + VP8LHistogram** const out_histo = out->histograms; + const int in_size = in->size; + const int out_size = out->size; + if (out_size > 1) { + for (i = 0; i < in_size; ++i) { + int best_out = 0; + double best_bits = MAX_COST; + int k; + for (k = 0; k < out_size; ++k) { + const double cur_bits = + HistogramAddThresh(out_histo[k], in_histo[i], best_bits); + if (k == 0 || cur_bits < best_bits) { + best_bits = cur_bits; + best_out = k; + } + } + symbols[i] = best_out; + } + } else { + assert(out_size == 1); + for (i = 0; i < in_size; ++i) { + symbols[i] = 0; + } + } + + // Recompute each out based on raw and symbols. + for (i = 0; i < out_size; ++i) { + HistogramClear(out_histo[i]); + } + + for (i = 0; i < in_size; ++i) { + const int idx = symbols[i]; + VP8LHistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]); + } +} + +static double GetCombineCostFactor(int histo_size, int quality) { + double combine_cost_factor = 0.16; + if (quality < 90) { + if (histo_size > 256) combine_cost_factor /= 2.; + if (histo_size > 512) combine_cost_factor /= 2.; + if (histo_size > 1024) combine_cost_factor /= 2.; + if (quality <= 50) combine_cost_factor /= 2.; + } + return combine_cost_factor; +} + +int VP8LGetHistoImageSymbols(int xsize, int ysize, + const VP8LBackwardRefs* const refs, + int quality, int low_effort, + int histo_bits, int cache_bits, + VP8LHistogramSet* const image_histo, + VP8LHistogramSet* const tmp_histos, + uint16_t* const histogram_symbols) { + int ok = 0; + const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; + const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; + const int image_histo_raw_size = histo_xsize * histo_ysize; + const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; + + // The bin_map for every bin follows following semantics: + // bin_map[n][0] = num_histo; // The number of histograms in that bin. + // bin_map[n][1] = index of first histogram in that bin; + // bin_map[n][num_histo] = index of last histogram in that bin; + // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices. + const int bin_depth = image_histo_raw_size + 1; + int16_t* bin_map = NULL; + VP8LHistogramSet* const orig_histo = + VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); + VP8LHistogram* cur_combo; + const int entropy_combine = + (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100); + + if (orig_histo == NULL) goto Error; + + // Don't attempt linear bin-partition heuristic for: + // histograms of small sizes, as bin_map will be very sparse and; + // Maximum quality (q==100), to preserve the compression gains at that level. + if (entropy_combine) { + const int bin_map_size = bin_depth * entropy_combine_num_bins; + bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map)); + if (bin_map == NULL) goto Error; + } + + // Construct the histograms from backward references. + HistogramBuild(xsize, histo_bits, refs, orig_histo); + // Copies the histograms and computes its bit_cost. + HistogramCopyAndAnalyze(orig_histo, image_histo); + + cur_combo = tmp_histos->histograms[1]; // pick up working slot + if (entropy_combine) { + const double combine_cost_factor = + GetCombineCostFactor(image_histo_raw_size, quality); + HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); + // Collapse histograms with similar entropy. + cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map, + bin_depth, entropy_combine_num_bins, + combine_cost_factor, low_effort); + } + + // Don't combine the histograms using stochastic and greedy heuristics for + // low-effort compression mode. + if (!low_effort || !entropy_combine) { + const float x = quality / 100.f; + // cubic ramp between 1 and MAX_HISTO_GREEDY: + const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); + HistogramCombineStochastic(image_histo, tmp_histos->histograms[0], + cur_combo, quality, threshold_size); + if ((image_histo->size <= threshold_size) && + !HistogramCombineGreedy(image_histo)) { + goto Error; + } + } + + // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also. + // Find the optimal map from original histograms to the final ones. + HistogramRemap(orig_histo, image_histo, histogram_symbols); + + ok = 1; + + Error: + WebPSafeFree(bin_map); + VP8LFreeHistogramSet(orig_histo); + return ok; +} diff --git a/thirdparty/libwebp/enc/histogram.h b/thirdparty/libwebp/enc/histogram.h new file mode 100644 index 0000000000..d303d1d58b --- /dev/null +++ b/thirdparty/libwebp/enc/histogram.h @@ -0,0 +1,123 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Jyrki Alakuijala (jyrki@google.com) +// +// Models the histograms of literal and distance codes. + +#ifndef WEBP_ENC_HISTOGRAM_H_ +#define WEBP_ENC_HISTOGRAM_H_ + +#include <string.h> + +#include "./backward_references.h" +#include "../webp/format_constants.h" +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Not a trivial literal symbol. +#define VP8L_NON_TRIVIAL_SYM (0xffffffff) + +// A simple container for histograms of data. +typedef struct { + // literal_ contains green literal, palette-code and + // copy-length-prefix histogram + uint32_t* literal_; // Pointer to the allocated buffer for literal. + uint32_t red_[NUM_LITERAL_CODES]; + uint32_t blue_[NUM_LITERAL_CODES]; + uint32_t alpha_[NUM_LITERAL_CODES]; + // Backward reference prefix-code histogram. + uint32_t distance_[NUM_DISTANCE_CODES]; + int palette_code_bits_; + uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha + // literal symbols are single valued. + double bit_cost_; // cached value of bit cost. + double literal_cost_; // Cached values of dominant entropy costs: + double red_cost_; // literal, red & blue. + double blue_cost_; +} VP8LHistogram; + +// Collection of histograms with fixed capacity, allocated as one +// big memory chunk. Can be destroyed by calling WebPSafeFree(). +typedef struct { + int size; // number of slots currently in use + int max_size; // maximum capacity + VP8LHistogram** histograms; +} VP8LHistogramSet; + +// Create the histogram. +// +// The input data is the PixOrCopy data, which models the literals, stop +// codes and backward references (both distances and lengths). Also: if +// palette_code_bits is >= 0, initialize the histogram with this value. +void VP8LHistogramCreate(VP8LHistogram* const p, + const VP8LBackwardRefs* const refs, + int palette_code_bits); + +// Return the size of the histogram for a given palette_code_bits. +int VP8LGetHistogramSize(int palette_code_bits); + +// Set the palette_code_bits and reset the stats. +void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits); + +// Collect all the references into a histogram (without reset) +void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, + VP8LHistogram* const histo); + +// Free the memory allocated for the histogram. +void VP8LFreeHistogram(VP8LHistogram* const histo); + +// Free the memory allocated for the histogram set. +void VP8LFreeHistogramSet(VP8LHistogramSet* const histo); + +// Allocate an array of pointer to histograms, allocated and initialized +// using 'cache_bits'. Return NULL in case of memory error. +VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits); + +// Allocate and initialize histogram object with specified 'cache_bits'. +// Returns NULL in case of memory error. +// Special case of VP8LAllocateHistogramSet, with size equals 1. +VP8LHistogram* VP8LAllocateHistogram(int cache_bits); + +// Accumulate a token 'v' into a histogram. +void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, + const PixOrCopy* const v); + +static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { + return NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); +} + +// Builds the histogram image. +int VP8LGetHistoImageSymbols(int xsize, int ysize, + const VP8LBackwardRefs* const refs, + int quality, int low_effort, + int histogram_bits, int cache_bits, + VP8LHistogramSet* const image_in, + VP8LHistogramSet* const tmp_histos, + uint16_t* const histogram_symbols); + +// Returns the entropy for the symbols in the input array. +// Also sets trivial_symbol to the code value, if the array has only one code +// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM. +double VP8LBitsEntropy(const uint32_t* const array, int n, + uint32_t* const trivial_symbol); + +// Estimate how many bits the combined entropy of literals and distance +// approximately maps to. +double VP8LHistogramEstimateBits(const VP8LHistogram* const p); + +#ifdef __cplusplus +} +#endif + +#endif // WEBP_ENC_HISTOGRAM_H_ diff --git a/thirdparty/libwebp/enc/iterator.c b/thirdparty/libwebp/enc/iterator.c new file mode 100644 index 0000000000..99d960a547 --- /dev/null +++ b/thirdparty/libwebp/enc/iterator.c @@ -0,0 +1,456 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// VP8Iterator: block iterator +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <string.h> + +#include "./vp8enci.h" + +//------------------------------------------------------------------------------ +// VP8Iterator +//------------------------------------------------------------------------------ + +static void InitLeft(VP8EncIterator* const it) { + it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = + (it->y_ > 0) ? 129 : 127; + memset(it->y_left_, 129, 16); + memset(it->u_left_, 129, 8); + memset(it->v_left_, 129, 8); + it->left_nz_[8] = 0; +} + +static void InitTop(VP8EncIterator* const it) { + const VP8Encoder* const enc = it->enc_; + const size_t top_size = enc->mb_w_ * 16; + memset(enc->y_top_, 127, 2 * top_size); + memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_)); +} + +void VP8IteratorSetRow(VP8EncIterator* const it, int y) { + VP8Encoder* const enc = it->enc_; + it->x_ = 0; + it->y_ = y; + it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)]; + it->preds_ = enc->preds_ + y * 4 * enc->preds_w_; + it->nz_ = enc->nz_; + it->mb_ = enc->mb_info_ + y * enc->mb_w_; + it->y_top_ = enc->y_top_; + it->uv_top_ = enc->uv_top_; + InitLeft(it); +} + +void VP8IteratorReset(VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + VP8IteratorSetRow(it, 0); + VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default + InitTop(it); + InitLeft(it); + memset(it->bit_count_, 0, sizeof(it->bit_count_)); + it->do_trellis_ = 0; +} + +void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) { + it->count_down_ = it->count_down0_ = count_down; +} + +int VP8IteratorIsDone(const VP8EncIterator* const it) { + return (it->count_down_ <= 0); +} + +void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { + it->enc_ = enc; + it->y_stride_ = enc->pic_->y_stride; + it->uv_stride_ = enc->pic_->uv_stride; + it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_); + it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC; + it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC; + it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC; + it->lf_stats_ = enc->lf_stats_; + it->percent0_ = enc->percent_; + it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1); + it->u_left_ = it->y_left_ + 16 + 16; + it->v_left_ = it->u_left_ + 16; + VP8IteratorReset(it); +} + +int VP8IteratorProgress(const VP8EncIterator* const it, int delta) { + VP8Encoder* const enc = it->enc_; + if (delta && enc->pic_->progress_hook != NULL) { + const int done = it->count_down0_ - it->count_down_; + const int percent = (it->count_down0_ <= 0) + ? it->percent0_ + : it->percent0_ + delta * done / it->count_down0_; + return WebPReportProgress(enc->pic_, percent, &enc->percent_); + } + return 1; +} + +//------------------------------------------------------------------------------ +// Import the source samples into the cache. Takes care of replicating +// boundary pixels if necessary. + +static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; } + +static void ImportBlock(const uint8_t* src, int src_stride, + uint8_t* dst, int w, int h, int size) { + int i; + for (i = 0; i < h; ++i) { + memcpy(dst, src, w); + if (w < size) { + memset(dst + w, dst[w - 1], size - w); + } + dst += BPS; + src += src_stride; + } + for (i = h; i < size; ++i) { + memcpy(dst, dst - BPS, size); + dst += BPS; + } +} + +static void ImportLine(const uint8_t* src, int src_stride, + uint8_t* dst, int len, int total_len) { + int i; + for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src; + for (; i < total_len; ++i) dst[i] = dst[len - 1]; +} + +void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) { + const VP8Encoder* const enc = it->enc_; + const int x = it->x_, y = it->y_; + const WebPPicture* const pic = enc->pic_; + const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16; + const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8; + const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8; + const int w = MinSize(pic->width - x * 16, 16); + const int h = MinSize(pic->height - y * 16, 16); + const int uv_w = (w + 1) >> 1; + const int uv_h = (h + 1) >> 1; + + ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16); + ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8); + ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8); + + if (tmp_32 == NULL) return; + + // Import source (uncompressed) samples into boundary. + if (x == 0) { + InitLeft(it); + } else { + if (y == 0) { + it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127; + } else { + it->y_left_[-1] = ysrc[- 1 - pic->y_stride]; + it->u_left_[-1] = usrc[- 1 - pic->uv_stride]; + it->v_left_[-1] = vsrc[- 1 - pic->uv_stride]; + } + ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16); + ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8); + ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8); + } + + it->y_top_ = tmp_32 + 0; + it->uv_top_ = tmp_32 + 16; + if (y == 0) { + memset(tmp_32, 127, 32 * sizeof(*tmp_32)); + } else { + ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16); + ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8); + ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8); + } +} + +//------------------------------------------------------------------------------ +// Copy back the compressed samples into user space if requested. + +static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride, + int w, int h) { + while (h-- > 0) { + memcpy(dst, src, w); + dst += dst_stride; + src += BPS; + } +} + +void VP8IteratorExport(const VP8EncIterator* const it) { + const VP8Encoder* const enc = it->enc_; + if (enc->config_->show_compressed) { + const int x = it->x_, y = it->y_; + const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC; + const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC; + const WebPPicture* const pic = enc->pic_; + uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16; + uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8; + uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8; + int w = (pic->width - x * 16); + int h = (pic->height - y * 16); + + if (w > 16) w = 16; + if (h > 16) h = 16; + + // Luma plane + ExportBlock(ysrc, ydst, pic->y_stride, w, h); + + { // U/V planes + const int uv_w = (w + 1) >> 1; + const int uv_h = (h + 1) >> 1; + ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h); + ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h); + } + } +} + +//------------------------------------------------------------------------------ +// Non-zero contexts setup/teardown + +// Nz bits: +// 0 1 2 3 Y +// 4 5 6 7 +// 8 9 10 11 +// 12 13 14 15 +// 16 17 U +// 18 19 +// 20 21 V +// 22 23 +// 24 DC-intra16 + +// Convert packed context to byte array +#define BIT(nz, n) (!!((nz) & (1 << (n)))) + +void VP8IteratorNzToBytes(VP8EncIterator* const it) { + const int tnz = it->nz_[0], lnz = it->nz_[-1]; + int* const top_nz = it->top_nz_; + int* const left_nz = it->left_nz_; + + // Top-Y + top_nz[0] = BIT(tnz, 12); + top_nz[1] = BIT(tnz, 13); + top_nz[2] = BIT(tnz, 14); + top_nz[3] = BIT(tnz, 15); + // Top-U + top_nz[4] = BIT(tnz, 18); + top_nz[5] = BIT(tnz, 19); + // Top-V + top_nz[6] = BIT(tnz, 22); + top_nz[7] = BIT(tnz, 23); + // DC + top_nz[8] = BIT(tnz, 24); + + // left-Y + left_nz[0] = BIT(lnz, 3); + left_nz[1] = BIT(lnz, 7); + left_nz[2] = BIT(lnz, 11); + left_nz[3] = BIT(lnz, 15); + // left-U + left_nz[4] = BIT(lnz, 17); + left_nz[5] = BIT(lnz, 19); + // left-V + left_nz[6] = BIT(lnz, 21); + left_nz[7] = BIT(lnz, 23); + // left-DC is special, iterated separately +} + +void VP8IteratorBytesToNz(VP8EncIterator* const it) { + uint32_t nz = 0; + const int* const top_nz = it->top_nz_; + const int* const left_nz = it->left_nz_; + // top + nz |= (top_nz[0] << 12) | (top_nz[1] << 13); + nz |= (top_nz[2] << 14) | (top_nz[3] << 15); + nz |= (top_nz[4] << 18) | (top_nz[5] << 19); + nz |= (top_nz[6] << 22) | (top_nz[7] << 23); + nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4 + // left + nz |= (left_nz[0] << 3) | (left_nz[1] << 7); + nz |= (left_nz[2] << 11); + nz |= (left_nz[4] << 17) | (left_nz[6] << 21); + + *it->nz_ = nz; +} + +#undef BIT + +//------------------------------------------------------------------------------ +// Advance to the next position, doing the bookkeeping. + +void VP8IteratorSaveBoundary(VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + const int x = it->x_, y = it->y_; + const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC; + if (x < enc->mb_w_ - 1) { // left + int i; + for (i = 0; i < 16; ++i) { + it->y_left_[i] = ysrc[15 + i * BPS]; + } + for (i = 0; i < 8; ++i) { + it->u_left_[i] = uvsrc[7 + i * BPS]; + it->v_left_[i] = uvsrc[15 + i * BPS]; + } + // top-left (before 'top'!) + it->y_left_[-1] = it->y_top_[15]; + it->u_left_[-1] = it->uv_top_[0 + 7]; + it->v_left_[-1] = it->uv_top_[8 + 7]; + } + if (y < enc->mb_h_ - 1) { // top + memcpy(it->y_top_, ysrc + 15 * BPS, 16); + memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8); + } +} + +int VP8IteratorNext(VP8EncIterator* const it) { + it->preds_ += 4; + it->mb_ += 1; + it->nz_ += 1; + it->y_top_ += 16; + it->uv_top_ += 16; + it->x_ += 1; + if (it->x_ == it->enc_->mb_w_) { + VP8IteratorSetRow(it, ++it->y_); + } + return (0 < --it->count_down_); +} + +//------------------------------------------------------------------------------ +// Helper function to set mode properties + +void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) { + uint8_t* preds = it->preds_; + int y; + for (y = 0; y < 4; ++y) { + memset(preds, mode, 4); + preds += it->enc_->preds_w_; + } + it->mb_->type_ = 1; +} + +void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) { + uint8_t* preds = it->preds_; + int y; + for (y = 4; y > 0; --y) { + memcpy(preds, modes, 4 * sizeof(*modes)); + preds += it->enc_->preds_w_; + modes += 4; + } + it->mb_->type_ = 0; +} + +void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) { + it->mb_->uv_mode_ = mode; +} + +void VP8SetSkip(const VP8EncIterator* const it, int skip) { + it->mb_->skip_ = skip; +} + +void VP8SetSegment(const VP8EncIterator* const it, int segment) { + it->mb_->segment_ = segment; +} + +//------------------------------------------------------------------------------ +// Intra4x4 sub-blocks iteration +// +// We store and update the boundary samples into an array of 37 pixels. They +// are updated as we iterate and reconstructs each intra4x4 blocks in turn. +// The position of the samples has the following snake pattern: +// +// 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right +// --+-----------+-----------+-----------+-----------+ +// 15| 19| 23| 27| 31| +// 14| 18| 22| 26| 30| +// 13| 17| 21| 25| 29| +// 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28| +// --+-----------+-----------+-----------+-----------+ +// 11| 15| 19| 23| 27| +// 10| 14| 18| 22| 26| +// 9| 13| 17| 21| 25| +// 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24| +// --+-----------+-----------+-----------+-----------+ +// 7| 11| 15| 19| 23| +// 6| 10| 14| 18| 22| +// 5| 9| 13| 17| 21| +// 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20| +// --+-----------+-----------+-----------+-----------+ +// 3| 7| 11| 15| 19| +// 2| 6| 10| 14| 18| +// 1| 5| 9| 13| 17| +// 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16| +// --+-----------+-----------+-----------+-----------+ + +// Array to record the position of the top sample to pass to the prediction +// functions in dsp.c. +static const uint8_t VP8TopLeftI4[16] = { + 17, 21, 25, 29, + 13, 17, 21, 25, + 9, 13, 17, 21, + 5, 9, 13, 17 +}; + +void VP8IteratorStartI4(VP8EncIterator* const it) { + const VP8Encoder* const enc = it->enc_; + int i; + + it->i4_ = 0; // first 4x4 sub-block + it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0]; + + // Import the boundary samples + for (i = 0; i < 17; ++i) { // left + it->i4_boundary_[i] = it->y_left_[15 - i]; + } + for (i = 0; i < 16; ++i) { // top + it->i4_boundary_[17 + i] = it->y_top_[i]; + } + // top-right samples have a special case on the far right of the picture + if (it->x_ < enc->mb_w_ - 1) { + for (i = 16; i < 16 + 4; ++i) { + it->i4_boundary_[17 + i] = it->y_top_[i]; + } + } else { // else, replicate the last valid pixel four times + for (i = 16; i < 16 + 4; ++i) { + it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15]; + } + } + VP8IteratorNzToBytes(it); // import the non-zero context +} + +int VP8IteratorRotateI4(VP8EncIterator* const it, + const uint8_t* const yuv_out) { + const uint8_t* const blk = yuv_out + VP8Scan[it->i4_]; + uint8_t* const top = it->i4_top_; + int i; + + // Update the cache with 7 fresh samples + for (i = 0; i <= 3; ++i) { + top[-4 + i] = blk[i + 3 * BPS]; // store future top samples + } + if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15 + for (i = 0; i <= 2; ++i) { // store future left samples + top[i] = blk[3 + (2 - i) * BPS]; + } + } else { // else replicate top-right samples, as says the specs. + for (i = 0; i <= 3; ++i) { + top[i] = top[i + 4]; + } + } + // move pointers to next sub-block + ++it->i4_; + if (it->i4_ == 16) { // we're done + return 0; + } + + it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_]; + return 1; +} + +//------------------------------------------------------------------------------ + diff --git a/thirdparty/libwebp/enc/near_lossless.c b/thirdparty/libwebp/enc/near_lossless.c new file mode 100644 index 0000000000..f4ab91f571 --- /dev/null +++ b/thirdparty/libwebp/enc/near_lossless.c @@ -0,0 +1,122 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Near-lossless image preprocessing adjusts pixel values to help +// compressibility with a guarantee of maximum deviation between original and +// resulting pixel values. +// +// Author: Jyrki Alakuijala (jyrki@google.com) +// Converted to C by Aleksander Kramarz (akramarz@google.com) + +#include <assert.h> +#include <stdlib.h> + +#include "../dsp/lossless.h" +#include "../utils/utils.h" +#include "./vp8enci.h" + +#define MIN_DIM_FOR_NEAR_LOSSLESS 64 +#define MAX_LIMIT_BITS 5 + +// Quantizes the value up or down to a multiple of 1<<bits (or to 255), +// choosing the closer one, resolving ties using bankers' rounding. +static int FindClosestDiscretized(int a, int bits) { + const int mask = (1 << bits) - 1; + const int biased = a + (mask >> 1) + ((a >> bits) & 1); + assert(bits > 0); + if (biased > 0xff) return 0xff; + return biased & ~mask; +} + +// Applies FindClosestDiscretized to all channels of pixel. +static uint32_t ClosestDiscretizedArgb(uint32_t a, int bits) { + return + (FindClosestDiscretized(a >> 24, bits) << 24) | + (FindClosestDiscretized((a >> 16) & 0xff, bits) << 16) | + (FindClosestDiscretized((a >> 8) & 0xff, bits) << 8) | + (FindClosestDiscretized(a & 0xff, bits)); +} + +// Checks if distance between corresponding channel values of pixels a and b +// is within the given limit. +static int IsNear(uint32_t a, uint32_t b, int limit) { + int k; + for (k = 0; k < 4; ++k) { + const int delta = + (int)((a >> (k * 8)) & 0xff) - (int)((b >> (k * 8)) & 0xff); + if (delta >= limit || delta <= -limit) { + return 0; + } + } + return 1; +} + +static int IsSmooth(const uint32_t* const prev_row, + const uint32_t* const curr_row, + const uint32_t* const next_row, + int ix, int limit) { + // Check that all pixels in 4-connected neighborhood are smooth. + return (IsNear(curr_row[ix], curr_row[ix - 1], limit) && + IsNear(curr_row[ix], curr_row[ix + 1], limit) && + IsNear(curr_row[ix], prev_row[ix], limit) && + IsNear(curr_row[ix], next_row[ix], limit)); +} + +// Adjusts pixel values of image with given maximum error. +static void NearLossless(int xsize, int ysize, uint32_t* argb, + int limit_bits, uint32_t* copy_buffer) { + int x, y; + const int limit = 1 << limit_bits; + uint32_t* prev_row = copy_buffer; + uint32_t* curr_row = prev_row + xsize; + uint32_t* next_row = curr_row + xsize; + memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0])); + + for (y = 1; y < ysize - 1; ++y) { + uint32_t* const curr_argb_row = argb + y * xsize; + uint32_t* const next_argb_row = curr_argb_row + xsize; + memcpy(next_row, next_argb_row, xsize * sizeof(argb[0])); + for (x = 1; x < xsize - 1; ++x) { + if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) { + curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits); + } + } + { + // Three-way swap. + uint32_t* const temp = prev_row; + prev_row = curr_row; + curr_row = next_row; + next_row = temp; + } + } +} + +int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) { + int i; + uint32_t* const copy_buffer = + (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer)); + const int limit_bits = VP8LNearLosslessBits(quality); + assert(argb != NULL); + assert(limit_bits >= 0); + assert(limit_bits <= MAX_LIMIT_BITS); + if (copy_buffer == NULL) { + return 0; + } + // For small icon images, don't attempt to apply near-lossless compression. + if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) { + WebPSafeFree(copy_buffer); + return 1; + } + + for (i = limit_bits; i != 0; --i) { + NearLossless(xsize, ysize, argb, i, copy_buffer); + } + WebPSafeFree(copy_buffer); + return 1; +} diff --git a/thirdparty/libwebp/enc/picture.c b/thirdparty/libwebp/enc/picture.c new file mode 100644 index 0000000000..d9befbc47d --- /dev/null +++ b/thirdparty/libwebp/enc/picture.c @@ -0,0 +1,292 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture class basis +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> + +#include "./vp8enci.h" +#include "../dsp/dsp.h" +#include "../utils/utils.h" + +//------------------------------------------------------------------------------ +// WebPPicture +//------------------------------------------------------------------------------ + +static int DummyWriter(const uint8_t* data, size_t data_size, + const WebPPicture* const picture) { + // The following are to prevent 'unused variable' error message. + (void)data; + (void)data_size; + (void)picture; + return 1; +} + +int WebPPictureInitInternal(WebPPicture* picture, int version) { + if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) { + return 0; // caller/system version mismatch! + } + if (picture != NULL) { + memset(picture, 0, sizeof(*picture)); + picture->writer = DummyWriter; + WebPEncodingSetError(picture, VP8_ENC_OK); + } + return 1; +} + +//------------------------------------------------------------------------------ + +static void WebPPictureResetBufferARGB(WebPPicture* const picture) { + picture->memory_argb_ = NULL; + picture->argb = NULL; + picture->argb_stride = 0; +} + +static void WebPPictureResetBufferYUVA(WebPPicture* const picture) { + picture->memory_ = NULL; + picture->y = picture->u = picture->v = picture->a = NULL; + picture->y_stride = picture->uv_stride = 0; + picture->a_stride = 0; +} + +void WebPPictureResetBuffers(WebPPicture* const picture) { + WebPPictureResetBufferARGB(picture); + WebPPictureResetBufferYUVA(picture); +} + +int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { + void* memory; + const uint64_t argb_size = (uint64_t)width * height; + + assert(picture != NULL); + + WebPSafeFree(picture->memory_argb_); + WebPPictureResetBufferARGB(picture); + + if (width <= 0 || height <= 0) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); + } + // allocate a new buffer. + memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb)); + if (memory == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + // TODO(skal): align plane to cache line? + picture->memory_argb_ = memory; + picture->argb = (uint32_t*)memory; + picture->argb_stride = width; + return 1; +} + +int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { + const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; + const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; + const int y_stride = width; + const int uv_width = (width + 1) >> 1; + const int uv_height = (height + 1) >> 1; + const int uv_stride = uv_width; + int a_width, a_stride; + uint64_t y_size, uv_size, a_size, total_size; + uint8_t* mem; + + assert(picture != NULL); + + WebPSafeFree(picture->memory_); + WebPPictureResetBufferYUVA(picture); + + if (uv_csp != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } + + // alpha + a_width = has_alpha ? width : 0; + a_stride = a_width; + y_size = (uint64_t)y_stride * height; + uv_size = (uint64_t)uv_stride * uv_height; + a_size = (uint64_t)a_stride * height; + + total_size = y_size + a_size + 2 * uv_size; + + // Security and validation checks + if (width <= 0 || height <= 0 || // luma/alpha param error + uv_width < 0 || uv_height < 0) { // u/v param error + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); + } + // allocate a new buffer. + mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + if (mem == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + + // From now on, we're in the clear, we can no longer fail... + picture->memory_ = (void*)mem; + picture->y_stride = y_stride; + picture->uv_stride = uv_stride; + picture->a_stride = a_stride; + + // TODO(skal): we could align the y/u/v planes and adjust stride. + picture->y = mem; + mem += y_size; + + picture->u = mem; + mem += uv_size; + picture->v = mem; + mem += uv_size; + + if (a_size > 0) { + picture->a = mem; + mem += a_size; + } + (void)mem; // makes the static analyzer happy + return 1; +} + +int WebPPictureAlloc(WebPPicture* picture) { + if (picture != NULL) { + const int width = picture->width; + const int height = picture->height; + + WebPPictureFree(picture); // erase previous buffer + + if (!picture->use_argb) { + return WebPPictureAllocYUVA(picture, width, height); + } else { + return WebPPictureAllocARGB(picture, width, height); + } + } + return 1; +} + +void WebPPictureFree(WebPPicture* picture) { + if (picture != NULL) { + WebPSafeFree(picture->memory_); + WebPSafeFree(picture->memory_argb_); + WebPPictureResetBuffers(picture); + } +} + +//------------------------------------------------------------------------------ +// WebPMemoryWriter: Write-to-memory + +void WebPMemoryWriterInit(WebPMemoryWriter* writer) { + writer->mem = NULL; + writer->size = 0; + writer->max_size = 0; +} + +int WebPMemoryWrite(const uint8_t* data, size_t data_size, + const WebPPicture* picture) { + WebPMemoryWriter* const w = (WebPMemoryWriter*)picture->custom_ptr; + uint64_t next_size; + if (w == NULL) { + return 1; + } + next_size = (uint64_t)w->size + data_size; + if (next_size > w->max_size) { + uint8_t* new_mem; + uint64_t next_max_size = 2ULL * w->max_size; + if (next_max_size < next_size) next_max_size = next_size; + if (next_max_size < 8192ULL) next_max_size = 8192ULL; + new_mem = (uint8_t*)WebPSafeMalloc(next_max_size, 1); + if (new_mem == NULL) { + return 0; + } + if (w->size > 0) { + memcpy(new_mem, w->mem, w->size); + } + WebPSafeFree(w->mem); + w->mem = new_mem; + // down-cast is ok, thanks to WebPSafeMalloc + w->max_size = (size_t)next_max_size; + } + if (data_size > 0) { + memcpy(w->mem + w->size, data, data_size); + w->size += data_size; + } + return 1; +} + +void WebPMemoryWriterClear(WebPMemoryWriter* writer) { + if (writer != NULL) { + WebPSafeFree(writer->mem); + writer->mem = NULL; + writer->size = 0; + writer->max_size = 0; + } +} + +//------------------------------------------------------------------------------ +// Simplest high-level calls: + +typedef int (*Importer)(WebPPicture* const, const uint8_t* const, int); + +static size_t Encode(const uint8_t* rgba, int width, int height, int stride, + Importer import, float quality_factor, int lossless, + uint8_t** output) { + WebPPicture pic; + WebPConfig config; + WebPMemoryWriter wrt; + int ok; + + if (output == NULL) return 0; + + if (!WebPConfigPreset(&config, WEBP_PRESET_DEFAULT, quality_factor) || + !WebPPictureInit(&pic)) { + return 0; // shouldn't happen, except if system installation is broken + } + + config.lossless = !!lossless; + pic.use_argb = !!lossless; + pic.width = width; + pic.height = height; + pic.writer = WebPMemoryWrite; + pic.custom_ptr = &wrt; + WebPMemoryWriterInit(&wrt); + + ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic); + WebPPictureFree(&pic); + if (!ok) { + WebPMemoryWriterClear(&wrt); + *output = NULL; + return 0; + } + *output = wrt.mem; + return wrt.size; +} + +#define ENCODE_FUNC(NAME, IMPORTER) \ +size_t NAME(const uint8_t* in, int w, int h, int bps, float q, \ + uint8_t** out) { \ + return Encode(in, w, h, bps, IMPORTER, q, 0, out); \ +} + +ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB) +ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR) +ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA) +ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA) + +#undef ENCODE_FUNC + +#define LOSSLESS_DEFAULT_QUALITY 70. +#define LOSSLESS_ENCODE_FUNC(NAME, IMPORTER) \ +size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) { \ + return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out); \ +} + +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA) + +#undef LOSSLESS_ENCODE_FUNC + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/picture_csp.c b/thirdparty/libwebp/enc/picture_csp.c new file mode 100644 index 0000000000..607a6240b0 --- /dev/null +++ b/thirdparty/libwebp/enc/picture_csp.c @@ -0,0 +1,1168 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture utils for colorspace conversion +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> +#include <math.h> + +#include "./vp8enci.h" +#include "../utils/random.h" +#include "../utils/utils.h" +#include "../dsp/yuv.h" + +// Uncomment to disable gamma-compression during RGB->U/V averaging +#define USE_GAMMA_COMPRESSION + +// If defined, use table to compute x / alpha. +#define USE_INVERSE_ALPHA_TABLE + +static const union { + uint32_t argb; + uint8_t bytes[4]; +} test_endian = { 0xff000000u }; +#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) + +//------------------------------------------------------------------------------ +// Detection of non-trivial transparency + +// Returns true if alpha[] has non-0xff values. +static int CheckNonOpaque(const uint8_t* alpha, int width, int height, + int x_step, int y_step) { + if (alpha == NULL) return 0; + while (height-- > 0) { + int x; + for (x = 0; x < width * x_step; x += x_step) { + if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time. + } + alpha += y_step; + } + return 0; +} + +// Checking for the presence of non-opaque alpha. +int WebPPictureHasTransparency(const WebPPicture* picture) { + if (picture == NULL) return 0; + if (!picture->use_argb) { + return CheckNonOpaque(picture->a, picture->width, picture->height, + 1, picture->a_stride); + } else { + int x, y; + const uint32_t* argb = picture->argb; + if (argb == NULL) return 0; + for (y = 0; y < picture->height; ++y) { + for (x = 0; x < picture->width; ++x) { + if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff + } + argb += picture->argb_stride; + } + } + return 0; +} + +//------------------------------------------------------------------------------ +// Code for gamma correction + +#if defined(USE_GAMMA_COMPRESSION) + +// gamma-compensates loss of resolution during chroma subsampling +#define kGamma 0.80 // for now we use a different gamma value than kGammaF +#define kGammaFix 12 // fixed-point precision for linear values +#define kGammaScale ((1 << kGammaFix) - 1) +#define kGammaTabFix 7 // fixed-point fractional bits precision +#define kGammaTabScale (1 << kGammaTabFix) +#define kGammaTabRounder (kGammaTabScale >> 1) +#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) + +static int kLinearToGammaTab[kGammaTabSize + 1]; +static uint16_t kGammaToLinearTab[256]; +static volatile int kGammaTablesOk = 0; + +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) { + if (!kGammaTablesOk) { + int v; + const double scale = (double)(1 << kGammaTabFix) / kGammaScale; + const double norm = 1. / 255.; + for (v = 0; v <= 255; ++v) { + kGammaToLinearTab[v] = + (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); + } + for (v = 0; v <= kGammaTabSize; ++v) { + kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); + } + kGammaTablesOk = 1; + } +} + +static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { + return kGammaToLinearTab[v]; +} + +static WEBP_INLINE int Interpolate(int v) { + const int tab_pos = v >> (kGammaTabFix + 2); // integer part + const int x = v & ((kGammaTabScale << 2) - 1); // fractional part + const int v0 = kLinearToGammaTab[tab_pos]; + const int v1 = kLinearToGammaTab[tab_pos + 1]; + const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate + assert(tab_pos + 1 < kGammaTabSize + 1); + return y; +} + +// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision +// U/V value, suitable for RGBToU/V calls. +static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { + const int y = Interpolate(base_value << shift); // final uplifted value + return (y + kGammaTabRounder) >> kGammaTabFix; // descale +} + +#else + +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {} +static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; } +static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { + return (int)(base_value << shift); +} + +#endif // USE_GAMMA_COMPRESSION + +//------------------------------------------------------------------------------ +// RGB -> YUV conversion + +static int RGBToY(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF) + : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); +} + +static int RGBToU(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2) + : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); +} + +static int RGBToV(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2) + : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); +} + +//------------------------------------------------------------------------------ +// Smart RGB->YUV conversion + +static const int kNumIterations = 6; +static const int kMinDimensionIterativeConversion = 4; + +// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some +// banding sometimes. Better use extra precision. +#define SFIX 2 // fixed-point precision of RGB and Y/W +typedef int16_t fixed_t; // signed type with extra SFIX precision for UV +typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W + +#define SHALF (1 << SFIX >> 1) +#define MAX_Y_T ((256 << SFIX) - 1) +#define SROUNDER (1 << (YUV_FIX + SFIX - 1)) + +#if defined(USE_GAMMA_COMPRESSION) + +// float variant of gamma-correction +// We use tables of different size and precision, along with a 'real-world' +// Gamma value close to ~2. +#define kGammaF 2.2 +static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX +static float kLinearToGammaTabF[kGammaTabSize + 2]; +static volatile int kGammaTablesFOk = 0; + +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) { + if (!kGammaTablesFOk) { + int v; + const double norm = 1. / MAX_Y_T; + const double scale = 1. / kGammaTabSize; + for (v = 0; v <= MAX_Y_T; ++v) { + kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF); + } + for (v = 0; v <= kGammaTabSize; ++v) { + kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF)); + } + // to prevent small rounding errors to cause read-overflow: + kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize]; + kGammaTablesFOk = 1; + } +} + +static WEBP_INLINE float GammaToLinearF(int v) { + return kGammaToLinearTabF[v]; +} + +static WEBP_INLINE int LinearToGammaF(float value) { + const float v = value * kGammaTabSize; + const int tab_pos = (int)v; + const float x = v - (float)tab_pos; // fractional part + const float v0 = kLinearToGammaTabF[tab_pos + 0]; + const float v1 = kLinearToGammaTabF[tab_pos + 1]; + const float y = v1 * x + v0 * (1.f - x); // interpolate + return (int)(y + .5); +} + +#else + +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {} +static WEBP_INLINE float GammaToLinearF(int v) { + const float norm = 1.f / MAX_Y_T; + return norm * v; +} +static WEBP_INLINE int LinearToGammaF(float value) { + return (int)(MAX_Y_T * value + .5); +} + +#endif // USE_GAMMA_COMPRESSION + +//------------------------------------------------------------------------------ + +static uint8_t clip_8b(fixed_t v) { + return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; +} + +static fixed_y_t clip_y(int y) { + return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; +} + +//------------------------------------------------------------------------------ + +static int RGBToGray(int r, int g, int b) { + const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF; + return (luma >> YUV_FIX); +} + +static float RGBToGrayF(float r, float g, float b) { + return 0.299f * r + 0.587f * g + 0.114f * b; +} + +static int ScaleDown(int a, int b, int c, int d) { + const float A = GammaToLinearF(a); + const float B = GammaToLinearF(b); + const float C = GammaToLinearF(c); + const float D = GammaToLinearF(d); + return LinearToGammaF(0.25f * (A + B + C + D)); +} + +static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) { + while (len-- > 0) { + const float R = GammaToLinearF(src[0]); + const float G = GammaToLinearF(src[1]); + const float B = GammaToLinearF(src[2]); + const float Y = RGBToGrayF(R, G, B); + *dst++ = (fixed_y_t)LinearToGammaF(Y); + src += 3; + } +} + +static int UpdateChroma(const fixed_y_t* src1, + const fixed_y_t* src2, + fixed_t* dst, fixed_y_t* tmp, int len) { + int diff = 0; + while (len--> 0) { + const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]); + const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]); + const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]); + const int W = RGBToGray(r, g, b); + const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2; + const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2; + const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2; + dst[0] = (fixed_t)(r - W); + dst[1] = (fixed_t)(g - W); + dst[2] = (fixed_t)(b - W); + dst += 3; + src1 += 6; + src2 += 6; + if (tmp != NULL) { + tmp[0] = tmp[1] = clip_y(W); + tmp += 2; + } + diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W); + } + return diff; +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B, + int rightwise) { + int v; + if (!rightwise) { + v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]); + } else { + v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]); + } + return (v + 8) >> 4; +} + +static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; } + +//------------------------------------------------------------------------------ + +static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX + return ((fixed_y_t)a << SFIX) | SHALF; +} + +static void ImportOneRow(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, + int pic_width, + fixed_y_t* const dst) { + int i; + for (i = 0; i < pic_width; ++i) { + const int off = i * step; + dst[3 * i + 0] = UpLift(r_ptr[off]); + dst[3 * i + 1] = UpLift(g_ptr[off]); + dst[3 * i + 2] = UpLift(b_ptr[off]); + } + if (pic_width & 1) { // replicate rightmost pixel + memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst)); + } +} + +static void InterpolateTwoRows(const fixed_y_t* const best_y, + const fixed_t* const prev_uv, + const fixed_t* const cur_uv, + const fixed_t* const next_uv, + int w, + fixed_y_t* const out1, + fixed_y_t* const out2) { + int i, k; + { // special boundary case for i==0 + const int W0 = best_y[0]; + const int W1 = best_y[w]; + for (k = 0; k <= 2; ++k) { + out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0); + out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1); + } + } + for (i = 1; i < w - 1; ++i) { + const int W0 = best_y[i + 0]; + const int W1 = best_y[i + w]; + const int off = 3 * (i >> 1); + for (k = 0; k <= 2; ++k) { + const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1); + const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1); + out1[3 * i + k] = clip_y(tmp0 + W0); + out2[3 * i + k] = clip_y(tmp1 + W1); + } + } + { // special boundary case for i == w - 1 + const int W0 = best_y[i + 0]; + const int W1 = best_y[i + w]; + const int off = 3 * (i >> 1); + for (k = 0; k <= 2; ++k) { + out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0); + out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1); + } + } +} + +static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { + const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; + return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); +} + +static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { + const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER; + return clip_8b(128 + (u >> (YUV_FIX + SFIX))); +} + +static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { + const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER; + return clip_8b(128 + (v >> (YUV_FIX + SFIX))); +} + +static int ConvertWRGBToYUV(const fixed_y_t* const best_y, + const fixed_t* const best_uv, + WebPPicture* const picture) { + int i, j; + const int w = (picture->width + 1) & ~1; + const int h = (picture->height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + for (j = 0; j < picture->height; ++j) { + for (i = 0; i < picture->width; ++i) { + const int off = 3 * ((i >> 1) + (j >> 1) * uv_w); + const int off2 = i + j * picture->y_stride; + const int W = best_y[i + j * w]; + const int r = best_uv[off + 0] + W; + const int g = best_uv[off + 1] + W; + const int b = best_uv[off + 2] + W; + picture->y[off2] = ConvertRGBToY(r, g, b); + } + } + for (j = 0; j < uv_h; ++j) { + uint8_t* const dst_u = picture->u + j * picture->uv_stride; + uint8_t* const dst_v = picture->v + j * picture->uv_stride; + for (i = 0; i < uv_w; ++i) { + const int off = 3 * (i + j * uv_w); + const int r = best_uv[off + 0]; + const int g = best_uv[off + 1]; + const int b = best_uv[off + 2]; + dst_u[i] = ConvertRGBToU(r, g, b); + dst_v[i] = ConvertRGBToV(r, g, b); + } + } + return 1; +} + +//------------------------------------------------------------------------------ +// Main function + +#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) + +static int PreprocessARGB(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + WebPPicture* const picture) { + // we expand the right/bottom border if needed + const int w = (picture->width + 1) & ~1; + const int h = (picture->height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + int i, j, iter; + + // TODO(skal): allocate one big memory chunk. But for now, it's easier + // for valgrind debugging to have several chunks. + fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch + fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); + fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); + int ok; + int diff_sum = 0; + const int first_diff_threshold = (int)(2.5 * w * h); + const int min_improvement = 5; // stop if improvement is below this % + const int min_first_improvement = 80; + + if (best_y == NULL || best_uv == NULL || + target_y == NULL || target_uv == NULL || + best_rgb_y == NULL || best_rgb_uv == NULL || + tmp_buffer == NULL) { + ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto End; + } + assert(picture->width >= kMinDimensionIterativeConversion); + assert(picture->height >= kMinDimensionIterativeConversion); + + // Import RGB samples to W/RGB representation. + for (j = 0; j < picture->height; j += 2) { + const int is_last_row = (j == picture->height - 1); + fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + const int off1 = j * rgb_stride; + const int off2 = off1 + rgb_stride; + const int uv_off = (j >> 1) * 3 * uv_w; + fixed_y_t* const dst_y = best_y + j * w; + + // prepare two rows of input + ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, picture->width, src1); + if (!is_last_row) { + ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2, + step, picture->width, src2); + } else { + memcpy(src2, src1, 3 * w * sizeof(*src2)); + } + UpdateW(src1, target_y + (j + 0) * w, w); + UpdateW(src2, target_y + (j + 1) * w, w); + diff_sum += UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w); + memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv)); + memcpy(dst_y + w, dst_y, w * sizeof(*dst_y)); + } + + // Iterate and resolve clipping conflicts. + for (iter = 0; iter < kNumIterations; ++iter) { + int k; + const fixed_t* cur_uv = best_uv; + const fixed_t* prev_uv = best_uv; + const int old_diff_sum = diff_sum; + diff_sum = 0; + for (j = 0; j < h; j += 2) { + fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + { + const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); + InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv, + w, src1, src2); + prev_uv = cur_uv; + cur_uv = next_uv; + } + + UpdateW(src1, best_rgb_y + 0 * w, w); + UpdateW(src2, best_rgb_y + 1 * w, w); + diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w); + + // update two rows of Y and one row of RGB + for (i = 0; i < 2 * w; ++i) { + const int off = i + j * w; + const int diff_y = target_y[off] - best_rgb_y[i]; + const int new_y = (int)best_y[off] + diff_y; + best_y[off] = clip_y(new_y); + } + for (i = 0; i < uv_w; ++i) { + const int off = 3 * (i + (j >> 1) * uv_w); + int W; + for (k = 0; k <= 2; ++k) { + const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k]; + best_uv[off + k] += diff_uv; + } + W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]); + for (k = 0; k <= 2; ++k) { + best_uv[off + k] -= W; + } + } + } + // test exit condition + if (diff_sum > 0) { + const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum; + // Check if first iteration gave good result already, without a large + // jump of improvement (otherwise it means we need to try few extra + // iterations, just to be sure). + if (iter == 0 && diff_sum < first_diff_threshold && + improvement < min_first_improvement) { + break; + } + // then, check if improvement is stalling. + if (improvement < min_improvement) { + break; + } + } else { + break; + } + } + + // final reconstruction + ok = ConvertWRGBToYUV(best_y, best_uv, picture); + + End: + WebPSafeFree(best_y); + WebPSafeFree(best_uv); + WebPSafeFree(target_y); + WebPSafeFree(target_uv); + WebPSafeFree(best_rgb_y); + WebPSafeFree(best_rgb_uv); + WebPSafeFree(tmp_buffer); + return ok; +} +#undef SAFE_ALLOC + +//------------------------------------------------------------------------------ +// "Fast" regular RGB->YUV + +#define SUM4(ptr, step) LinearToGamma( \ + GammaToLinear((ptr)[0]) + \ + GammaToLinear((ptr)[(step)]) + \ + GammaToLinear((ptr)[rgb_stride]) + \ + GammaToLinear((ptr)[rgb_stride + (step)]), 0) \ + +#define SUM2(ptr) \ + LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) + +#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride]) +#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4)) + +#if defined(USE_INVERSE_ALPHA_TABLE) + +static const int kAlphaFix = 19; +// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix +// formula is then equal to v / a in most (99.6%) cases. Note that this table +// and constant are adjusted very tightly to fit 32b arithmetic. +// In particular, they use the fact that the operands for 'v / a' are actually +// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 +// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid +// overflow is: kGammaFix + kAlphaFix <= 31. +static const uint32_t kInvAlpha[4 * 0xff + 1] = { + 0, /* alpha = 0 */ + 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, + 58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768, + 30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845, + 20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384, + 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107, + 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, + 10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362, + 9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192, + 8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281, + 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553, + 6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, + 5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461, + 5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041, + 4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681, + 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369, + 4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, + 4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855, + 3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640, + 3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449, + 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276, + 3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, + 3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978, + 2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849, + 2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730, + 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621, + 2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, + 2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427, + 2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340, + 2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259, + 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184, + 2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, + 2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048, + 2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985, + 1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927, + 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872, + 1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, + 1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771, + 1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724, + 1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680, + 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638, + 1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, + 1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560, + 1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524, + 1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489, + 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456, + 1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, + 1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394, + 1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365, + 1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337, + 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310, + 1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, + 1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260, + 1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236, + 1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213, + 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191, + 1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, + 1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149, + 1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129, + 1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110, + 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092, + 1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, + 1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057, + 1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040, + 1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024, + 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008, + 1006, 1004, 1002, 1000, 998, 996, 994, 992, + 991, 989, 987, 985, 983, 981, 979, 978, + 976, 974, 972, 970, 969, 967, 965, 963, + 961, 960, 958, 956, 954, 953, 951, 949, + 948, 946, 944, 942, 941, 939, 937, 936, + 934, 932, 931, 929, 927, 926, 924, 923, + 921, 919, 918, 916, 914, 913, 911, 910, + 908, 907, 905, 903, 902, 900, 899, 897, + 896, 894, 893, 891, 890, 888, 887, 885, + 884, 882, 881, 879, 878, 876, 875, 873, + 872, 870, 869, 868, 866, 865, 863, 862, + 860, 859, 858, 856, 855, 853, 852, 851, + 849, 848, 846, 845, 844, 842, 841, 840, + 838, 837, 836, 834, 833, 832, 830, 829, + 828, 826, 825, 824, 823, 821, 820, 819, + 817, 816, 815, 814, 812, 811, 810, 809, + 807, 806, 805, 804, 802, 801, 800, 799, + 798, 796, 795, 794, 793, 791, 790, 789, + 788, 787, 786, 784, 783, 782, 781, 780, + 779, 777, 776, 775, 774, 773, 772, 771, + 769, 768, 767, 766, 765, 764, 763, 762, + 760, 759, 758, 757, 756, 755, 754, 753, + 752, 751, 750, 748, 747, 746, 745, 744, + 743, 742, 741, 740, 739, 738, 737, 736, + 735, 734, 733, 732, 731, 730, 729, 728, + 727, 726, 725, 724, 723, 722, 721, 720, + 719, 718, 717, 716, 715, 714, 713, 712, + 711, 710, 709, 708, 707, 706, 705, 704, + 703, 702, 701, 700, 699, 699, 698, 697, + 696, 695, 694, 693, 692, 691, 690, 689, + 688, 688, 687, 686, 685, 684, 683, 682, + 681, 680, 680, 679, 678, 677, 676, 675, + 674, 673, 673, 672, 671, 670, 669, 668, + 667, 667, 666, 665, 664, 663, 662, 661, + 661, 660, 659, 658, 657, 657, 656, 655, + 654, 653, 652, 652, 651, 650, 649, 648, + 648, 647, 646, 645, 644, 644, 643, 642, + 641, 640, 640, 639, 638, 637, 637, 636, + 635, 634, 633, 633, 632, 631, 630, 630, + 629, 628, 627, 627, 626, 625, 624, 624, + 623, 622, 621, 621, 620, 619, 618, 618, + 617, 616, 616, 615, 614, 613, 613, 612, + 611, 611, 610, 609, 608, 608, 607, 606, + 606, 605, 604, 604, 603, 602, 601, 601, + 600, 599, 599, 598, 597, 597, 596, 595, + 595, 594, 593, 593, 592, 591, 591, 590, + 589, 589, 588, 587, 587, 586, 585, 585, + 584, 583, 583, 582, 581, 581, 580, 579, + 579, 578, 578, 577, 576, 576, 575, 574, + 574, 573, 572, 572, 571, 571, 570, 569, + 569, 568, 568, 567, 566, 566, 565, 564, + 564, 563, 563, 562, 561, 561, 560, 560, + 559, 558, 558, 557, 557, 556, 555, 555, + 554, 554, 553, 553, 552, 551, 551, 550, + 550, 549, 548, 548, 547, 547, 546, 546, + 545, 544, 544, 543, 543, 542, 542, 541, + 541, 540, 539, 539, 538, 538, 537, 537, + 536, 536, 535, 534, 534, 533, 533, 532, + 532, 531, 531, 530, 530, 529, 529, 528, + 527, 527, 526, 526, 525, 525, 524, 524, + 523, 523, 522, 522, 521, 521, 520, 520, + 519, 519, 518, 518, 517, 517, 516, 516, + 515, 515, 514, 514 +}; + +// Note that LinearToGamma() expects the values to be premultiplied by 4, +// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly. +#define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2)) + +#else + +#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a)) + +#endif // USE_INVERSE_ALPHA_TABLE + +static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src, + const uint8_t* a_ptr, + uint32_t total_a, int step, + int rgb_stride) { + const uint32_t sum = + a_ptr[0] * GammaToLinear(src[0]) + + a_ptr[step] * GammaToLinear(src[step]) + + a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) + + a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]); + assert(total_a > 0 && total_a <= 4 * 0xff); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32)); +#endif + return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0); +} + +static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, + uint8_t* const dst_y, + int width, + VP8Random* const rg) { + int i, j; + for (i = 0, j = 0; i < width; i += 1, j += step) { + dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg); + } +} + +static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int rgb_stride, + uint16_t* dst, int width) { + int i, j; + // we loop over 2x2 blocks and produce one R/G/B/A value for each. + for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) { + const uint32_t a = SUM4ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM4(r_ptr + j, 4); + g = SUM4(g_ptr + j, 4); + b = SUM4(b_ptr + j, 4); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); + } + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + } + if (width & 1) { + const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM2(r_ptr + j); + g = SUM2(g_ptr + j); + b = SUM2(b_ptr + j); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); + } + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + } +} + +static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + uint16_t* dst, int width) { + int i, j; + for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) { + dst[0] = SUM4(r_ptr + j, step); + dst[1] = SUM4(g_ptr + j, step); + dst[2] = SUM4(b_ptr + j, step); + } + if (width & 1) { + dst[0] = SUM2(r_ptr + j); + dst[1] = SUM2(g_ptr + j); + dst[2] = SUM2(b_ptr + j); + } +} + +static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb, + uint8_t* const dst_u, + uint8_t* const dst_v, + int width, + VP8Random* const rg) { + int i; + for (i = 0; i < width; i += 1, rgb += 4) { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } +} + +static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int step, // bytes per pixel + int rgb_stride, // bytes per scanline + float dithering, + int use_iterative_conversion, + WebPPicture* const picture) { + int y; + const int width = picture->width; + const int height = picture->height; + const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); + const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr + + picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420; + picture->use_argb = 0; + + // disable smart conversion if source is too small (overkill). + if (width < kMinDimensionIterativeConversion || + height < kMinDimensionIterativeConversion) { + use_iterative_conversion = 0; + } + + if (!WebPPictureAllocYUVA(picture, width, height)) { + return 0; + } + if (has_alpha) { + WebPInitAlphaProcessing(); + assert(step == 4); +#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE) + assert(kAlphaFix + kGammaFix <= 31); +#endif + } + + if (use_iterative_conversion) { + InitGammaTablesF(); + if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { + return 0; + } + if (has_alpha) { + WebPExtractAlpha(a_ptr, rgb_stride, width, height, + picture->a, picture->a_stride); + } + } else { + const int uv_width = (width + 1) >> 1; + int use_dsp = (step == 3); // use special function in this case + // temporary storage for accumulated R/G/B values during conversion to U/V + uint16_t* const tmp_rgb = + (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb)); + uint8_t* dst_y = picture->y; + uint8_t* dst_u = picture->u; + uint8_t* dst_v = picture->v; + uint8_t* dst_a = picture->a; + + VP8Random base_rg; + VP8Random* rg = NULL; + if (dithering > 0.) { + VP8InitRandom(&base_rg, dithering); + rg = &base_rg; + use_dsp = 0; // can't use dsp in this case + } + WebPInitConvertARGBToYUV(); + InitGammaTables(); + + if (tmp_rgb == NULL) return 0; // malloc error + + // Downsample Y/U/V planes, two rows at a time + for (y = 0; y < (height >> 1); ++y) { + int rows_have_alpha = has_alpha; + const int off1 = (2 * y + 0) * rgb_stride; + const int off2 = (2 * y + 1) * rgb_stride; + if (use_dsp) { + if (is_rgb) { + WebPConvertRGB24ToY(r_ptr + off1, dst_y, width); + WebPConvertRGB24ToY(r_ptr + off2, dst_y + picture->y_stride, width); + } else { + WebPConvertBGR24ToY(b_ptr + off1, dst_y, width); + WebPConvertBGR24ToY(b_ptr + off2, dst_y + picture->y_stride, width); + } + } else { + ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, + dst_y, width, rg); + ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, + dst_y + picture->y_stride, width, rg); + } + dst_y += 2 * picture->y_stride; + if (has_alpha) { + rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride, + width, 2, + dst_a, picture->a_stride); + dst_a += 2 * picture->a_stride; + } + // Collect averaged R/G/B(/A) + if (!rows_have_alpha) { + AccumulateRGB(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, rgb_stride, tmp_rgb, width); + } else { + AccumulateRGBA(r_ptr + off1, g_ptr + off1, b_ptr + off1, a_ptr + off1, + rgb_stride, tmp_rgb, width); + } + // Convert to U/V + if (rg == NULL) { + WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); + } else { + ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg); + } + dst_u += picture->uv_stride; + dst_v += picture->uv_stride; + } + if (height & 1) { // extra last row + const int off = 2 * y * rgb_stride; + int row_has_alpha = has_alpha; + if (use_dsp) { + if (r_ptr < b_ptr) { + WebPConvertRGB24ToY(r_ptr + off, dst_y, width); + } else { + WebPConvertBGR24ToY(b_ptr + off, dst_y, width); + } + } else { + ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, + dst_y, width, rg); + } + if (row_has_alpha) { + row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0); + } + // Collect averaged R/G/B(/A) + if (!row_has_alpha) { + // Collect averaged R/G/B + AccumulateRGB(r_ptr + off, g_ptr + off, b_ptr + off, + step, /* rgb_stride = */ 0, tmp_rgb, width); + } else { + AccumulateRGBA(r_ptr + off, g_ptr + off, b_ptr + off, a_ptr + off, + /* rgb_stride = */ 0, tmp_rgb, width); + } + if (rg == NULL) { + WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); + } else { + ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg); + } + } + WebPSafeFree(tmp_rgb); + } + return 1; +} + +#undef SUM4 +#undef SUM2 +#undef SUM4ALPHA +#undef SUM2ALPHA + +//------------------------------------------------------------------------------ +// call for ARGB->YUVA conversion + +static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace, + float dithering, int use_iterative_conversion) { + if (picture == NULL) return 0; + if (picture->argb == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } else { + const uint8_t* const argb = (const uint8_t*)picture->argb; + const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1; + const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2; + const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3; + const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0; + + picture->colorspace = WEBP_YUV420; + return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, + dithering, use_iterative_conversion, picture); + } +} + +int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, + float dithering) { + return PictureARGBToYUVA(picture, colorspace, dithering, 0); +} + +int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { + return PictureARGBToYUVA(picture, colorspace, 0.f, 0); +} + +int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { + return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1); +} + +//------------------------------------------------------------------------------ +// call for YUVA -> ARGB conversion + +int WebPPictureYUVAToARGB(WebPPicture* picture) { + if (picture == NULL) return 0; + if (picture->y == NULL || picture->u == NULL || picture->v == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } + if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } + if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } + // Allocate a new argb buffer (discarding the previous one). + if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0; + picture->use_argb = 1; + + // Convert + { + int y; + const int width = picture->width; + const int height = picture->height; + const int argb_stride = 4 * picture->argb_stride; + uint8_t* dst = (uint8_t*)picture->argb; + const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; + WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST); + + // First row, with replicated top samples. + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); + cur_y += picture->y_stride; + dst += argb_stride; + // Center rows. + for (y = 1; y + 1 < height; y += 2) { + const uint8_t* const top_u = cur_u; + const uint8_t* const top_v = cur_v; + cur_u += picture->uv_stride; + cur_v += picture->uv_stride; + upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v, + dst, dst + argb_stride, width); + cur_y += 2 * picture->y_stride; + dst += 2 * argb_stride; + } + // Last row (if needed), with replicated bottom samples. + if (height > 1 && !(height & 1)) { + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); + } + // Insert alpha values if needed, in replacement for the default 0xff ones. + if (picture->colorspace & WEBP_CSP_ALPHA_BIT) { + for (y = 0; y < height; ++y) { + uint32_t* const argb_dst = picture->argb + y * picture->argb_stride; + const uint8_t* const src = picture->a + y * picture->a_stride; + int x; + for (x = 0; x < width; ++x) { + argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24); + } + } + } + } + return 1; +} + +//------------------------------------------------------------------------------ +// automatic import / conversion + +static int Import(WebPPicture* const picture, + const uint8_t* const rgb, int rgb_stride, + int step, int swap_rb, int import_alpha) { + int y; + const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0); + const uint8_t* const g_ptr = rgb + 1; + const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2); + const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL; + const int width = picture->width; + const int height = picture->height; + + if (!picture->use_argb) { + return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, + 0.f /* no dithering */, 0, picture); + } + if (!WebPPictureAlloc(picture)) return 0; + + VP8EncDspARGBInit(); + + if (import_alpha) { + assert(step == 4); + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + const int offset = y * rgb_stride; + VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset, + b_ptr + offset, width, dst); + } + } else { + assert(step >= 3); + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + const int offset = y * rgb_stride; + VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset, + width, step, dst); + } + } + return 1; +} + +// Public API + +int WebPPictureImportRGB(WebPPicture* picture, + const uint8_t* rgb, int rgb_stride) { + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 0, 0) + : 0; +} + +int WebPPictureImportBGR(WebPPicture* picture, + const uint8_t* rgb, int rgb_stride) { + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 1, 0) + : 0; +} + +int WebPPictureImportRGBA(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 0, 1) + : 0; +} + +int WebPPictureImportBGRA(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 1, 1) + : 0; +} + +int WebPPictureImportRGBX(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 0, 0) + : 0; +} + +int WebPPictureImportBGRX(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 1, 0) + : 0; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/picture_psnr.c b/thirdparty/libwebp/enc/picture_psnr.c new file mode 100644 index 0000000000..81ab1b5ca1 --- /dev/null +++ b/thirdparty/libwebp/enc/picture_psnr.c @@ -0,0 +1,177 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools for measuring distortion +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <math.h> +#include <stdlib.h> + +#include "./vp8enci.h" +#include "../utils/utils.h" + +//------------------------------------------------------------------------------ +// local-min distortion +// +// For every pixel in the *reference* picture, we search for the local best +// match in the compressed image. This is not a symmetrical measure. + +#define RADIUS 2 // search radius. Shouldn't be too large. + +static void AccumulateLSIM(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h, VP8DistoStats* stats) { + int x, y; + double total_sse = 0.; + for (y = 0; y < h; ++y) { + const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS; + const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1; + for (x = 0; x < w; ++x) { + const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS; + const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1; + double best_sse = 255. * 255.; + const double value = (double)ref[y * ref_stride + x]; + int i, j; + for (j = y_0; j < y_1; ++j) { + const uint8_t* const s = src + j * src_stride; + for (i = x_0; i < x_1; ++i) { + const double diff = s[i] - value; + const double sse = diff * diff; + if (sse < best_sse) best_sse = sse; + } + } + total_sse += best_sse; + } + } + stats->w = w * h; + stats->xm = 0; + stats->ym = 0; + stats->xxm = total_sse; + stats->yym = 0; + stats->xxm = 0; +} +#undef RADIUS + +//------------------------------------------------------------------------------ +// Distortion + +// Max value returned in case of exact similarity. +static const double kMinDistortion_dB = 99.; +static float GetPSNR(const double v) { + return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) + : kMinDistortion_dB); +} + +int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, + int type, float result[5]) { + VP8DistoStats stats[5]; + int w, h; + + memset(stats, 0, sizeof(stats)); + + VP8SSIMDspInit(); + + if (src == NULL || ref == NULL || + src->width != ref->width || src->height != ref->height || + src->use_argb != ref->use_argb || result == NULL) { + return 0; + } + w = src->width; + h = src->height; + + if (src->use_argb == 1) { + if (src->argb == NULL || ref->argb == NULL) { + return 0; + } else { + int i, j, c; + uint8_t* tmp1, *tmp2; + uint8_t* const tmp_plane = + (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane)); + if (tmp_plane == NULL) return 0; + tmp1 = tmp_plane; + tmp2 = tmp_plane + w * h; + for (c = 0; c < 4; ++c) { + for (j = 0; j < h; ++j) { + for (i = 0; i < w; ++i) { + tmp1[j * w + i] = src->argb[i + j * src->argb_stride] >> (c * 8); + tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8); + } + } + if (type >= 2) { + AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]); + } else { + VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]); + } + } + free(tmp_plane); + } + } else { + int has_alpha, uv_w, uv_h; + if (src->y == NULL || ref->y == NULL || + src->u == NULL || ref->u == NULL || + src->v == NULL || ref->v == NULL) { + return 0; + } + has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT); + if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) || + (has_alpha && (src->a == NULL || ref->a == NULL))) { + return 0; + } + + uv_w = (src->width + 1) >> 1; + uv_h = (src->height + 1) >> 1; + if (type >= 2) { + AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride, + w, h, &stats[0]); + AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride, + uv_w, uv_h, &stats[1]); + AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride, + uv_w, uv_h, &stats[2]); + if (has_alpha) { + AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride, + w, h, &stats[3]); + } + } else { + VP8SSIMAccumulatePlane(src->y, src->y_stride, + ref->y, ref->y_stride, + w, h, &stats[0]); + VP8SSIMAccumulatePlane(src->u, src->uv_stride, + ref->u, ref->uv_stride, + uv_w, uv_h, &stats[1]); + VP8SSIMAccumulatePlane(src->v, src->uv_stride, + ref->v, ref->uv_stride, + uv_w, uv_h, &stats[2]); + if (has_alpha) { + VP8SSIMAccumulatePlane(src->a, src->a_stride, + ref->a, ref->a_stride, + w, h, &stats[3]); + } + } + } + // Final stat calculations. + { + int c; + for (c = 0; c <= 4; ++c) { + if (type == 1) { + const double v = VP8SSIMGet(&stats[c]); + result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) + : kMinDistortion_dB); + } else { + const double v = VP8SSIMGetSquaredError(&stats[c]); + result[c] = GetPSNR(v); + } + // Accumulate forward + if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]); + } + } + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/picture_rescale.c b/thirdparty/libwebp/enc/picture_rescale.c new file mode 100644 index 0000000000..9f19e8e80f --- /dev/null +++ b/thirdparty/libwebp/enc/picture_rescale.c @@ -0,0 +1,264 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools: copy, crop, rescaling and view. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> + +#include "./vp8enci.h" +#include "../utils/rescaler.h" +#include "../utils/utils.h" + +#define HALVE(x) (((x) + 1) >> 1) + +// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them +// into 'dst'. Mark 'dst' as not owning any memory. +static void PictureGrabSpecs(const WebPPicture* const src, + WebPPicture* const dst) { + assert(src != NULL && dst != NULL); + *dst = *src; + WebPPictureResetBuffers(dst); +} + +//------------------------------------------------------------------------------ + +// Adjust top-left corner to chroma sample position. +static void SnapTopLeftPosition(const WebPPicture* const pic, + int* const left, int* const top) { + if (!pic->use_argb) { + *left &= ~1; + *top &= ~1; + } +} + +// Adjust top-left corner and verify that the sub-rectangle is valid. +static int AdjustAndCheckRectangle(const WebPPicture* const pic, + int* const left, int* const top, + int width, int height) { + SnapTopLeftPosition(pic, left, top); + if ((*left) < 0 || (*top) < 0) return 0; + if (width <= 0 || height <= 0) return 0; + if ((*left) + width > pic->width) return 0; + if ((*top) + height > pic->height) return 0; + return 1; +} + +int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { + if (src == NULL || dst == NULL) return 0; + if (src == dst) return 1; + + PictureGrabSpecs(src, dst); + if (!WebPPictureAlloc(dst)) return 0; + + if (!src->use_argb) { + WebPCopyPlane(src->y, src->y_stride, + dst->y, dst->y_stride, dst->width, dst->height); + WebPCopyPlane(src->u, src->uv_stride, dst->u, dst->uv_stride, + HALVE(dst->width), HALVE(dst->height)); + WebPCopyPlane(src->v, src->uv_stride, dst->v, dst->uv_stride, + HALVE(dst->width), HALVE(dst->height)); + if (dst->a != NULL) { + WebPCopyPlane(src->a, src->a_stride, + dst->a, dst->a_stride, dst->width, dst->height); + } + } else { + WebPCopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, + (uint8_t*)dst->argb, 4 * dst->argb_stride, + 4 * dst->width, dst->height); + } + return 1; +} + +int WebPPictureIsView(const WebPPicture* picture) { + if (picture == NULL) return 0; + if (picture->use_argb) { + return (picture->memory_argb_ == NULL); + } + return (picture->memory_ == NULL); +} + +int WebPPictureView(const WebPPicture* src, + int left, int top, int width, int height, + WebPPicture* dst) { + if (src == NULL || dst == NULL) return 0; + + // verify rectangle position. + if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0; + + if (src != dst) { // beware of aliasing! We don't want to leak 'memory_'. + PictureGrabSpecs(src, dst); + } + dst->width = width; + dst->height = height; + if (!src->use_argb) { + dst->y = src->y + top * src->y_stride + left; + dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1); + dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1); + dst->y_stride = src->y_stride; + dst->uv_stride = src->uv_stride; + if (src->a != NULL) { + dst->a = src->a + top * src->a_stride + left; + dst->a_stride = src->a_stride; + } + } else { + dst->argb = src->argb + top * src->argb_stride + left; + dst->argb_stride = src->argb_stride; + } + return 1; +} + +//------------------------------------------------------------------------------ +// Picture cropping + +int WebPPictureCrop(WebPPicture* pic, + int left, int top, int width, int height) { + WebPPicture tmp; + + if (pic == NULL) return 0; + if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0; + + PictureGrabSpecs(pic, &tmp); + tmp.width = width; + tmp.height = height; + if (!WebPPictureAlloc(&tmp)) return 0; + + if (!pic->use_argb) { + const int y_offset = top * pic->y_stride + left; + const int uv_offset = (top / 2) * pic->uv_stride + left / 2; + WebPCopyPlane(pic->y + y_offset, pic->y_stride, + tmp.y, tmp.y_stride, width, height); + WebPCopyPlane(pic->u + uv_offset, pic->uv_stride, + tmp.u, tmp.uv_stride, HALVE(width), HALVE(height)); + WebPCopyPlane(pic->v + uv_offset, pic->uv_stride, + tmp.v, tmp.uv_stride, HALVE(width), HALVE(height)); + + if (tmp.a != NULL) { + const int a_offset = top * pic->a_stride + left; + WebPCopyPlane(pic->a + a_offset, pic->a_stride, + tmp.a, tmp.a_stride, width, height); + } + } else { + const uint8_t* const src = + (const uint8_t*)(pic->argb + top * pic->argb_stride + left); + WebPCopyPlane(src, pic->argb_stride * 4, (uint8_t*)tmp.argb, + tmp.argb_stride * 4, width * 4, height); + } + WebPPictureFree(pic); + *pic = tmp; + return 1; +} + +//------------------------------------------------------------------------------ +// Simple picture rescaler + +static void RescalePlane(const uint8_t* src, + int src_width, int src_height, int src_stride, + uint8_t* dst, + int dst_width, int dst_height, int dst_stride, + rescaler_t* const work, + int num_channels) { + WebPRescaler rescaler; + int y = 0; + WebPRescalerInit(&rescaler, src_width, src_height, + dst, dst_width, dst_height, dst_stride, + num_channels, work); + while (y < src_height) { + y += WebPRescalerImport(&rescaler, src_height - y, + src + y * src_stride, src_stride); + WebPRescalerExport(&rescaler); + } +} + +static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) { + assert(pic->argb != NULL); + WebPMultARGBRows((uint8_t*)pic->argb, pic->argb_stride * sizeof(*pic->argb), + pic->width, pic->height, inverse); +} + +static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { + if (pic->a != NULL) { + WebPMultRows(pic->y, pic->y_stride, pic->a, pic->a_stride, + pic->width, pic->height, inverse); + } +} + +int WebPPictureRescale(WebPPicture* pic, int width, int height) { + WebPPicture tmp; + int prev_width, prev_height; + rescaler_t* work; + + if (pic == NULL) return 0; + prev_width = pic->width; + prev_height = pic->height; + if (!WebPRescalerGetScaledDimensions( + prev_width, prev_height, &width, &height)) { + return 0; + } + + PictureGrabSpecs(pic, &tmp); + tmp.width = width; + tmp.height = height; + if (!WebPPictureAlloc(&tmp)) return 0; + + if (!pic->use_argb) { + work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); + if (work == NULL) { + WebPPictureFree(&tmp); + return 0; + } + // If present, we need to rescale alpha first (for AlphaMultiplyY). + if (pic->a != NULL) { + WebPInitAlphaProcessing(); + RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, + tmp.a, width, height, tmp.a_stride, work, 1); + } + + // We take transparency into account on the luma plane only. That's not + // totally exact blending, but still is a good approximation. + AlphaMultiplyY(pic, 0); + RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, + tmp.y, width, height, tmp.y_stride, work, 1); + AlphaMultiplyY(&tmp, 1); + + RescalePlane(pic->u, + HALVE(prev_width), HALVE(prev_height), pic->uv_stride, + tmp.u, + HALVE(width), HALVE(height), tmp.uv_stride, work, 1); + RescalePlane(pic->v, + HALVE(prev_width), HALVE(prev_height), pic->uv_stride, + tmp.v, + HALVE(width), HALVE(height), tmp.uv_stride, work, 1); + } else { + work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); + if (work == NULL) { + WebPPictureFree(&tmp); + return 0; + } + // In order to correctly interpolate colors, we need to apply the alpha + // weighting first (black-matting), scale the RGB values, and remove + // the premultiplication afterward (while preserving the alpha channel). + WebPInitAlphaProcessing(); + AlphaMultiplyARGB(pic, 0); + RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, + pic->argb_stride * 4, + (uint8_t*)tmp.argb, width, height, + tmp.argb_stride * 4, + work, 4); + AlphaMultiplyARGB(&tmp, 1); + } + WebPPictureFree(pic); + WebPSafeFree(work); + *pic = tmp; + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/picture_tools.c b/thirdparty/libwebp/enc/picture_tools.c new file mode 100644 index 0000000000..bf97af8408 --- /dev/null +++ b/thirdparty/libwebp/enc/picture_tools.c @@ -0,0 +1,226 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools: alpha handling, etc. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> + +#include "./vp8enci.h" +#include "../dsp/yuv.h" + +static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { + return (0xff000000u | (r << 16) | (g << 8) | b); +} + +//------------------------------------------------------------------------------ +// Helper: clean up fully transparent area to help compressibility. + +#define SIZE 8 +#define SIZE2 (SIZE / 2) +static int is_transparent_area(const uint8_t* ptr, int stride, int size) { + int y, x; + for (y = 0; y < size; ++y) { + for (x = 0; x < size; ++x) { + if (ptr[x]) { + return 0; + } + } + ptr += stride; + } + return 1; +} + +static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) { + int y, x; + for (y = 0; y < size; ++y) { + for (x = 0; x < size; ++x) { + if (ptr[x] & 0xff000000u) { + return 0; + } + } + ptr += stride; + } + return 1; +} + +static void flatten(uint8_t* ptr, int v, int stride, int size) { + int y; + for (y = 0; y < size; ++y) { + memset(ptr, v, size); + ptr += stride; + } +} + +static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) { + int x, y; + for (y = 0; y < size; ++y) { + for (x = 0; x < size; ++x) ptr[x] = v; + ptr += stride; + } +} + +void WebPCleanupTransparentArea(WebPPicture* pic) { + int x, y, w, h; + if (pic == NULL) return; + w = pic->width / SIZE; + h = pic->height / SIZE; + + // note: we ignore the left-overs on right/bottom + if (pic->use_argb) { + uint32_t argb_value = 0; + for (y = 0; y < h; ++y) { + int need_reset = 1; + for (x = 0; x < w; ++x) { + const int off = (y * pic->argb_stride + x) * SIZE; + if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) { + if (need_reset) { + argb_value = pic->argb[off]; + need_reset = 0; + } + flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE); + } else { + need_reset = 1; + } + } + } + } else { + const uint8_t* const a_ptr = pic->a; + int values[3] = { 0 }; + if (a_ptr == NULL) return; // nothing to do + for (y = 0; y < h; ++y) { + int need_reset = 1; + for (x = 0; x < w; ++x) { + const int off_a = (y * pic->a_stride + x) * SIZE; + const int off_y = (y * pic->y_stride + x) * SIZE; + const int off_uv = (y * pic->uv_stride + x) * SIZE2; + if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) { + if (need_reset) { + values[0] = pic->y[off_y]; + values[1] = pic->u[off_uv]; + values[2] = pic->v[off_uv]; + need_reset = 0; + } + flatten(pic->y + off_y, values[0], pic->y_stride, SIZE); + flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2); + flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2); + } else { + need_reset = 1; + } + } + } + } +} + +#undef SIZE +#undef SIZE2 + +void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { + int x, y, w, h; + uint32_t* argb; + assert(pic != NULL && pic->use_argb); + w = pic->width; + h = pic->height; + argb = pic->argb; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + if ((argb[x] & 0xff000000) == 0) { + argb[x] = 0x00000000; + } + } + argb += pic->argb_stride; + } +} + +//------------------------------------------------------------------------------ +// Blend color and remove transparency info + +#define BLEND(V0, V1, ALPHA) \ + ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16) +#define BLEND_10BIT(V0, V1, ALPHA) \ + ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18) + +void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { + const int red = (background_rgb >> 16) & 0xff; + const int green = (background_rgb >> 8) & 0xff; + const int blue = (background_rgb >> 0) & 0xff; + int x, y; + if (pic == NULL) return; + if (!pic->use_argb) { + const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop + const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF); + // VP8RGBToU/V expects the u/v values summed over four pixels + const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); + const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); + const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT; + if (!has_alpha || pic->a == NULL) return; // nothing to do + for (y = 0; y < pic->height; ++y) { + // Luma blending + uint8_t* const y_ptr = pic->y + y * pic->y_stride; + uint8_t* const a_ptr = pic->a + y * pic->a_stride; + for (x = 0; x < pic->width; ++x) { + const int alpha = a_ptr[x]; + if (alpha < 0xff) { + y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]); + } + } + // Chroma blending every even line + if ((y & 1) == 0) { + uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride; + uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride; + uint8_t* const a_ptr2 = + (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride; + for (x = 0; x < uv_width; ++x) { + // Average four alpha values into a single blending weight. + // TODO(skal): might lead to visible contouring. Can we do better? + const int alpha = + a_ptr[2 * x + 0] + a_ptr[2 * x + 1] + + a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1]; + u[x] = BLEND_10BIT(U0, u[x], alpha); + v[x] = BLEND_10BIT(V0, v[x], alpha); + } + if (pic->width & 1) { // rightmost pixel + const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]); + u[x] = BLEND_10BIT(U0, u[x], alpha); + v[x] = BLEND_10BIT(V0, v[x], alpha); + } + } + memset(a_ptr, 0xff, pic->width); + } + } else { + uint32_t* argb = pic->argb; + const uint32_t background = MakeARGB32(red, green, blue); + for (y = 0; y < pic->height; ++y) { + for (x = 0; x < pic->width; ++x) { + const int alpha = (argb[x] >> 24) & 0xff; + if (alpha != 0xff) { + if (alpha > 0) { + int r = (argb[x] >> 16) & 0xff; + int g = (argb[x] >> 8) & 0xff; + int b = (argb[x] >> 0) & 0xff; + r = BLEND(red, r, alpha); + g = BLEND(green, g, alpha); + b = BLEND(blue, b, alpha); + argb[x] = MakeARGB32(r, g, b); + } else { + argb[x] = background; + } + } + } + argb += pic->argb_stride; + } + } +} + +#undef BLEND +#undef BLEND_10BIT + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/quant.c b/thirdparty/libwebp/enc/quant.c new file mode 100644 index 0000000000..549ad26f93 --- /dev/null +++ b/thirdparty/libwebp/enc/quant.c @@ -0,0 +1,1283 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Quantization +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <math.h> +#include <stdlib.h> // for abs() + +#include "./vp8enci.h" +#include "./cost.h" + +#define DO_TRELLIS_I4 1 +#define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate. +#define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. +#define USE_TDISTO 1 + +#define MID_ALPHA 64 // neutral value for susceptibility +#define MIN_ALPHA 30 // lowest usable value for susceptibility +#define MAX_ALPHA 100 // higher meaningful value for susceptibility + +#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP + // power-law modulation. Must be strictly less than 1. + +// number of non-zero coeffs below which we consider the block very flat +// (and apply a penalty to complex predictions) +#define FLATNESS_LIMIT_I16 10 // I16 mode +#define FLATNESS_LIMIT_I4 3 // I4 mode +#define FLATNESS_LIMIT_UV 2 // UV mode +#define FLATNESS_PENALTY 140 // roughly ~1bit per block + +#define MULT_8B(a, b) (((a) * (b) + 128) >> 8) + +#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) + +// #define DEBUG_BLOCK + +//------------------------------------------------------------------------------ + +#if defined(DEBUG_BLOCK) + +#include <stdio.h> +#include <stdlib.h> + +static void PrintBlockInfo(const VP8EncIterator* const it, + const VP8ModeScore* const rd) { + int i, j; + const int is_i16 = (it->mb_->type_ == 1); + const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC; + const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC; + const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC; + printf("SOURCE / OUTPUT / ABS DELTA\n"); + for (j = 0; j < 16; ++j) { + for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]); + printf(" "); + for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]); + printf(" "); + for (i = 0; i < 16; ++i) { + printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS])); + } + printf("\n"); + } + printf("\n"); // newline before the U/V block + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]); + printf(" "); + for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]); + printf(" "); + for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]); + printf(" "); + for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]); + printf(" "); + for (i = 0; i < 8; ++i) { + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); + } + printf(" "); + for (i = 8; i < 16; ++i) { + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); + } + printf("\n"); + } + printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", + (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz, + (int)rd->score); + if (is_i16) { + printf("Mode: %d\n", rd->mode_i16); + printf("y_dc_levels:"); + for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]); + printf("\n"); + } else { + printf("Modes[16]: "); + for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]); + printf("\n"); + } + printf("y_ac_levels:\n"); + for (j = 0; j < 16; ++j) { + for (i = is_i16 ? 1 : 0; i < 16; ++i) { + printf("%4d ", rd->y_ac_levels[j][i]); + } + printf("\n"); + } + printf("\n"); + printf("uv_levels (mode=%d):\n", rd->mode_uv); + for (j = 0; j < 8; ++j) { + for (i = 0; i < 16; ++i) { + printf("%4d ", rd->uv_levels[j][i]); + } + printf("\n"); + } +} + +#endif // DEBUG_BLOCK + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int clip(int v, int m, int M) { + return v < m ? m : v > M ? M : v; +} + +static const uint8_t kZigzag[16] = { + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +static const uint8_t kDcTable[128] = { + 4, 5, 6, 7, 8, 9, 10, 10, + 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, + 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, + 138, 140, 143, 145, 148, 151, 154, 157 +}; + +static const uint16_t kAcTable[128] = { + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, + 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, + 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, + 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, + 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, + 249, 254, 259, 264, 269, 274, 279, 284 +}; + +static const uint16_t kAcTable2[128] = { + 8, 8, 9, 10, 12, 13, 15, 17, + 18, 20, 21, 23, 24, 26, 27, 29, + 31, 32, 34, 35, 37, 38, 40, 41, + 43, 44, 46, 48, 49, 51, 52, 54, + 55, 57, 58, 60, 62, 63, 65, 66, + 68, 69, 71, 72, 74, 75, 77, 79, + 80, 82, 83, 85, 86, 88, 89, 93, + 96, 99, 102, 105, 108, 111, 114, 117, + 120, 124, 127, 130, 133, 136, 139, 142, + 145, 148, 151, 155, 158, 161, 164, 167, + 170, 173, 176, 179, 184, 189, 193, 198, + 203, 207, 212, 217, 221, 226, 230, 235, + 240, 244, 249, 254, 258, 263, 268, 274, + 280, 286, 292, 299, 305, 311, 317, 323, + 330, 336, 342, 348, 354, 362, 370, 379, + 385, 393, 401, 409, 416, 424, 432, 440 +}; + +static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac] + { 96, 110 }, { 96, 108 }, { 110, 115 } +}; + +// Sharpening by (slightly) raising the hi-frequency coeffs. +// Hack-ish but helpful for mid-bitrate range. Use with care. +#define SHARPEN_BITS 11 // number of descaling bits for sharpening bias +static const uint8_t kFreqSharpening[16] = { + 0, 30, 60, 90, + 30, 60, 90, 90, + 60, 90, 90, 90, + 90, 90, 90, 90 +}; + +//------------------------------------------------------------------------------ +// Initialize quantization parameters in VP8Matrix + +// Returns the average quantizer +static int ExpandMatrix(VP8Matrix* const m, int type) { + int i, sum; + for (i = 0; i < 2; ++i) { + const int is_ac_coeff = (i > 0); + const int bias = kBiasMatrices[type][is_ac_coeff]; + m->iq_[i] = (1 << QFIX) / m->q_[i]; + m->bias_[i] = BIAS(bias); + // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is: + // * zero if coeff <= zthresh + // * non-zero if coeff > zthresh + m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i]; + } + for (i = 2; i < 16; ++i) { + m->q_[i] = m->q_[1]; + m->iq_[i] = m->iq_[1]; + m->bias_[i] = m->bias_[1]; + m->zthresh_[i] = m->zthresh_[1]; + } + for (sum = 0, i = 0; i < 16; ++i) { + if (type == 0) { // we only use sharpening for AC luma coeffs + m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS; + } else { + m->sharpen_[i] = 0; + } + sum += m->q_[i]; + } + return (sum + 8) >> 4; +} + +static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; } + +static void SetupMatrices(VP8Encoder* enc) { + int i; + const int tlambda_scale = + (enc->method_ >= 4) ? enc->config_->sns_strength + : 0; + const int num_segments = enc->segment_hdr_.num_segments_; + for (i = 0; i < num_segments; ++i) { + VP8SegmentInfo* const m = &enc->dqm_[i]; + const int q = m->quant_; + int q_i4, q_i16, q_uv; + m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; + m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; + + m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2; + m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)]; + + m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; + m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; + + q_i4 = ExpandMatrix(&m->y1_, 0); + q_i16 = ExpandMatrix(&m->y2_, 1); + q_uv = ExpandMatrix(&m->uv_, 2); + + m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7; + m->lambda_i16_ = (3 * q_i16 * q_i16); + m->lambda_uv_ = (3 * q_uv * q_uv) >> 6; + m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7; + m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3; + m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2; + m->lambda_trellis_uv_ = (q_uv * q_uv) << 1; + m->tlambda_ = (tlambda_scale * q_i4) >> 5; + + // none of these constants should be < 1 + CheckLambdaValue(&m->lambda_i4_); + CheckLambdaValue(&m->lambda_i16_); + CheckLambdaValue(&m->lambda_uv_); + CheckLambdaValue(&m->lambda_mode_); + CheckLambdaValue(&m->lambda_trellis_i4_); + CheckLambdaValue(&m->lambda_trellis_i16_); + CheckLambdaValue(&m->lambda_trellis_uv_); + CheckLambdaValue(&m->tlambda_); + + m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto + m->max_edge_ = 0; + + m->i4_penalty_ = 1000 * q_i4 * q_i4; + } +} + +//------------------------------------------------------------------------------ +// Initialize filtering parameters + +// Very small filter-strength values have close to no visual effect. So we can +// save a little decoding-CPU by turning filtering off for these. +#define FSTRENGTH_CUTOFF 2 + +static void SetupFilterStrength(VP8Encoder* const enc) { + int i; + // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering. + const int level0 = 5 * enc->config_->filter_strength; + for (i = 0; i < NUM_MB_SEGMENTS; ++i) { + VP8SegmentInfo* const m = &enc->dqm_[i]; + // We focus on the quantization of AC coeffs. + const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2; + const int base_strength = + VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep); + // Segments with lower complexity ('beta') will be less filtered. + const int f = base_strength * level0 / (256 + m->beta_); + m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f; + } + // We record the initial strength (mainly for the case of 1-segment only). + enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_; + enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0); + enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness; +} + +//------------------------------------------------------------------------------ + +// Note: if you change the values below, remember that the max range +// allowed by the syntax for DQ_UV is [-16,16]. +#define MAX_DQ_UV (6) +#define MIN_DQ_UV (-4) + +// We want to emulate jpeg-like behaviour where the expected "good" quality +// is around q=75. Internally, our "good" middle is around c=50. So we +// map accordingly using linear piece-wise function +static double QualityToCompression(double c) { + const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.; + // The file size roughly scales as pow(quantizer, 3.). Actually, the + // exponent is somewhere between 2.8 and 3.2, but we're mostly interested + // in the mid-quant range. So we scale the compressibility inversely to + // this power-law: quant ~= compression ^ 1/3. This law holds well for + // low quant. Finer modeling for high-quant would make use of kAcTable[] + // more explicitly. + const double v = pow(linear_c, 1 / 3.); + return v; +} + +static double QualityToJPEGCompression(double c, double alpha) { + // We map the complexity 'alpha' and quality setting 'c' to a compression + // exponent empirically matched to the compression curve of libjpeg6b. + // On average, the WebP output size will be roughly similar to that of a + // JPEG file compressed with same quality factor. + const double amin = 0.30; + const double amax = 0.85; + const double exp_min = 0.4; + const double exp_max = 0.9; + const double slope = (exp_min - exp_max) / (amax - amin); + // Linearly interpolate 'expn' from exp_min to exp_max + // in the [amin, amax] range. + const double expn = (alpha > amax) ? exp_min + : (alpha < amin) ? exp_max + : exp_max + slope * (alpha - amin); + const double v = pow(c, expn); + return v; +} + +static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, + const VP8SegmentInfo* const S2) { + return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_); +} + +static void SimplifySegments(VP8Encoder* const enc) { + int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid a spurious warning about 'i' exceeding + // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9). + const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) + ? enc->segment_hdr_.num_segments_ + : NUM_MB_SEGMENTS; + int num_final_segments = 1; + int s1, s2; + for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments + const VP8SegmentInfo* const S1 = &enc->dqm_[s1]; + int found = 0; + // check if we already have similar segment + for (s2 = 0; s2 < num_final_segments; ++s2) { + const VP8SegmentInfo* const S2 = &enc->dqm_[s2]; + if (SegmentsAreEquivalent(S1, S2)) { + found = 1; + break; + } + } + map[s1] = s2; + if (!found) { + if (num_final_segments != s1) { + enc->dqm_[num_final_segments] = enc->dqm_[s1]; + } + ++num_final_segments; + } + } + if (num_final_segments < num_segments) { // Remap + int i = enc->mb_w_ * enc->mb_h_; + while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_]; + enc->segment_hdr_.num_segments_ = num_final_segments; + // Replicate the trailing segment infos (it's mostly cosmetics) + for (i = num_final_segments; i < num_segments; ++i) { + enc->dqm_[i] = enc->dqm_[num_final_segments - 1]; + } + } +} + +void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { + int i; + int dq_uv_ac, dq_uv_dc; + const int num_segments = enc->segment_hdr_.num_segments_; + const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.; + const double Q = quality / 100.; + const double c_base = enc->config_->emulate_jpeg_size ? + QualityToJPEGCompression(Q, enc->alpha_ / 255.) : + QualityToCompression(Q); + for (i = 0; i < num_segments; ++i) { + // We modulate the base coefficient to accommodate for the quantization + // susceptibility and allow denser segments to be quantized more. + const double expn = 1. - amp * enc->dqm_[i].alpha_; + const double c = pow(c_base, expn); + const int q = (int)(127. * (1. - c)); + assert(expn > 0.); + enc->dqm_[i].quant_ = clip(q, 0, 127); + } + + // purely indicative in the bitstream (except for the 1-segment case) + enc->base_quant_ = enc->dqm_[0].quant_; + + // fill-in values for the unused segments (required by the syntax) + for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) { + enc->dqm_[i].quant_ = enc->base_quant_; + } + + // uv_alpha_ is normally spread around ~60. The useful range is + // typically ~30 (quite bad) to ~100 (ok to decimate UV more). + // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv. + dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV) + / (MAX_ALPHA - MIN_ALPHA); + // we rescale by the user-defined strength of adaptation + dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100; + // and make it safe. + dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV); + // We also boost the dc-uv-quant a little, based on sns-strength, since + // U/V channels are quite more reactive to high quants (flat DC-blocks + // tend to appear, and are unpleasant). + dq_uv_dc = -4 * enc->config_->sns_strength / 100; + dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed + + enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum + enc->dq_y2_dc_ = 0; + enc->dq_y2_ac_ = 0; + enc->dq_uv_dc_ = dq_uv_dc; + enc->dq_uv_ac_ = dq_uv_ac; + + SetupFilterStrength(enc); // initialize segments' filtering, eventually + + if (num_segments > 1) SimplifySegments(enc); + + SetupMatrices(enc); // finalize quantization matrices +} + +//------------------------------------------------------------------------------ +// Form the predictions in cache + +// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index +const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 }; +const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 }; + +// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index +const int VP8I4ModeOffsets[NUM_BMODES] = { + I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 +}; + +void VP8MakeLuma16Preds(const VP8EncIterator* const it) { + const uint8_t* const left = it->x_ ? it->y_left_ : NULL; + const uint8_t* const top = it->y_ ? it->y_top_ : NULL; + VP8EncPredLuma16(it->yuv_p_, left, top); +} + +void VP8MakeChroma8Preds(const VP8EncIterator* const it) { + const uint8_t* const left = it->x_ ? it->u_left_ : NULL; + const uint8_t* const top = it->y_ ? it->uv_top_ : NULL; + VP8EncPredChroma8(it->yuv_p_, left, top); +} + +void VP8MakeIntra4Preds(const VP8EncIterator* const it) { + VP8EncPredLuma4(it->yuv_p_, it->i4_top_); +} + +//------------------------------------------------------------------------------ +// Quantize + +// Layout: +// +----+----+ +// |YYYY|UUVV| 0 +// |YYYY|UUVV| 4 +// |YYYY|....| 8 +// |YYYY|....| 12 +// +----+----+ + +const int VP8Scan[16] = { // Luma + 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, + 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, + 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, + 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, +}; + +static const int VP8ScanUV[4 + 4] = { + 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U + 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V +}; + +//------------------------------------------------------------------------------ +// Distortion measurement + +static const uint16_t kWeightY[16] = { + 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2 +}; + +static const uint16_t kWeightTrellis[16] = { +#if USE_TDISTO == 0 + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 +#else + 30, 27, 19, 11, + 27, 24, 17, 10, + 19, 17, 12, 8, + 11, 10, 8, 6 +#endif +}; + +// Init/Copy the common fields in score. +static void InitScore(VP8ModeScore* const rd) { + rd->D = 0; + rd->SD = 0; + rd->R = 0; + rd->H = 0; + rd->nz = 0; + rd->score = MAX_COST; +} + +static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { + dst->D = src->D; + dst->SD = src->SD; + dst->R = src->R; + dst->H = src->H; + dst->nz = src->nz; // note that nz is not accumulated, but just copied. + dst->score = src->score; +} + +static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { + dst->D += src->D; + dst->SD += src->SD; + dst->R += src->R; + dst->H += src->H; + dst->nz |= src->nz; // here, new nz bits are accumulated. + dst->score += src->score; +} + +//------------------------------------------------------------------------------ +// Performs trellis-optimized quantization. + +// Trellis node +typedef struct { + int8_t prev; // best previous node + int8_t sign; // sign of coeff_i + int16_t level; // level +} Node; + +// Score state +typedef struct { + score_t score; // partial RD score + const uint16_t* costs; // shortcut to cost tables +} ScoreState; + +// If a coefficient was quantized to a value Q (using a neutral bias), +// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] +// We don't test negative values though. +#define MIN_DELTA 0 // how much lower level to try +#define MAX_DELTA 1 // how much higher +#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) +#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA]) +#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) + +static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { + rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD); +} + +static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, + score_t distortion) { + return rate * lambda + RD_DISTO_MULT * distortion; +} + +static int TrellisQuantizeBlock(const VP8Encoder* const enc, + int16_t in[16], int16_t out[16], + int ctx0, int coeff_type, + const VP8Matrix* const mtx, + int lambda) { + const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; + CostArrayPtr const costs = + (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; + const int first = (coeff_type == 0) ? 1 : 0; + Node nodes[16][NUM_NODES]; + ScoreState score_states[2][NUM_NODES]; + ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); + ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA); + int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous + score_t best_score; + int n, m, p, last; + + { + score_t cost; + const int thresh = mtx->q_[1] * mtx->q_[1] / 4; + const int last_proba = probas[VP8EncBands[first]][ctx0][0]; + + // compute the position of the last interesting coefficient + last = first - 1; + for (n = 15; n >= first; --n) { + const int j = kZigzag[n]; + const int err = in[j] * in[j]; + if (err > thresh) { + last = n; + break; + } + } + // we don't need to go inspect up to n = 16 coeffs. We can just go up + // to last + 1 (inclusive) without losing much. + if (last < 15) ++last; + + // compute 'skip' score. This is the max score one can do. + cost = VP8BitCost(0, last_proba); + best_score = RDScoreTrellis(lambda, cost, 0); + + // initialize source node. + for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { + const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; + ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); + ss_cur[m].costs = costs[first][ctx0]; + } + } + + // traverse trellis. + for (n = first; n <= last; ++n) { + const int j = kZigzag[n]; + const uint32_t Q = mtx->q_[j]; + const uint32_t iQ = mtx->iq_[j]; + const uint32_t B = BIAS(0x00); // neutral bias + // note: it's important to take sign of the _original_ coeff, + // so we don't have to consider level < 0 afterward. + const int sign = (in[j] < 0); + const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; + int level0 = QUANTDIV(coeff0, iQ, B); + if (level0 > MAX_LEVEL) level0 = MAX_LEVEL; + + { // Swap current and previous score states + ScoreState* const tmp = ss_cur; + ss_cur = ss_prev; + ss_prev = tmp; + } + + // test all alternate level values around level0. + for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { + Node* const cur = &NODE(n, m); + int level = level0 + m; + const int ctx = (level > 2) ? 2 : level; + const int band = VP8EncBands[n + 1]; + score_t base_score, last_pos_score; + score_t best_cur_score = MAX_COST; + int best_prev = 0; // default, in case + + ss_cur[m].score = MAX_COST; + ss_cur[m].costs = costs[n + 1][ctx]; + if (level > MAX_LEVEL || level < 0) { // node is dead? + continue; + } + + // Compute extra rate cost if last coeff's position is < 15 + { + const score_t last_pos_cost = + (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; + last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); + } + + { + // Compute delta_error = how much coding this level will + // subtract to max_error as distortion. + // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2 + const int new_error = coeff0 - level * Q; + const int delta_error = + kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0); + base_score = RDScoreTrellis(lambda, 0, delta_error); + } + + // Inspect all possible non-dead predecessors. Retain only the best one. + for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { + // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically + // eliminated since their score can't be better than the current best. + const score_t cost = VP8LevelCost(ss_prev[p].costs, level); + // Examine node assuming it's a non-terminal one. + const score_t score = + base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); + if (score < best_cur_score) { + best_cur_score = score; + best_prev = p; + } + } + // Store best finding in current node. + cur->sign = sign; + cur->level = level; + cur->prev = best_prev; + ss_cur[m].score = best_cur_score; + + // Now, record best terminal node (and thus best entry in the graph). + if (level != 0) { + const score_t score = best_cur_score + last_pos_score; + if (score < best_score) { + best_score = score; + best_path[0] = n; // best eob position + best_path[1] = m; // best node index + best_path[2] = best_prev; // best predecessor + } + } + } + } + + // Fresh start + memset(in + first, 0, (16 - first) * sizeof(*in)); + memset(out + first, 0, (16 - first) * sizeof(*out)); + if (best_path[0] == -1) { + return 0; // skip! + } + + { + // Unwind the best path. + // Note: best-prev on terminal node is not necessarily equal to the + // best_prev for non-terminal. So we patch best_path[2] in. + int nz = 0; + int best_node = best_path[1]; + n = best_path[0]; + NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal + + for (; n >= first; --n) { + const Node* const node = &NODE(n, best_node); + const int j = kZigzag[n]; + out[n] = node->sign ? -node->level : node->level; + nz |= node->level; + in[j] = out[n] * mtx->q_[j]; + best_node = node->prev; + } + return (nz != 0); + } +} + +#undef NODE + +//------------------------------------------------------------------------------ +// Performs: difference, transform, quantize, back-transform, add +// all at once. Output is the reconstructed block in *yuv_out, and the +// quantized levels in *levels. + +static int ReconstructIntra16(VP8EncIterator* const it, + VP8ModeScore* const rd, + uint8_t* const yuv_out, + int mode) { + const VP8Encoder* const enc = it->enc_; + const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; + const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + int nz = 0; + int n; + int16_t tmp[16][16], dc_tmp[16]; + + for (n = 0; n < 16; n += 2) { + VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); + } + VP8FTransformWHT(tmp[0], dc_tmp); + nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; + + if (DO_TRELLIS_I16 && it->do_trellis_) { + int x, y; + VP8IteratorNzToBytes(it); + for (y = 0, n = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x, ++n) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + const int non_zero = + TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, + &dqm->y1_, dqm->lambda_trellis_i16_); + it->top_nz_[x] = it->left_nz_[y] = non_zero; + rd->y_ac_levels[n][0] = 0; + nz |= non_zero << n; + } + } + } else { + for (n = 0; n < 16; n += 2) { + // Zero-out the first coeff, so that: a) nz is correct below, and + // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. + tmp[n][0] = tmp[n + 1][0] = 0; + nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; + assert(rd->y_ac_levels[n + 0][0] == 0); + assert(rd->y_ac_levels[n + 1][0] == 0); + } + } + + // Transform back + VP8TransformWHT(dc_tmp, tmp[0]); + for (n = 0; n < 16; n += 2) { + VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1); + } + + return nz; +} + +static int ReconstructIntra4(VP8EncIterator* const it, + int16_t levels[16], + const uint8_t* const src, + uint8_t* const yuv_out, + int mode) { + const VP8Encoder* const enc = it->enc_; + const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; + const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + int nz = 0; + int16_t tmp[16]; + + VP8FTransform(src, ref, tmp); + if (DO_TRELLIS_I4 && it->do_trellis_) { + const int x = it->i4_ & 3, y = it->i4_ >> 2; + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, + dqm->lambda_trellis_i4_); + } else { + nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); + } + VP8ITransform(ref, tmp, yuv_out, 0); + return nz; +} + +static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, + uint8_t* const yuv_out, int mode) { + const VP8Encoder* const enc = it->enc_; + const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + int nz = 0; + int n; + int16_t tmp[8][16]; + + for (n = 0; n < 8; n += 2) { + VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); + } + if (DO_TRELLIS_UV && it->do_trellis_) { + int ch, x, y; + for (ch = 0, n = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x, ++n) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + const int non_zero = + TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, + &dqm->uv_, dqm->lambda_trellis_uv_); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; + nz |= non_zero << n; + } + } + } + } else { + for (n = 0; n < 8; n += 2) { + nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; + } + } + + for (n = 0; n < 8; n += 2) { + VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1); + } + return (nz << 16); +} + +//------------------------------------------------------------------------------ +// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost. +// Pick the mode is lower RD-cost = Rate + lambda * Distortion. + +static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { + // We look at the first three AC coefficients to determine what is the average + // delta between each sub-4x4 block. + const int v0 = abs(DCs[1]); + const int v1 = abs(DCs[4]); + const int v2 = abs(DCs[5]); + int max_v = (v0 > v1) ? v1 : v0; + max_v = (v2 > max_v) ? v2 : max_v; + if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; +} + +static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) { + VP8ModeScore* const tmp = *a; + *a = *b; + *b = tmp; +} + +static void SwapPtr(uint8_t** a, uint8_t** b) { + uint8_t* const tmp = *a; + *a = *b; + *b = tmp; +} + +static void SwapOut(VP8EncIterator* const it) { + SwapPtr(&it->yuv_out_, &it->yuv_out2_); +} + +static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { + score_t score = 0; + while (num_blocks-- > 0) { // TODO(skal): refine positional scoring? + int i; + for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC + score += (levels[i] != 0); + if (score > thresh) return 0; + } + levels += 16; + } + return 1; +} + +static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { + const int kNumBlocks = 16; + VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; + const int lambda = dqm->lambda_i16_; + const int tlambda = dqm->tlambda_; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; + VP8ModeScore rd_tmp; + VP8ModeScore* rd_cur = &rd_tmp; + VP8ModeScore* rd_best = rd; + int mode; + + rd->mode_i16 = -1; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer + rd_cur->mode_i16 = mode; + + // Reconstruct + rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode); + + // Measure RD-score + rd_cur->D = VP8SSE16x16(src, tmp_dst); + rd_cur->SD = + tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; + rd_cur->H = VP8FixedCostsI16[mode]; + rd_cur->R = VP8GetCostLuma16(it, rd_cur); + if (mode > 0 && + IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { + // penalty to avoid flat area to be mispredicted by complex mode + rd_cur->R += FLATNESS_PENALTY * kNumBlocks; + } + + // Since we always examine Intra16 first, we can overwrite *rd directly. + SetRDScore(lambda, rd_cur); + if (mode == 0 || rd_cur->score < rd_best->score) { + SwapModeScore(&rd_cur, &rd_best); + SwapOut(it); + } + } + if (rd_best != rd) { + memcpy(rd, rd_best, sizeof(*rd)); + } + SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. + VP8SetIntra16Mode(it, rd->mode_i16); + + // we have a blocky macroblock (only DCs are non-zero) with fairly high + // distortion, record max delta so we can later adjust the minimal filtering + // strength needed to smooth these blocks out. + if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) { + StoreMaxDelta(dqm, rd->y_dc_levels); + } +} + +//------------------------------------------------------------------------------ + +// return the cost array corresponding to the surrounding prediction modes. +static const uint16_t* GetCostModeI4(VP8EncIterator* const it, + const uint8_t modes[16]) { + const int preds_w = it->enc_->preds_w_; + const int x = (it->i4_ & 3), y = it->i4_ >> 2; + const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1]; + const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4]; + return VP8FixedCostsI4[top][left]; +} + +static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { + const VP8Encoder* const enc = it->enc_; + const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + const int lambda = dqm->lambda_i4_; + const int tlambda = dqm->tlambda_; + const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC; + uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC; + int total_header_bits = 0; + VP8ModeScore rd_best; + + if (enc->max_i4_header_bits_ == 0) { + return 0; + } + + InitScore(&rd_best); + rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145) + SetRDScore(dqm->lambda_mode_, &rd_best); + VP8IteratorStartI4(it); + do { + const int kNumBlocks = 1; + VP8ModeScore rd_i4; + int mode; + int best_mode = -1; + const uint8_t* const src = src0 + VP8Scan[it->i4_]; + const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); + uint8_t* best_block = best_blocks + VP8Scan[it->i4_]; + uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer. + + InitScore(&rd_i4); + VP8MakeIntra4Preds(it); + for (mode = 0; mode < NUM_BMODES; ++mode) { + VP8ModeScore rd_tmp; + int16_t tmp_levels[16]; + + // Reconstruct + rd_tmp.nz = + ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_; + + // Compute RD-score + rd_tmp.D = VP8SSE4x4(src, tmp_dst); + rd_tmp.SD = + tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) + : 0; + rd_tmp.H = mode_costs[mode]; + + // Add flatness penalty + if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { + rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; + } else { + rd_tmp.R = 0; + } + + // early-out check + SetRDScore(lambda, &rd_tmp); + if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue; + + // finish computing score + rd_tmp.R += VP8GetCostLuma4(it, tmp_levels); + SetRDScore(lambda, &rd_tmp); + + if (best_mode < 0 || rd_tmp.score < rd_i4.score) { + CopyScore(&rd_i4, &rd_tmp); + best_mode = mode; + SwapPtr(&tmp_dst, &best_block); + memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, + sizeof(rd_best.y_ac_levels[it->i4_])); + } + } + SetRDScore(dqm->lambda_mode_, &rd_i4); + AddScore(&rd_best, &rd_i4); + if (rd_best.score >= rd->score) { + return 0; + } + total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode]; + if (total_header_bits > enc->max_i4_header_bits_) { + return 0; + } + // Copy selected samples if not in the right place already. + if (best_block != best_blocks + VP8Scan[it->i4_]) { + VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]); + } + rd->modes_i4[it->i4_] = best_mode; + it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0); + } while (VP8IteratorRotateI4(it, best_blocks)); + + // finalize state + CopyScore(rd, &rd_best); + VP8SetIntra4Mode(it, rd->modes_i4); + SwapOut(it); + memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels)); + return 1; // select intra4x4 over intra16x16 +} + +//------------------------------------------------------------------------------ + +static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { + const int kNumBlocks = 8; + const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; + const int lambda = dqm->lambda_uv_; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer + uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC; + uint8_t* dst = dst0; + VP8ModeScore rd_best; + int mode; + + rd->mode_uv = -1; + InitScore(&rd_best); + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + VP8ModeScore rd_uv; + + // Reconstruct + rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode); + + // Compute RD-score + rd_uv.D = VP8SSE16x8(src, tmp_dst); + rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas. + rd_uv.H = VP8FixedCostsUV[mode]; + rd_uv.R = VP8GetCostUV(it, &rd_uv); + if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { + rd_uv.R += FLATNESS_PENALTY * kNumBlocks; + } + + SetRDScore(lambda, &rd_uv); + if (mode == 0 || rd_uv.score < rd_best.score) { + CopyScore(&rd_best, &rd_uv); + rd->mode_uv = mode; + memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); + SwapPtr(&dst, &tmp_dst); + } + } + VP8SetIntraUVMode(it, rd->mode_uv); + AddScore(rd, &rd_best); + if (dst != dst0) { // copy 16x8 block if needed + VP8Copy16x8(dst, dst0); + } +} + +//------------------------------------------------------------------------------ +// Final reconstruction and quantization. + +static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { + const VP8Encoder* const enc = it->enc_; + const int is_i16 = (it->mb_->type_ == 1); + int nz = 0; + + if (is_i16) { + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); + } else { + VP8IteratorStartI4(it); + do { + const int mode = + it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_]; + VP8MakeIntra4Preds(it); + nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], + src, dst, mode) << it->i4_; + } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC)); + } + + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); + rd->nz = nz; +} + +// Refine intra16/intra4 sub-modes based on distortion only (not rate). +static void RefineUsingDistortion(VP8EncIterator* const it, + int try_both_modes, int refine_uv_mode, + VP8ModeScore* const rd) { + score_t best_score = MAX_COST; + int nz = 0; + int mode; + int is_i16 = try_both_modes || (it->mb_->type_ == 1); + + const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; + // Some empiric constants, of approximate order of magnitude. + const int lambda_d_i16 = 106; + const int lambda_d_i4 = 11; + const int lambda_d_uv = 120; + score_t score_i4 = dqm->i4_penalty_; + score_t i4_bit_sum = 0; + const score_t bit_limit = it->enc_->mb_header_limit_; + + if (is_i16) { // First, evaluate Intra16 distortion + int best_mode = -1; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; + const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT + + VP8FixedCostsI16[mode] * lambda_d_i16; + if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { + continue; + } + if (score < best_score) { + best_mode = mode; + best_score = score; + } + } + VP8SetIntra16Mode(it, best_mode); + // we'll reconstruct later, if i16 mode actually gets selected + } + + // Next, evaluate Intra4 + if (try_both_modes || !is_i16) { + // We don't evaluate the rate here, but just account for it through a + // constant penalty (i4 mode usually needs more bits compared to i16). + is_i16 = 0; + VP8IteratorStartI4(it); + do { + int best_i4_mode = -1; + score_t best_i4_score = MAX_COST; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); + + VP8MakeIntra4Preds(it); + for (mode = 0; mode < NUM_BMODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; + const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT + + mode_costs[mode] * lambda_d_i4; + if (score < best_i4_score) { + best_i4_mode = mode; + best_i4_score = score; + } + } + i4_bit_sum += mode_costs[best_i4_mode]; + rd->modes_i4[it->i4_] = best_i4_mode; + score_i4 += best_i4_score; + if (score_i4 >= best_score || i4_bit_sum > bit_limit) { + // Intra4 won't be better than Intra16. Bail out and pick Intra16. + is_i16 = 1; + break; + } else { // reconstruct partial block inside yuv_out2_ buffer + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; + nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], + src, tmp_dst, best_i4_mode) << it->i4_; + } + } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); + } + + // Final reconstruction, depending on which mode is selected. + if (!is_i16) { + VP8SetIntra4Mode(it, rd->modes_i4); + SwapOut(it); + best_score = score_i4; + } else { + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); + } + + // ... and UV! + if (refine_uv_mode) { + int best_mode = -1; + score_t best_uv_score = MAX_COST; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; + const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT + + VP8FixedCostsUV[mode] * lambda_d_uv; + if (score < best_uv_score) { + best_mode = mode; + best_uv_score = score; + } + } + VP8SetIntraUVMode(it, best_mode); + } + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); + + rd->nz = nz; + rd->score = best_score; +} + +//------------------------------------------------------------------------------ +// Entry point + +int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, + VP8RDLevel rd_opt) { + int is_skipped; + const int method = it->enc_->method_; + + InitScore(rd); + + // We can perform predictions for Luma16x16 and Chroma8x8 already. + // Luma4x4 predictions needs to be done as-we-go. + VP8MakeLuma16Preds(it); + VP8MakeChroma8Preds(it); + + if (rd_opt > RD_OPT_NONE) { + it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL); + PickBestIntra16(it, rd); + if (method >= 2) { + PickBestIntra4(it, rd); + } + PickBestUV(it, rd); + if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now + it->do_trellis_ = 1; + SimpleQuantize(it, rd); + } + } else { + // At this point we have heuristically decided intra16 / intra4. + // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). + // For method <= 1, we don't re-examine the decision but just go ahead with + // quantization/reconstruction. + RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); + } + is_skipped = (rd->nz == 0); + VP8SetSkip(it, is_skipped); + return is_skipped; +} diff --git a/thirdparty/libwebp/enc/syntax.c b/thirdparty/libwebp/enc/syntax.c new file mode 100644 index 0000000000..a0e79ef404 --- /dev/null +++ b/thirdparty/libwebp/enc/syntax.c @@ -0,0 +1,383 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Header syntax writing +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> + +#include "../utils/utils.h" +#include "../webp/format_constants.h" // RIFF constants +#include "../webp/mux_types.h" // ALPHA_FLAG +#include "./vp8enci.h" + +//------------------------------------------------------------------------------ +// Helper functions + +static int IsVP8XNeeded(const VP8Encoder* const enc) { + return !!enc->has_alpha_; // Currently the only case when VP8X is needed. + // This could change in the future. +} + +static int PutPaddingByte(const WebPPicture* const pic) { + const uint8_t pad_byte[1] = { 0 }; + return !!pic->writer(pad_byte, 1, pic); +} + +//------------------------------------------------------------------------------ +// Writers for header's various pieces (in order of appearance) + +static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc, + size_t riff_size) { + const WebPPicture* const pic = enc->pic_; + uint8_t riff[RIFF_HEADER_SIZE] = { + 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P' + }; + assert(riff_size == (uint32_t)riff_size); + PutLE32(riff + TAG_SIZE, (uint32_t)riff_size); + if (!pic->writer(riff, sizeof(riff), pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + return VP8_ENC_OK; +} + +static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) { + const WebPPicture* const pic = enc->pic_; + uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = { + 'V', 'P', '8', 'X' + }; + uint32_t flags = 0; + + assert(IsVP8XNeeded(enc)); + assert(pic->width >= 1 && pic->height >= 1); + assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE); + + if (enc->has_alpha_) { + flags |= ALPHA_FLAG; + } + + PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE); + PutLE32(vp8x + CHUNK_HEADER_SIZE, flags); + PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1); + PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1); + if (!pic->writer(vp8x, sizeof(vp8x), pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + return VP8_ENC_OK; +} + +static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) { + const WebPPicture* const pic = enc->pic_; + uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = { + 'A', 'L', 'P', 'H' + }; + + assert(enc->has_alpha_); + + // Alpha chunk header. + PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_); + if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + + // Alpha chunk data. + if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + + // Padding. + if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + return VP8_ENC_OK; +} + +static WebPEncodingError PutVP8Header(const WebPPicture* const pic, + size_t vp8_size) { + uint8_t vp8_chunk_hdr[CHUNK_HEADER_SIZE] = { + 'V', 'P', '8', ' ' + }; + assert(vp8_size == (uint32_t)vp8_size); + PutLE32(vp8_chunk_hdr + TAG_SIZE, (uint32_t)vp8_size); + if (!pic->writer(vp8_chunk_hdr, sizeof(vp8_chunk_hdr), pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + return VP8_ENC_OK; +} + +static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic, + int profile, size_t size0) { + uint8_t vp8_frm_hdr[VP8_FRAME_HEADER_SIZE]; + uint32_t bits; + + if (size0 >= VP8_MAX_PARTITION0_SIZE) { // partition #0 is too big to fit + return VP8_ENC_ERROR_PARTITION0_OVERFLOW; + } + + // Paragraph 9.1. + bits = 0 // keyframe (1b) + | (profile << 1) // profile (3b) + | (1 << 4) // visible (1b) + | ((uint32_t)size0 << 5); // partition length (19b) + vp8_frm_hdr[0] = (bits >> 0) & 0xff; + vp8_frm_hdr[1] = (bits >> 8) & 0xff; + vp8_frm_hdr[2] = (bits >> 16) & 0xff; + // signature + vp8_frm_hdr[3] = (VP8_SIGNATURE >> 16) & 0xff; + vp8_frm_hdr[4] = (VP8_SIGNATURE >> 8) & 0xff; + vp8_frm_hdr[5] = (VP8_SIGNATURE >> 0) & 0xff; + // dimensions + vp8_frm_hdr[6] = pic->width & 0xff; + vp8_frm_hdr[7] = pic->width >> 8; + vp8_frm_hdr[8] = pic->height & 0xff; + vp8_frm_hdr[9] = pic->height >> 8; + + if (!pic->writer(vp8_frm_hdr, sizeof(vp8_frm_hdr), pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + return VP8_ENC_OK; +} + +// WebP Headers. +static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0, + size_t vp8_size, size_t riff_size) { + WebPPicture* const pic = enc->pic_; + WebPEncodingError err = VP8_ENC_OK; + + // RIFF header. + err = PutRIFFHeader(enc, riff_size); + if (err != VP8_ENC_OK) goto Error; + + // VP8X. + if (IsVP8XNeeded(enc)) { + err = PutVP8XHeader(enc); + if (err != VP8_ENC_OK) goto Error; + } + + // Alpha. + if (enc->has_alpha_) { + err = PutAlphaChunk(enc); + if (err != VP8_ENC_OK) goto Error; + } + + // VP8 header. + err = PutVP8Header(pic, vp8_size); + if (err != VP8_ENC_OK) goto Error; + + // VP8 frame header. + err = PutVP8FrameHeader(pic, enc->profile_, size0); + if (err != VP8_ENC_OK) goto Error; + + // All OK. + return 1; + + // Error. + Error: + return WebPEncodingSetError(pic, err); +} + +// Segmentation header +static void PutSegmentHeader(VP8BitWriter* const bw, + const VP8Encoder* const enc) { + const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_; + const VP8EncProba* const proba = &enc->proba_; + if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) { + // We always 'update' the quant and filter strength values + const int update_data = 1; + int s; + VP8PutBitUniform(bw, hdr->update_map_); + if (VP8PutBitUniform(bw, update_data)) { + // we always use absolute values, not relative ones + VP8PutBitUniform(bw, 1); // (segment_feature_mode = 1. Paragraph 9.3.) + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7); + } + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6); + } + } + if (hdr->update_map_) { + for (s = 0; s < 3; ++s) { + if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) { + VP8PutBits(bw, proba->segments_[s], 8); + } + } + } + } +} + +// Filtering parameters header +static void PutFilterHeader(VP8BitWriter* const bw, + const VP8EncFilterHeader* const hdr) { + const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0); + VP8PutBitUniform(bw, hdr->simple_); + VP8PutBits(bw, hdr->level_, 6); + VP8PutBits(bw, hdr->sharpness_, 3); + if (VP8PutBitUniform(bw, use_lf_delta)) { + // '0' is the default value for i4x4_lf_delta_ at frame #0. + const int need_update = (hdr->i4x4_lf_delta_ != 0); + if (VP8PutBitUniform(bw, need_update)) { + // we don't use ref_lf_delta => emit four 0 bits + VP8PutBits(bw, 0, 4); + // we use mode_lf_delta for i4x4 + VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6); + VP8PutBits(bw, 0, 3); // all others unused + } + } +} + +// Nominal quantization parameters +static void PutQuant(VP8BitWriter* const bw, + const VP8Encoder* const enc) { + VP8PutBits(bw, enc->base_quant_, 7); + VP8PutSignedBits(bw, enc->dq_y1_dc_, 4); + VP8PutSignedBits(bw, enc->dq_y2_dc_, 4); + VP8PutSignedBits(bw, enc->dq_y2_ac_, 4); + VP8PutSignedBits(bw, enc->dq_uv_dc_, 4); + VP8PutSignedBits(bw, enc->dq_uv_ac_, 4); +} + +// Partition sizes +static int EmitPartitionsSize(const VP8Encoder* const enc, + WebPPicture* const pic) { + uint8_t buf[3 * (MAX_NUM_PARTITIONS - 1)]; + int p; + for (p = 0; p < enc->num_parts_ - 1; ++p) { + const size_t part_size = VP8BitWriterSize(enc->parts_ + p); + if (part_size >= VP8_MAX_PARTITION_SIZE) { + return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW); + } + buf[3 * p + 0] = (part_size >> 0) & 0xff; + buf[3 * p + 1] = (part_size >> 8) & 0xff; + buf[3 * p + 2] = (part_size >> 16) & 0xff; + } + return p ? pic->writer(buf, 3 * p, pic) : 1; +} + +//------------------------------------------------------------------------------ + +static int GeneratePartition0(VP8Encoder* const enc) { + VP8BitWriter* const bw = &enc->bw_; + const int mb_size = enc->mb_w_ * enc->mb_h_; + uint64_t pos1, pos2, pos3; + + pos1 = VP8BitWriterPos(bw); + if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) { // ~7 bits per macroblock + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + VP8PutBitUniform(bw, 0); // colorspace + VP8PutBitUniform(bw, 0); // clamp type + + PutSegmentHeader(bw, enc); + PutFilterHeader(bw, &enc->filter_hdr_); + VP8PutBits(bw, enc->num_parts_ == 8 ? 3 : + enc->num_parts_ == 4 ? 2 : + enc->num_parts_ == 2 ? 1 : 0, 2); + PutQuant(bw, enc); + VP8PutBitUniform(bw, 0); // no proba update + VP8WriteProbas(bw, &enc->proba_); + pos2 = VP8BitWriterPos(bw); + VP8CodeIntraModes(enc); + VP8BitWriterFinish(bw); + + pos3 = VP8BitWriterPos(bw); + + if (enc->pic_->stats) { + enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3); + enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3); + enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_; + } + if (bw->error_) { + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + return 1; +} + +void VP8EncFreeBitWriters(VP8Encoder* const enc) { + int p; + VP8BitWriterWipeOut(&enc->bw_); + for (p = 0; p < enc->num_parts_; ++p) { + VP8BitWriterWipeOut(enc->parts_ + p); + } +} + +int VP8EncWrite(VP8Encoder* const enc) { + WebPPicture* const pic = enc->pic_; + VP8BitWriter* const bw = &enc->bw_; + const int task_percent = 19; + const int percent_per_part = task_percent / enc->num_parts_; + const int final_percent = enc->percent_ + task_percent; + int ok = 0; + size_t vp8_size, pad, riff_size; + int p; + + // Partition #0 with header and partition sizes + ok = GeneratePartition0(enc); + if (!ok) return 0; + + // Compute VP8 size + vp8_size = VP8_FRAME_HEADER_SIZE + + VP8BitWriterSize(bw) + + 3 * (enc->num_parts_ - 1); + for (p = 0; p < enc->num_parts_; ++p) { + vp8_size += VP8BitWriterSize(enc->parts_ + p); + } + pad = vp8_size & 1; + vp8_size += pad; + + // Compute RIFF size + // At the minimum it is: "WEBPVP8 nnnn" + VP8 data size. + riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8_size; + if (IsVP8XNeeded(enc)) { // Add size for: VP8X header + data. + riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE; + } + if (enc->has_alpha_) { // Add size for: ALPH header + data. + const uint32_t padded_alpha_size = enc->alpha_data_size_ + + (enc->alpha_data_size_ & 1); + riff_size += CHUNK_HEADER_SIZE + padded_alpha_size; + } + // Sanity check. + if (riff_size > 0xfffffffeU) { + return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG); + } + + // Emit headers and partition #0 + { + const uint8_t* const part0 = VP8BitWriterBuf(bw); + const size_t size0 = VP8BitWriterSize(bw); + ok = ok && PutWebPHeaders(enc, size0, vp8_size, riff_size) + && pic->writer(part0, size0, pic) + && EmitPartitionsSize(enc, pic); + VP8BitWriterWipeOut(bw); // will free the internal buffer. + } + + // Token partitions + for (p = 0; p < enc->num_parts_; ++p) { + const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p); + const size_t size = VP8BitWriterSize(enc->parts_ + p); + if (size) + ok = ok && pic->writer(buf, size, pic); + VP8BitWriterWipeOut(enc->parts_ + p); // will free the internal buffer. + ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part, + &enc->percent_); + } + + // Padding byte + if (ok && pad) { + ok = PutPaddingByte(pic); + } + + enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size); + ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_); + return ok; +} + +//------------------------------------------------------------------------------ + diff --git a/thirdparty/libwebp/enc/token.c b/thirdparty/libwebp/enc/token.c new file mode 100644 index 0000000000..e73256b37e --- /dev/null +++ b/thirdparty/libwebp/enc/token.c @@ -0,0 +1,285 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Paginated token buffer +// +// A 'token' is a bit value associated with a probability, either fixed +// or a later-to-be-determined after statistics have been collected. +// For dynamic probability, we just record the slot id (idx) for the probability +// value in the final probability array (uint8_t* probas in VP8EmitTokens). +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "./cost.h" +#include "./vp8enci.h" +#include "../utils/utils.h" + +#if !defined(DISABLE_TOKEN_BUFFER) + +// we use pages to reduce the number of memcpy() +#define MIN_PAGE_SIZE 8192 // minimum number of token per page +#define FIXED_PROBA_BIT (1u << 14) + +typedef uint16_t token_t; // bit #15: bit value + // bit #14: flags for constant proba or idx + // bits #0..13: slot or constant proba +struct VP8Tokens { + VP8Tokens* next_; // pointer to next page +}; +// Token data is located in memory just after the next_ field. +// This macro is used to return their address and hide the trick. +#define TOKEN_DATA(p) ((const token_t*)&(p)[1]) + +//------------------------------------------------------------------------------ + +void VP8TBufferInit(VP8TBuffer* const b, int page_size) { + b->tokens_ = NULL; + b->pages_ = NULL; + b->last_page_ = &b->pages_; + b->left_ = 0; + b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size; + b->error_ = 0; +} + +void VP8TBufferClear(VP8TBuffer* const b) { + if (b != NULL) { + VP8Tokens* p = b->pages_; + while (p != NULL) { + VP8Tokens* const next = p->next_; + WebPSafeFree(p); + p = next; + } + VP8TBufferInit(b, b->page_size_); + } +} + +static int TBufferNewPage(VP8TBuffer* const b) { + VP8Tokens* page = NULL; + if (!b->error_) { + const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t); + page = (VP8Tokens*)WebPSafeMalloc(1ULL, size); + } + if (page == NULL) { + b->error_ = 1; + return 0; + } + page->next_ = NULL; + + *b->last_page_ = page; + b->last_page_ = &page->next_; + b->left_ = b->page_size_; + b->tokens_ = (token_t*)TOKEN_DATA(page); + return 1; +} + +//------------------------------------------------------------------------------ + +#define TOKEN_ID(t, b, ctx) \ + (NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t)))) + +static WEBP_INLINE uint32_t AddToken(VP8TBuffer* const b, + uint32_t bit, uint32_t proba_idx) { + assert(proba_idx < FIXED_PROBA_BIT); + assert(bit <= 1); + if (b->left_ > 0 || TBufferNewPage(b)) { + const int slot = --b->left_; + b->tokens_[slot] = (bit << 15) | proba_idx; + } + return bit; +} + +static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b, + uint32_t bit, uint32_t proba) { + assert(proba < 256); + assert(bit <= 1); + if (b->left_ > 0 || TBufferNewPage(b)) { + const int slot = --b->left_; + b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba; + } +} + +int VP8RecordCoeffTokens(const int ctx, const int coeff_type, + int first, int last, + const int16_t* const coeffs, + VP8TBuffer* const tokens) { + int n = first; + uint32_t base_id = TOKEN_ID(coeff_type, n, ctx); + if (!AddToken(tokens, last >= 0, base_id + 0)) { + return 0; + } + + while (n < 16) { + const int c = coeffs[n++]; + const int sign = c < 0; + const uint32_t v = sign ? -c : c; + if (!AddToken(tokens, v != 0, base_id + 1)) { + base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 0); // ctx=0 + continue; + } + if (!AddToken(tokens, v > 1, base_id + 2)) { + base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 1); // ctx=1 + } else { + if (!AddToken(tokens, v > 4, base_id + 3)) { + if (AddToken(tokens, v != 2, base_id + 4)) + AddToken(tokens, v == 4, base_id + 5); + } else if (!AddToken(tokens, v > 10, base_id + 6)) { + if (!AddToken(tokens, v > 6, base_id + 7)) { + AddConstantToken(tokens, v == 6, 159); + } else { + AddConstantToken(tokens, v >= 9, 165); + AddConstantToken(tokens, !(v & 1), 145); + } + } else { + int mask; + const uint8_t* tab; + uint32_t residue = v - 3; + if (residue < (8 << 1)) { // VP8Cat3 (3b) + AddToken(tokens, 0, base_id + 8); + AddToken(tokens, 0, base_id + 9); + residue -= (8 << 0); + mask = 1 << 2; + tab = VP8Cat3; + } else if (residue < (8 << 2)) { // VP8Cat4 (4b) + AddToken(tokens, 0, base_id + 8); + AddToken(tokens, 1, base_id + 9); + residue -= (8 << 1); + mask = 1 << 3; + tab = VP8Cat4; + } else if (residue < (8 << 3)) { // VP8Cat5 (5b) + AddToken(tokens, 1, base_id + 8); + AddToken(tokens, 0, base_id + 10); + residue -= (8 << 2); + mask = 1 << 4; + tab = VP8Cat5; + } else { // VP8Cat6 (11b) + AddToken(tokens, 1, base_id + 8); + AddToken(tokens, 1, base_id + 10); + residue -= (8 << 3); + mask = 1 << 10; + tab = VP8Cat6; + } + while (mask) { + AddConstantToken(tokens, !!(residue & mask), *tab++); + mask >>= 1; + } + } + base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 2); // ctx=2 + } + AddConstantToken(tokens, sign, 128); + if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) { + return 1; // EOB + } + } + return 1; +} + +#undef TOKEN_ID + +//------------------------------------------------------------------------------ +// This function works, but isn't currently used. Saved for later. + +#if 0 + +static void Record(int bit, proba_t* const stats) { + proba_t p = *stats; + if (p >= 0xffff0000u) { // an overflow is inbound. + p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. + } + // record bit count (lower 16 bits) and increment total count (upper 16 bits). + p += 0x00010000u + bit; + *stats = p; +} + +void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) { + const VP8Tokens* p = b->pages_; + while (p != NULL) { + const int N = (p->next_ == NULL) ? b->left_ : 0; + int n = MAX_NUM_TOKEN; + const token_t* const tokens = TOKEN_DATA(p); + while (n-- > N) { + const token_t token = tokens[n]; + if (!(token & FIXED_PROBA_BIT)) { + Record((token >> 15) & 1, stats + (token & 0x3fffu)); + } + } + p = p->next_; + } +} + +#endif // 0 + +//------------------------------------------------------------------------------ +// Final coding pass, with known probabilities + +int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, + const uint8_t* const probas, int final_pass) { + const VP8Tokens* p = b->pages_; + assert(!b->error_); + while (p != NULL) { + const VP8Tokens* const next = p->next_; + const int N = (next == NULL) ? b->left_ : 0; + int n = b->page_size_; + const token_t* const tokens = TOKEN_DATA(p); + while (n-- > N) { + const token_t token = tokens[n]; + const int bit = (token >> 15) & 1; + if (token & FIXED_PROBA_BIT) { + VP8PutBit(bw, bit, token & 0xffu); // constant proba + } else { + VP8PutBit(bw, bit, probas[token & 0x3fffu]); + } + } + if (final_pass) WebPSafeFree((void*)p); + p = next; + } + if (final_pass) b->pages_ = NULL; + return 1; +} + +// Size estimation +size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) { + size_t size = 0; + const VP8Tokens* p = b->pages_; + assert(!b->error_); + while (p != NULL) { + const VP8Tokens* const next = p->next_; + const int N = (next == NULL) ? b->left_ : 0; + int n = b->page_size_; + const token_t* const tokens = TOKEN_DATA(p); + while (n-- > N) { + const token_t token = tokens[n]; + const int bit = token & (1 << 15); + if (token & FIXED_PROBA_BIT) { + size += VP8BitCost(bit, token & 0xffu); + } else { + size += VP8BitCost(bit, probas[token & 0x3fffu]); + } + } + p = next; + } + return size; +} + +//------------------------------------------------------------------------------ + +#else // DISABLE_TOKEN_BUFFER + +void VP8TBufferInit(VP8TBuffer* const b) { + (void)b; +} +void VP8TBufferClear(VP8TBuffer* const b) { + (void)b; +} + +#endif // !DISABLE_TOKEN_BUFFER + diff --git a/thirdparty/libwebp/enc/tree.c b/thirdparty/libwebp/enc/tree.c new file mode 100644 index 0000000000..f141006d19 --- /dev/null +++ b/thirdparty/libwebp/enc/tree.c @@ -0,0 +1,504 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Coding of token probabilities, intra modes and segments. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./vp8enci.h" + +//------------------------------------------------------------------------------ +// Default probabilities + +// Paragraph 13.5 +const uint8_t + VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { + { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } + }, + { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, + }, + { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, + }, + { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } + }, + { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } + }, + { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } + }, + { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } + }, + { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } + }, + { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } + }, + { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } + }, + { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } + }, + { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } + }, + { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } + }, + { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } + } + }, + { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } + }, + { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } + }, + { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } + }, + { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } + }, + { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } + }, + { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } + }, + { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } + }, + { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } + }, + { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } + }, + { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } + }, + { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } + }, + { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + } + } +}; + +void VP8DefaultProbas(VP8Encoder* const enc) { + VP8EncProba* const probas = &enc->proba_; + probas->use_skip_proba_ = 0; + memset(probas->segments_, 255u, sizeof(probas->segments_)); + memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0)); + // Note: we could hard-code the level_costs_ corresponding to VP8CoeffsProba0, + // but that's ~11k of static data. Better call VP8CalculateLevelCosts() later. + probas->dirty_ = 1; +} + +// Paragraph 11.5. 900bytes. +static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = { + { { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, + { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, + { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, + { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, + { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, + { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, + { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, + { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, + { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, + { 81, 40, 11, 96, 182, 84, 29, 16, 36 } }, + { { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, + { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, + { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, + { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, + { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, + { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, + { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, + { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, + { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, + { 66, 45, 25, 102, 197, 189, 23, 18, 22 } }, + { { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, + { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, + { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, + { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, + { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, + { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, + { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, + { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, + { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, + { 62, 18, 78, 95, 85, 57, 50, 48, 51 } }, + { { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, + { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, + { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, + { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, + { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, + { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, + { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, + { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, + { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, + { 51, 50, 17, 168, 209, 192, 23, 25, 82 } }, + { { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, + { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, + { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, + { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, + { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, + { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, + { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, + { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, + { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, + { 87, 37, 9, 115, 59, 77, 64, 21, 47 } }, + { { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, + { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, + { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, + { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, + { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, + { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, + { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, + { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, + { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, + { 55, 38, 70, 124, 73, 102, 1, 34, 98 } }, + { { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, + { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, + { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, + { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, + { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, + { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, + { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, + { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, + { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, + { 117, 20, 15, 36, 163, 128, 68, 1, 26 } }, + { { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, + { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, + { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, + { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, + { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, + { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, + { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, + { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, + { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, + { 58, 15, 20, 82, 135, 57, 26, 121, 40 } }, + { { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, + { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, + { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, + { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, + { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, + { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, + { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, + { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, + { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, + { 35, 27, 10, 146, 174, 171, 12, 26, 128 } }, + { { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, + { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, + { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, + { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, + { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, + { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, + { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, + { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, + { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, + { 112, 19, 12, 61, 195, 128, 48, 4, 24 } } +}; + +static int PutI4Mode(VP8BitWriter* const bw, int mode, + const uint8_t* const prob) { + if (VP8PutBit(bw, mode != B_DC_PRED, prob[0])) { + if (VP8PutBit(bw, mode != B_TM_PRED, prob[1])) { + if (VP8PutBit(bw, mode != B_VE_PRED, prob[2])) { + if (!VP8PutBit(bw, mode >= B_LD_PRED, prob[3])) { + if (VP8PutBit(bw, mode != B_HE_PRED, prob[4])) { + VP8PutBit(bw, mode != B_RD_PRED, prob[5]); + } + } else { + if (VP8PutBit(bw, mode != B_LD_PRED, prob[6])) { + if (VP8PutBit(bw, mode != B_VL_PRED, prob[7])) { + VP8PutBit(bw, mode != B_HD_PRED, prob[8]); + } + } + } + } + } + } + return mode; +} + +static void PutI16Mode(VP8BitWriter* const bw, int mode) { + if (VP8PutBit(bw, (mode == TM_PRED || mode == H_PRED), 156)) { + VP8PutBit(bw, mode == TM_PRED, 128); // TM or HE + } else { + VP8PutBit(bw, mode == V_PRED, 163); // VE or DC + } +} + +static void PutUVMode(VP8BitWriter* const bw, int uv_mode) { + if (VP8PutBit(bw, uv_mode != DC_PRED, 142)) { + if (VP8PutBit(bw, uv_mode != V_PRED, 114)) { + VP8PutBit(bw, uv_mode != H_PRED, 183); // else: TM_PRED + } + } +} + +static void PutSegment(VP8BitWriter* const bw, int s, const uint8_t* p) { + if (VP8PutBit(bw, s >= 2, p[0])) p += 1; + VP8PutBit(bw, s & 1, p[1]); +} + +void VP8CodeIntraModes(VP8Encoder* const enc) { + VP8BitWriter* const bw = &enc->bw_; + VP8EncIterator it; + VP8IteratorInit(enc, &it); + do { + const VP8MBInfo* const mb = it.mb_; + const uint8_t* preds = it.preds_; + if (enc->segment_hdr_.update_map_) { + PutSegment(bw, mb->segment_, enc->proba_.segments_); + } + if (enc->proba_.use_skip_proba_) { + VP8PutBit(bw, mb->skip_, enc->proba_.skip_proba_); + } + if (VP8PutBit(bw, (mb->type_ != 0), 145)) { // i16x16 + PutI16Mode(bw, preds[0]); + } else { + const int preds_w = enc->preds_w_; + const uint8_t* top_pred = preds - preds_w; + int x, y; + for (y = 0; y < 4; ++y) { + int left = preds[-1]; + for (x = 0; x < 4; ++x) { + const uint8_t* const probas = kBModesProba[top_pred[x]][left]; + left = PutI4Mode(bw, preds[x], probas); + } + top_pred = preds; + preds += preds_w; + } + } + PutUVMode(bw, mb->uv_mode_); + } while (VP8IteratorNext(&it)); +} + +//------------------------------------------------------------------------------ +// Paragraph 13 + +const uint8_t + VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { + { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + }, + { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 }, + { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 } + }, + { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + }, + { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 } + }, + { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + }, + { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + } +}; + +void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas) { + int t, b, c, p; + for (t = 0; t < NUM_TYPES; ++t) { + for (b = 0; b < NUM_BANDS; ++b) { + for (c = 0; c < NUM_CTX; ++c) { + for (p = 0; p < NUM_PROBAS; ++p) { + const uint8_t p0 = probas->coeffs_[t][b][c][p]; + const int update = (p0 != VP8CoeffsProba0[t][b][c][p]); + if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) { + VP8PutBits(bw, p0, 8); + } + } + } + } + } + if (VP8PutBitUniform(bw, probas->use_skip_proba_)) { + VP8PutBits(bw, probas->skip_proba_, 8); + } +} + diff --git a/thirdparty/libwebp/enc/vp8enci.h b/thirdparty/libwebp/enc/vp8enci.h new file mode 100644 index 0000000000..c1fbd7644e --- /dev/null +++ b/thirdparty/libwebp/enc/vp8enci.h @@ -0,0 +1,531 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebP encoder: internal header. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_ENC_VP8ENCI_H_ +#define WEBP_ENC_VP8ENCI_H_ + +#include <string.h> // for memcpy() +#include "../dec/common.h" +#include "../dsp/dsp.h" +#include "../utils/bit_writer.h" +#include "../utils/thread.h" +#include "../utils/utils.h" +#include "../webp/encode.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Various defines and enums + +// version numbers +#define ENC_MAJ_VERSION 0 +#define ENC_MIN_VERSION 5 +#define ENC_REV_VERSION 1 + +enum { MAX_LF_LEVELS = 64, // Maximum loop filter level + MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost + MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67) + }; + +typedef enum { // Rate-distortion optimization levels + RD_OPT_NONE = 0, // no rd-opt + RD_OPT_BASIC = 1, // basic scoring (no trellis) + RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only + RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower) +} VP8RDLevel; + +// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). +// The original or reconstructed samples can be accessed using VP8Scan[]. +// The predicted blocks can be accessed using offsets to yuv_p_ and +// the arrays VP8*ModeOffsets[]. +// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_) +// (see VP8Scan[] for accessing the blocks, along with +// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC): +// +----+----+ +// Y_OFF_ENC |YYYY|UUVV| +// U_OFF_ENC |YYYY|UUVV| +// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area +// |YYYY|....| +// +----+----+ +// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC) +// Intra16 predictions (16x16 block each, two per row): +// |I16DC16|I16TM16| +// |I16VE16|I16HE16| +// Chroma U/V predictions (16x8 block each, two per row): +// |C8DC8|C8TM8| +// |C8VE8|C8HE8| +// Intra 4x4 predictions (4x4 block each) +// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4| +// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted +#define YUV_SIZE_ENC (BPS * 16) +#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds +#define Y_OFF_ENC (0) +#define U_OFF_ENC (16) +#define V_OFF_ENC (16 + 8) + +extern const int VP8Scan[16]; // in quant.c +extern const int VP8UVModeOffsets[4]; // in analyze.c +extern const int VP8I16ModeOffsets[4]; +extern const int VP8I4ModeOffsets[NUM_BMODES]; + +// Layout of prediction blocks +// intra 16x16 +#define I16DC16 (0 * 16 * BPS) +#define I16TM16 (I16DC16 + 16) +#define I16VE16 (1 * 16 * BPS) +#define I16HE16 (I16VE16 + 16) +// chroma 8x8, two U/V blocks side by side (hence: 16x8 each) +#define C8DC8 (2 * 16 * BPS) +#define C8TM8 (C8DC8 + 1 * 16) +#define C8VE8 (2 * 16 * BPS + 8 * BPS) +#define C8HE8 (C8VE8 + 1 * 16) +// intra 4x4 +#define I4DC4 (3 * 16 * BPS + 0) +#define I4TM4 (I4DC4 + 4) +#define I4VE4 (I4DC4 + 8) +#define I4HE4 (I4DC4 + 12) +#define I4RD4 (I4DC4 + 16) +#define I4VR4 (I4DC4 + 20) +#define I4LD4 (I4DC4 + 24) +#define I4VL4 (I4DC4 + 28) +#define I4HD4 (3 * 16 * BPS + 4 * BPS) +#define I4HU4 (I4HD4 + 4) +#define I4TMP (I4HD4 + 8) + +typedef int64_t score_t; // type used for scores, rate, distortion +// Note that MAX_COST is not the maximum allowed by sizeof(score_t), +// in order to allow overflowing computations. +#define MAX_COST ((score_t)0x7fffffffffffffLL) + +#define QFIX 17 +#define BIAS(b) ((b) << (QFIX - 8)) +// Fun fact: this is the _only_ line where we're actually being lossy and +// discarding bits. +static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { + return (int)((n * iQ + B) >> QFIX); +} + +// Uncomment the following to remove token-buffer code: +// #define DISABLE_TOKEN_BUFFER + +//------------------------------------------------------------------------------ +// Headers + +typedef uint32_t proba_t; // 16b + 16b +typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; +typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS]; +typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; +typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting +typedef const uint16_t* CostArrayMap[16][NUM_CTX]; +typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats + +typedef struct VP8Encoder VP8Encoder; + +// segment features +typedef struct { + int num_segments_; // Actual number of segments. 1 segment only = unused. + int update_map_; // whether to update the segment map or not. + // must be 0 if there's only 1 segment. + int size_; // bit-cost for transmitting the segment map +} VP8EncSegmentHeader; + +// Struct collecting all frame-persistent probabilities. +typedef struct { + uint8_t segments_[3]; // probabilities for segment tree + uint8_t skip_proba_; // final probability of being skipped. + ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes + StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes + CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes + CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes + int dirty_; // if true, need to call VP8CalculateLevelCosts() + int use_skip_proba_; // Note: we always use skip_proba for now. + int nb_skip_; // number of skipped blocks +} VP8EncProba; + +// Filter parameters. Not actually used in the code (we don't perform +// the in-loop filtering), but filled from user's config +typedef struct { + int simple_; // filtering type: 0=complex, 1=simple + int level_; // base filter level [0..63] + int sharpness_; // [0..7] + int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16 +} VP8EncFilterHeader; + +//------------------------------------------------------------------------------ +// Informations about the macroblocks. + +typedef struct { + // block type + unsigned int type_:2; // 0=i4x4, 1=i16x16 + unsigned int uv_mode_:2; + unsigned int skip_:1; + unsigned int segment_:2; + uint8_t alpha_; // quantization-susceptibility +} VP8MBInfo; + +typedef struct VP8Matrix { + uint16_t q_[16]; // quantizer steps + uint16_t iq_[16]; // reciprocals, fixed point. + uint32_t bias_[16]; // rounding bias + uint32_t zthresh_[16]; // value below which a coefficient is zeroed + uint16_t sharpen_[16]; // frequency boosters for slight sharpening +} VP8Matrix; + +typedef struct { + VP8Matrix y1_, y2_, uv_; // quantization matrices + int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral. + // Lower values indicate a lower risk of blurriness. + int beta_; // filter-susceptibility, range [0,255]. + int quant_; // final segment quantizer. + int fstrength_; // final in-loop filtering strength + int max_edge_; // max edge delta (for filtering strength) + int min_disto_; // minimum distortion required to trigger filtering record + // reactivities + int lambda_i16_, lambda_i4_, lambda_uv_; + int lambda_mode_, lambda_trellis_, tlambda_; + int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; + + // lambda values for distortion-based evaluation + score_t i4_penalty_; // penalty for using Intra4 +} VP8SegmentInfo; + +// Handy transient struct to accumulate score and info during RD-optimization +// and mode evaluation. +typedef struct { + score_t D, SD; // Distortion, spectral distortion + score_t H, R, score; // header bits, rate, score. + int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma. + int16_t y_ac_levels[16][16]; + int16_t uv_levels[4 + 4][16]; + int mode_i16; // mode number for intra16 prediction + uint8_t modes_i4[16]; // mode numbers for intra4 predictions + int mode_uv; // mode number of chroma prediction + uint32_t nz; // non-zero blocks +} VP8ModeScore; + +// Iterator structure to iterate through macroblocks, pointing to the +// right neighbouring data (samples, predictions, contexts, ...) +typedef struct { + int x_, y_; // current macroblock + int y_stride_, uv_stride_; // respective strides + uint8_t* yuv_in_; // input samples + uint8_t* yuv_out_; // output samples + uint8_t* yuv_out2_; // secondary buffer swapped with yuv_out_. + uint8_t* yuv_p_; // scratch buffer for prediction + VP8Encoder* enc_; // back-pointer + VP8MBInfo* mb_; // current macroblock + VP8BitWriter* bw_; // current bit-writer + uint8_t* preds_; // intra mode predictors (4x4 blocks) + uint32_t* nz_; // non-zero pattern + uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4 + uint8_t* i4_top_; // pointer to the current top boundary sample + int i4_; // current intra4x4 mode being tested + int top_nz_[9]; // top-non-zero context. + int left_nz_[9]; // left-non-zero. left_nz[8] is independent. + uint64_t bit_count_[4][3]; // bit counters for coded levels. + uint64_t luma_bits_; // macroblock bit-cost for luma + uint64_t uv_bits_; // macroblock bit-cost for chroma + LFStats* lf_stats_; // filter stats (borrowed from enc_) + int do_trellis_; // if true, perform extra level optimisation + int count_down_; // number of mb still to be processed + int count_down0_; // starting counter value (for progress) + int percent0_; // saved initial progress percent + + uint8_t* y_left_; // left luma samples (addressable from index -1 to 15). + uint8_t* u_left_; // left u samples (addressable from index -1 to 7) + uint8_t* v_left_; // left v samples (addressable from index -1 to 7) + + uint8_t* y_top_; // top luma samples at position 'x_' + uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes + + // memory for storing y/u/v_left_ + uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST]; + // memory for yuv_* + uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST]; +} VP8EncIterator; + + // in iterator.c +// must be called first +void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it); +// restart a scan +void VP8IteratorReset(VP8EncIterator* const it); +// reset iterator position to row 'y' +void VP8IteratorSetRow(VP8EncIterator* const it, int y); +// set count down (=number of iterations to go) +void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down); +// return true if iteration is finished +int VP8IteratorIsDone(const VP8EncIterator* const it); +// Import uncompressed samples from source. +// If tmp_32 is not NULL, import boundary samples too. +// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory. +void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32); +// export decimated samples +void VP8IteratorExport(const VP8EncIterator* const it); +// go to next macroblock. Returns false if not finished. +int VP8IteratorNext(VP8EncIterator* const it); +// save the yuv_out_ boundary values to top_/left_ arrays for next iterations. +void VP8IteratorSaveBoundary(VP8EncIterator* const it); +// Report progression based on macroblock rows. Return 0 for user-abort request. +int VP8IteratorProgress(const VP8EncIterator* const it, + int final_delta_percent); +// Intra4x4 iterations +void VP8IteratorStartI4(VP8EncIterator* const it); +// returns true if not done. +int VP8IteratorRotateI4(VP8EncIterator* const it, + const uint8_t* const yuv_out); + +// Non-zero context setup/teardown +void VP8IteratorNzToBytes(VP8EncIterator* const it); +void VP8IteratorBytesToNz(VP8EncIterator* const it); + +// Helper functions to set mode properties +void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode); +void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes); +void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode); +void VP8SetSkip(const VP8EncIterator* const it, int skip); +void VP8SetSegment(const VP8EncIterator* const it, int segment); + +//------------------------------------------------------------------------------ +// Paginated token buffer + +typedef struct VP8Tokens VP8Tokens; // struct details in token.c + +typedef struct { +#if !defined(DISABLE_TOKEN_BUFFER) + VP8Tokens* pages_; // first page + VP8Tokens** last_page_; // last page + uint16_t* tokens_; // set to (*last_page_)->tokens_ + int left_; // how many free tokens left before the page is full + int page_size_; // number of tokens per page +#endif + int error_; // true in case of malloc error +} VP8TBuffer; + +// initialize an empty buffer +void VP8TBufferInit(VP8TBuffer* const b, int page_size); +void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory + +#if !defined(DISABLE_TOKEN_BUFFER) + +// Finalizes bitstream when probabilities are known. +// Deletes the allocated token memory if final_pass is true. +int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, + const uint8_t* const probas, int final_pass); + +// record the coding of coefficients without knowing the probabilities yet +int VP8RecordCoeffTokens(const int ctx, const int coeff_type, + int first, int last, + const int16_t* const coeffs, + VP8TBuffer* const tokens); + +// Estimate the final coded size given a set of 'probas'. +size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas); + +// unused for now +void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats); + +#endif // !DISABLE_TOKEN_BUFFER + +//------------------------------------------------------------------------------ +// VP8Encoder + +struct VP8Encoder { + const WebPConfig* config_; // user configuration and parameters + WebPPicture* pic_; // input / output picture + + // headers + VP8EncFilterHeader filter_hdr_; // filtering information + VP8EncSegmentHeader segment_hdr_; // segment information + + int profile_; // VP8's profile, deduced from Config. + + // dimension, in macroblock units. + int mb_w_, mb_h_; + int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1) + + // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS) + int num_parts_; + + // per-partition boolean decoders. + VP8BitWriter bw_; // part0 + VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions + VP8TBuffer tokens_; // token buffer + + int percent_; // for progress + + // transparency blob + int has_alpha_; + uint8_t* alpha_data_; // non-NULL if transparency is present + uint32_t alpha_data_size_; + WebPWorker alpha_worker_; + + // quantization info (one set of DC/AC dequant factor per segment) + VP8SegmentInfo dqm_[NUM_MB_SEGMENTS]; + int base_quant_; // nominal quantizer value. Only used + // for relative coding of segments' quant. + int alpha_; // global susceptibility (<=> complexity) + int uv_alpha_; // U/V quantization susceptibility + // global offset of quantizers, shared by all segments + int dq_y1_dc_; + int dq_y2_dc_, dq_y2_ac_; + int dq_uv_dc_, dq_uv_ac_; + + // probabilities and statistics + VP8EncProba proba_; + uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks + uint64_t sse_count_; // pixel count for the sse_[] stats + int coded_size_; + int residual_bytes_[3][4]; + int block_count_[3]; + + // quality/speed settings + int method_; // 0=fastest, 6=best/slowest. + VP8RDLevel rd_opt_level_; // Deduced from method_. + int max_i4_header_bits_; // partition #0 safeness factor + int mb_header_limit_; // rough limit for header bits per MB + int thread_level_; // derived from config->thread_level + int do_search_; // derived from config->target_XXX + int use_tokens_; // if true, use token buffer + + // Memory + VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) + uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) + uint32_t* nz_; // non-zero bit context: mb_w+1 + uint8_t* y_top_; // top luma samples. + uint8_t* uv_top_; // top u/v samples. + // U and V are packed into 16 bytes (8 U + 8 V) + LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off) +}; + +//------------------------------------------------------------------------------ +// internal functions. Not public. + + // in tree.c +extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; +extern const uint8_t + VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; +// Reset the token probabilities to their initial (default) values +void VP8DefaultProbas(VP8Encoder* const enc); +// Write the token probabilities +void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas); +// Writes the partition #0 modes (that is: all intra modes) +void VP8CodeIntraModes(VP8Encoder* const enc); + + // in syntax.c +// Generates the final bitstream by coding the partition0 and headers, +// and appending an assembly of all the pre-coded token partitions. +// Return true if everything is ok. +int VP8EncWrite(VP8Encoder* const enc); +// Release memory allocated for bit-writing in VP8EncLoop & seq. +void VP8EncFreeBitWriters(VP8Encoder* const enc); + + // in frame.c +extern const uint8_t VP8Cat3[]; +extern const uint8_t VP8Cat4[]; +extern const uint8_t VP8Cat5[]; +extern const uint8_t VP8Cat6[]; + +// Form all the four Intra16x16 predictions in the yuv_p_ cache +void VP8MakeLuma16Preds(const VP8EncIterator* const it); +// Form all the four Chroma8x8 predictions in the yuv_p_ cache +void VP8MakeChroma8Preds(const VP8EncIterator* const it); +// Form all the ten Intra4x4 predictions in the yuv_p_ cache +// for the 4x4 block it->i4_ +void VP8MakeIntra4Preds(const VP8EncIterator* const it); +// Rate calculation +int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd); +int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]); +int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd); +// Main coding calls +int VP8EncLoop(VP8Encoder* const enc); +int VP8EncTokenLoop(VP8Encoder* const enc); + + // in webpenc.c +// Assign an error code to a picture. Return false for convenience. +int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error); +int WebPReportProgress(const WebPPicture* const pic, + int percent, int* const percent_store); + + // in analysis.c +// Main analysis loop. Decides the segmentations and complexity. +// Assigns a first guess for Intra16 and uvmode_ prediction modes. +int VP8EncAnalyze(VP8Encoder* const enc); + + // in quant.c +// Sets up segment's quantization values, base_quant_ and filter strengths. +void VP8SetSegmentParams(VP8Encoder* const enc, float quality); +// Pick best modes and fills the levels. Returns true if skipped. +int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, + VP8RDLevel rd_opt); + + // in alpha.c +void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression +int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process +int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data +int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data + + // in filter.c +void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst); +void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2, + int W, int H, VP8DistoStats* const stats); +double VP8SSIMGet(const VP8DistoStats* const stats); +double VP8SSIMGetSquaredError(const VP8DistoStats* const stats); + +// autofilter +void VP8InitFilter(VP8EncIterator* const it); +void VP8StoreFilterStats(VP8EncIterator* const it); +void VP8AdjustFilterStrength(VP8EncIterator* const it); + +// returns the approximate filtering strength needed to smooth a edge +// step of 'delta', given a sharpness parameter 'sharpness'. +int VP8FilterStrengthFromDelta(int sharpness, int delta); + + // misc utils for picture_*.c: + +// Remove reference to the ARGB/YUVA buffer (doesn't free anything). +void WebPPictureResetBuffers(WebPPicture* const picture); + +// Allocates ARGB buffer of given dimension (previous one is always free'd). +// Preserves the YUV(A) buffer. Returns false in case of error (invalid param, +// out-of-memory). +int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); + +// Allocates YUVA buffer of given dimension (previous one is always free'd). +// Uses picture->csp to determine whether an alpha buffer is needed. +// Preserves the ARGB buffer. +// Returns false in case of error (invalid param, out-of-memory). +int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); + +// Clean-up the RGB samples under fully transparent area, to help lossless +// compressibility (no guarantee, though). Assumes that pic->use_argb is true. +void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); + + // in near_lossless.c +// Near lossless preprocessing in RGB color-space. +int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality); +// Near lossless adjustment for predictors. +void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits, + const uint32_t* argb_orig, + uint32_t* argb, uint32_t* argb_scratch, + const uint32_t* const transform_data, + int quality, int subtract_green); +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_ENC_VP8ENCI_H_ */ diff --git a/thirdparty/libwebp/enc/vp8l.c b/thirdparty/libwebp/enc/vp8l.c new file mode 100644 index 0000000000..c16e2560ec --- /dev/null +++ b/thirdparty/libwebp/enc/vp8l.c @@ -0,0 +1,1603 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// main entry for the lossless encoder. +// +// Author: Vikas Arora (vikaas.arora@gmail.com) +// + +#include <assert.h> +#include <stdlib.h> + +#include "./backward_references.h" +#include "./histogram.h" +#include "./vp8enci.h" +#include "./vp8li.h" +#include "../dsp/lossless.h" +#include "../utils/bit_writer.h" +#include "../utils/huffman_encode.h" +#include "../utils/utils.h" +#include "../webp/format_constants.h" + +#include "./delta_palettization.h" + +#define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer. +// Maximum number of histogram images (sub-blocks). +#define MAX_HUFF_IMAGE_SIZE 2600 + +// Palette reordering for smaller sum of deltas (and for smaller storage). + +static int PaletteCompareColorsForQsort(const void* p1, const void* p2) { + const uint32_t a = WebPMemToUint32(p1); + const uint32_t b = WebPMemToUint32(p2); + assert(a != b); + return (a < b) ? -1 : 1; +} + +static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) { + return (v <= 128) ? v : (256 - v); +} + +// Computes a value that is related to the entropy created by the +// palette entry diff. +// +// Note that the last & 0xff is a no-operation in the next statement, but +// removed by most compilers and is here only for regularity of the code. +static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) { + const uint32_t diff = VP8LSubPixels(col1, col2); + const int kMoreWeightForRGBThanForAlpha = 9; + uint32_t score; + score = PaletteComponentDistance((diff >> 0) & 0xff); + score += PaletteComponentDistance((diff >> 8) & 0xff); + score += PaletteComponentDistance((diff >> 16) & 0xff); + score *= kMoreWeightForRGBThanForAlpha; + score += PaletteComponentDistance((diff >> 24) & 0xff); + return score; +} + +static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { + const uint32_t tmp = *col1; + *col1 = *col2; + *col2 = tmp; +} + +static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { + // Find greedily always the closest color of the predicted color to minimize + // deltas in the palette. This reduces storage needs since the + // palette is stored with delta encoding. + uint32_t predict = 0x00000000; + int i, k; + for (i = 0; i < num_colors; ++i) { + int best_ix = i; + uint32_t best_score = ~0U; + for (k = i; k < num_colors; ++k) { + const uint32_t cur_score = PaletteColorDistance(palette[k], predict); + if (best_score > cur_score) { + best_score = cur_score; + best_ix = k; + } + } + SwapColor(&palette[best_ix], &palette[i]); + predict = palette[i]; + } +} + +// The palette has been sorted by alpha. This function checks if the other +// components of the palette have a monotonic development with regards to +// position in the palette. If all have monotonic development, there is +// no benefit to re-organize them greedily. A monotonic development +// would be spotted in green-only situations (like lossy alpha) or gray-scale +// images. +static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { + uint32_t predict = 0x000000; + int i; + uint8_t sign_found = 0x00; + for (i = 0; i < num_colors; ++i) { + const uint32_t diff = VP8LSubPixels(palette[i], predict); + const uint8_t rd = (diff >> 16) & 0xff; + const uint8_t gd = (diff >> 8) & 0xff; + const uint8_t bd = (diff >> 0) & 0xff; + if (rd != 0x00) { + sign_found |= (rd < 0x80) ? 1 : 2; + } + if (gd != 0x00) { + sign_found |= (gd < 0x80) ? 8 : 16; + } + if (bd != 0x00) { + sign_found |= (bd < 0x80) ? 64 : 128; + } + predict = palette[i]; + } + return (sign_found & (sign_found << 1)) != 0; // two consequent signs. +} + +// ----------------------------------------------------------------------------- +// Palette + +// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE, +// creates a palette and returns true, else returns false. +static int AnalyzeAndCreatePalette(const WebPPicture* const pic, + int low_effort, + uint32_t palette[MAX_PALETTE_SIZE], + int* const palette_size) { + const int num_colors = WebPGetColorPalette(pic, palette); + if (num_colors > MAX_PALETTE_SIZE) return 0; + *palette_size = num_colors; + qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); + if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { + GreedyMinimizeDeltas(palette, num_colors); + } + return 1; +} + +// These five modes are evaluated and their respective entropy is computed. +typedef enum { + kDirect = 0, + kSpatial = 1, + kSubGreen = 2, + kSpatialSubGreen = 3, + kPalette = 4, + kNumEntropyIx = 5 +} EntropyIx; + +typedef enum { + kHistoAlpha = 0, + kHistoAlphaPred, + kHistoGreen, + kHistoGreenPred, + kHistoRed, + kHistoRedPred, + kHistoBlue, + kHistoBluePred, + kHistoRedSubGreen, + kHistoRedPredSubGreen, + kHistoBlueSubGreen, + kHistoBluePredSubGreen, + kHistoPalette, + kHistoTotal // Must be last. +} HistoIx; + +static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) { + const uint32_t green = p >> 8; // The upper bits are masked away later. + ++r[((p >> 16) - green) & 0xff]; + ++b[(p - green) & 0xff]; +} + +static void AddSingle(uint32_t p, + uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) { + ++a[p >> 24]; + ++r[(p >> 16) & 0xff]; + ++g[(p >> 8) & 0xff]; + ++b[(p & 0xff)]; +} + +static int AnalyzeEntropy(const uint32_t* argb, + int width, int height, int argb_stride, + int use_palette, + EntropyIx* const min_entropy_ix, + int* const red_and_blue_always_zero) { + // Allocate histogram set with cache_bits = 0. + uint32_t* const histo = + (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256); + if (histo != NULL) { + int i, x, y; + const uint32_t* prev_row = argb; + const uint32_t* curr_row = argb + argb_stride; + for (y = 1; y < height; ++y) { + uint32_t prev_pix = curr_row[0]; + for (x = 1; x < width; ++x) { + const uint32_t pix = curr_row[x]; + const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix); + if ((pix_diff == 0) || (pix == prev_row[x])) continue; + prev_pix = pix; + AddSingle(pix, + &histo[kHistoAlpha * 256], + &histo[kHistoRed * 256], + &histo[kHistoGreen * 256], + &histo[kHistoBlue * 256]); + AddSingle(pix_diff, + &histo[kHistoAlphaPred * 256], + &histo[kHistoRedPred * 256], + &histo[kHistoGreenPred * 256], + &histo[kHistoBluePred * 256]); + AddSingleSubGreen(pix, + &histo[kHistoRedSubGreen * 256], + &histo[kHistoBlueSubGreen * 256]); + AddSingleSubGreen(pix_diff, + &histo[kHistoRedPredSubGreen * 256], + &histo[kHistoBluePredSubGreen * 256]); + { + // Approximate the palette by the entropy of the multiplicative hash. + const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24; + ++histo[kHistoPalette * 256 + (hash & 0xff)]; + } + } + prev_row = curr_row; + curr_row += argb_stride; + } + { + double entropy_comp[kHistoTotal]; + double entropy[kNumEntropyIx]; + EntropyIx k; + EntropyIx last_mode_to_analyze = + use_palette ? kPalette : kSpatialSubGreen; + int j; + // Let's add one zero to the predicted histograms. The zeros are removed + // too efficiently by the pix_diff == 0 comparison, at least one of the + // zeros is likely to exist. + ++histo[kHistoRedPredSubGreen * 256]; + ++histo[kHistoBluePredSubGreen * 256]; + ++histo[kHistoRedPred * 256]; + ++histo[kHistoGreenPred * 256]; + ++histo[kHistoBluePred * 256]; + ++histo[kHistoAlphaPred * 256]; + + for (j = 0; j < kHistoTotal; ++j) { + entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL); + } + entropy[kDirect] = entropy_comp[kHistoAlpha] + + entropy_comp[kHistoRed] + + entropy_comp[kHistoGreen] + + entropy_comp[kHistoBlue]; + entropy[kSpatial] = entropy_comp[kHistoAlphaPred] + + entropy_comp[kHistoRedPred] + + entropy_comp[kHistoGreenPred] + + entropy_comp[kHistoBluePred]; + entropy[kSubGreen] = entropy_comp[kHistoAlpha] + + entropy_comp[kHistoRedSubGreen] + + entropy_comp[kHistoGreen] + + entropy_comp[kHistoBlueSubGreen]; + entropy[kSpatialSubGreen] = entropy_comp[kHistoAlphaPred] + + entropy_comp[kHistoRedPredSubGreen] + + entropy_comp[kHistoGreenPred] + + entropy_comp[kHistoBluePredSubGreen]; + // Palette mode seems more efficient in a breakeven case. Bias with 1.0. + entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0; + + *min_entropy_ix = kDirect; + for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) { + if (entropy[*min_entropy_ix] > entropy[k]) { + *min_entropy_ix = k; + } + } + *red_and_blue_always_zero = 1; + // Let's check if the histogram of the chosen entropy mode has + // non-zero red and blue values. If all are zero, we can later skip + // the cross color optimization. + { + static const uint8_t kHistoPairs[5][2] = { + { kHistoRed, kHistoBlue }, + { kHistoRedPred, kHistoBluePred }, + { kHistoRedSubGreen, kHistoBlueSubGreen }, + { kHistoRedPredSubGreen, kHistoBluePredSubGreen }, + { kHistoRed, kHistoBlue } + }; + const uint32_t* const red_histo = + &histo[256 * kHistoPairs[*min_entropy_ix][0]]; + const uint32_t* const blue_histo = + &histo[256 * kHistoPairs[*min_entropy_ix][1]]; + for (i = 1; i < 256; ++i) { + if ((red_histo[i] | blue_histo[i]) != 0) { + *red_and_blue_always_zero = 0; + break; + } + } + } + } + WebPSafeFree(histo); + return 1; + } else { + return 0; + } +} + +static int GetHistoBits(int method, int use_palette, int width, int height) { + // Make tile size a function of encoding method (Range: 0 to 6). + int histo_bits = (use_palette ? 9 : 7) - method; + while (1) { + const int huff_image_size = VP8LSubSampleSize(width, histo_bits) * + VP8LSubSampleSize(height, histo_bits); + if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break; + ++histo_bits; + } + return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS : + (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; +} + +static int GetTransformBits(int method, int histo_bits) { + const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5; + return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; +} + +static int AnalyzeAndInit(VP8LEncoder* const enc) { + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + const int pix_cnt = width * height; + const WebPConfig* const config = enc->config_; + const int method = config->method; + const int low_effort = (config->method == 0); + // we round the block size up, so we're guaranteed to have + // at max MAX_REFS_BLOCK_PER_IMAGE blocks used: + int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1; + assert(pic != NULL && pic->argb != NULL); + + enc->use_cross_color_ = 0; + enc->use_predict_ = 0; + enc->use_subtract_green_ = 0; + enc->use_palette_ = + AnalyzeAndCreatePalette(pic, low_effort, + enc->palette_, &enc->palette_size_); + + // TODO(jyrki): replace the decision to be based on an actual estimate + // of entropy, or even spatial variance of entropy. + enc->histo_bits_ = GetHistoBits(method, enc->use_palette_, + pic->width, pic->height); + enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_); + + if (low_effort) { + // AnalyzeEntropy is somewhat slow. + enc->use_predict_ = !enc->use_palette_; + enc->use_subtract_green_ = !enc->use_palette_; + enc->use_cross_color_ = 0; + } else { + int red_and_blue_always_zero; + EntropyIx min_entropy_ix; + if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, + enc->use_palette_, &min_entropy_ix, + &red_and_blue_always_zero)) { + return 0; + } + enc->use_palette_ = (min_entropy_ix == kPalette); + enc->use_subtract_green_ = + (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen); + enc->use_predict_ = + (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen); + enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; + } + + if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; + + // palette-friendly input typically uses less literals + // -> reduce block size a bit + if (enc->use_palette_) refs_block_size /= 2; + VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size); + VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size); + + return 1; +} + +// Returns false in case of memory error. +static int GetHuffBitLengthsAndCodes( + const VP8LHistogramSet* const histogram_image, + HuffmanTreeCode* const huffman_codes) { + int i, k; + int ok = 0; + uint64_t total_length_size = 0; + uint8_t* mem_buf = NULL; + const int histogram_image_size = histogram_image->size; + int max_num_symbols = 0; + uint8_t* buf_rle = NULL; + HuffmanTree* huff_tree = NULL; + + // Iterate over all histograms and get the aggregate number of codes used. + for (i = 0; i < histogram_image_size; ++i) { + const VP8LHistogram* const histo = histogram_image->histograms[i]; + HuffmanTreeCode* const codes = &huffman_codes[5 * i]; + for (k = 0; k < 5; ++k) { + const int num_symbols = + (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) : + (k == 4) ? NUM_DISTANCE_CODES : 256; + codes[k].num_symbols = num_symbols; + total_length_size += num_symbols; + } + } + + // Allocate and Set Huffman codes. + { + uint16_t* codes; + uint8_t* lengths; + mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size, + sizeof(*lengths) + sizeof(*codes)); + if (mem_buf == NULL) goto End; + + codes = (uint16_t*)mem_buf; + lengths = (uint8_t*)&codes[total_length_size]; + for (i = 0; i < 5 * histogram_image_size; ++i) { + const int bit_length = huffman_codes[i].num_symbols; + huffman_codes[i].codes = codes; + huffman_codes[i].code_lengths = lengths; + codes += bit_length; + lengths += bit_length; + if (max_num_symbols < bit_length) { + max_num_symbols = bit_length; + } + } + } + + buf_rle = (uint8_t*)WebPSafeMalloc(1ULL, max_num_symbols); + huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * max_num_symbols, + sizeof(*huff_tree)); + if (buf_rle == NULL || huff_tree == NULL) goto End; + + // Create Huffman trees. + for (i = 0; i < histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[5 * i]; + VP8LHistogram* const histo = histogram_image->histograms[i]; + VP8LCreateHuffmanTree(histo->literal_, 15, buf_rle, huff_tree, codes + 0); + VP8LCreateHuffmanTree(histo->red_, 15, buf_rle, huff_tree, codes + 1); + VP8LCreateHuffmanTree(histo->blue_, 15, buf_rle, huff_tree, codes + 2); + VP8LCreateHuffmanTree(histo->alpha_, 15, buf_rle, huff_tree, codes + 3); + VP8LCreateHuffmanTree(histo->distance_, 15, buf_rle, huff_tree, codes + 4); + } + ok = 1; + End: + WebPSafeFree(huff_tree); + WebPSafeFree(buf_rle); + if (!ok) { + WebPSafeFree(mem_buf); + memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes)); + } + return ok; +} + +static void StoreHuffmanTreeOfHuffmanTreeToBitMask( + VP8LBitWriter* const bw, const uint8_t* code_length_bitdepth) { + // RFC 1951 will calm you down if you are worried about this funny sequence. + // This sequence is tuned from that, but more weighted for lower symbol count, + // and more spiking histograms. + static const uint8_t kStorageOrder[CODE_LENGTH_CODES] = { + 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + }; + int i; + // Throw away trailing zeros: + int codes_to_store = CODE_LENGTH_CODES; + for (; codes_to_store > 4; --codes_to_store) { + if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) { + break; + } + } + VP8LPutBits(bw, codes_to_store - 4, 4); + for (i = 0; i < codes_to_store; ++i) { + VP8LPutBits(bw, code_length_bitdepth[kStorageOrder[i]], 3); + } +} + +static void ClearHuffmanTreeIfOnlyOneSymbol( + HuffmanTreeCode* const huffman_code) { + int k; + int count = 0; + for (k = 0; k < huffman_code->num_symbols; ++k) { + if (huffman_code->code_lengths[k] != 0) { + ++count; + if (count > 1) return; + } + } + for (k = 0; k < huffman_code->num_symbols; ++k) { + huffman_code->code_lengths[k] = 0; + huffman_code->codes[k] = 0; + } +} + +static void StoreHuffmanTreeToBitMask( + VP8LBitWriter* const bw, + const HuffmanTreeToken* const tokens, const int num_tokens, + const HuffmanTreeCode* const huffman_code) { + int i; + for (i = 0; i < num_tokens; ++i) { + const int ix = tokens[i].code; + const int extra_bits = tokens[i].extra_bits; + VP8LPutBits(bw, huffman_code->codes[ix], huffman_code->code_lengths[ix]); + switch (ix) { + case 16: + VP8LPutBits(bw, extra_bits, 2); + break; + case 17: + VP8LPutBits(bw, extra_bits, 3); + break; + case 18: + VP8LPutBits(bw, extra_bits, 7); + break; + } + } +} + +// 'huff_tree' and 'tokens' are pre-alloacted buffers. +static void StoreFullHuffmanCode(VP8LBitWriter* const bw, + HuffmanTree* const huff_tree, + HuffmanTreeToken* const tokens, + const HuffmanTreeCode* const tree) { + uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 }; + uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 }; + const int max_tokens = tree->num_symbols; + int num_tokens; + HuffmanTreeCode huffman_code; + huffman_code.num_symbols = CODE_LENGTH_CODES; + huffman_code.code_lengths = code_length_bitdepth; + huffman_code.codes = code_length_bitdepth_symbols; + + VP8LPutBits(bw, 0, 1); + num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens); + { + uint32_t histogram[CODE_LENGTH_CODES] = { 0 }; + uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 }; + int i; + for (i = 0; i < num_tokens; ++i) { + ++histogram[tokens[i].code]; + } + + VP8LCreateHuffmanTree(histogram, 7, buf_rle, huff_tree, &huffman_code); + } + + StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth); + ClearHuffmanTreeIfOnlyOneSymbol(&huffman_code); + { + int trailing_zero_bits = 0; + int trimmed_length = num_tokens; + int write_trimmed_length; + int length; + int i = num_tokens; + while (i-- > 0) { + const int ix = tokens[i].code; + if (ix == 0 || ix == 17 || ix == 18) { + --trimmed_length; // discount trailing zeros + trailing_zero_bits += code_length_bitdepth[ix]; + if (ix == 17) { + trailing_zero_bits += 3; + } else if (ix == 18) { + trailing_zero_bits += 7; + } + } else { + break; + } + } + write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12); + length = write_trimmed_length ? trimmed_length : num_tokens; + VP8LPutBits(bw, write_trimmed_length, 1); + if (write_trimmed_length) { + const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1); + const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2; + VP8LPutBits(bw, nbitpairs - 1, 3); + assert(trimmed_length >= 2); + VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2); + } + StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code); + } +} + +// 'huff_tree' and 'tokens' are pre-alloacted buffers. +static void StoreHuffmanCode(VP8LBitWriter* const bw, + HuffmanTree* const huff_tree, + HuffmanTreeToken* const tokens, + const HuffmanTreeCode* const huffman_code) { + int i; + int count = 0; + int symbols[2] = { 0, 0 }; + const int kMaxBits = 8; + const int kMaxSymbol = 1 << kMaxBits; + + // Check whether it's a small tree. + for (i = 0; i < huffman_code->num_symbols && count < 3; ++i) { + if (huffman_code->code_lengths[i] != 0) { + if (count < 2) symbols[count] = i; + ++count; + } + } + + if (count == 0) { // emit minimal tree for empty cases + // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0 + VP8LPutBits(bw, 0x01, 4); + } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) { + VP8LPutBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols. + VP8LPutBits(bw, count - 1, 1); + if (symbols[0] <= 1) { + VP8LPutBits(bw, 0, 1); // Code bit for small (1 bit) symbol value. + VP8LPutBits(bw, symbols[0], 1); + } else { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, symbols[0], 8); + } + if (count == 2) { + VP8LPutBits(bw, symbols[1], 8); + } + } else { + StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code); + } +} + +static WEBP_INLINE void WriteHuffmanCode(VP8LBitWriter* const bw, + const HuffmanTreeCode* const code, + int code_index) { + const int depth = code->code_lengths[code_index]; + const int symbol = code->codes[code_index]; + VP8LPutBits(bw, symbol, depth); +} + +static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( + VP8LBitWriter* const bw, + const HuffmanTreeCode* const code, + int code_index, + int bits, + int n_bits) { + const int depth = code->code_lengths[code_index]; + const int symbol = code->codes[code_index]; + VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits); +} + +static WebPEncodingError StoreImageToBitMask( + VP8LBitWriter* const bw, int width, int histo_bits, + VP8LBackwardRefs* const refs, + const uint16_t* histogram_symbols, + const HuffmanTreeCode* const huffman_codes) { + const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; + const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits); + // x and y trace the position in the image. + int x = 0; + int y = 0; + int tile_x = x & tile_mask; + int tile_y = y & tile_mask; + int histogram_ix = histogram_symbols[0]; + const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; + if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) { + tile_x = x & tile_mask; + tile_y = y & tile_mask; + histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize + + (x >> histo_bits)]; + codes = huffman_codes + 5 * histogram_ix; + } + if (PixOrCopyIsLiteral(v)) { + static const int order[] = { 1, 2, 0, 3 }; + int k; + for (k = 0; k < 4; ++k) { + const int code = PixOrCopyLiteral(v, order[k]); + WriteHuffmanCode(bw, codes + k, code); + } + } else if (PixOrCopyIsCacheIdx(v)) { + const int code = PixOrCopyCacheIdx(v); + const int literal_ix = 256 + NUM_LENGTH_CODES + code; + WriteHuffmanCode(bw, codes, literal_ix); + } else { + int bits, n_bits; + int code; + + const int distance = PixOrCopyDistance(v); + VP8LPrefixEncode(v->len, &code, &n_bits, &bits); + WriteHuffmanCodeWithExtraBits(bw, codes, 256 + code, bits, n_bits); + + // Don't write the distance with the extra bits code since + // the distance can be up to 18 bits of extra bits, and the prefix + // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits. + // TODO(jyrki): optimize this further. + VP8LPrefixEncode(distance, &code, &n_bits, &bits); + WriteHuffmanCode(bw, codes + 4, code); + VP8LPutBits(bw, bits, n_bits); + } + x += PixOrCopyLength(v); + while (x >= width) { + x -= width; + ++y; + } + VP8LRefsCursorNext(&c); + } + return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK; +} + +// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 +static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2], + int width, int height, + int quality) { + int i; + int max_tokens = 0; + WebPEncodingError err = VP8_ENC_OK; + VP8LBackwardRefs* refs; + HuffmanTreeToken* tokens = NULL; + HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; + const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol + int cache_bits = 0; + VP8LHistogramSet* histogram_image = NULL; + HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( + 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); + if (huff_tree == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Calculate backward references from ARGB image. + if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, + hash_chain, refs_array); + if (refs == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + histogram_image = VP8LAllocateHistogramSet(1, cache_bits); + if (histogram_image == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Build histogram image and symbols from backward references. + VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]); + + // Create Huffman bit lengths and codes for each histogram image. + assert(histogram_image->size == 1); + if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // No color cache, no Huffman image. + VP8LPutBits(bw, 0, 1); + + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Store Huffman codes. + for (i = 0; i < 5; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); + ClearHuffmanTreeIfOnlyOneSymbol(codes); + } + + // Store actual literals. + err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, + huffman_codes); + + Error: + WebPSafeFree(tokens); + WebPSafeFree(huff_tree); + VP8LFreeHistogramSet(histogram_image); + WebPSafeFree(huffman_codes[0].codes); + return err; +} + +static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2], + int width, int height, int quality, + int low_effort, + int use_cache, int* cache_bits, + int histogram_bits, + size_t init_byte_position, + int* const hdr_size, + int* const data_size) { + WebPEncodingError err = VP8_ENC_OK; + const uint32_t histogram_image_xysize = + VP8LSubSampleSize(width, histogram_bits) * + VP8LSubSampleSize(height, histogram_bits); + VP8LHistogramSet* histogram_image = NULL; + VP8LHistogramSet* tmp_histos = NULL; + int histogram_image_size = 0; + size_t bit_array_size = 0; + HuffmanTree* huff_tree = NULL; + HuffmanTreeToken* tokens = NULL; + HuffmanTreeCode* huffman_codes = NULL; + VP8LBackwardRefs refs; + VP8LBackwardRefs* best_refs; + uint16_t* const histogram_symbols = + (uint16_t*)WebPSafeMalloc(histogram_image_xysize, + sizeof(*histogram_symbols)); + assert(histogram_bits >= MIN_HUFFMAN_BITS); + assert(histogram_bits <= MAX_HUFFMAN_BITS); + assert(hdr_size != NULL); + assert(data_size != NULL); + + VP8LBackwardRefsInit(&refs, refs_array[0].block_size_); + if (histogram_symbols == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + *cache_bits = use_cache ? MAX_COLOR_CACHE_BITS : 0; + // 'best_refs' is the reference to the best backward refs and points to one + // of refs_array[0] or refs_array[1]. + // Calculate backward references from ARGB image. + if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + best_refs = VP8LGetBackwardReferences(width, height, argb, quality, + low_effort, cache_bits, hash_chain, + refs_array); + if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); + tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits); + if (histogram_image == NULL || tmp_histos == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Build histogram image and symbols from backward references. + if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort, + histogram_bits, *cache_bits, histogram_image, + tmp_histos, histogram_symbols)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Create Huffman bit lengths and codes for each histogram image. + histogram_image_size = histogram_image->size; + bit_array_size = 5 * histogram_image_size; + huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, + sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the latter + // need to outlive the following call to GetHuffBitLengthsAndCodes(). + if (huffman_codes == NULL || + !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Free combined histograms. + VP8LFreeHistogramSet(histogram_image); + histogram_image = NULL; + + // Free scratch histograms. + VP8LFreeHistogramSet(tmp_histos); + tmp_histos = NULL; + + // Color Cache parameters. + if (*cache_bits > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, *cache_bits, 4); + } else { + VP8LPutBits(bw, 0, 1); + } + + // Huffman image + meta huffman. + { + const int write_histogram_image = (histogram_image_size > 1); + VP8LPutBits(bw, write_histogram_image, 1); + if (write_histogram_image) { + uint32_t* const histogram_argb = + (uint32_t*)WebPSafeMalloc(histogram_image_xysize, + sizeof(*histogram_argb)); + int max_index = 0; + uint32_t i; + if (histogram_argb == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + for (i = 0; i < histogram_image_xysize; ++i) { + const int symbol_index = histogram_symbols[i] & 0xffff; + histogram_argb[i] = (symbol_index << 8); + if (symbol_index >= max_index) { + max_index = symbol_index + 1; + } + } + histogram_image_size = max_index; + + VP8LPutBits(bw, histogram_bits - 2, 3); + err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array, + VP8LSubSampleSize(width, histogram_bits), + VP8LSubSampleSize(height, histogram_bits), + quality); + WebPSafeFree(histogram_argb); + if (err != VP8_ENC_OK) goto Error; + } + } + + // Store Huffman codes. + { + int i; + int max_tokens = 0; + huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, + sizeof(*huff_tree)); + if (huff_tree == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, + sizeof(*tokens)); + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); + ClearHuffmanTreeIfOnlyOneSymbol(codes); + } + } + + *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); + // Store actual literals. + err = StoreImageToBitMask(bw, width, histogram_bits, &refs, + histogram_symbols, huffman_codes); + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); + + Error: + WebPSafeFree(tokens); + WebPSafeFree(huff_tree); + VP8LFreeHistogramSet(histogram_image); + VP8LFreeHistogramSet(tmp_histos); + VP8LBackwardRefsClear(&refs); + if (huffman_codes != NULL) { + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); + } + WebPSafeFree(histogram_symbols); + return err; +} + +// ----------------------------------------------------------------------------- +// Transforms + +static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, + VP8LBitWriter* const bw) { + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, SUBTRACT_GREEN, 2); + VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); +} + +static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, + int width, int height, + int quality, int low_effort, + int used_subtract_green, + VP8LBitWriter* const bw) { + const int pred_bits = enc->transform_bits_; + const int transform_width = VP8LSubSampleSize(width, pred_bits); + const int transform_height = VP8LSubSampleSize(height, pred_bits); + // we disable near-lossless quantization if palette is used. + const int near_lossless_strength = enc->use_palette_ ? 100 + : enc->config_->near_lossless; + + VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, + enc->argb_scratch_, enc->transform_data_, + near_lossless_strength, enc->config_->exact, + used_subtract_green); + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); + assert(pred_bits >= 2); + VP8LPutBits(bw, pred_bits - 2, 3); + return EncodeImageNoHuffman(bw, enc->transform_data_, + (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); +} + +static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, + int width, int height, + int quality, + VP8LBitWriter* const bw) { + const int ccolor_transform_bits = enc->transform_bits_; + const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); + const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); + + VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, + enc->argb_, enc->transform_data_); + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); + assert(ccolor_transform_bits >= 2); + VP8LPutBits(bw, ccolor_transform_bits - 2, 3); + return EncodeImageNoHuffman(bw, enc->transform_data_, + (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); +} + +// ----------------------------------------------------------------------------- + +static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic, + size_t riff_size, size_t vp8l_size) { + uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = { + 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P', + 'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE, + }; + PutLE32(riff + TAG_SIZE, (uint32_t)riff_size); + PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size); + if (!pic->writer(riff, sizeof(riff), pic)) { + return VP8_ENC_ERROR_BAD_WRITE; + } + return VP8_ENC_OK; +} + +static int WriteImageSize(const WebPPicture* const pic, + VP8LBitWriter* const bw) { + const int width = pic->width - 1; + const int height = pic->height - 1; + assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION); + + VP8LPutBits(bw, width, VP8L_IMAGE_SIZE_BITS); + VP8LPutBits(bw, height, VP8L_IMAGE_SIZE_BITS); + return !bw->error_; +} + +static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) { + VP8LPutBits(bw, has_alpha, 1); + VP8LPutBits(bw, VP8L_VERSION, VP8L_VERSION_BITS); + return !bw->error_; +} + +static WebPEncodingError WriteImage(const WebPPicture* const pic, + VP8LBitWriter* const bw, + size_t* const coded_size) { + WebPEncodingError err = VP8_ENC_OK; + const uint8_t* const webpll_data = VP8LBitWriterFinish(bw); + const size_t webpll_size = VP8LBitWriterNumBytes(bw); + const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size; + const size_t pad = vp8l_size & 1; + const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad; + + err = WriteRiffHeader(pic, riff_size, vp8l_size); + if (err != VP8_ENC_OK) goto Error; + + if (!pic->writer(webpll_data, webpll_size, pic)) { + err = VP8_ENC_ERROR_BAD_WRITE; + goto Error; + } + + if (pad) { + const uint8_t pad_byte[1] = { 0 }; + if (!pic->writer(pad_byte, 1, pic)) { + err = VP8_ENC_ERROR_BAD_WRITE; + goto Error; + } + } + *coded_size = CHUNK_HEADER_SIZE + riff_size; + return VP8_ENC_OK; + + Error: + return err; +} + +// ----------------------------------------------------------------------------- + +static void ClearTransformBuffer(VP8LEncoder* const enc) { + WebPSafeFree(enc->transform_mem_); + enc->transform_mem_ = NULL; + enc->transform_mem_size_ = 0; +} + +// Allocates the memory for argb (W x H) buffer, 2 rows of context for +// prediction and transform data. +// Flags influencing the memory allocated: +// enc->transform_bits_ +// enc->use_predict_, enc->use_cross_color_ +static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, + int width, int height) { + WebPEncodingError err = VP8_ENC_OK; + const uint64_t image_size = width * height; + // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra + // pixel in each, plus 2 regular scanlines of bytes. + // TODO(skal): Clean up by using arithmetic in bytes instead of words. + const uint64_t argb_scratch_size = + enc->use_predict_ + ? (width + 1) * 2 + + (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t) + : 0; + const uint64_t transform_data_size = + (enc->use_predict_ || enc->use_cross_color_) + ? VP8LSubSampleSize(width, enc->transform_bits_) * + VP8LSubSampleSize(height, enc->transform_bits_) + : 0; + const uint64_t max_alignment_in_words = + (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t); + const uint64_t mem_size = + image_size + max_alignment_in_words + + argb_scratch_size + max_alignment_in_words + + transform_data_size; + uint32_t* mem = enc->transform_mem_; + if (mem == NULL || mem_size > enc->transform_mem_size_) { + ClearTransformBuffer(enc); + mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); + if (mem == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + enc->transform_mem_ = mem; + enc->transform_mem_size_ = (size_t)mem_size; + } + enc->argb_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + image_size); + enc->argb_scratch_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); + enc->transform_data_ = mem; + + enc->current_width_ = width; + Error: + return err; +} + +static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { + WebPEncodingError err = VP8_ENC_OK; + const WebPPicture* const picture = enc->pic_; + const int width = picture->width; + const int height = picture->height; + int y; + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) return err; + for (y = 0; y < height; ++y) { + memcpy(enc->argb_ + y * width, + picture->argb + y * picture->argb_stride, + width * sizeof(*enc->argb_)); + } + assert(enc->current_width_ == width); + return VP8_ENC_OK; +} + +// ----------------------------------------------------------------------------- + +static int SearchColor(const uint32_t sorted[], uint32_t color, int hi) { + int low = 0; + if (sorted[low] == color) return low; // loop invariant: sorted[low] != color + while (1) { + const int mid = (low + hi) >> 1; + if (sorted[mid] == color) { + return mid; + } else if (sorted[mid] < color) { + low = mid; + } else { + hi = mid; + } + } +} + +// Sort palette in increasing order and prepare an inverse mapping array. +static void PrepareMapToPalette(const uint32_t palette[], int num_colors, + uint32_t sorted[], int idx_map[]) { + int i; + memcpy(sorted, palette, num_colors * sizeof(*sorted)); + qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); + for (i = 0; i < num_colors; ++i) { + idx_map[SearchColor(sorted, palette[i], num_colors)] = i; + } +} + +static void MapToPalette(const uint32_t sorted_palette[], int num_colors, + uint32_t* const last_pix, int* const last_idx, + const int idx_map[], + const uint32_t* src, uint8_t* dst, int width) { + int x; + int prev_idx = *last_idx; + uint32_t prev_pix = *last_pix; + for (x = 0; x < width; ++x) { + const uint32_t pix = src[x]; + if (pix != prev_pix) { + prev_idx = idx_map[SearchColor(sorted_palette, pix, num_colors)]; + prev_pix = pix; + } + dst[x] = prev_idx; + } + *last_idx = prev_idx; + *last_pix = prev_pix; +} + +// Remap argb values in src[] to packed palettes entries in dst[] +// using 'row' as a temporary buffer of size 'width'. +// We assume that all src[] values have a corresponding entry in the palette. +// Note: src[] can be the same as dst[] +static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, + uint32_t* dst, uint32_t dst_stride, + const uint32_t* palette, int palette_size, + int width, int height, int xbits) { + // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be + // made to work in-place. + uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); + int i, x, y; + int use_LUT = 1; + + if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + for (i = 0; i < palette_size; ++i) { + if ((palette[i] & 0xffff00ffu) != 0) { + use_LUT = 0; + break; + } + } + + if (use_LUT) { + uint8_t inv_palette[MAX_PALETTE_SIZE] = { 0 }; + for (i = 0; i < palette_size; ++i) { + const int color = (palette[i] >> 8) & 0xff; + inv_palette[color] = i; + } + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + const int color = (src[x] >> 8) & 0xff; + tmp_row[x] = inv_palette[color]; + } + VP8LBundleColorMap(tmp_row, width, xbits, dst); + src += src_stride; + dst += dst_stride; + } + } else { + // Use 1 pixel cache for ARGB pixels. + uint32_t last_pix; + int last_idx; + uint32_t sorted[MAX_PALETTE_SIZE]; + int idx_map[MAX_PALETTE_SIZE]; + PrepareMapToPalette(palette, palette_size, sorted, idx_map); + last_pix = palette[0]; + last_idx = 0; + for (y = 0; y < height; ++y) { + MapToPalette(sorted, palette_size, &last_pix, &last_idx, + idx_map, src, tmp_row, width); + VP8LBundleColorMap(tmp_row, width, xbits, dst); + src += src_stride; + dst += dst_stride; + } + } + WebPSafeFree(tmp_row); + return VP8_ENC_OK; +} + +// Note: Expects "enc->palette_" to be set properly. +static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, + int in_place) { + WebPEncodingError err = VP8_ENC_OK; + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + const uint32_t* const palette = enc->palette_; + const uint32_t* src = in_place ? enc->argb_ : pic->argb; + const int src_stride = in_place ? enc->current_width_ : pic->argb_stride; + const int palette_size = enc->palette_size_; + int xbits; + + // Replace each input pixel by corresponding palette index. + // This is done line by line. + if (palette_size <= 4) { + xbits = (palette_size <= 2) ? 3 : 2; + } else { + xbits = (palette_size <= 16) ? 1 : 0; + } + + err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); + if (err != VP8_ENC_OK) return err; + + err = ApplyPalette(src, src_stride, + enc->argb_, enc->current_width_, + palette, palette_size, width, height, xbits); + return err; +} + +// Save palette_[] to bitstream. +static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, + VP8LEncoder* const enc) { + int i; + uint32_t tmp_palette[MAX_PALETTE_SIZE]; + const int palette_size = enc->palette_size_; + const uint32_t* const palette = enc->palette_; + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2); + assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE); + VP8LPutBits(bw, palette_size - 1, 8); + for (i = palette_size - 1; i >= 1; --i) { + tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); + } + tmp_palette[0] = palette[0]; + return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_, + palette_size, 1, 20 /* quality */); +} + +#ifdef WEBP_EXPERIMENTAL_FEATURES + +static WebPEncodingError EncodeDeltaPalettePredictorImage( + VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality) { + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + + const int pred_bits = 5; + const int transform_width = VP8LSubSampleSize(width, pred_bits); + const int transform_height = VP8LSubSampleSize(height, pred_bits); + const int pred = 7; // default is Predictor7 (Top/Left Average) + const int tiles_per_row = VP8LSubSampleSize(width, pred_bits); + const int tiles_per_col = VP8LSubSampleSize(height, pred_bits); + uint32_t* predictors; + int tile_x, tile_y; + WebPEncodingError err = VP8_ENC_OK; + + predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row, + sizeof(*predictors)); + if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + + for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { + for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { + predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); + } + } + + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); + VP8LPutBits(bw, pred_bits - 2, 3); + err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); + WebPSafeFree(predictors); + return err; +} + +#endif // WEBP_EXPERIMENTAL_FEATURES + +// ----------------------------------------------------------------------------- +// VP8LEncoder + +static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, + const WebPPicture* const picture) { + VP8LEncoder* const enc = (VP8LEncoder*)WebPSafeCalloc(1ULL, sizeof(*enc)); + if (enc == NULL) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + return NULL; + } + enc->config_ = config; + enc->pic_ = picture; + + VP8LEncDspInit(); + + return enc; +} + +static void VP8LEncoderDelete(VP8LEncoder* enc) { + if (enc != NULL) { + VP8LHashChainClear(&enc->hash_chain_); + VP8LBackwardRefsClear(&enc->refs_[0]); + VP8LBackwardRefsClear(&enc->refs_[1]); + ClearTransformBuffer(enc); + WebPSafeFree(enc); + } +} + +// ----------------------------------------------------------------------------- +// Main call + +WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, + VP8LBitWriter* const bw, int use_cache) { + WebPEncodingError err = VP8_ENC_OK; + const int quality = (int)config->quality; + const int low_effort = (config->method == 0); + const int width = picture->width; + const int height = picture->height; + VP8LEncoder* const enc = VP8LEncoderNew(config, picture); + const size_t byte_position = VP8LBitWriterNumBytes(bw); + int use_near_lossless = 0; + int hdr_size = 0; + int data_size = 0; + int use_delta_palettization = 0; + + if (enc == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // --------------------------------------------------------------------------- + // Analyze image (entropy, num_palettes etc) + + if (!AnalyzeAndInit(enc)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Apply near-lossless preprocessing. + use_near_lossless = + (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_; + if (use_near_lossless) { + if (!VP8ApplyNearLossless(width, height, picture->argb, + config->near_lossless)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + } + +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->delta_palettization) { + enc->use_predict_ = 1; + enc->use_cross_color_ = 0; + enc->use_subtract_green_ = 0; + enc->use_palette_ = 1; + err = MakeInputImageCopy(enc); + if (err != VP8_ENC_OK) goto Error; + err = WebPSearchOptimalDeltaPalette(enc); + if (err != VP8_ENC_OK) goto Error; + if (enc->use_palette_) { + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) goto Error; + err = EncodeDeltaPalettePredictorImage(bw, enc, quality); + if (err != VP8_ENC_OK) goto Error; + use_delta_palettization = 1; + } + } +#endif // WEBP_EXPERIMENTAL_FEATURES + + // Encode palette + if (enc->use_palette_) { + err = EncodePalette(bw, enc); + if (err != VP8_ENC_OK) goto Error; + err = MapImageFromPalette(enc, use_delta_palettization); + if (err != VP8_ENC_OK) goto Error; + } + if (!use_delta_palettization) { + // In case image is not packed. + if (enc->argb_ == NULL) { + err = MakeInputImageCopy(enc); + if (err != VP8_ENC_OK) goto Error; + } + + // ------------------------------------------------------------------------- + // Apply transforms and write transform data. + + if (enc->use_subtract_green_) { + ApplySubtractGreen(enc, enc->current_width_, height, bw); + } + + if (enc->use_predict_) { + err = ApplyPredictFilter(enc, enc->current_width_, height, quality, + low_effort, enc->use_subtract_green_, bw); + if (err != VP8_ENC_OK) goto Error; + } + + if (enc->use_cross_color_) { + err = ApplyCrossColorFilter(enc, enc->current_width_, + height, quality, bw); + if (err != VP8_ENC_OK) goto Error; + } + } + + VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms. + + // --------------------------------------------------------------------------- + // Encode and write the transformed image. + err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, + enc->current_width_, height, quality, low_effort, + use_cache, &enc->cache_bits_, enc->histo_bits_, + byte_position, &hdr_size, &data_size); + if (err != VP8_ENC_OK) goto Error; + + if (picture->stats != NULL) { + WebPAuxStats* const stats = picture->stats; + stats->lossless_features = 0; + if (enc->use_predict_) stats->lossless_features |= 1; + if (enc->use_cross_color_) stats->lossless_features |= 2; + if (enc->use_subtract_green_) stats->lossless_features |= 4; + if (enc->use_palette_) stats->lossless_features |= 8; + stats->histogram_bits = enc->histo_bits_; + stats->transform_bits = enc->transform_bits_; + stats->cache_bits = enc->cache_bits_; + stats->palette_size = enc->palette_size_; + stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position); + stats->lossless_hdr_size = hdr_size; + stats->lossless_data_size = data_size; + } + + Error: + VP8LEncoderDelete(enc); + return err; +} + +int VP8LEncodeImage(const WebPConfig* const config, + const WebPPicture* const picture) { + int width, height; + int has_alpha; + size_t coded_size; + int percent = 0; + int initial_size; + WebPEncodingError err = VP8_ENC_OK; + VP8LBitWriter bw; + + if (picture == NULL) return 0; + + if (config == NULL || picture->argb == NULL) { + err = VP8_ENC_ERROR_NULL_PARAMETER; + WebPEncodingSetError(picture, err); + return 0; + } + + width = picture->width; + height = picture->height; + // Initialize BitWriter with size corresponding to 16 bpp to photo images and + // 8 bpp for graphical images. + initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? + width * height : width * height * 2; + if (!VP8LBitWriterInit(&bw, initial_size)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + if (!WebPReportProgress(picture, 1, &percent)) { + UserAbort: + err = VP8_ENC_ERROR_USER_ABORT; + goto Error; + } + // Reset stats (for pure lossless coding) + if (picture->stats != NULL) { + WebPAuxStats* const stats = picture->stats; + memset(stats, 0, sizeof(*stats)); + stats->PSNR[0] = 99.f; + stats->PSNR[1] = 99.f; + stats->PSNR[2] = 99.f; + stats->PSNR[3] = 99.f; + stats->PSNR[4] = 99.f; + } + + // Write image size. + if (!WriteImageSize(picture, &bw)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + has_alpha = WebPPictureHasTransparency(picture); + // Write the non-trivial Alpha flag and lossless version. + if (!WriteRealAlphaAndVersion(&bw, has_alpha)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort; + + // Encode main image stream. + err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/); + if (err != VP8_ENC_OK) goto Error; + + // TODO(skal): have a fine-grained progress report in VP8LEncodeStream(). + if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort; + + // Finish the RIFF chunk. + err = WriteImage(picture, &bw, &coded_size); + if (err != VP8_ENC_OK) goto Error; + + if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort; + + // Save size. + if (picture->stats != NULL) { + picture->stats->coded_size += (int)coded_size; + picture->stats->lossless_size = (int)coded_size; + } + + if (picture->extra_info != NULL) { + const int mb_w = (width + 15) >> 4; + const int mb_h = (height + 15) >> 4; + memset(picture->extra_info, 0, mb_w * mb_h * sizeof(*picture->extra_info)); + } + + Error: + if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY; + VP8LBitWriterWipeOut(&bw); + if (err != VP8_ENC_OK) { + WebPEncodingSetError(picture, err); + return 0; + } + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/enc/vp8li.h b/thirdparty/libwebp/enc/vp8li.h new file mode 100644 index 0000000000..371e276ee0 --- /dev/null +++ b/thirdparty/libwebp/enc/vp8li.h @@ -0,0 +1,81 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Lossless encoder: internal header. +// +// Author: Vikas Arora (vikaas.arora@gmail.com) + +#ifndef WEBP_ENC_VP8LI_H_ +#define WEBP_ENC_VP8LI_H_ + +#include "./backward_references.h" +#include "./histogram.h" +#include "../utils/bit_writer.h" +#include "../webp/encode.h" +#include "../webp/format_constants.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + const WebPConfig* config_; // user configuration and parameters + const WebPPicture* pic_; // input picture. + + uint32_t* argb_; // Transformed argb image data. + uint32_t* argb_scratch_; // Scratch memory for argb rows + // (used for prediction). + uint32_t* transform_data_; // Scratch memory for transform data. + uint32_t* transform_mem_; // Currently allocated memory. + size_t transform_mem_size_; // Currently allocated memory size. + + int current_width_; // Corresponds to packed image width. + + // Encoding parameters derived from quality parameter. + int histo_bits_; + int transform_bits_; + int cache_bits_; // If equal to 0, don't use color cache. + + // Encoding parameters derived from image characteristics. + int use_cross_color_; + int use_subtract_green_; + int use_predict_; + int use_palette_; + int palette_size_; + uint32_t palette_[MAX_PALETTE_SIZE]; + + // Some 'scratch' (potentially large) objects. + struct VP8LBackwardRefs refs_[2]; // Backward Refs array corresponding to + // LZ77 & RLE coding. + VP8LHashChain hash_chain_; // HashChain data for constructing + // backward references. +} VP8LEncoder; + +//------------------------------------------------------------------------------ +// internal functions. Not public. + +// Encodes the picture. +// Returns 0 if config or picture is NULL or picture doesn't have valid argb +// input. +int VP8LEncodeImage(const WebPConfig* const config, + const WebPPicture* const picture); + +// Encodes the main image stream using the supplied bit writer. +// If 'use_cache' is false, disables the use of color cache. +WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, + VP8LBitWriter* const bw, int use_cache); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_ENC_VP8LI_H_ */ diff --git a/thirdparty/libwebp/enc/webpenc.c b/thirdparty/libwebp/enc/webpenc.c new file mode 100644 index 0000000000..a7d04ea2ce --- /dev/null +++ b/thirdparty/libwebp/enc/webpenc.c @@ -0,0 +1,395 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebP encoder: main entry point +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "./cost.h" +#include "./vp8enci.h" +#include "./vp8li.h" +#include "../utils/utils.h" + +// #define PRINT_MEMORY_INFO + +#ifdef PRINT_MEMORY_INFO +#include <stdio.h> +#endif + +//------------------------------------------------------------------------------ + +int WebPGetEncoderVersion(void) { + return (ENC_MAJ_VERSION << 16) | (ENC_MIN_VERSION << 8) | ENC_REV_VERSION; +} + +//------------------------------------------------------------------------------ +// VP8Encoder +//------------------------------------------------------------------------------ + +static void ResetSegmentHeader(VP8Encoder* const enc) { + VP8EncSegmentHeader* const hdr = &enc->segment_hdr_; + hdr->num_segments_ = enc->config_->segments; + hdr->update_map_ = (hdr->num_segments_ > 1); + hdr->size_ = 0; +} + +static void ResetFilterHeader(VP8Encoder* const enc) { + VP8EncFilterHeader* const hdr = &enc->filter_hdr_; + hdr->simple_ = 1; + hdr->level_ = 0; + hdr->sharpness_ = 0; + hdr->i4x4_lf_delta_ = 0; +} + +static void ResetBoundaryPredictions(VP8Encoder* const enc) { + // init boundary values once for all + // Note: actually, initializing the preds_[] is only needed for intra4. + int i; + uint8_t* const top = enc->preds_ - enc->preds_w_; + uint8_t* const left = enc->preds_ - 1; + for (i = -1; i < 4 * enc->mb_w_; ++i) { + top[i] = B_DC_PRED; + } + for (i = 0; i < 4 * enc->mb_h_; ++i) { + left[i * enc->preds_w_] = B_DC_PRED; + } + enc->nz_[-1] = 0; // constant +} + +// Mapping from config->method_ to coding tools used. +//-------------------+---+---+---+---+---+---+---+ +// Method | 0 | 1 | 2 | 3 |(4)| 5 | 6 | +//-------------------+---+---+---+---+---+---+---+ +// fast probe | x | | | x | | | | +//-------------------+---+---+---+---+---+---+---+ +// dynamic proba | ~ | x | x | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// fast mode analysis| | | | | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// basic rd-opt | | | | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// disto-refine i4/16| x | x | x | | | | | +//-------------------+---+---+---+---+---+---+---+ +// disto-refine uv | | x | x | | | | | +//-------------------+---+---+---+---+---+---+---+ +// rd-opt i4/16 | | | ~ | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// token buffer (opt)| | | | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// Trellis | | | | | | x |Ful| +//-------------------+---+---+---+---+---+---+---+ +// full-SNS | | | | | x | x | x | +//-------------------+---+---+---+---+---+---+---+ + +static void MapConfigToTools(VP8Encoder* const enc) { + const WebPConfig* const config = enc->config_; + const int method = config->method; + const int limit = 100 - config->partition_limit; + enc->method_ = method; + enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL + : (method >= 5) ? RD_OPT_TRELLIS + : (method >= 3) ? RD_OPT_BASIC + : RD_OPT_NONE; + enc->max_i4_header_bits_ = + 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block + (limit * limit) / (100 * 100); // ... modulated with a quadratic curve. + + // partition0 = 512k max. + enc->mb_header_limit_ = + (score_t)256 * 510 * 8 * 1024 / (enc->mb_w_ * enc->mb_h_); + + enc->thread_level_ = config->thread_level; + + enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0); + if (!config->low_memory) { +#if !defined(DISABLE_TOKEN_BUFFER) + enc->use_tokens_ = (enc->rd_opt_level_ >= RD_OPT_BASIC); // need rd stats +#endif + if (enc->use_tokens_) { + enc->num_parts_ = 1; // doesn't work with multi-partition + } + } +} + +// Memory scaling with dimensions: +// memory (bytes) ~= 2.25 * w + 0.0625 * w * h +// +// Typical memory footprint (614x440 picture) +// encoder: 22111 +// info: 4368 +// preds: 17741 +// top samples: 1263 +// non-zero: 175 +// lf-stats: 0 +// total: 45658 +// Transient object sizes: +// VP8EncIterator: 3360 +// VP8ModeScore: 872 +// VP8SegmentInfo: 732 +// VP8EncProba: 18352 +// LFStats: 2048 +// Picture size (yuv): 419328 + +static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, + WebPPicture* const picture) { + VP8Encoder* enc; + const int use_filter = + (config->filter_strength > 0) || (config->autofilter > 0); + const int mb_w = (picture->width + 15) >> 4; + const int mb_h = (picture->height + 15) >> 4; + const int preds_w = 4 * mb_w + 1; + const int preds_h = 4 * mb_h + 1; + const size_t preds_size = preds_w * preds_h * sizeof(*enc->preds_); + const int top_stride = mb_w * 16; + const size_t nz_size = (mb_w + 1) * sizeof(*enc->nz_) + WEBP_ALIGN_CST; + const size_t info_size = mb_w * mb_h * sizeof(*enc->mb_info_); + const size_t samples_size = + 2 * top_stride * sizeof(*enc->y_top_) // top-luma/u/v + + WEBP_ALIGN_CST; // align all + const size_t lf_stats_size = + config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0; + uint8_t* mem; + const uint64_t size = (uint64_t)sizeof(*enc) // main struct + + WEBP_ALIGN_CST // cache alignment + + info_size // modes info + + preds_size // prediction modes + + samples_size // top/left samples + + nz_size // coeff context bits + + lf_stats_size; // autofilter stats + +#ifdef PRINT_MEMORY_INFO + printf("===================================\n"); + printf("Memory used:\n" + " encoder: %ld\n" + " info: %ld\n" + " preds: %ld\n" + " top samples: %ld\n" + " non-zero: %ld\n" + " lf-stats: %ld\n" + " total: %ld\n", + sizeof(*enc) + WEBP_ALIGN_CST, info_size, + preds_size, samples_size, nz_size, lf_stats_size, size); + printf("Transient object sizes:\n" + " VP8EncIterator: %ld\n" + " VP8ModeScore: %ld\n" + " VP8SegmentInfo: %ld\n" + " VP8EncProba: %ld\n" + " LFStats: %ld\n", + sizeof(VP8EncIterator), sizeof(VP8ModeScore), + sizeof(VP8SegmentInfo), sizeof(VP8EncProba), + sizeof(LFStats)); + printf("Picture size (yuv): %ld\n", + mb_w * mb_h * 384 * sizeof(uint8_t)); + printf("===================================\n"); +#endif + mem = (uint8_t*)WebPSafeMalloc(size, sizeof(*mem)); + if (mem == NULL) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + return NULL; + } + enc = (VP8Encoder*)mem; + mem = (uint8_t*)WEBP_ALIGN(mem + sizeof(*enc)); + memset(enc, 0, sizeof(*enc)); + enc->num_parts_ = 1 << config->partitions; + enc->mb_w_ = mb_w; + enc->mb_h_ = mb_h; + enc->preds_w_ = preds_w; + enc->mb_info_ = (VP8MBInfo*)mem; + mem += info_size; + enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_; + mem += preds_size; + enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem); + mem += nz_size; + enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL; + mem += lf_stats_size; + + // top samples (all 16-aligned) + mem = (uint8_t*)WEBP_ALIGN(mem); + enc->y_top_ = (uint8_t*)mem; + enc->uv_top_ = enc->y_top_ + top_stride; + mem += 2 * top_stride; + assert(mem <= (uint8_t*)enc + size); + + enc->config_ = config; + enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2; + enc->pic_ = picture; + enc->percent_ = 0; + + MapConfigToTools(enc); + VP8EncDspInit(); + VP8DefaultProbas(enc); + ResetSegmentHeader(enc); + ResetFilterHeader(enc); + ResetBoundaryPredictions(enc); + VP8EncDspCostInit(); + VP8EncInitAlpha(enc); + + // lower quality means smaller output -> we modulate a little the page + // size based on quality. This is just a crude 1rst-order prediction. + { + const float scale = 1.f + config->quality * 5.f / 100.f; // in [1,6] + VP8TBufferInit(&enc->tokens_, (int)(mb_w * mb_h * 4 * scale)); + } + return enc; +} + +static int DeleteVP8Encoder(VP8Encoder* enc) { + int ok = 1; + if (enc != NULL) { + ok = VP8EncDeleteAlpha(enc); + VP8TBufferClear(&enc->tokens_); + WebPSafeFree(enc); + } + return ok; +} + +//------------------------------------------------------------------------------ + +static double GetPSNR(uint64_t err, uint64_t size) { + return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.; +} + +static void FinalizePSNR(const VP8Encoder* const enc) { + WebPAuxStats* stats = enc->pic_->stats; + const uint64_t size = enc->sse_count_; + const uint64_t* const sse = enc->sse_; + stats->PSNR[0] = (float)GetPSNR(sse[0], size); + stats->PSNR[1] = (float)GetPSNR(sse[1], size / 4); + stats->PSNR[2] = (float)GetPSNR(sse[2], size / 4); + stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2); + stats->PSNR[4] = (float)GetPSNR(sse[3], size); +} + +static void StoreStats(VP8Encoder* const enc) { + WebPAuxStats* const stats = enc->pic_->stats; + if (stats != NULL) { + int i, s; + for (i = 0; i < NUM_MB_SEGMENTS; ++i) { + stats->segment_level[i] = enc->dqm_[i].fstrength_; + stats->segment_quant[i] = enc->dqm_[i].quant_; + for (s = 0; s <= 2; ++s) { + stats->residual_bytes[s][i] = enc->residual_bytes_[s][i]; + } + } + FinalizePSNR(enc); + stats->coded_size = enc->coded_size_; + for (i = 0; i < 3; ++i) { + stats->block_count[i] = enc->block_count_[i]; + } + } + WebPReportProgress(enc->pic_, 100, &enc->percent_); // done! +} + +int WebPEncodingSetError(const WebPPicture* const pic, + WebPEncodingError error) { + assert((int)error < VP8_ENC_ERROR_LAST); + assert((int)error >= VP8_ENC_OK); + ((WebPPicture*)pic)->error_code = error; + return 0; +} + +int WebPReportProgress(const WebPPicture* const pic, + int percent, int* const percent_store) { + if (percent_store != NULL && percent != *percent_store) { + *percent_store = percent; + if (pic->progress_hook && !pic->progress_hook(percent, pic)) { + // user abort requested + WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); + return 0; + } + } + return 1; // ok +} +//------------------------------------------------------------------------------ + +int WebPEncode(const WebPConfig* config, WebPPicture* pic) { + int ok = 0; + + if (pic == NULL) + return 0; + WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far + if (config == NULL) // bad params + return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); + if (!WebPValidateConfig(config)) + return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); + if (pic->width <= 0 || pic->height <= 0) + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); + if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); + + if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats)); + + if (!config->lossless) { + VP8Encoder* enc = NULL; + + if (!config->exact) { + WebPCleanupTransparentArea(pic); + } + + if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { + // Make sure we have YUVA samples. + if (config->preprocessing & 4) { + if (!WebPPictureSmartARGBToYUVA(pic)) { + return 0; + } + } else { + float dithering = 0.f; + if (config->preprocessing & 2) { + const float x = config->quality / 100.f; + const float x2 = x * x; + // slowly decreasing from max dithering at low quality (q->0) + // to 0.5 dithering amplitude at high quality (q->100) + dithering = 1.0f + (0.5f - 1.0f) * x2 * x2; + } + if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) { + return 0; + } + } + } + + enc = InitVP8Encoder(config, pic); + if (enc == NULL) return 0; // pic->error is already set. + // Note: each of the tasks below account for 20% in the progress report. + ok = VP8EncAnalyze(enc); + + // Analysis is done, proceed to actual coding. + ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel + if (!enc->use_tokens_) { + ok = ok && VP8EncLoop(enc); + } else { + ok = ok && VP8EncTokenLoop(enc); + } + ok = ok && VP8EncFinishAlpha(enc); + + ok = ok && VP8EncWrite(enc); + StoreStats(enc); + if (!ok) { + VP8EncFreeBitWriters(enc); + } + ok &= DeleteVP8Encoder(enc); // must always be called, even if !ok + } else { + // Make sure we have ARGB samples. + if (pic->argb == NULL && !WebPPictureYUVAToARGB(pic)) { + return 0; + } + + if (!config->exact) { + WebPCleanupTransparentAreaLossless(pic); + } + + ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. + } + + return ok; +} |