diff options
author | Juan Linietsky <reduzio@gmail.com> | 2015-12-04 10:18:28 -0300 |
---|---|---|
committer | Juan Linietsky <reduzio@gmail.com> | 2015-12-04 10:18:28 -0300 |
commit | da113fe40d0a9410859912473d53e43903dc6c8e (patch) | |
tree | 23c6019a28a11d67241789721d1feecdd19410e6 /drivers/webp/enc | |
parent | 064fd762fae75371658e773a3acf39616e813b08 (diff) |
-Upgraded webp to a MUCH newer version. Hoping it fixes some bugs in the process. Keeping old version just in case for now.
-Added ability to convert xml and tscn scenes to binary on export, makes loading of larger scenes faster
Diffstat (limited to 'drivers/webp/enc')
-rw-r--r-- | drivers/webp/enc/alpha.c | 417 | ||||
-rw-r--r-- | drivers/webp/enc/analysis.c | 361 | ||||
-rw-r--r-- | drivers/webp/enc/backward_references.c | 1253 | ||||
-rw-r--r-- | drivers/webp/enc/backward_references.h | 188 | ||||
-rw-r--r-- | drivers/webp/enc/config.c | 69 | ||||
-rw-r--r-- | drivers/webp/enc/cost.c | 672 | ||||
-rw-r--r-- | drivers/webp/enc/cost.h | 36 | ||||
-rw-r--r-- | drivers/webp/enc/filter.c | 332 | ||||
-rw-r--r-- | drivers/webp/enc/frame.c | 987 | ||||
-rw-r--r-- | drivers/webp/enc/histogram.c | 980 | ||||
-rw-r--r-- | drivers/webp/enc/histogram.h | 87 | ||||
-rw-r--r-- | drivers/webp/enc/iterator.c | 214 | ||||
-rw-r--r-- | drivers/webp/enc/layer.c | 49 | ||||
-rw-r--r-- | drivers/webp/enc/picture.c | 1001 | ||||
-rw-r--r-- | drivers/webp/enc/quant.c | 811 | ||||
-rw-r--r-- | drivers/webp/enc/syntax.c | 134 | ||||
-rw-r--r-- | drivers/webp/enc/tree.c | 30 | ||||
-rw-r--r-- | drivers/webp/enc/vp8enci.h | 437 | ||||
-rw-r--r-- | drivers/webp/enc/vp8l.c | 1367 | ||||
-rw-r--r-- | drivers/webp/enc/vp8li.h | 23 | ||||
-rw-r--r-- | drivers/webp/enc/webpenc.c | 266 |
21 files changed, 5135 insertions, 4579 deletions
diff --git a/drivers/webp/enc/alpha.c b/drivers/webp/enc/alpha.c index e554eb7f30..fad6346e43 100644 --- a/drivers/webp/enc/alpha.c +++ b/drivers/webp/enc/alpha.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Alpha-plane compression. @@ -13,13 +15,11 @@ #include <stdlib.h> #include "./vp8enci.h" +#include "../dsp/dsp.h" #include "../utils/filters.h" #include "../utils/quant_levels.h" -#include "../format_constants.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "../utils/utils.h" +#include "../webp/format_constants.h" // ----------------------------------------------------------------------------- // Encodes the given alpha data via specified compression method 'method'. @@ -36,7 +36,7 @@ extern "C" { // // 'output' corresponds to the buffer containing compressed alpha data. // This buffer is allocated by this method and caller should call -// free(*output) when done. +// WebPSafeFree(*output) when done. // 'output_size' corresponds to size of this compressed alpha buffer. // // Returns 1 on successfully encoding the alpha and @@ -48,12 +48,11 @@ extern "C" { static int EncodeLossless(const uint8_t* const data, int width, int height, int effort_level, // in [0..6] range - VP8BitWriter* const bw, + VP8LBitWriter* const bw, WebPAuxStats* const stats) { int ok = 0; WebPConfig config; WebPPicture picture; - VP8LBitWriter tmp_bw; WebPPictureInit(&picture); picture.width = width; @@ -63,53 +62,51 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, if (!WebPPictureAlloc(&picture)) return 0; // Transfer the alpha values to the green channel. - { - int i, j; - uint32_t* dst = picture.argb; - const uint8_t* src = data; - for (j = 0; j < picture.height; ++j) { - for (i = 0; i < picture.width; ++i) { - dst[i] = (src[i] << 8) | 0xff000000u; - } - src += width; - dst += picture.argb_stride; - } - } + WebPDispatchAlphaToGreen(data, width, picture.width, picture.height, + picture.argb, picture.argb_stride); WebPConfigInit(&config); config.lossless = 1; config.method = effort_level; // impact is very small - // Set moderate default quality setting for alpha. Higher qualities (80 and - // above) could be very slow. - config.quality = 10.f + 15.f * effort_level; - if (config.quality > 100.f) config.quality = 100.f; + // Set a low default quality for encoding alpha. Ensure that Alpha quality at + // lower methods (3 and below) is less than the threshold for triggering + // costly 'BackwardReferencesTraceBackwards'. + config.quality = 8.f * effort_level; + assert(config.quality >= 0 && config.quality <= 100.f); - ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3); - ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK); + ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK); WebPPictureFree(&picture); - if (ok) { - const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw); - const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw); - VP8BitWriterAppend(bw, data, data_size); + ok = ok && !bw->error_; + if (!ok) { + VP8LBitWriterWipeOut(bw); + return 0; } - VP8LBitWriterDestroy(&tmp_bw); - return ok && !bw->error_; + return 1; } // ----------------------------------------------------------------------------- +// Small struct to hold the result of a filter mode compression attempt. +typedef struct { + size_t score; + VP8BitWriter bw; + WebPAuxStats stats; +} FilterTrial; + +// This function always returns an initialized 'bw' object, even upon error. static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, int method, int filter, int reduce_levels, int effort_level, // in [0..6] range uint8_t* const tmp_alpha, - VP8BitWriter* const bw, - WebPAuxStats* const stats) { + FilterTrial* result) { int ok = 0; const uint8_t* alpha_src; WebPFilterFunc filter_func; uint8_t header; - size_t expected_size; const size_t data_size = width * height; + const uint8_t* output = NULL; + size_t output_size = 0; + VP8LBitWriter tmp_bw; assert((uint64_t)data_size == (uint64_t)width * height); // as per spec assert(filter >= 0 && filter < WEBP_FILTER_LAST); @@ -118,43 +115,163 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, assert(sizeof(header) == ALPHA_HEADER_LEN); // TODO(skal): have a common function and #define's to validate alpha params. - expected_size = - (method == ALPHA_NO_COMPRESSION) ? (ALPHA_HEADER_LEN + data_size) - : (data_size >> 5); - header = method | (filter << 2); - if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; - - VP8BitWriterInit(bw, expected_size); - VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN); - filter_func = WebPFilters[filter]; - if (filter_func) { - filter_func(data, width, height, 1, width, tmp_alpha); + if (filter_func != NULL) { + filter_func(data, width, height, width, tmp_alpha); alpha_src = tmp_alpha; } else { alpha_src = data; } + if (method != ALPHA_NO_COMPRESSION) { + ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3); + ok = ok && EncodeLossless(alpha_src, width, height, effort_level, + &tmp_bw, &result->stats); + if (ok) { + output = VP8LBitWriterFinish(&tmp_bw); + output_size = VP8LBitWriterNumBytes(&tmp_bw); + if (output_size > data_size) { + // compressed size is larger than source! Revert to uncompressed mode. + method = ALPHA_NO_COMPRESSION; + VP8LBitWriterWipeOut(&tmp_bw); + } + } else { + VP8LBitWriterWipeOut(&tmp_bw); + return 0; + } + } + if (method == ALPHA_NO_COMPRESSION) { - ok = VP8BitWriterAppend(bw, alpha_src, width * height); - ok = ok && !bw->error_; - } else { - ok = EncodeLossless(alpha_src, width, height, effort_level, bw, stats); - VP8BitWriterFinish(bw); + output = alpha_src; + output_size = data_size; + ok = 1; + } + + // Emit final result. + header = method | (filter << 2); + if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; + + VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); + ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); + ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); + + if (method != ALPHA_NO_COMPRESSION) { + VP8LBitWriterWipeOut(&tmp_bw); } + ok = ok && !result->bw.error_; + result->score = VP8BitWriterSize(&result->bw); return ok; } // ----------------------------------------------------------------------------- -// TODO(skal): move to dsp/ ? -static void CopyPlane(const uint8_t* src, int src_stride, - uint8_t* dst, int dst_stride, int width, int height) { - while (height-- > 0) { - memcpy(dst, src, width); - src += src_stride; - dst += dst_stride; +static int GetNumColors(const uint8_t* data, int width, int height, + int stride) { + int j; + int colors = 0; + uint8_t color[256] = { 0 }; + + for (j = 0; j < height; ++j) { + int i; + const uint8_t* const p = data + j * stride; + for (i = 0; i < width; ++i) { + color[p[i]] = 1; + } + } + for (j = 0; j < 256; ++j) { + if (color[j] > 0) ++colors; } + return colors; +} + +#define FILTER_TRY_NONE (1 << WEBP_FILTER_NONE) +#define FILTER_TRY_ALL ((1 << WEBP_FILTER_LAST) - 1) + +// Given the input 'filter' option, return an OR'd bit-set of filters to try. +static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height, + int filter, int effort_level) { + uint32_t bit_map = 0U; + if (filter == WEBP_FILTER_FAST) { + // Quick estimate of the best candidate. + int try_filter_none = (effort_level > 3); + const int kMinColorsForFilterNone = 16; + const int kMaxColorsForFilterNone = 192; + const int num_colors = GetNumColors(alpha, width, height, width); + // For low number of colors, NONE yields better compression. + filter = (num_colors <= kMinColorsForFilterNone) + ? WEBP_FILTER_NONE + : WebPEstimateBestFilter(alpha, width, height, width); + bit_map |= 1 << filter; + // For large number of colors, try FILTER_NONE in addition to the best + // filter as well. + if (try_filter_none || num_colors > kMaxColorsForFilterNone) { + bit_map |= FILTER_TRY_NONE; + } + } else if (filter == WEBP_FILTER_NONE) { + bit_map = FILTER_TRY_NONE; + } else { // WEBP_FILTER_BEST -> try all + bit_map = FILTER_TRY_ALL; + } + return bit_map; +} + +static void InitFilterTrial(FilterTrial* const score) { + score->score = (size_t)~0U; + VP8BitWriterInit(&score->bw, 0); +} + +static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, + size_t data_size, int method, int filter, + int reduce_levels, int effort_level, + uint8_t** const output, + size_t* const output_size, + WebPAuxStats* const stats) { + int ok = 1; + FilterTrial best; + uint32_t try_map = + GetFilterMap(alpha, width, height, filter, effort_level); + InitFilterTrial(&best); + + if (try_map != FILTER_TRY_NONE) { + uint8_t* filtered_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); + if (filtered_alpha == NULL) return 0; + + for (filter = WEBP_FILTER_NONE; ok && try_map; ++filter, try_map >>= 1) { + if (try_map & 1) { + FilterTrial trial; + ok = EncodeAlphaInternal(alpha, width, height, method, filter, + reduce_levels, effort_level, filtered_alpha, + &trial); + if (ok && trial.score < best.score) { + VP8BitWriterWipeOut(&best.bw); + best = trial; + } else { + VP8BitWriterWipeOut(&trial.bw); + } + } + } + WebPSafeFree(filtered_alpha); + } else { + ok = EncodeAlphaInternal(alpha, width, height, method, WEBP_FILTER_NONE, + reduce_levels, effort_level, NULL, &best); + } + if (ok) { + if (stats != NULL) { + stats->lossless_features = best.stats.lossless_features; + stats->histogram_bits = best.stats.histogram_bits; + stats->transform_bits = best.stats.transform_bits; + stats->cache_bits = best.stats.cache_bits; + stats->palette_size = best.stats.palette_size; + stats->lossless_size = best.stats.lossless_size; + stats->lossless_hdr_size = best.stats.lossless_hdr_size; + stats->lossless_data_size = best.stats.lossless_data_size; + } + *output_size = VP8BitWriterSize(&best.bw); + *output = VP8BitWriterBuf(&best.bw); + } else { + VP8BitWriterWipeOut(&best.bw); + } + return ok; } static int EncodeAlpha(VP8Encoder* const enc, @@ -187,13 +304,18 @@ static int EncodeAlpha(VP8Encoder* const enc, return 0; } - quant_alpha = (uint8_t*)malloc(data_size); + if (method == ALPHA_NO_COMPRESSION) { + // Don't filter, as filtering will make no impact on compressed size. + filter = WEBP_FILTER_NONE; + } + + quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); if (quant_alpha == NULL) { return 0; } // Extract alpha data (width x height) from raw_data (stride x height). - CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height); + WebPCopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height); if (reduce_levels) { // No Quantization required for 'quality = 100'. // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence @@ -205,126 +327,99 @@ static int EncodeAlpha(VP8Encoder* const enc, } if (ok) { - VP8BitWriter bw; - int test_filter; - uint8_t* filtered_alpha = NULL; - - // We always test WEBP_FILTER_NONE first. - ok = EncodeAlphaInternal(quant_alpha, width, height, - method, WEBP_FILTER_NONE, reduce_levels, - effort_level, NULL, &bw, pic->stats); - if (!ok) { - VP8BitWriterWipeOut(&bw); - goto End; - } - - if (filter == WEBP_FILTER_FAST) { // Quick estimate of a second candidate? - filter = EstimateBestFilter(quant_alpha, width, height, width); - } - // Stop? - if (filter == WEBP_FILTER_NONE) { - goto Ok; - } - - filtered_alpha = (uint8_t*)malloc(data_size); - ok = (filtered_alpha != NULL); - if (!ok) { - goto End; + VP8FiltersInit(); + ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, + filter, reduce_levels, effort_level, output, + output_size, pic->stats); + if (pic->stats != NULL) { // need stats? + pic->stats->coded_size += (int)(*output_size); + enc->sse_[3] = sse; } - - // Try the other mode(s). - { - WebPAuxStats best_stats; - size_t best_score = VP8BitWriterSize(&bw); - - memset(&best_stats, 0, sizeof(best_stats)); // prevent spurious warning - if (pic->stats != NULL) best_stats = *pic->stats; - for (test_filter = WEBP_FILTER_HORIZONTAL; - ok && (test_filter <= WEBP_FILTER_GRADIENT); - ++test_filter) { - VP8BitWriter tmp_bw; - if (filter != WEBP_FILTER_BEST && test_filter != filter) { - continue; - } - ok = EncodeAlphaInternal(quant_alpha, width, height, - method, test_filter, reduce_levels, - effort_level, filtered_alpha, &tmp_bw, - pic->stats); - if (ok) { - const size_t score = VP8BitWriterSize(&tmp_bw); - if (score < best_score) { - // swap bitwriter objects. - VP8BitWriter tmp = tmp_bw; - tmp_bw = bw; - bw = tmp; - best_score = score; - if (pic->stats != NULL) best_stats = *pic->stats; - } - } else { - VP8BitWriterWipeOut(&bw); - } - VP8BitWriterWipeOut(&tmp_bw); - } - if (pic->stats != NULL) *pic->stats = best_stats; - } - Ok: - if (ok) { - *output_size = VP8BitWriterSize(&bw); - *output = VP8BitWriterBuf(&bw); - if (pic->stats != NULL) { // need stats? - pic->stats->coded_size += (int)(*output_size); - enc->sse_[3] = sse; - } - } - free(filtered_alpha); } - End: - free(quant_alpha); + + WebPSafeFree(quant_alpha); return ok; } - //------------------------------------------------------------------------------ // Main calls +static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) { + const WebPConfig* config = enc->config_; + uint8_t* alpha_data = NULL; + size_t alpha_size = 0; + const int effort_level = config->method; // maps to [0..6] + const WEBP_FILTER_TYPE filter = + (config->alpha_filtering == 0) ? WEBP_FILTER_NONE : + (config->alpha_filtering == 1) ? WEBP_FILTER_FAST : + WEBP_FILTER_BEST; + if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression, + filter, effort_level, &alpha_data, &alpha_size)) { + return 0; + } + if (alpha_size != (uint32_t)alpha_size) { // Sanity check. + WebPSafeFree(alpha_data); + return 0; + } + enc->alpha_data_size_ = (uint32_t)alpha_size; + enc->alpha_data_ = alpha_data; + (void)dummy; + return 1; +} + void VP8EncInitAlpha(VP8Encoder* const enc) { + WebPInitAlphaProcessing(); enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); enc->alpha_data_ = NULL; enc->alpha_data_size_ = 0; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + WebPGetWorkerInterface()->Init(worker); + worker->data1 = enc; + worker->data2 = NULL; + worker->hook = (WebPWorkerHook)CompressAlphaJob; + } } -int VP8EncFinishAlpha(VP8Encoder* const enc) { +int VP8EncStartAlpha(VP8Encoder* const enc) { if (enc->has_alpha_) { - const WebPConfig* config = enc->config_; - uint8_t* tmp_data = NULL; - size_t tmp_size = 0; - const int effort_level = config->method; // maps to [0..6] - const WEBP_FILTER_TYPE filter = - (config->alpha_filtering == 0) ? WEBP_FILTER_NONE : - (config->alpha_filtering == 1) ? WEBP_FILTER_FAST : - WEBP_FILTER_BEST; - - if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression, - filter, effort_level, &tmp_data, &tmp_size)) { - return 0; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + // Makes sure worker is good to go. + if (!WebPGetWorkerInterface()->Reset(worker)) { + return 0; + } + WebPGetWorkerInterface()->Launch(worker); + return 1; + } else { + return CompressAlphaJob(enc, NULL); // just do the job right away } - if (tmp_size != (uint32_t)tmp_size) { // Sanity check. - free(tmp_data); - return 0; + } + return 1; +} + +int VP8EncFinishAlpha(VP8Encoder* const enc) { + if (enc->has_alpha_) { + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + if (!WebPGetWorkerInterface()->Sync(worker)) return 0; // error } - enc->alpha_data_size_ = (uint32_t)tmp_size; - enc->alpha_data_ = tmp_data; } return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); } -void VP8EncDeleteAlpha(VP8Encoder* const enc) { - free(enc->alpha_data_); +int VP8EncDeleteAlpha(VP8Encoder* const enc) { + int ok = 1; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + // finish anything left in flight + ok = WebPGetWorkerInterface()->Sync(worker); + // still need to end the worker, even if !ok + WebPGetWorkerInterface()->End(worker); + } + WebPSafeFree(enc->alpha_data_); enc->alpha_data_ = NULL; enc->alpha_data_size_ = 0; enc->has_alpha_ = 0; + return ok; } - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/analysis.c b/drivers/webp/enc/analysis.c index 22cfb492e7..b55128fd48 100644 --- a/drivers/webp/enc/analysis.c +++ b/drivers/webp/enc/analysis.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Macroblock analysis @@ -17,16 +19,8 @@ #include "./cost.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - #define MAX_ITERS_K_MEANS 6 -static int ClipAlpha(int alpha) { - return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha; -} - //------------------------------------------------------------------------------ // Smooth the segment map by replacing isolated block by the majority of its // neighbours. @@ -36,7 +30,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { const int w = enc->mb_w_; const int h = enc->mb_h_; const int majority_cnt_3_x_3_grid = 5; - uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp)); + uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp)); assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec if (tmp == NULL) return; @@ -57,6 +51,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { for (n = 0; n < NUM_MB_SEGMENTS; ++n) { if (cnt[n] >= majority_cnt_3_x_3_grid) { majority_seg = n; + break; } } tmp[x + y * w] = majority_seg; @@ -68,54 +63,14 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { mb->segment_ = tmp[x + y * w]; } } - free(tmp); + WebPSafeFree(tmp); } //------------------------------------------------------------------------------ -// Finalize Segment probability based on the coding tree - -static int GetProba(int a, int b) { - int proba; - const int total = a + b; - if (total == 0) return 255; // that's the default probability. - proba = (255 * a + total / 2) / total; - return proba; -} - -static void SetSegmentProbas(VP8Encoder* const enc) { - int p[NUM_MB_SEGMENTS] = { 0 }; - int n; - - for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { - const VP8MBInfo* const mb = &enc->mb_info_[n]; - p[mb->segment_]++; - } - if (enc->pic_->stats) { - for (n = 0; n < NUM_MB_SEGMENTS; ++n) { - enc->pic_->stats->segment_size[n] = p[n]; - } - } - if (enc->segment_hdr_.num_segments_ > 1) { - uint8_t* const probas = enc->proba_.segments_; - probas[0] = GetProba(p[0] + p[1], p[2] + p[3]); - probas[1] = GetProba(p[0], p[1]); - probas[2] = GetProba(p[2], p[3]); - - enc->segment_hdr_.update_map_ = - (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255); - enc->segment_hdr_.size_ = - p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) + - p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) + - p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) + - p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2])); - } else { - enc->segment_hdr_.update_map_ = 0; - enc->segment_hdr_.size_ = 0; - } -} +// set segment susceptibility alpha_ / beta_ static WEBP_INLINE int clip(int v, int m, int M) { - return v < m ? m : v > M ? M : v; + return (v < m) ? m : (v > M) ? M : v; } static void SetSegmentAlphas(VP8Encoder* const enc, @@ -142,28 +97,77 @@ static void SetSegmentAlphas(VP8Encoder* const enc, } //------------------------------------------------------------------------------ +// Compute susceptibility based on DCT-coeff histograms: +// the higher, the "easier" the macroblock is to compress. + +#define MAX_ALPHA 255 // 8b of precision for susceptibilities. +#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha. +#define DEFAULT_ALPHA (-1) +#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha)) + +static int FinalAlphaValue(int alpha) { + alpha = MAX_ALPHA - alpha; + return clip(alpha, 0, MAX_ALPHA); +} + +static int GetAlpha(const VP8Histogram* const histo) { + // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer + // values which happen to be mostly noise. This leaves the maximum precision + // for handling the useful small values which contribute most. + const int max_value = histo->max_value; + const int last_non_zero = histo->last_non_zero; + const int alpha = + (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; + return alpha; +} + +static void InitHistogram(VP8Histogram* const histo) { + histo->max_value = 0; + histo->last_non_zero = 1; +} + +static void MergeHistograms(const VP8Histogram* const in, + VP8Histogram* const out) { + if (in->max_value > out->max_value) { + out->max_value = in->max_value; + } + if (in->last_non_zero > out->last_non_zero) { + out->last_non_zero = in->last_non_zero; + } +} + +//------------------------------------------------------------------------------ // Simplified k-Means, to assign Nb segments based on alpha-histogram -static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { - const int nb = enc->segment_hdr_.num_segments_; +static void AssignSegments(VP8Encoder* const enc, + const int alphas[MAX_ALPHA + 1]) { + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid spurious warning about 'n + 1' exceeding + // array bounds of 'centers' with some compilers (noticed with gcc-4.9). + const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ? + enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS; int centers[NUM_MB_SEGMENTS]; int weighted_average = 0; - int map[256]; + int map[MAX_ALPHA + 1]; int a, n, k; - int min_a = 0, max_a = 255, range_a; + int min_a = 0, max_a = MAX_ALPHA, range_a; // 'int' type is ok for histo, and won't overflow int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS]; + assert(nb >= 1); + assert(nb <= NUM_MB_SEGMENTS); + // bracket the input - for (n = 0; n < 256 && alphas[n] == 0; ++n) {} + for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {} min_a = n; - for (n = 255; n > min_a && alphas[n] == 0; --n) {} + for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {} max_a = n; range_a = max_a - min_a; // Spread initial centers evenly - for (n = 1, k = 0; n < 2 * nb; n += 2) { - centers[k++] = min_a + (n * range_a) / (2 * nb); + for (k = 0, n = 1; k < nb; ++k, n += 2) { + assert(n < 2 * nb); + centers[k] = min_a + (n * range_a) / (2 * nb); } for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough @@ -178,7 +182,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { n = 0; // track the nearest center for current 'a' for (a = min_a; a <= max_a; ++a) { if (alphas[a]) { - while (n < nb - 1 && abs(a - centers[n + 1]) < abs(a - centers[n])) { + while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) { n++; } map[a] = n; @@ -210,7 +214,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { VP8MBInfo* const mb = &enc->mb_info_[n]; const int alpha = mb->alpha_; mb->segment_ = map[alpha]; - mb->alpha_ = centers[map[alpha]]; // just for the record. + mb->alpha_ = centers[map[alpha]]; // for the record. } if (nb > 1) { @@ -218,7 +222,6 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { if (smooth) SmoothSegmentMap(enc); } - SetSegmentProbas(enc); // Assign final proba SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas. } @@ -227,24 +230,30 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { // susceptibility and set best modes for this macroblock. // Segment assignment is done later. -// Number of modes to inspect for alpha_ evaluation. For high-quality settings, -// we don't need to test all the possible modes during the analysis phase. +// Number of modes to inspect for alpha_ evaluation. We don't need to test all +// the possible modes during the analysis phase: we risk falling into a local +// optimum, or be subject to boundary effect #define MAX_INTRA16_MODE 2 #define MAX_INTRA4_MODE 2 #define MAX_UV_MODE 2 static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { - const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4; + const int max_mode = MAX_INTRA16_MODE; int mode; - int best_alpha = -1; + int best_alpha = DEFAULT_ALPHA; int best_mode = 0; VP8MakeLuma16Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF, - it->yuv_p_ + VP8I16ModeOffsets[mode], - 0, 16); - if (alpha > best_alpha) { + VP8Histogram histo; + int alpha; + + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC, + it->yuv_p_ + VP8I16ModeOffsets[mode], + 0, 16, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { best_alpha = alpha; best_mode = mode; } @@ -256,46 +265,62 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, int best_alpha) { uint8_t modes[16]; - const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES; - int i4_alpha = 0; + const int max_mode = MAX_INTRA4_MODE; + int i4_alpha; + VP8Histogram total_histo; + int cur_histo = 0; + InitHistogram(&total_histo); + VP8IteratorStartI4(it); do { int mode; - int best_mode_alpha = -1; - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; + int best_mode_alpha = DEFAULT_ALPHA; + VP8Histogram histos[2]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; VP8MakeIntra4Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(src, - it->yuv_p_ + VP8I4ModeOffsets[mode], - 0, 1); - if (alpha > best_mode_alpha) { + int alpha; + + InitHistogram(&histos[cur_histo]); + VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], + 0, 1, &histos[cur_histo]); + alpha = GetAlpha(&histos[cur_histo]); + if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { best_mode_alpha = alpha; modes[it->i4_] = mode; + cur_histo ^= 1; // keep track of best histo so far. } } - i4_alpha += best_mode_alpha; + // accumulate best histogram + MergeHistograms(&histos[cur_histo ^ 1], &total_histo); // Note: we reuse the original samples for predictors - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); + } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - if (i4_alpha > best_alpha) { + i4_alpha = GetAlpha(&total_histo); + if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { VP8SetIntra4Mode(it, modes); - best_alpha = ClipAlpha(i4_alpha); + best_alpha = i4_alpha; } return best_alpha; } static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { - int best_alpha = -1; + int best_alpha = DEFAULT_ALPHA; int best_mode = 0; - const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4; + const int max_mode = MAX_UV_MODE; int mode; + VP8MakeChroma8Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF, - it->yuv_p_ + VP8UVModeOffsets[mode], - 16, 16 + 4 + 4); - if (alpha > best_alpha) { + VP8Histogram histo; + int alpha; + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC, + it->yuv_p_ + VP8UVModeOffsets[mode], + 16, 16 + 4 + 4, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { best_alpha = alpha; best_mode = mode; } @@ -305,7 +330,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { } static void MBAnalyze(VP8EncIterator* const it, - int alphas[256], int* const uv_alpha) { + int alphas[MAX_ALPHA + 1], + int* const alpha, int* const uv_alpha) { const VP8Encoder* const enc = it->enc_; int best_alpha, best_uv_alpha; @@ -314,7 +340,7 @@ static void MBAnalyze(VP8EncIterator* const it, VP8SetSegment(it, 0); // default segment, spec-wise. best_alpha = MBAnalyzeBestIntra16Mode(it); - if (enc->method_ != 3) { + if (enc->method_ >= 5) { // We go and make a fast decision for intra4/intra16. // It's usually not a good and definitive pick, but helps seeding the stats // about level bit-cost. @@ -324,10 +350,22 @@ static void MBAnalyze(VP8EncIterator* const it, best_uv_alpha = MBAnalyzeBestUVMode(it); // Final susceptibility mix - best_alpha = (best_alpha + best_uv_alpha + 1) / 2; + best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2; + best_alpha = FinalAlphaValue(best_alpha); alphas[best_alpha]++; + it->mb_->alpha_ = best_alpha; // for later remapping. + + // Accumulate for later complexity analysis. + *alpha += best_alpha; // mixed susceptibility (not just luma) *uv_alpha += best_uv_alpha; - it->mb_->alpha_ = best_alpha; // Informative only. +} + +static void DefaultMBInfo(VP8MBInfo* const mb) { + mb->type_ = 1; // I16x16 + mb->uv_mode_ = 0; + mb->skip_ = 0; // not skipped + mb->segment_ = 0; // default segment + mb->alpha_ = 0; } //------------------------------------------------------------------------------ @@ -340,25 +378,124 @@ static void MBAnalyze(VP8EncIterator* const it, // and decide intra4/intra16, but that's usually almost always a bad choice at // this stage. -int VP8EncAnalyze(VP8Encoder* const enc) { - int ok = 1; - int alphas[256] = { 0 }; - VP8EncIterator it; - - VP8IteratorInit(enc, &it); +static void ResetAllMBInfo(VP8Encoder* const enc) { + int n; + for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { + DefaultMBInfo(&enc->mb_info_[n]); + } + // Default susceptibilities. + enc->dqm_[0].alpha_ = 0; + enc->dqm_[0].beta_ = 0; + // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value. + enc->alpha_ = 0; enc->uv_alpha_ = 0; - do { - VP8IteratorImport(&it); - MBAnalyze(&it, alphas, &enc->uv_alpha_); - ok = VP8IteratorProgress(&it, 20); - // Let's pretend we have perfect lossless reconstruction. - } while (ok && VP8IteratorNext(&it, it.yuv_in_)); - enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_; - if (ok) AssignSegments(enc, alphas); + WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); +} + +// struct used to collect job result +typedef struct { + WebPWorker worker; + int alphas[MAX_ALPHA + 1]; + int alpha, uv_alpha; + VP8EncIterator it; + int delta_progress; +} SegmentJob; +// main work call +static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) { + int ok = 1; + if (!VP8IteratorIsDone(it)) { + uint8_t tmp[32 + WEBP_ALIGN_CST]; + uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp); + do { + // Let's pretend we have perfect lossless reconstruction. + VP8IteratorImport(it, scratch); + MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha); + ok = VP8IteratorProgress(it, job->delta_progress); + } while (ok && VP8IteratorNext(it)); + } return ok; } -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" +static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) { + int i; + for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i]; + dst->alpha += src->alpha; + dst->uv_alpha += src->uv_alpha; +} + +// initialize the job struct with some TODOs +static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, + int start_row, int end_row) { + WebPGetWorkerInterface()->Init(&job->worker); + job->worker.data1 = job; + job->worker.data2 = &job->it; + job->worker.hook = (WebPWorkerHook)DoSegmentsJob; + VP8IteratorInit(enc, &job->it); + VP8IteratorSetRow(&job->it, start_row); + VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_); + memset(job->alphas, 0, sizeof(job->alphas)); + job->alpha = 0; + job->uv_alpha = 0; + // only one of both jobs can record the progress, since we don't + // expect the user's hook to be multi-thread safe + job->delta_progress = (start_row == 0) ? 20 : 0; +} + +// main entry point +int VP8EncAnalyze(VP8Encoder* const enc) { + int ok = 1; + const int do_segments = + enc->config_->emulate_jpeg_size || // We need the complexity evaluation. + (enc->segment_hdr_.num_segments_ > 1) || + (enc->method_ == 0); // for method 0, we need preds_[] to be filled. + if (do_segments) { + const int last_row = enc->mb_h_; + // We give a little more than a half work to the main thread. + const int split_row = (9 * last_row + 15) >> 4; + const int total_mb = last_row * enc->mb_w_; +#ifdef WEBP_USE_THREAD + const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it + const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow); +#else + const int do_mt = 0; #endif + const WebPWorkerInterface* const worker_interface = + WebPGetWorkerInterface(); + SegmentJob main_job; + if (do_mt) { + SegmentJob side_job; + // Note the use of '&' instead of '&&' because we must call the functions + // no matter what. + InitSegmentJob(enc, &main_job, 0, split_row); + InitSegmentJob(enc, &side_job, split_row, last_row); + // we don't need to call Reset() on main_job.worker, since we're calling + // WebPWorkerExecute() on it + ok &= worker_interface->Reset(&side_job.worker); + // launch the two jobs in parallel + if (ok) { + worker_interface->Launch(&side_job.worker); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&side_job.worker); + ok &= worker_interface->Sync(&main_job.worker); + } + worker_interface->End(&side_job.worker); + if (ok) MergeJobs(&side_job, &main_job); // merge results together + } else { + // Even for single-thread case, we use the generic Worker tools. + InitSegmentJob(enc, &main_job, 0, last_row); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&main_job.worker); + } + worker_interface->End(&main_job.worker); + if (ok) { + enc->alpha_ = main_job.alpha / total_mb; + enc->uv_alpha_ = main_job.uv_alpha / total_mb; + AssignSegments(enc, main_job.alphas); + } + } else { // Use only one default segment. + ResetAllMBInfo(enc); + } + return ok; +} + diff --git a/drivers/webp/enc/backward_references.c b/drivers/webp/enc/backward_references.c index b8c8ece806..049125e521 100644 --- a/drivers/webp/enc/backward_references.c +++ b/drivers/webp/enc/backward_references.c @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Author: Jyrki Alakuijala (jyrki@google.com) @@ -10,7 +12,6 @@ #include <assert.h> #include <math.h> -#include <stdio.h> #include "./backward_references.h" #include "./histogram.h" @@ -20,9 +21,9 @@ #define VALUES_IN_BYTE 256 -#define HASH_BITS 18 -#define HASH_SIZE (1 << HASH_BITS) -#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL) +#define MIN_BLOCK_SIZE 256 // minimum block size for backward references + +#define MAX_ENTROPY (1e30f) // 1M window (4M bytes) minus 120 special codes for short distances. #define WINDOW_SIZE ((1 << 20) - 120) @@ -31,14 +32,6 @@ #define MIN_LENGTH 2 #define MAX_LENGTH 4096 -typedef struct { - // Stores the most recently added position with the given hash value. - int32_t hash_to_first_index_[HASH_SIZE]; - // chain_[pos] stores the previous position with the same hash value - // for every pixel in the image. - int32_t* chain_; -} HashChain; - // ----------------------------------------------------------------------------- static const uint8_t plane_to_code_lut[128] = { @@ -65,145 +58,275 @@ static int DistanceToPlaneCode(int xsize, int dist) { static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, const uint32_t* const array2, - const int max_limit) { + int best_len_match, + int max_limit) { +#if !defined(__x86_64__) + // TODO(vrabaud): Compare on other architectures. int match_len = 0; + // Before 'expensive' linear match, check if the two arrays match at the + // current best length index. + if (array1[best_len_match] != array2[best_len_match]) return 0; while (match_len < max_limit && array1[match_len] == array2[match_len]) { ++match_len; } return match_len; +#else + const uint32_t* array1_32 = array1; + const uint32_t* array2_32 = array2; + // max value is aligned to (uint64_t*) array1 + const uint32_t* const array1_32_max = array1 + (max_limit & ~1); + + // Before 'expensive' linear match, check if the two arrays match at the + // current best length index. + if (array1[best_len_match] != array2[best_len_match]) return 0; + + // TODO(vrabaud): add __predict_true on bound checking? + while (array1_32 < array1_32_max) { + if (*(uint64_t*)array1_32 == *(uint64_t*)array2_32) { + array1_32 += 2; + array2_32 += 2; + } else { + // if the uint32_t pointed to are the same, then the following ones have + // to be different + return (array1_32 - array1) + (*array1_32 == *array2_32); + } + } + + // Deal with the potential last uint32_t. + if ((max_limit & 1) && (*array1_32 != *array2_32)) return max_limit - 1; + return max_limit; +#endif } // ----------------------------------------------------------------------------- // VP8LBackwardRefs -void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs) { - if (refs != NULL) { - refs->refs = NULL; - refs->size = 0; - refs->max_size = 0; +struct PixOrCopyBlock { + PixOrCopyBlock* next_; // next block (or NULL) + PixOrCopy* start_; // data start + int size_; // currently used size +}; + +static void ClearBackwardRefs(VP8LBackwardRefs* const refs) { + assert(refs != NULL); + if (refs->tail_ != NULL) { + *refs->tail_ = refs->free_blocks_; // recycle all blocks at once } + refs->free_blocks_ = refs->refs_; + refs->tail_ = &refs->refs_; + refs->last_block_ = NULL; + refs->refs_ = NULL; } -void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) { - if (refs != NULL) { - free(refs->refs); - VP8LInitBackwardRefs(refs); +void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { + assert(refs != NULL); + ClearBackwardRefs(refs); + while (refs->free_blocks_ != NULL) { + PixOrCopyBlock* const next = refs->free_blocks_->next_; + WebPSafeFree(refs->free_blocks_); + refs->free_blocks_ = next; } } -int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size) { +void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) { assert(refs != NULL); - refs->size = 0; - refs->max_size = 0; - refs->refs = (PixOrCopy*)WebPSafeMalloc((uint64_t)max_size, - sizeof(*refs->refs)); - if (refs->refs == NULL) return 0; - refs->max_size = max_size; + memset(refs, 0, sizeof(*refs)); + refs->tail_ = &refs->refs_; + refs->block_size_ = + (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size; +} + +VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) { + VP8LRefsCursor c; + c.cur_block_ = refs->refs_; + if (refs->refs_ != NULL) { + c.cur_pos = c.cur_block_->start_; + c.last_pos_ = c.cur_pos + c.cur_block_->size_; + } else { + c.cur_pos = NULL; + c.last_pos_ = NULL; + } + return c; +} + +void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) { + PixOrCopyBlock* const b = c->cur_block_->next_; + c->cur_pos = (b == NULL) ? NULL : b->start_; + c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_; + c->cur_block_ = b; +} + +// Create a new block, either from the free list or allocated +static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { + PixOrCopyBlock* b = refs->free_blocks_; + if (b == NULL) { // allocate new memory chunk + const size_t total_size = + sizeof(*b) + refs->block_size_ * sizeof(*b->start_); + b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size); + if (b == NULL) { + refs->error_ |= 1; + return NULL; + } + b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned + } else { // recycle from free-list + refs->free_blocks_ = b->next_; + } + *refs->tail_ = b; + refs->tail_ = &b->next_; + refs->last_block_ = b; + b->next_ = NULL; + b->size_ = 0; + return b; +} + +static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs, + const PixOrCopy v) { + PixOrCopyBlock* b = refs->last_block_; + if (b == NULL || b->size_ == refs->block_size_) { + b = BackwardRefsNewBlock(refs); + if (b == NULL) return; // refs->error_ is set + } + b->start_[b->size_++] = v; +} + +int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, + VP8LBackwardRefs* const dst) { + const PixOrCopyBlock* b = src->refs_; + ClearBackwardRefs(dst); + assert(src->block_size_ == dst->block_size_); + while (b != NULL) { + PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst); + if (new_b == NULL) return 0; // dst->error_ is set + memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_)); + new_b->size_ = b->size_; + b = b->next_; + } return 1; } // ----------------------------------------------------------------------------- // Hash chains -static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) { - uint64_t key = ((uint64_t)(argb[1]) << 32) | argb[0]; - key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS); - return key; -} - -static int HashChainInit(HashChain* const p, int size) { +// initialize as empty +static void HashChainReset(VP8LHashChain* const p) { int i; - p->chain_ = (int*)WebPSafeMalloc((uint64_t)size, sizeof(*p->chain_)); - if (p->chain_ == NULL) { - return 0; - } - for (i = 0; i < size; ++i) { + assert(p != NULL); + for (i = 0; i < p->size_; ++i) { p->chain_[i] = -1; } for (i = 0; i < HASH_SIZE; ++i) { p->hash_to_first_index_[i] = -1; } +} + +int VP8LHashChainInit(VP8LHashChain* const p, int size) { + assert(p->size_ == 0); + assert(p->chain_ == NULL); + assert(size > 0); + p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_)); + if (p->chain_ == NULL) return 0; + p->size_ = size; + HashChainReset(p); return 1; } -static void HashChainDelete(HashChain* const p) { - if (p != NULL) { - free(p->chain_); - free(p); - } +void VP8LHashChainClear(VP8LHashChain* const p) { + assert(p != NULL); + WebPSafeFree(p->chain_); + p->size_ = 0; + p->chain_ = NULL; +} + +// ----------------------------------------------------------------------------- + +#define HASH_MULTIPLIER_HI (0xc6a4a793U) +#define HASH_MULTIPLIER_LO (0x5bd1e996U) + +static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { + uint32_t key; + key = argb[1] * HASH_MULTIPLIER_HI; + key += argb[0] * HASH_MULTIPLIER_LO; + key = key >> (32 - HASH_BITS); + return key; } // Insertion of two pixels at a time. -static void HashChainInsert(HashChain* const p, +static void HashChainInsert(VP8LHashChain* const p, const uint32_t* const argb, int pos) { - const uint64_t hash_code = GetPixPairHash64(argb); + const uint32_t hash_code = GetPixPairHash64(argb); p->chain_[pos] = p->hash_to_first_index_[hash_code]; p->hash_to_first_index_[hash_code] = pos; } -static int HashChainFindCopy(const HashChain* const p, - int quality, int index, int xsize, - const uint32_t* const argb, int maxlen, +// Returns the maximum number of hash chain lookups to do for a +// given compression quality. Return value in range [6, 86]. +static int GetMaxItersForQuality(int quality, int low_effort) { + return (low_effort ? 6 : 8) + (quality * quality) / 128; +} + +static int GetWindowSizeForHashChain(int quality, int xsize) { + const int max_window_size = (quality > 75) ? WINDOW_SIZE + : (quality > 50) ? (xsize << 8) + : (quality > 25) ? (xsize << 6) + : (xsize << 4); + assert(xsize > 0); + return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size; +} + +static WEBP_INLINE int MaxFindCopyLength(int len) { + return (len < MAX_LENGTH) ? len : MAX_LENGTH; +} + +static void HashChainFindOffset(const VP8LHashChain* const p, int base_position, + const uint32_t* const argb, int len, + int window_size, int* const distance_ptr) { + const uint32_t* const argb_start = argb + base_position; + const int min_pos = + (base_position > window_size) ? base_position - window_size : 0; + int pos; + assert(len <= MAX_LENGTH); + for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; + pos >= min_pos; + pos = p->chain_[pos]) { + const int curr_length = + FindMatchLength(argb + pos, argb_start, len - 1, len); + if (curr_length == len) break; + } + *distance_ptr = base_position - pos; +} + +static int HashChainFindCopy(const VP8LHashChain* const p, + int base_position, + const uint32_t* const argb, int max_len, + int window_size, int iter_max, int* const distance_ptr, int* const length_ptr) { - const uint64_t hash_code = GetPixPairHash64(&argb[index]); - int prev_length = 0; - int64_t best_val = 0; + const uint32_t* const argb_start = argb + base_position; + int iter = iter_max; int best_length = 0; int best_distance = 0; - const uint32_t* const argb_start = argb + index; - const int iter_min_mult = (quality < 50) ? 2 : (quality < 75) ? 4 : 8; - const int iter_min = -quality * iter_min_mult; - int iter_cnt = 10 + (quality >> 1); - const int min_pos = (index > WINDOW_SIZE) ? index - WINDOW_SIZE : 0; + const int min_pos = + (base_position > window_size) ? base_position - window_size : 0; int pos; - - assert(xsize > 0); - for (pos = p->hash_to_first_index_[hash_code]; + int length_max = 256; + if (max_len < length_max) { + length_max = max_len; + } + for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; pos >= min_pos; pos = p->chain_[pos]) { - int64_t val; int curr_length; - if (iter_cnt < 0) { - if (iter_cnt < iter_min || best_val >= 0xff0000) { - break; - } - } - --iter_cnt; - if (best_length != 0 && - argb[pos + best_length - 1] != argb_start[best_length - 1]) { - continue; - } - curr_length = FindMatchLength(argb + pos, argb_start, maxlen); - if (curr_length < prev_length) { - continue; + int distance; + if (--iter < 0) { + break; } - val = 65536 * curr_length; - // Favoring 2d locality here gives savings for certain images. - if (index - pos < 9 * xsize) { - const int y = (index - pos) / xsize; - int x = (index - pos) % xsize; - if (x > xsize / 2) { - x = xsize - x; - } - if (x <= 7 && x >= -8) { - val -= y * y + x * x; - } else { - val -= 9 * 9 + 9 * 9; - } - } else { - val -= 9 * 9 + 9 * 9; - } - if (best_val < val) { - prev_length = curr_length; - best_val = val; + + curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len); + if (best_length < curr_length) { + distance = base_position - pos; best_length = curr_length; - best_distance = index - pos; - if (curr_length >= MAX_LENGTH) { - break; - } - if ((best_distance == 1 || best_distance == xsize) && - best_length >= 128) { + best_distance = distance; + if (curr_length >= length_max) { break; } } @@ -213,140 +336,153 @@ static int HashChainFindCopy(const HashChain* const p, return (best_length >= MIN_LENGTH); } -static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) { - int size = refs->size; - while (length >= MAX_LENGTH) { - refs->refs[size++] = PixOrCopyCreateCopy(1, MAX_LENGTH); - length -= MAX_LENGTH; - } - if (length > 0) { - refs->refs[size++] = PixOrCopyCreateCopy(1, length); +static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, + VP8LColorCache* const hashers, + VP8LBackwardRefs* const refs) { + PixOrCopy v; + if (use_color_cache) { + const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel); + if (VP8LColorCacheLookup(hashers, key) == pixel) { + v = PixOrCopyCreateCacheIdx(key); + } else { + v = PixOrCopyCreateLiteral(pixel); + VP8LColorCacheSet(hashers, key, pixel); + } + } else { + v = PixOrCopyCreateLiteral(pixel); } - refs->size = size; + BackwardRefsCursorAdd(refs, v); } -static void BackwardReferencesRle(int xsize, int ysize, - const uint32_t* const argb, - VP8LBackwardRefs* const refs) { +static int BackwardReferencesRle(int xsize, int ysize, + const uint32_t* const argb, + int cache_bits, VP8LBackwardRefs* const refs) { const int pix_count = xsize * ysize; - int match_len = 0; - int i; - refs->size = 0; - PushBackCopy(refs, match_len); // i=0 case - refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[0]); - for (i = 1; i < pix_count; ++i) { - if (argb[i] == argb[i - 1]) { - ++match_len; + int i, k; + const int use_color_cache = (cache_bits > 0); + VP8LColorCache hashers; + + if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) { + return 0; + } + ClearBackwardRefs(refs); + // Add first pixel as literal. + AddSingleLiteral(argb[0], use_color_cache, &hashers, refs); + i = 1; + while (i < pix_count) { + const int max_len = MaxFindCopyLength(pix_count - i); + const int kMinLength = 4; + const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len); + const int prev_row_len = (i < xsize) ? 0 : + FindMatchLength(argb + i, argb + i - xsize, 0, max_len); + if (rle_len >= prev_row_len && rle_len >= kMinLength) { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len)); + // We don't need to update the color cache here since it is always the + // same pixel being copied, and that does not change the color cache + // state. + i += rle_len; + } else if (prev_row_len >= kMinLength) { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len)); + if (use_color_cache) { + for (k = 0; k < prev_row_len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); + } + } + i += prev_row_len; } else { - PushBackCopy(refs, match_len); - match_len = 0; - refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[i]); + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + i++; } } - PushBackCopy(refs, match_len); + if (use_color_cache) VP8LColorCacheClear(&hashers); + return !refs->error_; } -static int BackwardReferencesHashChain(int xsize, int ysize, - const uint32_t* const argb, - int cache_bits, int quality, - VP8LBackwardRefs* const refs) { +static int BackwardReferencesLz77(int xsize, int ysize, + const uint32_t* const argb, int cache_bits, + int quality, int low_effort, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { int i; int ok = 0; int cc_init = 0; const int use_color_cache = (cache_bits > 0); const int pix_count = xsize * ysize; - HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain)); VP8LColorCache hashers; + int iter_max = GetMaxItersForQuality(quality, low_effort); + const int window_size = GetWindowSizeForHashChain(quality, xsize); + int min_matches = 32; - if (hash_chain == NULL) return 0; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - - if (!HashChainInit(hash_chain, pix_count)) goto Error; - - refs->size = 0; - for (i = 0; i < pix_count; ) { + ClearBackwardRefs(refs); + HashChainReset(hash_chain); + for (i = 0; i < pix_count - 2; ) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; int len = 0; - if (i < pix_count - 1) { // FindCopy(i,..) reads pixels at [i] and [i + 1]. - int maxlen = pix_count - i; - if (maxlen > MAX_LENGTH) { - maxlen = MAX_LENGTH; - } - HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen, - &offset, &len); - } - if (len >= MIN_LENGTH) { - // Alternative#2: Insert the pixel at 'i' as literal, and code the - // pixels starting at 'i + 1' using backward reference. + const int max_len = MaxFindCopyLength(pix_count - i); + HashChainFindCopy(hash_chain, i, argb, max_len, window_size, + iter_max, &offset, &len); + if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) { int offset2 = 0; int len2 = 0; int k; + min_matches = 8; HashChainInsert(hash_chain, &argb[i], i); - if (i < pix_count - 2) { // FindCopy(i+1,..) reads [i + 1] and [i + 2]. - int maxlen = pix_count - (i + 1); - if (maxlen > MAX_LENGTH) { - maxlen = MAX_LENGTH; - } - HashChainFindCopy(hash_chain, quality, - i + 1, xsize, argb, maxlen, &offset2, &len2); + if ((len < (max_len >> 2)) && !low_effort) { + // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code + // the pixels starting at 'i + 1' using backward reference. + HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1, + window_size, iter_max, &offset2, + &len2); if (len2 > len + 1) { - const uint32_t pixel = argb[i]; - // Alternative#2 is a better match. So push pixel at 'i' as literal. - if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) { - const int ix = VP8LColorCacheGetIndex(&hashers, pixel); - refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix); - } else { - refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel); - } - ++refs->size; - if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel); + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); i++; // Backward reference to be done for next pixel. len = len2; offset = offset2; } } - if (len >= MAX_LENGTH) { - len = MAX_LENGTH - 1; - } - refs->refs[refs->size++] = PixOrCopyCreateCopy(offset, len); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); } } // Add to the hash_chain (but cannot add the last pixel). - { + if (offset >= 3 && offset != xsize) { const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i; - for (k = 1; k < last; ++k) { + for (k = 2; k < last - 8; k += 2) { + HashChainInsert(hash_chain, &argb[i + k], i + k); + } + for (; k < last; ++k) { HashChainInsert(hash_chain, &argb[i + k], i + k); } } i += len; } else { - const uint32_t pixel = argb[i]; - if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) { - // push pixel as a PixOrCopyCreateCacheIdx pixel - const int ix = VP8LColorCacheGetIndex(&hashers, pixel); - refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix); - } else { - refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel); - } - ++refs->size; - if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel); - if (i + 1 < pix_count) { + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + HashChainInsert(hash_chain, &argb[i], i); + ++i; + --min_matches; + if (min_matches <= 0) { + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); HashChainInsert(hash_chain, &argb[i], i); + ++i; } - ++i; } } - ok = 1; -Error: + while (i < pix_count) { + // Handle the last pixel(s). + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + ++i; + } + + ok = !refs->error_; + Error: if (cc_init) VP8LColorCacheClear(&hashers); - HashChainDelete(hash_chain); return ok; } @@ -355,18 +491,19 @@ Error: typedef struct { double alpha_[VALUES_IN_BYTE]; double red_[VALUES_IN_BYTE]; - double literal_[PIX_OR_COPY_CODES_MAX]; double blue_[VALUES_IN_BYTE]; double distance_[NUM_DISTANCE_CODES]; + double* literal_; } CostModel; static int BackwardReferencesTraceBackwards( - int xsize, int ysize, int recursive_cost_model, - const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs); + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs); static void ConvertPopulationCountTableToBitEstimates( - int num_symbols, const int population_counts[], double output[]) { - int sum = 0; + int num_symbols, const uint32_t population_counts[], double output[]) { + uint32_t sum = 0; int nonzeros = 0; int i; for (i = 0; i < num_symbols; ++i) { @@ -385,42 +522,29 @@ static void ConvertPopulationCountTableToBitEstimates( } } -static int CostModelBuild(CostModel* const m, int xsize, int ysize, - int recursion_level, const uint32_t* const argb, - int cache_bits) { +static int CostModelBuild(CostModel* const m, int cache_bits, + VP8LBackwardRefs* const refs) { int ok = 0; - VP8LHistogram histo; - VP8LBackwardRefs refs; - const int quality = 100; + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; - if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error; + VP8LHistogramCreate(histo, refs, cache_bits); - if (recursion_level > 0) { - if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1, - argb, cache_bits, &refs)) { - goto Error; - } - } else { - if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality, - &refs)) { - goto Error; - } - } - VP8LHistogramCreate(&histo, &refs, cache_bits); ConvertPopulationCountTableToBitEstimates( - VP8LHistogramNumCodes(&histo), histo.literal_, m->literal_); + VP8LHistogramNumCodes(histo->palette_code_bits_), + histo->literal_, m->literal_); ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo.red_, m->red_); + VALUES_IN_BYTE, histo->red_, m->red_); ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo.blue_, m->blue_); + VALUES_IN_BYTE, histo->blue_, m->blue_); ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo.alpha_, m->alpha_); + VALUES_IN_BYTE, histo->alpha_, m->alpha_); ConvertPopulationCountTableToBitEstimates( - NUM_DISTANCE_CODES, histo.distance_, m->distance_); + NUM_DISTANCE_CODES, histo->distance_, m->distance_); ok = 1; Error: - VP8LClearBackwardRefs(&refs); + VP8LFreeHistogram(histo); return ok; } @@ -438,203 +562,211 @@ static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { static WEBP_INLINE double GetLengthCost(const CostModel* const m, uint32_t length) { - int code, extra_bits_count, extra_bits_value; - PrefixEncode(length, &code, &extra_bits_count, &extra_bits_value); - return m->literal_[VALUES_IN_BYTE + code] + extra_bits_count; + int code, extra_bits; + VP8LPrefixEncodeBits(length, &code, &extra_bits); + return m->literal_[VALUES_IN_BYTE + code] + extra_bits; } static WEBP_INLINE double GetDistanceCost(const CostModel* const m, uint32_t distance) { - int code, extra_bits_count, extra_bits_value; - PrefixEncode(distance, &code, &extra_bits_count, &extra_bits_value); - return m->distance_[code] + extra_bits_count; + int code, extra_bits; + VP8LPrefixEncodeBits(distance, &code, &extra_bits); + return m->distance_[code] + extra_bits; +} + +static void AddSingleLiteralWithCostModel( + const uint32_t* const argb, VP8LHashChain* const hash_chain, + VP8LColorCache* const hashers, const CostModel* const cost_model, int idx, + int is_last, int use_color_cache, double prev_cost, float* const cost, + uint16_t* const dist_array) { + double cost_val = prev_cost; + const uint32_t color = argb[0]; + if (!is_last) { + HashChainInsert(hash_chain, argb, idx); + } + if (use_color_cache && VP8LColorCacheContains(hashers, color)) { + const double mul0 = 0.68; + const int ix = VP8LColorCacheGetIndex(hashers, color); + cost_val += GetCacheCost(cost_model, ix) * mul0; + } else { + const double mul1 = 0.82; + if (use_color_cache) VP8LColorCacheInsert(hashers, color); + cost_val += GetLiteralCost(cost_model, color) * mul1; + } + if (cost[idx] > cost_val) { + cost[idx] = (float)cost_val; + dist_array[idx] = 1; // only one is inserted. + } } static int BackwardReferencesHashChainDistanceOnly( - int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb, - int cache_bits, uint32_t* const dist_array) { + int xsize, int ysize, const uint32_t* const argb, + int quality, int cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, uint16_t* const dist_array) { int i; int ok = 0; int cc_init = 0; - const int quality = 100; const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); - double* const cost = - (double*)WebPSafeMalloc((uint64_t)pix_count, sizeof(*cost)); - CostModel* cost_model = (CostModel*)malloc(sizeof(*cost_model)); - HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain)); + float* const cost = + (float*)WebPSafeMalloc(pix_count, sizeof(*cost)); + const size_t literal_array_size = sizeof(double) * + (NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((cache_bits > 0) ? (1 << cache_bits) : 0)); + const size_t cost_model_size = sizeof(CostModel) + literal_array_size; + CostModel* const cost_model = + (CostModel*)WebPSafeMalloc(1ULL, cost_model_size); VP8LColorCache hashers; - const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68; - const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82; - - if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error; + const int skip_length = 32 + quality; + const int skip_min_distance_code = 2; + int iter_max = GetMaxItersForQuality(quality, 0); + const int window_size = GetWindowSizeForHashChain(quality, xsize); - if (!HashChainInit(hash_chain, pix_count)) goto Error; + if (cost == NULL || cost_model == NULL) goto Error; + cost_model->literal_ = (double*)(cost_model + 1); if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb, - cache_bits)) { + if (!CostModelBuild(cost_model, cache_bits, refs)) { goto Error; } - for (i = 0; i < pix_count; ++i) cost[i] = 1e100; + for (i = 0; i < pix_count; ++i) cost[i] = 1e38f; // We loop one pixel at a time, but store all currently best points to // non-processed locations from this point. dist_array[0] = 0; - for (i = 0; i < pix_count; ++i) { - double prev_cost = 0.0; - int shortmax; - if (i > 0) { - prev_cost = cost[i - 1]; - } - for (shortmax = 0; shortmax < 2; ++shortmax) { - int offset = 0; - int len = 0; - if (i < pix_count - 1) { // FindCopy reads pixels at [i] and [i + 1]. - int maxlen = shortmax ? 2 : MAX_LENGTH; - if (maxlen > pix_count - i) { - maxlen = pix_count - i; + HashChainReset(hash_chain); + // Add first pixel as literal. + AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0, + 0, use_color_cache, 0.0, cost, dist_array); + for (i = 1; i < pix_count - 1; ++i) { + int offset = 0; + int len = 0; + double prev_cost = cost[i - 1]; + const int max_len = MaxFindCopyLength(pix_count - i); + HashChainFindCopy(hash_chain, i, argb, max_len, window_size, + iter_max, &offset, &len); + if (len >= MIN_LENGTH) { + const int code = DistanceToPlaneCode(xsize, offset); + const double distance_cost = + prev_cost + GetDistanceCost(cost_model, code); + int k; + for (k = 1; k < len; ++k) { + const double cost_val = distance_cost + GetLengthCost(cost_model, k); + if (cost[i + k] > cost_val) { + cost[i + k] = (float)cost_val; + dist_array[i + k] = k + 1; } - HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen, - &offset, &len); } - if (len >= MIN_LENGTH) { - const int code = DistanceToPlaneCode(xsize, offset); - const double distance_cost = - prev_cost + GetDistanceCost(cost_model, code); - int k; - for (k = 1; k < len; ++k) { - const double cost_val = - distance_cost + GetLengthCost(cost_model, k); - if (cost[i + k] > cost_val) { - cost[i + k] = cost_val; - dist_array[i + k] = k + 1; + // This if is for speedup only. It roughly doubles the speed, and + // makes compression worse by .1 %. + if (len >= skip_length && code <= skip_min_distance_code) { + // Long copy for short distances, let's skip the middle + // lookups for better copies. + // 1) insert the hashes. + if (use_color_cache) { + for (k = 0; k < len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); } } - // This if is for speedup only. It roughly doubles the speed, and - // makes compression worse by .1 %. - if (len >= 128 && code < 2) { - // Long copy for short distances, let's skip the middle - // lookups for better copies. - // 1) insert the hashes. - if (use_color_cache) { - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); - } - } - // 2) Add to the hash_chain (but cannot add the last pixel) - { - const int last = (len < pix_count - 1 - i) ? len - : pix_count - 1 - i; - for (k = 0; k < last; ++k) { - HashChainInsert(hash_chain, &argb[i + k], i + k); - } + // 2) Add to the hash_chain (but cannot add the last pixel) + { + const int last = (len + i < pix_count - 1) ? len + i + : pix_count - 1; + for (k = i; k < last; ++k) { + HashChainInsert(hash_chain, &argb[k], k); } - // 3) jump. - i += len - 1; // for loop does ++i, thus -1 here. - goto next_symbol; } + // 3) jump. + i += len - 1; // for loop does ++i, thus -1 here. + goto next_symbol; } - } - if (i < pix_count - 1) { - HashChainInsert(hash_chain, &argb[i], i); - } - { - // inserting a literal pixel - double cost_val = prev_cost; - if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { - const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]); - cost_val += GetCacheCost(cost_model, ix) * mul0; - } else { - cost_val += GetLiteralCost(cost_model, argb[i]) * mul1; - } - if (cost[i] > cost_val) { - cost[i] = cost_val; - dist_array[i] = 1; // only one is inserted. + if (len != MIN_LENGTH) { + int code_min_length; + double cost_total; + HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size, + &offset); + code_min_length = DistanceToPlaneCode(xsize, offset); + cost_total = prev_cost + + GetDistanceCost(cost_model, code_min_length) + + GetLengthCost(cost_model, 1); + if (cost[i + 1] > cost_total) { + cost[i + 1] = (float)cost_total; + dist_array[i + 1] = 2; + } } - if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); } + AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, + 0, use_color_cache, prev_cost, cost, + dist_array); next_symbol: ; } - // Last pixel still to do, it can only be a single step if not reached - // through cheaper means already. - ok = 1; -Error: + // Handle the last pixel. + if (i == (pix_count - 1)) { + AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, + 1, use_color_cache, cost[pix_count - 2], cost, + dist_array); + } + ok = !refs->error_; + Error: if (cc_init) VP8LColorCacheClear(&hashers); - HashChainDelete(hash_chain); - free(cost_model); - free(cost); + WebPSafeFree(cost_model); + WebPSafeFree(cost); return ok; } -static int TraceBackwards(const uint32_t* const dist_array, - int dist_array_size, - uint32_t** const chosen_path, - int* const chosen_path_size) { - int i; - // Count how many. - int count = 0; - for (i = dist_array_size - 1; i >= 0; ) { - int k = dist_array[i]; - assert(k >= 1); - ++count; - i -= k; - } - // Allocate. - *chosen_path_size = count; - *chosen_path = - (uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path)); - if (*chosen_path == NULL) return 0; - - // Write in reverse order. - for (i = dist_array_size - 1; i >= 0; ) { - int k = dist_array[i]; - assert(k >= 1); - (*chosen_path)[--count] = k; - i -= k; +// We pack the path at the end of *dist_array and return +// a pointer to this part of the array. Example: +// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232] +static void TraceBackwards(uint16_t* const dist_array, + int dist_array_size, + uint16_t** const chosen_path, + int* const chosen_path_size) { + uint16_t* path = dist_array + dist_array_size; + uint16_t* cur = dist_array + dist_array_size - 1; + while (cur >= dist_array) { + const int k = *cur; + --path; + *path = k; + cur -= k; } - return 1; + *chosen_path = path; + *chosen_path_size = (int)(dist_array + dist_array_size - path); } static int BackwardReferencesHashChainFollowChosenPath( - int xsize, int ysize, const uint32_t* const argb, int cache_bits, - const uint32_t* const chosen_path, int chosen_path_size, + int xsize, int ysize, const uint32_t* const argb, + int quality, int cache_bits, + const uint16_t* const chosen_path, int chosen_path_size, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { - const int quality = 100; const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); - int size = 0; - int i = 0; - int k; int ix; + int i = 0; int ok = 0; int cc_init = 0; - HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain)); + const int window_size = GetWindowSizeForHashChain(quality, xsize); VP8LColorCache hashers; - if (hash_chain == NULL || !HashChainInit(hash_chain, pix_count)) { - goto Error; - } if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - refs->size = 0; - for (ix = 0; ix < chosen_path_size; ++ix, ++size) { + ClearBackwardRefs(refs); + HashChainReset(hash_chain); + for (ix = 0; ix < chosen_path_size; ++ix) { int offset = 0; - int len = 0; - int maxlen = chosen_path[ix]; - if (maxlen != 1) { - HashChainFindCopy(hash_chain, quality, - i, xsize, argb, maxlen, &offset, &len); - assert(len == maxlen); - refs->refs[size] = PixOrCopyCreateCopy(offset, len); + const int len = chosen_path[ix]; + if (len != 1) { + int k; + HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); @@ -648,227 +780,330 @@ static int BackwardReferencesHashChainFollowChosenPath( } i += len; } else { + PixOrCopy v; if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { // push pixel as a color cache index const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]); - refs->refs[size] = PixOrCopyCreateCacheIdx(idx); + v = PixOrCopyCreateCacheIdx(idx); } else { - refs->refs[size] = PixOrCopyCreateLiteral(argb[i]); + if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); + v = PixOrCopyCreateLiteral(argb[i]); } - if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); + BackwardRefsCursorAdd(refs, v); if (i + 1 < pix_count) { HashChainInsert(hash_chain, &argb[i], i); } ++i; } } - assert(size <= refs->max_size); - refs->size = size; - ok = 1; -Error: + ok = !refs->error_; + Error: if (cc_init) VP8LColorCacheClear(&hashers); - HashChainDelete(hash_chain); return ok; } // Returns 1 on success. static int BackwardReferencesTraceBackwards(int xsize, int ysize, - int recursive_cost_model, const uint32_t* const argb, - int cache_bits, + int quality, int cache_bits, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { int ok = 0; const int dist_array_size = xsize * ysize; - uint32_t* chosen_path = NULL; + uint16_t* chosen_path = NULL; int chosen_path_size = 0; - uint32_t* dist_array = - (uint32_t*)WebPSafeMalloc((uint64_t)dist_array_size, sizeof(*dist_array)); + uint16_t* dist_array = + (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); if (dist_array == NULL) goto Error; if (!BackwardReferencesHashChainDistanceOnly( - xsize, ysize, recursive_cost_model, argb, cache_bits, dist_array)) { - goto Error; - } - if (!TraceBackwards(dist_array, dist_array_size, - &chosen_path, &chosen_path_size)) { + xsize, ysize, argb, quality, cache_bits, hash_chain, + refs, dist_array)) { goto Error; } - free(dist_array); // no need to retain this memory any longer - dist_array = NULL; + TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); if (!BackwardReferencesHashChainFollowChosenPath( - xsize, ysize, argb, cache_bits, chosen_path, chosen_path_size, refs)) { + xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size, + hash_chain, refs)) { goto Error; } ok = 1; Error: - free(chosen_path); - free(dist_array); + WebPSafeFree(dist_array); return ok; } static void BackwardReferences2DLocality(int xsize, - VP8LBackwardRefs* const refs) { - int i; - for (i = 0; i < refs->size; ++i) { - if (PixOrCopyIsCopy(&refs->refs[i])) { - const int dist = refs->refs[i].argb_or_distance; + const VP8LBackwardRefs* const refs) { + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + if (PixOrCopyIsCopy(c.cur_pos)) { + const int dist = c.cur_pos->argb_or_distance; const int transformed_dist = DistanceToPlaneCode(xsize, dist); - refs->refs[i].argb_or_distance = transformed_dist; + c.cur_pos->argb_or_distance = transformed_dist; } + VP8LRefsCursorNext(&c); } } -int VP8LGetBackwardReferences(int width, int height, - const uint32_t* const argb, - int quality, int cache_bits, int use_2d_locality, - VP8LBackwardRefs* const best) { - int ok = 0; - int lz77_is_useful; - VP8LBackwardRefs refs_rle, refs_lz77; - const int num_pix = width * height; - - VP8LBackwardRefsAlloc(&refs_rle, num_pix); - VP8LBackwardRefsAlloc(&refs_lz77, num_pix); - VP8LInitBackwardRefs(best); - if (refs_rle.refs == NULL || refs_lz77.refs == NULL) { - Error1: - VP8LClearBackwardRefs(&refs_rle); - VP8LClearBackwardRefs(&refs_lz77); - goto End; - } - - if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality, - &refs_lz77)) { - goto End; - } - // Backward Reference using RLE only. - BackwardReferencesRle(width, height, argb, &refs_rle); +// Returns entropy for the given cache bits. +static double ComputeCacheEntropy(const uint32_t* argb, + const VP8LBackwardRefs* const refs, + int cache_bits) { + const int use_color_cache = (cache_bits > 0); + int cc_init = 0; + double entropy = MAX_ENTROPY; + const double kSmallPenaltyForLargeCache = 4.0; + VP8LColorCache hashers; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; - { - double bit_cost_lz77, bit_cost_rle; - VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo)); - if (histo == NULL) goto Error1; - // Evaluate lz77 coding - VP8LHistogramCreate(histo, &refs_lz77, cache_bits); - bit_cost_lz77 = VP8LHistogramEstimateBits(histo); - // Evaluate RLE coding - VP8LHistogramCreate(histo, &refs_rle, cache_bits); - bit_cost_rle = VP8LHistogramEstimateBits(histo); - // Decide if LZ77 is useful. - lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); - free(histo); + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; } - - // Choose appropriate backward reference. - if (lz77_is_useful) { - // TraceBackwards is costly. Run it for higher qualities. - const int try_lz77_trace_backwards = (quality >= 75); - *best = refs_lz77; // default guess: lz77 is better - VP8LClearBackwardRefs(&refs_rle); - if (try_lz77_trace_backwards) { - const int recursion_level = (num_pix < 320 * 200) ? 1 : 0; - VP8LBackwardRefs refs_trace; - if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) { - goto End; - } - if (BackwardReferencesTraceBackwards( - width, height, recursion_level, argb, cache_bits, &refs_trace)) { - VP8LClearBackwardRefs(&refs_lz77); - *best = refs_trace; - } + if (!use_color_cache) { + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LRefsCursorNext(&c); } } else { - VP8LClearBackwardRefs(&refs_lz77); - *best = refs_rle; + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; + if (PixOrCopyIsLiteral(v)) { + const uint32_t pix = *argb++; + const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix); + if (VP8LColorCacheLookup(&hashers, key) == pix) { + ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; + } else { + VP8LColorCacheSet(&hashers, key, pix); + ++histo->blue_[pix & 0xff]; + ++histo->literal_[(pix >> 8) & 0xff]; + ++histo->red_[(pix >> 16) & 0xff]; + ++histo->alpha_[pix >> 24]; + } + } else { + int len = PixOrCopyLength(v); + int code, extra_bits; + VP8LPrefixEncodeBits(len, &code, &extra_bits); + ++histo->literal_[NUM_LITERAL_CODES + code]; + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); + ++histo->distance_[code]; + do { + VP8LColorCacheInsert(&hashers, *argb++); + } while(--len != 0); + } + VP8LRefsCursorNext(&c); + } } + entropy = VP8LHistogramEstimateBits(histo) + + kSmallPenaltyForLargeCache * cache_bits; + Error: + if (cc_init) VP8LColorCacheClear(&hashers); + VP8LFreeHistogram(histo); + return entropy; +} - if (use_2d_locality) BackwardReferences2DLocality(width, best); - - ok = 1; - - End: - if (!ok) { - VP8LClearBackwardRefs(best); +// Evaluate optimal cache bits for the local color cache. +// The input *best_cache_bits sets the maximum cache bits to use (passing 0 +// implies disabling the local color cache). The local color cache is also +// disabled for the lower (<= 25) quality. +// Returns 0 in case of memory error. +static int CalculateBestCacheSize(const uint32_t* const argb, + int xsize, int ysize, int quality, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, + int* const lz77_computed, + int* const best_cache_bits) { + int eval_low = 1; + int eval_high = 1; + double entropy_low = MAX_ENTROPY; + double entropy_high = MAX_ENTROPY; + const double cost_mul = 5e-4; + int cache_bits_low = 0; + int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits; + + assert(cache_bits_high <= MAX_COLOR_CACHE_BITS); + + *lz77_computed = 0; + if (cache_bits_high == 0) { + *best_cache_bits = 0; + // Local color cache is disabled. + return 1; } - return ok; + if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0, + hash_chain, refs)) { + return 0; + } + // Do a binary search to find the optimal entropy for cache_bits. + while (eval_low || eval_high) { + if (eval_low) { + entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low); + entropy_low += entropy_low * cache_bits_low * cost_mul; + eval_low = 0; + } + if (eval_high) { + entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high); + entropy_high += entropy_high * cache_bits_high * cost_mul; + eval_high = 0; + } + if (entropy_high < entropy_low) { + const int prev_cache_bits_low = cache_bits_low; + *best_cache_bits = cache_bits_high; + cache_bits_low = (cache_bits_low + cache_bits_high) / 2; + if (cache_bits_low != prev_cache_bits_low) eval_low = 1; + } else { + *best_cache_bits = cache_bits_low; + cache_bits_high = (cache_bits_low + cache_bits_high) / 2; + if (cache_bits_high != cache_bits_low) eval_high = 1; + } + } + *lz77_computed = 1; + return 1; } -// Returns 1 on success. -static int ComputeCacheHistogram(const uint32_t* const argb, - int xsize, int ysize, - const VP8LBackwardRefs* const refs, - int cache_bits, - VP8LHistogram* const histo) { +// Update (in-place) backward references for specified cache_bits. +static int BackwardRefsWithLocalCache(const uint32_t* const argb, + int cache_bits, + VP8LBackwardRefs* const refs) { int pixel_index = 0; - int i; - uint32_t k; VP8LColorCache hashers; - const int use_color_cache = (cache_bits > 0); - int cc_init = 0; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0; - if (use_color_cache) { - cc_init = VP8LColorCacheInit(&hashers, cache_bits); - if (!cc_init) return 0; - } - - for (i = 0; i < refs->size; ++i) { - const PixOrCopy* const v = &refs->refs[i]; + while (VP8LRefsCursorOk(&c)) { + PixOrCopy* const v = c.cur_pos; if (PixOrCopyIsLiteral(v)) { - if (use_color_cache && - VP8LColorCacheContains(&hashers, argb[pixel_index])) { - // push pixel as a cache index - const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]); - const PixOrCopy token = PixOrCopyCreateCacheIdx(ix); - VP8LHistogramAddSinglePixOrCopy(histo, &token); + const uint32_t argb_literal = v->argb_or_distance; + if (VP8LColorCacheContains(&hashers, argb_literal)) { + const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal); + *v = PixOrCopyCreateCacheIdx(ix); } else { - VP8LHistogramAddSinglePixOrCopy(histo, v); + VP8LColorCacheInsert(&hashers, argb_literal); } + ++pixel_index; } else { - VP8LHistogramAddSinglePixOrCopy(histo, v); - } - if (use_color_cache) { - for (k = 0; k < PixOrCopyLength(v); ++k) { - VP8LColorCacheInsert(&hashers, argb[pixel_index + k]); + // refs was created without local cache, so it can not have cache indexes. + int k; + assert(PixOrCopyIsCopy(v)); + for (k = 0; k < v->len; ++k) { + VP8LColorCacheInsert(&hashers, argb[pixel_index++]); } } - pixel_index += PixOrCopyLength(v); + VP8LRefsCursorNext(&c); } - assert(pixel_index == xsize * ysize); - (void)xsize; // xsize is not used in non-debug compilations otherwise. - (void)ysize; // ysize is not used in non-debug compilations otherwise. - if (cc_init) VP8LColorCacheClear(&hashers); + VP8LColorCacheClear(&hashers); return 1; } -// Returns how many bits are to be used for a color cache. -int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb, - int xsize, int ysize, - int* const best_cache_bits) { - int ok = 0; - int cache_bits; - double lowest_entropy = 1e99; - VP8LBackwardRefs refs; - static const double kSmallPenaltyForLargeCache = 4.0; - static const int quality = 30; - if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize) || - !BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, &refs)) { +static VP8LBackwardRefs* GetBackwardReferencesLowEffort( + int width, int height, const uint32_t* const argb, int quality, + int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + *cache_bits = 0; + if (!BackwardReferencesLz77(width, height, argb, 0, quality, + 1 /* Low effort. */, hash_chain, refs_lz77)) { + return NULL; + } + BackwardReferences2DLocality(width, refs_lz77); + return refs_lz77; +} + +static VP8LBackwardRefs* GetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + int lz77_is_useful; + int lz77_computed; + double bit_cost_lz77, bit_cost_rle; + VP8LBackwardRefs* best = NULL; + VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + VP8LBackwardRefs* refs_rle = &refs_array[1]; + VP8LHistogram* histo = NULL; + + if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain, + refs_lz77, &lz77_computed, cache_bits)) { goto Error; } - for (cache_bits = 0; cache_bits <= MAX_COLOR_CACHE_BITS; ++cache_bits) { - double cur_entropy; - VP8LHistogram histo; - VP8LHistogramInit(&histo, cache_bits); - ComputeCacheHistogram(argb, xsize, ysize, &refs, cache_bits, &histo); - cur_entropy = VP8LHistogramEstimateBits(&histo) + - kSmallPenaltyForLargeCache * cache_bits; - if (cache_bits == 0 || cur_entropy < lowest_entropy) { - *best_cache_bits = cache_bits; - lowest_entropy = cur_entropy; + + if (lz77_computed) { + // Transform refs_lz77 for the optimized cache_bits. + if (*cache_bits > 0) { + if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) { + goto Error; + } + } + } else { + if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality, + 0 /* Low effort. */, hash_chain, refs_lz77)) { + goto Error; } } - ok = 1; + + if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) { + goto Error; + } + + histo = VP8LAllocateHistogram(*cache_bits); + if (histo == NULL) goto Error; + + { + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_lz77, *cache_bits); + bit_cost_lz77 = VP8LHistogramEstimateBits(histo); + // Evaluate RLE coding. + VP8LHistogramCreate(histo, refs_rle, *cache_bits); + bit_cost_rle = VP8LHistogramEstimateBits(histo); + // Decide if LZ77 is useful. + lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); + } + + // Choose appropriate backward reference. + if (lz77_is_useful) { + // TraceBackwards is costly. Don't execute it at lower quality. + const int try_lz77_trace_backwards = (quality >= 25); + best = refs_lz77; // default guess: lz77 is better + if (try_lz77_trace_backwards) { + VP8LBackwardRefs* const refs_trace = refs_rle; + if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) { + best = NULL; + goto Error; + } + if (BackwardReferencesTraceBackwards(width, height, argb, quality, + *cache_bits, hash_chain, + refs_trace)) { + double bit_cost_trace; + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_trace, *cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_cost_lz77) { + best = refs_trace; + } + } + } + } else { + best = refs_rle; + } + + BackwardReferences2DLocality(width, best); + Error: - VP8LClearBackwardRefs(&refs); - return ok; + VP8LFreeHistogram(histo); + return best; +} + +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + if (low_effort) { + return GetBackwardReferencesLowEffort(width, height, argb, quality, + cache_bits, hash_chain, refs_array); + } else { + return GetBackwardReferences(width, height, argb, quality, cache_bits, + hash_chain, refs_array); + } } diff --git a/drivers/webp/enc/backward_references.h b/drivers/webp/enc/backward_references.h index 8006a56ba1..daa084d846 100644 --- a/drivers/webp/enc/backward_references.h +++ b/drivers/webp/enc/backward_references.h @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Author: Jyrki Alakuijala (jyrki@google.com) @@ -13,82 +15,15 @@ #include <assert.h> #include <stdlib.h> -#include "../types.h" -#include "../format_constants.h" +#include "../webp/types.h" +#include "../webp/format_constants.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif -// The spec allows 11, we use 9 bits to reduce memory consumption in encoding. -// Having 9 instead of 11 only removes about 0.25 % of compression density. -#define MAX_COLOR_CACHE_BITS 9 - -// Max ever number of codes we'll use: -#define PIX_OR_COPY_CODES_MAX \ - (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS)) - -// ----------------------------------------------------------------------------- -// PrefixEncode() - -// use GNU builtins where available. -#if defined(__GNUC__) && \ - ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) -static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - return n == 0 ? -1 : 31 ^ __builtin_clz(n); -} -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) -#include <intrin.h> -#pragma intrinsic(_BitScanReverse) - -static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - unsigned long first_set_bit; - return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1; -} -#else -static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - int log = 0; - uint32_t value = n; - int i; - - if (value == 0) return -1; - for (i = 4; i >= 0; --i) { - const int shift = (1 << i); - const uint32_t x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - return log; -} -#endif - -static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) { - const int floor = BitsLog2Floor(n); - if (n == (n & ~(n - 1))) // zero or a power of two. - return floor; - else - return floor + 1; -} - -// Splitting of distance and length codes into prefixes and -// extra bits. The prefixes are encoded with an entropy code -// while the extra bits are stored just as normal bits. -static WEBP_INLINE void PrefixEncode(int distance, int* const code, - int* const extra_bits_count, - int* const extra_bits_value) { - // Collect the two most significant bits where the highest bit is 1. - const int highest_bit = BitsLog2Floor(--distance); - // & 0x3f is to make behavior well defined when highest_bit - // does not exist or is the least significant bit. - const int second_highest_bit = - (distance >> ((highest_bit - 1) & 0x3f)) & 1; - *extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0; - *extra_bits_value = distance & ((1 << *extra_bits_count) - 1); - *code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit) - : (highest_bit == 0) ? 1 : 0; -} +// The maximum allowed limit is 11. +#define MAX_COLOR_CACHE_BITS 10 // ----------------------------------------------------------------------------- // PixOrCopy @@ -173,39 +108,94 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) { } // ----------------------------------------------------------------------------- -// VP8LBackwardRefs +// VP8LHashChain + +#define HASH_BITS 18 +#define HASH_SIZE (1 << HASH_BITS) + +typedef struct VP8LHashChain VP8LHashChain; +struct VP8LHashChain { + // Stores the most recently added position with the given hash value. + int32_t hash_to_first_index_[HASH_SIZE]; + // chain_[pos] stores the previous position with the same hash value + // for every pixel in the image. + int32_t* chain_; + // This is the maximum size of the hash_chain that can be constructed. + // Typically this is the pixel count (width x height) for a given image. + int size_; +}; -typedef struct { - PixOrCopy* refs; - int size; // currently used - int max_size; // maximum capacity -} VP8LBackwardRefs; +// Must be called first, to set size. +int VP8LHashChainInit(VP8LHashChain* const p, int size); +void VP8LHashChainClear(VP8LHashChain* const p); // release memory -// Initialize the object. Must be called first. 'refs' can be NULL. -void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs); +// ----------------------------------------------------------------------------- +// VP8LBackwardRefs (block-based backward-references storage) + +// maximum number of reference blocks the image will be segmented into +#define MAX_REFS_BLOCK_PER_IMAGE 16 + +typedef struct PixOrCopyBlock PixOrCopyBlock; // forward declaration +typedef struct VP8LBackwardRefs VP8LBackwardRefs; + +// Container for blocks chain +struct VP8LBackwardRefs { + int block_size_; // common block-size + int error_; // set to true if some memory error occurred + PixOrCopyBlock* refs_; // list of currently used blocks + PixOrCopyBlock** tail_; // for list recycling + PixOrCopyBlock* free_blocks_; // free-list + PixOrCopyBlock* last_block_; // used for adding new refs (internal) +}; -// Release memory and re-initialize the object. 'refs' can be NULL. -void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs); +// Initialize the object. 'block_size' is the common block size to store +// references (typically, width * height / MAX_REFS_BLOCK_PER_IMAGE). +void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size); +// Release memory for backward references. +void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs); +// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error. +int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, + VP8LBackwardRefs* const dst); -// Allocate 'max_size' references. Returns false in case of memory error. -int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size); +// Cursor for iterating on references content +typedef struct { + // public: + PixOrCopy* cur_pos; // current position + // private: + PixOrCopyBlock* cur_block_; // current block in the refs list + const PixOrCopy* last_pos_; // sentinel for switching to next block +} VP8LRefsCursor; + +// Returns a cursor positioned at the beginning of the references list. +VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs); +// Returns true if cursor is pointing at a valid position. +static WEBP_INLINE int VP8LRefsCursorOk(const VP8LRefsCursor* const c) { + return (c->cur_pos != NULL); +} +// Move to next block of references. Internal, not to be called directly. +void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c); +// Move to next position, or NULL. Should not be called if !VP8LRefsCursorOk(). +static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { + assert(c != NULL); + assert(VP8LRefsCursorOk(c)); + if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c); +} // ----------------------------------------------------------------------------- // Main entry points // Evaluates best possible backward references for specified quality. -// Further optimize for 2D locality if use_2d_locality flag is set. -int VP8LGetBackwardReferences(int width, int height, - const uint32_t* const argb, - int quality, int cache_bits, int use_2d_locality, - VP8LBackwardRefs* const best); - -// Produce an estimate for a good color cache size for the image. -int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb, - int xsize, int ysize, - int* const best_cache_bits); - -#if defined(__cplusplus) || defined(c_plusplus) +// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache +// bits to use (passing 0 implies disabling the local color cache). +// The optimal cache bits is evaluated and set for the *cache_bits parameter. +// The return value is the pointer to the best of the two backward refs viz, +// refs[0] or refs[1]. +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs[2]); + +#ifdef __cplusplus } #endif diff --git a/drivers/webp/enc/config.c b/drivers/webp/enc/config.c index 4136f6c227..f9f7961d58 100644 --- a/drivers/webp/enc/config.c +++ b/drivers/webp/enc/config.c @@ -1,19 +1,17 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Coding tools configuration // // Author: Skal (pascal.massimino@gmail.com) -#include "../encode.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "../webp/encode.h" //------------------------------------------------------------------------------ // WebPConfig @@ -31,9 +29,9 @@ int WebPConfigInitInternal(WebPConfig* config, config->target_PSNR = 0.; config->method = 4; config->sns_strength = 50; - config->filter_strength = 20; // default: light filtering + config->filter_strength = 60; // mid-filtering config->filter_sharpness = 0; - config->filter_type = 0; // default: simple + config->filter_type = 1; // default: strong (so U/V is filtered too) config->partitions = 0; config->segments = 4; config->pass = 1; @@ -45,7 +43,15 @@ int WebPConfigInitInternal(WebPConfig* config, config->alpha_filtering = 1; config->alpha_quality = 100; config->lossless = 0; + config->exact = 0; config->image_hint = WEBP_HINT_DEFAULT; + config->emulate_jpeg_size = 0; + config->thread_level = 0; + config->low_memory = 0; + config->near_lossless = 100; +#ifdef WEBP_EXPERIMENTAL_FEATURES + config->delta_palettization = 0; +#endif // WEBP_EXPERIMENTAL_FEATURES // TODO(skal): tune. switch (preset) { @@ -53,11 +59,13 @@ int WebPConfigInitInternal(WebPConfig* config, config->sns_strength = 80; config->filter_sharpness = 4; config->filter_strength = 35; + config->preprocessing &= ~2; // no dithering break; case WEBP_PRESET_PHOTO: config->sns_strength = 80; config->filter_sharpness = 3; config->filter_strength = 30; + config->preprocessing |= 2; break; case WEBP_PRESET_DRAWING: config->sns_strength = 25; @@ -67,10 +75,12 @@ int WebPConfigInitInternal(WebPConfig* config, case WEBP_PRESET_ICON: config->sns_strength = 0; config->filter_strength = 0; // disable filtering to retain sharpness + config->preprocessing &= ~2; // no dithering break; case WEBP_PRESET_TEXT: config->sns_strength = 0; config->filter_strength = 0; // disable filtering to retain sharpness + config->preprocessing &= ~2; // no dithering config->segments = 2; break; case WEBP_PRESET_DEFAULT: @@ -106,7 +116,7 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->show_compressed < 0 || config->show_compressed > 1) return 0; - if (config->preprocessing < 0 || config->preprocessing > 1) + if (config->preprocessing < 0 || config->preprocessing > 7) return 0; if (config->partitions < 0 || config->partitions > 3) return 0; @@ -120,13 +130,44 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->lossless < 0 || config->lossless > 1) return 0; + if (config->near_lossless < 0 || config->near_lossless > 100) + return 0; if (config->image_hint >= WEBP_HINT_LAST) return 0; + if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) + return 0; + if (config->thread_level < 0 || config->thread_level > 1) + return 0; + if (config->low_memory < 0 || config->low_memory > 1) + return 0; + if (config->exact < 0 || config->exact > 1) + return 0; +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->delta_palettization < 0 || config->delta_palettization > 1) + return 0; +#endif // WEBP_EXPERIMENTAL_FEATURES return 1; } //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif +#define MAX_LEVEL 9 + +// Mapping between -z level and -m / -q parameter settings. +static const struct { + uint8_t method_; + uint8_t quality_; +} kLosslessPresets[MAX_LEVEL + 1] = { + { 0, 0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 }, + { 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 } +}; + +int WebPConfigLosslessPreset(WebPConfig* config, int level) { + if (config == NULL || level < 0 || level > MAX_LEVEL) return 0; + config->lossless = 1; + config->method = kLosslessPresets[level].method_; + config->quality = kLosslessPresets[level].quality_; + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/drivers/webp/enc/cost.c b/drivers/webp/enc/cost.c index 92e0cc713c..ae7fe01388 100644 --- a/drivers/webp/enc/cost.c +++ b/drivers/webp/enc/cost.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Cost tables for level and modes @@ -11,42 +13,6 @@ #include "./cost.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -//------------------------------------------------------------------------------ -// Boolean-cost cost table - -const uint16_t VP8EntropyCost[256] = { - 1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, - 1178, 1152, 1110, 1076, 1061, 1024, 1024, 992, 968, 951, - 939, 911, 896, 878, 871, 854, 838, 820, 811, 794, - 786, 768, 768, 752, 740, 732, 720, 709, 704, 690, - 683, 672, 666, 655, 647, 640, 631, 622, 615, 607, - 598, 592, 586, 576, 572, 564, 559, 555, 547, 541, - 534, 528, 522, 512, 512, 504, 500, 494, 488, 483, - 477, 473, 467, 461, 458, 452, 448, 443, 438, 434, - 427, 424, 419, 415, 410, 406, 403, 399, 394, 390, - 384, 384, 377, 374, 370, 366, 362, 359, 355, 351, - 347, 342, 342, 336, 333, 330, 326, 323, 320, 316, - 312, 308, 305, 302, 299, 296, 293, 288, 287, 283, - 280, 277, 274, 272, 268, 266, 262, 256, 256, 256, - 251, 248, 245, 242, 240, 237, 234, 232, 228, 226, - 223, 221, 218, 216, 214, 211, 208, 205, 203, 201, - 198, 196, 192, 191, 188, 187, 183, 181, 179, 176, - 175, 171, 171, 168, 165, 163, 160, 159, 156, 154, - 152, 150, 148, 146, 144, 142, 139, 138, 135, 133, - 131, 128, 128, 125, 123, 121, 119, 117, 115, 113, - 111, 110, 107, 105, 103, 102, 100, 98, 96, 94, - 92, 91, 89, 86, 86, 83, 82, 80, 77, 76, - 74, 73, 71, 69, 67, 66, 64, 63, 61, 59, - 57, 55, 54, 52, 51, 49, 47, 46, 44, 43, - 41, 40, 38, 36, 35, 33, 32, 30, 29, 27, - 25, 24, 22, 21, 19, 18, 16, 15, 13, 12, - 10, 9, 7, 6, 4, 3 -}; - //------------------------------------------------------------------------------ // Level cost tables @@ -73,267 +39,6 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = { {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153} }; -// fixed costs for coding levels, deduce from the coding tree. -// This is only the part that doesn't depend on the probability state. -const uint16_t VP8LevelFixedCosts[2048] = { - 0, 256, 256, 256, 256, 432, 618, 630, - 731, 640, 640, 828, 901, 948, 1021, 1101, - 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202, - 1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497, - 1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358, - 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532, - 1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679, - 1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853, - 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759, - 1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832, - 1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910, - 1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983, - 1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059, - 2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132, - 2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210, - 2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283, - 2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200, - 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273, - 2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351, - 2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424, - 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500, - 2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573, - 2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651, - 2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724, - 2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572, - 2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645, - 2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723, - 2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796, - 2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872, - 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945, - 2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023, - 3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096, - 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013, - 3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086, - 3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164, - 3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237, - 3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313, - 3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386, - 3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464, - 3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537, - 3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848, - 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921, - 2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999, - 3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072, - 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148, - 3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221, - 3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299, - 3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372, - 3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289, - 3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362, - 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440, - 3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513, - 3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589, - 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662, - 3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740, - 3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813, - 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661, - 3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734, - 3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812, - 3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885, - 3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961, - 3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034, - 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112, - 4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185, - 4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102, - 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175, - 4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253, - 4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326, - 4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402, - 4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475, - 4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553, - 4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626, - 4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547, - 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620, - 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, - 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, - 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847, - 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, - 3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, - 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071, - 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, - 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, - 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139, - 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, - 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, - 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361, - 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, - 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, - 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360, - 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, - 4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, - 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584, - 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, - 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, - 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811, - 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, - 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, - 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874, - 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, - 4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, - 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101, - 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, - 5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, - 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325, - 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, - 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, - 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787, - 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, - 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, - 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009, - 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, - 5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, - 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077, - 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, - 5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, - 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301, - 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, - 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, - 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528, - 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, - 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, - 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522, - 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, - 5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, - 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749, - 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, - 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, - 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973, - 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, - 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, - 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041, - 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, - 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, - 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263, - 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, - 6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, - 6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547, - 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620, - 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, - 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, - 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847, - 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, - 3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, - 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071, - 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, - 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, - 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139, - 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, - 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, - 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361, - 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, - 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, - 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360, - 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, - 4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, - 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584, - 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, - 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, - 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811, - 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, - 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, - 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874, - 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, - 4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, - 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101, - 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, - 5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, - 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325, - 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, - 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, - 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787, - 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, - 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, - 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009, - 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, - 5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, - 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077, - 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, - 5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, - 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301, - 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, - 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, - 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528, - 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, - 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, - 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522, - 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, - 5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, - 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749, - 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, - 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, - 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973, - 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, - 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, - 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041, - 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, - 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, - 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263, - 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, - 6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, - 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335, - 5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408, - 5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486, - 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559, - 5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635, - 5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708, - 5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786, - 5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859, - 5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776, - 5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849, - 5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927, - 5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000, - 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076, - 6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149, - 6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227, - 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300, - 6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148, - 6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221, - 6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299, - 6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372, - 6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448, - 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521, - 6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599, - 6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672, - 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589, - 6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662, - 6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740, - 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813, - 6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889, - 6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962, - 6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040, - 7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113, - 7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424, - 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497, - 6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575, - 6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648, - 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724, - 6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797, - 6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875, - 6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948, - 6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865, - 6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938, - 6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016, - 7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089, - 7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165, - 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238, - 7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316, - 7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389, - 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237, - 7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310, - 7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388, - 7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461, - 7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537, - 7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610, - 7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688, - 7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761 -}; - static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) { int pattern = VP8LevelCodes[level - 1][0]; int bits = VP8LevelCodes[level - 1][1]; @@ -352,19 +57,21 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) { //------------------------------------------------------------------------------ // Pre-calc level costs once for all -void VP8CalculateLevelCosts(VP8Proba* const proba) { +void VP8CalculateLevelCosts(VP8EncProba* const proba) { int ctype, band, ctx; if (!proba->dirty_) return; // nothing to do. for (ctype = 0; ctype < NUM_TYPES; ++ctype) { + int n; for (band = 0; band < NUM_BANDS; ++band) { - for(ctx = 0; ctx < NUM_CTX; ++ctx) { + for (ctx = 0; ctx < NUM_CTX; ++ctx) { const uint8_t* const p = proba->coeffs_[ctype][band][ctx]; uint16_t* const table = proba->level_cost_[ctype][band][ctx]; - const int cost_base = VP8BitCost(1, p[1]); + const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0; + const int cost_base = VP8BitCost(1, p[1]) + cost0; int v; - table[0] = VP8BitCost(0, p[1]); + table[0] = VP8BitCost(0, p[1]) + cost0; for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) { table[v] = cost_base + VariableLevelCost(v, p); } @@ -372,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) { // actually constant. } } + for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel. + for (ctx = 0; ctx < NUM_CTX; ++ctx) { + proba->remapped_costs_[ctype][n][ctx] = + proba->level_cost_[ctype][VP8EncBands[n]][ctx]; + } + } } proba->dirty_ = 0; } @@ -385,110 +98,257 @@ const uint16_t VP8FixedCostsUV[4] = { 302, 984, 439, 642 }; // note: these values include the fixed VP8BitCost(1, 145) mode selection cost. const uint16_t VP8FixedCostsI16[4] = { 663, 919, 872, 919 }; const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = { - { { 251, 1362, 1934, 2085, 2314, 2230, 1839, 1988, 2437, 2348 }, - { 403, 680, 1507, 1519, 2060, 2005, 1992, 1914, 1924, 1733 }, - { 353, 1121, 973, 1895, 2060, 1787, 1671, 1516, 2012, 1868 }, - { 770, 852, 1581, 632, 1393, 1780, 1823, 1936, 1074, 1218 }, - { 510, 1270, 1467, 1319, 847, 1279, 1792, 2094, 1080, 1353 }, - { 488, 1322, 918, 1573, 1300, 883, 1814, 1752, 1756, 1502 }, - { 425, 992, 1820, 1514, 1843, 2440, 937, 1771, 1924, 1129 }, - { 363, 1248, 1257, 1970, 2194, 2385, 1569, 953, 1951, 1601 }, - { 723, 1257, 1631, 964, 963, 1508, 1697, 1824, 671, 1418 }, - { 635, 1038, 1573, 930, 1673, 1413, 1410, 1687, 1410, 749 } }, - { { 451, 613, 1345, 1702, 1870, 1716, 1728, 1766, 2190, 2310 }, - { 678, 453, 1171, 1443, 1925, 1831, 2045, 1781, 1887, 1602 }, - { 711, 666, 674, 1718, 1910, 1493, 1775, 1193, 2325, 2325 }, - { 883, 854, 1583, 542, 1800, 1878, 1664, 2149, 1207, 1087 }, - { 669, 994, 1248, 1122, 949, 1179, 1376, 1729, 1070, 1244 }, - { 715, 1026, 715, 1350, 1430, 930, 1717, 1296, 1479, 1479 }, - { 544, 841, 1656, 1450, 2094, 3883, 1010, 1759, 2076, 809 }, - { 610, 855, 957, 1553, 2067, 1561, 1704, 824, 2066, 1226 }, - { 833, 960, 1416, 819, 1277, 1619, 1501, 1617, 757, 1182 }, - { 711, 964, 1252, 879, 1441, 1828, 1508, 1636, 1594, 734 } }, - { { 605, 764, 734, 1713, 1747, 1192, 1819, 1353, 1877, 2392 }, - { 866, 641, 586, 1622, 2072, 1431, 1888, 1346, 2189, 1764 }, - { 901, 851, 456, 2165, 2281, 1405, 1739, 1193, 2183, 2443 }, - { 770, 1045, 952, 1078, 1342, 1191, 1436, 1063, 1303, 995 }, - { 901, 1086, 727, 1170, 884, 1105, 1267, 1401, 1739, 1337 }, - { 951, 1162, 595, 1488, 1388, 703, 1790, 1366, 2057, 1724 }, - { 534, 986, 1273, 1987, 3273, 1485, 1024, 1399, 1583, 866 }, - { 699, 1182, 695, 1978, 1726, 1986, 1326, 714, 1750, 1672 }, - { 951, 1217, 1209, 920, 1062, 1441, 1548, 999, 952, 932 }, - { 733, 1284, 784, 1256, 1557, 1098, 1257, 1357, 1414, 908 } }, - { { 316, 1075, 1653, 1220, 2145, 2051, 1730, 2131, 1884, 1790 }, - { 745, 516, 1404, 894, 1599, 2375, 2013, 2105, 1475, 1381 }, - { 516, 729, 1088, 1319, 1637, 3426, 1636, 1275, 1531, 1453 }, - { 894, 943, 2138, 468, 1704, 2259, 2069, 1763, 1266, 1158 }, - { 605, 1025, 1235, 871, 1170, 1767, 1493, 1500, 1104, 1258 }, - { 739, 826, 1207, 1151, 1412, 846, 1305, 2726, 1014, 1569 }, - { 558, 825, 1820, 1398, 3344, 1556, 1218, 1550, 1228, 878 }, - { 429, 951, 1089, 1816, 3861, 3861, 1556, 969, 1568, 1828 }, - { 883, 961, 1752, 769, 1468, 1810, 2081, 2346, 613, 1298 }, - { 803, 895, 1372, 641, 1303, 1708, 1686, 1700, 1306, 1033 } }, - { { 439, 1267, 1270, 1579, 963, 1193, 1723, 1729, 1198, 1993 }, - { 705, 725, 1029, 1153, 1176, 1103, 1821, 1567, 1259, 1574 }, - { 723, 859, 802, 1253, 972, 1202, 1407, 1665, 1520, 1674 }, - { 894, 960, 1254, 887, 1052, 1607, 1344, 1349, 865, 1150 }, - { 833, 1312, 1337, 1205, 572, 1288, 1414, 1529, 1088, 1430 }, - { 842, 1279, 1068, 1861, 862, 688, 1861, 1630, 1039, 1381 }, - { 766, 938, 1279, 1546, 3338, 1550, 1031, 1542, 1288, 640 }, - { 715, 1090, 835, 1609, 1100, 1100, 1603, 1019, 1102, 1617 }, - { 894, 1813, 1500, 1188, 789, 1194, 1491, 1919, 617, 1333 }, - { 610, 1076, 1644, 1281, 1283, 975, 1179, 1688, 1434, 889 } }, - { { 544, 971, 1146, 1849, 1221, 740, 1857, 1621, 1683, 2430 }, - { 723, 705, 961, 1371, 1426, 821, 2081, 2079, 1839, 1380 }, - { 783, 857, 703, 2145, 1419, 814, 1791, 1310, 1609, 2206 }, - { 997, 1000, 1153, 792, 1229, 1162, 1810, 1418, 942, 979 }, - { 901, 1226, 883, 1289, 793, 715, 1904, 1649, 1319, 3108 }, - { 979, 1478, 782, 2216, 1454, 455, 3092, 1591, 1997, 1664 }, - { 663, 1110, 1504, 1114, 1522, 3311, 676, 1522, 1530, 1024 }, - { 605, 1138, 1153, 1314, 1569, 1315, 1157, 804, 1574, 1320 }, - { 770, 1216, 1218, 1227, 869, 1384, 1232, 1375, 834, 1239 }, - { 775, 1007, 843, 1216, 1225, 1074, 2527, 1479, 1149, 975 } }, - { { 477, 817, 1309, 1439, 1708, 1454, 1159, 1241, 1945, 1672 }, - { 577, 796, 1112, 1271, 1618, 1458, 1087, 1345, 1831, 1265 }, - { 663, 776, 753, 1940, 1690, 1690, 1227, 1097, 3149, 1361 }, - { 766, 1299, 1744, 1161, 1565, 1106, 1045, 1230, 1232, 707 }, - { 915, 1026, 1404, 1182, 1184, 851, 1428, 2425, 1043, 789 }, - { 883, 1456, 790, 1082, 1086, 985, 1083, 1484, 1238, 1160 }, - { 507, 1345, 2261, 1995, 1847, 3636, 653, 1761, 2287, 933 }, - { 553, 1193, 1470, 2057, 2059, 2059, 833, 779, 2058, 1263 }, - { 766, 1275, 1515, 1039, 957, 1554, 1286, 1540, 1289, 705 }, - { 499, 1378, 1496, 1385, 1850, 1850, 1044, 2465, 1515, 720 } }, - { { 553, 930, 978, 2077, 1968, 1481, 1457, 761, 1957, 2362 }, - { 694, 864, 905, 1720, 1670, 1621, 1429, 718, 2125, 1477 }, - { 699, 968, 658, 3190, 2024, 1479, 1865, 750, 2060, 2320 }, - { 733, 1308, 1296, 1062, 1576, 1322, 1062, 1112, 1172, 816 }, - { 920, 927, 1052, 939, 947, 1156, 1152, 1073, 3056, 1268 }, - { 723, 1534, 711, 1547, 1294, 892, 1553, 928, 1815, 1561 }, - { 663, 1366, 1583, 2111, 1712, 3501, 522, 1155, 2130, 1133 }, - { 614, 1731, 1188, 2343, 1944, 3733, 1287, 487, 3546, 1758 }, - { 770, 1585, 1312, 826, 884, 2673, 1185, 1006, 1195, 1195 }, - { 758, 1333, 1273, 1023, 1621, 1162, 1351, 833, 1479, 862 } }, - { { 376, 1193, 1446, 1149, 1545, 1577, 1870, 1789, 1175, 1823 }, - { 803, 633, 1136, 1058, 1350, 1323, 1598, 2247, 1072, 1252 }, - { 614, 1048, 943, 981, 1152, 1869, 1461, 1020, 1618, 1618 }, - { 1107, 1085, 1282, 592, 1779, 1933, 1648, 2403, 691, 1246 }, - { 851, 1309, 1223, 1243, 895, 1593, 1792, 2317, 627, 1076 }, - { 770, 1216, 1030, 1125, 921, 981, 1629, 1131, 1049, 1646 }, - { 626, 1469, 1456, 1081, 1489, 3278, 981, 1232, 1498, 733 }, - { 617, 1201, 812, 1220, 1476, 1476, 1478, 970, 1228, 1488 }, - { 1179, 1393, 1540, 999, 1243, 1503, 1916, 1925, 414, 1614 }, - { 943, 1088, 1490, 682, 1112, 1372, 1756, 1505, 966, 966 } }, - { { 322, 1142, 1589, 1396, 2144, 1859, 1359, 1925, 2084, 1518 }, - { 617, 625, 1241, 1234, 2121, 1615, 1524, 1858, 1720, 1004 }, - { 553, 851, 786, 1299, 1452, 1560, 1372, 1561, 1967, 1713 }, - { 770, 977, 1396, 568, 1893, 1639, 1540, 2108, 1430, 1013 }, - { 684, 1120, 1375, 982, 930, 2719, 1638, 1643, 933, 993 }, - { 553, 1103, 996, 1356, 1361, 1005, 1507, 1761, 1184, 1268 }, - { 419, 1247, 1537, 1554, 1817, 3606, 1026, 1666, 1829, 923 }, - { 439, 1139, 1101, 1257, 3710, 1922, 1205, 1040, 1931, 1529 }, - { 979, 935, 1269, 847, 1202, 1286, 1530, 1535, 827, 1036 }, - { 516, 1378, 1569, 1110, 1798, 1798, 1198, 2199, 1543, 712 } }, + { { 40, 1151, 1723, 1874, 2103, 2019, 1628, 1777, 2226, 2137 }, + { 192, 469, 1296, 1308, 1849, 1794, 1781, 1703, 1713, 1522 }, + { 142, 910, 762, 1684, 1849, 1576, 1460, 1305, 1801, 1657 }, + { 559, 641, 1370, 421, 1182, 1569, 1612, 1725, 863, 1007 }, + { 299, 1059, 1256, 1108, 636, 1068, 1581, 1883, 869, 1142 }, + { 277, 1111, 707, 1362, 1089, 672, 1603, 1541, 1545, 1291 }, + { 214, 781, 1609, 1303, 1632, 2229, 726, 1560, 1713, 918 }, + { 152, 1037, 1046, 1759, 1983, 2174, 1358, 742, 1740, 1390 }, + { 512, 1046, 1420, 753, 752, 1297, 1486, 1613, 460, 1207 }, + { 424, 827, 1362, 719, 1462, 1202, 1199, 1476, 1199, 538 } }, + { { 240, 402, 1134, 1491, 1659, 1505, 1517, 1555, 1979, 2099 }, + { 467, 242, 960, 1232, 1714, 1620, 1834, 1570, 1676, 1391 }, + { 500, 455, 463, 1507, 1699, 1282, 1564, 982, 2114, 2114 }, + { 672, 643, 1372, 331, 1589, 1667, 1453, 1938, 996, 876 }, + { 458, 783, 1037, 911, 738, 968, 1165, 1518, 859, 1033 }, + { 504, 815, 504, 1139, 1219, 719, 1506, 1085, 1268, 1268 }, + { 333, 630, 1445, 1239, 1883, 3672, 799, 1548, 1865, 598 }, + { 399, 644, 746, 1342, 1856, 1350, 1493, 613, 1855, 1015 }, + { 622, 749, 1205, 608, 1066, 1408, 1290, 1406, 546, 971 }, + { 500, 753, 1041, 668, 1230, 1617, 1297, 1425, 1383, 523 } }, + { { 394, 553, 523, 1502, 1536, 981, 1608, 1142, 1666, 2181 }, + { 655, 430, 375, 1411, 1861, 1220, 1677, 1135, 1978, 1553 }, + { 690, 640, 245, 1954, 2070, 1194, 1528, 982, 1972, 2232 }, + { 559, 834, 741, 867, 1131, 980, 1225, 852, 1092, 784 }, + { 690, 875, 516, 959, 673, 894, 1056, 1190, 1528, 1126 }, + { 740, 951, 384, 1277, 1177, 492, 1579, 1155, 1846, 1513 }, + { 323, 775, 1062, 1776, 3062, 1274, 813, 1188, 1372, 655 }, + { 488, 971, 484, 1767, 1515, 1775, 1115, 503, 1539, 1461 }, + { 740, 1006, 998, 709, 851, 1230, 1337, 788, 741, 721 }, + { 522, 1073, 573, 1045, 1346, 887, 1046, 1146, 1203, 697 } }, + { { 105, 864, 1442, 1009, 1934, 1840, 1519, 1920, 1673, 1579 }, + { 534, 305, 1193, 683, 1388, 2164, 1802, 1894, 1264, 1170 }, + { 305, 518, 877, 1108, 1426, 3215, 1425, 1064, 1320, 1242 }, + { 683, 732, 1927, 257, 1493, 2048, 1858, 1552, 1055, 947 }, + { 394, 814, 1024, 660, 959, 1556, 1282, 1289, 893, 1047 }, + { 528, 615, 996, 940, 1201, 635, 1094, 2515, 803, 1358 }, + { 347, 614, 1609, 1187, 3133, 1345, 1007, 1339, 1017, 667 }, + { 218, 740, 878, 1605, 3650, 3650, 1345, 758, 1357, 1617 }, + { 672, 750, 1541, 558, 1257, 1599, 1870, 2135, 402, 1087 }, + { 592, 684, 1161, 430, 1092, 1497, 1475, 1489, 1095, 822 } }, + { { 228, 1056, 1059, 1368, 752, 982, 1512, 1518, 987, 1782 }, + { 494, 514, 818, 942, 965, 892, 1610, 1356, 1048, 1363 }, + { 512, 648, 591, 1042, 761, 991, 1196, 1454, 1309, 1463 }, + { 683, 749, 1043, 676, 841, 1396, 1133, 1138, 654, 939 }, + { 622, 1101, 1126, 994, 361, 1077, 1203, 1318, 877, 1219 }, + { 631, 1068, 857, 1650, 651, 477, 1650, 1419, 828, 1170 }, + { 555, 727, 1068, 1335, 3127, 1339, 820, 1331, 1077, 429 }, + { 504, 879, 624, 1398, 889, 889, 1392, 808, 891, 1406 }, + { 683, 1602, 1289, 977, 578, 983, 1280, 1708, 406, 1122 }, + { 399, 865, 1433, 1070, 1072, 764, 968, 1477, 1223, 678 } }, + { { 333, 760, 935, 1638, 1010, 529, 1646, 1410, 1472, 2219 }, + { 512, 494, 750, 1160, 1215, 610, 1870, 1868, 1628, 1169 }, + { 572, 646, 492, 1934, 1208, 603, 1580, 1099, 1398, 1995 }, + { 786, 789, 942, 581, 1018, 951, 1599, 1207, 731, 768 }, + { 690, 1015, 672, 1078, 582, 504, 1693, 1438, 1108, 2897 }, + { 768, 1267, 571, 2005, 1243, 244, 2881, 1380, 1786, 1453 }, + { 452, 899, 1293, 903, 1311, 3100, 465, 1311, 1319, 813 }, + { 394, 927, 942, 1103, 1358, 1104, 946, 593, 1363, 1109 }, + { 559, 1005, 1007, 1016, 658, 1173, 1021, 1164, 623, 1028 }, + { 564, 796, 632, 1005, 1014, 863, 2316, 1268, 938, 764 } }, + { { 266, 606, 1098, 1228, 1497, 1243, 948, 1030, 1734, 1461 }, + { 366, 585, 901, 1060, 1407, 1247, 876, 1134, 1620, 1054 }, + { 452, 565, 542, 1729, 1479, 1479, 1016, 886, 2938, 1150 }, + { 555, 1088, 1533, 950, 1354, 895, 834, 1019, 1021, 496 }, + { 704, 815, 1193, 971, 973, 640, 1217, 2214, 832, 578 }, + { 672, 1245, 579, 871, 875, 774, 872, 1273, 1027, 949 }, + { 296, 1134, 2050, 1784, 1636, 3425, 442, 1550, 2076, 722 }, + { 342, 982, 1259, 1846, 1848, 1848, 622, 568, 1847, 1052 }, + { 555, 1064, 1304, 828, 746, 1343, 1075, 1329, 1078, 494 }, + { 288, 1167, 1285, 1174, 1639, 1639, 833, 2254, 1304, 509 } }, + { { 342, 719, 767, 1866, 1757, 1270, 1246, 550, 1746, 2151 }, + { 483, 653, 694, 1509, 1459, 1410, 1218, 507, 1914, 1266 }, + { 488, 757, 447, 2979, 1813, 1268, 1654, 539, 1849, 2109 }, + { 522, 1097, 1085, 851, 1365, 1111, 851, 901, 961, 605 }, + { 709, 716, 841, 728, 736, 945, 941, 862, 2845, 1057 }, + { 512, 1323, 500, 1336, 1083, 681, 1342, 717, 1604, 1350 }, + { 452, 1155, 1372, 1900, 1501, 3290, 311, 944, 1919, 922 }, + { 403, 1520, 977, 2132, 1733, 3522, 1076, 276, 3335, 1547 }, + { 559, 1374, 1101, 615, 673, 2462, 974, 795, 984, 984 }, + { 547, 1122, 1062, 812, 1410, 951, 1140, 622, 1268, 651 } }, + { { 165, 982, 1235, 938, 1334, 1366, 1659, 1578, 964, 1612 }, + { 592, 422, 925, 847, 1139, 1112, 1387, 2036, 861, 1041 }, + { 403, 837, 732, 770, 941, 1658, 1250, 809, 1407, 1407 }, + { 896, 874, 1071, 381, 1568, 1722, 1437, 2192, 480, 1035 }, + { 640, 1098, 1012, 1032, 684, 1382, 1581, 2106, 416, 865 }, + { 559, 1005, 819, 914, 710, 770, 1418, 920, 838, 1435 }, + { 415, 1258, 1245, 870, 1278, 3067, 770, 1021, 1287, 522 }, + { 406, 990, 601, 1009, 1265, 1265, 1267, 759, 1017, 1277 }, + { 968, 1182, 1329, 788, 1032, 1292, 1705, 1714, 203, 1403 }, + { 732, 877, 1279, 471, 901, 1161, 1545, 1294, 755, 755 } }, + { { 111, 931, 1378, 1185, 1933, 1648, 1148, 1714, 1873, 1307 }, + { 406, 414, 1030, 1023, 1910, 1404, 1313, 1647, 1509, 793 }, + { 342, 640, 575, 1088, 1241, 1349, 1161, 1350, 1756, 1502 }, + { 559, 766, 1185, 357, 1682, 1428, 1329, 1897, 1219, 802 }, + { 473, 909, 1164, 771, 719, 2508, 1427, 1432, 722, 782 }, + { 342, 892, 785, 1145, 1150, 794, 1296, 1550, 973, 1057 }, + { 208, 1036, 1326, 1343, 1606, 3395, 815, 1455, 1618, 712 }, + { 228, 928, 890, 1046, 3499, 1711, 994, 829, 1720, 1318 }, + { 768, 724, 1058, 636, 991, 1075, 1319, 1324, 616, 825 }, + { 305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501 } } }; //------------------------------------------------------------------------------ +// helper functions for residuals struct VP8Residual. + +void VP8InitResidual(int first, int coeff_type, + VP8Encoder* const enc, VP8Residual* const res) { + res->coeff_type = coeff_type; + res->prob = enc->proba_.coeffs_[coeff_type]; + res->stats = enc->proba_.stats_[coeff_type]; + res->costs = enc->proba_.remapped_costs_[coeff_type]; + res->first = first; +} + +//------------------------------------------------------------------------------ +// Mode costs + +int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) { + const int x = (it->i4_ & 3), y = (it->i4_ >> 2); + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int R = 0; + int ctx; + + VP8InitResidual(0, 3, enc, &res); + ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(levels, &res); + R += VP8GetResidualCost(ctx, &res); + return R; +} + +int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) { + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int x, y; + int R = 0; + + VP8IteratorNzToBytes(it); // re-import the non-zero context + + // DC + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res); + + // AC + VP8InitResidual(1, 0, enc, &res); + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + R += VP8GetResidualCost(ctx, &res); + it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0); + } + } + return R; +} + +int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) { + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int ch, x, y; + int R = 0; + + VP8IteratorNzToBytes(it); // re-import the non-zero context + + VP8InitResidual(0, 2, enc, &res); + for (ch = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + R += VP8GetResidualCost(ctx, &res); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0); + } + } + } + return R; +} + + +//------------------------------------------------------------------------------ +// Recording of token probabilities. + +// Record proba context used +static int Record(int bit, proba_t* const stats) { + proba_t p = *stats; + if (p >= 0xffff0000u) { // an overflow is inbound. + p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. + } + // record bit count (lower 16 bits) and increment total count (upper 16 bits). + p += 0x00010000u + bit; + *stats = p; + return bit; +} + +// We keep the table-free variant around for reference, in case. +#define USE_LEVEL_CODE_TABLE -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" +// Simulate block coding, but only record statistics. +// Note: no need to record the fixed probas. +int VP8RecordCoeffs(int ctx, const VP8Residual* const res) { + int n = res->first; + // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1 + proba_t* s = res->stats[n][ctx]; + if (res->last < 0) { + Record(0, s + 0); + return 0; + } + while (n <= res->last) { + int v; + Record(1, s + 0); // order of record doesn't matter + while ((v = res->coeffs[n++]) == 0) { + Record(0, s + 1); + s = res->stats[VP8EncBands[n]][0]; + } + Record(1, s + 1); + if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1 + s = res->stats[VP8EncBands[n]][1]; + } else { + v = abs(v); +#if !defined(USE_LEVEL_CODE_TABLE) + if (!Record(v > 4, s + 3)) { + if (Record(v != 2, s + 4)) + Record(v == 4, s + 5); + } else if (!Record(v > 10, s + 6)) { + Record(v > 6, s + 7); + } else if (!Record((v >= 3 + (8 << 2)), s + 8)) { + Record((v >= 3 + (8 << 1)), s + 9); + } else { + Record((v >= 3 + (8 << 3)), s + 10); + } +#else + if (v > MAX_VARIABLE_LEVEL) { + v = MAX_VARIABLE_LEVEL; + } + + { + const int bits = VP8LevelCodes[v - 1][1]; + int pattern = VP8LevelCodes[v - 1][0]; + int i; + for (i = 0; (pattern >>= 1) != 0; ++i) { + const int mask = 2 << i; + if (pattern & 1) Record(!!(bits & mask), s + 3 + i); + } + } #endif + s = res->stats[VP8EncBands[n]][2]; + } + } + if (n < 16) Record(0, s + 0); + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/drivers/webp/enc/cost.h b/drivers/webp/enc/cost.h index 09b75b699d..20960d6d74 100644 --- a/drivers/webp/enc/cost.h +++ b/drivers/webp/enc/cost.h @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Cost tables for level and modes. @@ -12,14 +14,32 @@ #ifndef WEBP_ENC_COST_H_ #define WEBP_ENC_COST_H_ +#include <assert.h> +#include <stdlib.h> #include "./vp8enci.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif -extern const uint16_t VP8LevelFixedCosts[2048]; // approximate cost per level -extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p) +// On-the-fly info about the current set of residuals. Handy to avoid +// passing zillions of params. +typedef struct VP8Residual VP8Residual; +struct VP8Residual { + int first; + int last; + const int16_t* coeffs; + + int coeff_type; + ProbaArray* prob; + StatsArray* stats; + CostArrayPtr costs; +}; + +void VP8InitResidual(int first, int coeff_type, + VP8Encoder* const enc, VP8Residual* const res); + +int VP8RecordCoeffs(int ctx, const VP8Residual* const res); // Cost of coding one event with probability 'proba'. static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { @@ -28,7 +48,7 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { // Level cost calculations extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; -void VP8CalculateLevelCosts(VP8Proba* const proba); +void VP8CalculateLevelCosts(VP8EncProba* const proba); static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) { return VP8LevelFixedCosts[level] + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level]; @@ -41,7 +61,7 @@ extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES]; //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif diff --git a/drivers/webp/enc/filter.c b/drivers/webp/enc/filter.c index 7fb78a3949..1a4dd947fb 100644 --- a/drivers/webp/enc/filter.c +++ b/drivers/webp/enc/filter.c @@ -1,194 +1,68 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Selecting filter level // // Author: somnath@google.com (Somnath Banerjee) +#include <assert.h> #include "./vp8enci.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -// NOTE: clip1, tables and InitTables are repeated entries of dsp.c -static uint8_t abs0[255 + 255 + 1]; // abs(i) -static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 -static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] -static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] -static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] - -static int tables_ok = 0; - -static void InitTables(void) { - if (!tables_ok) { - int i; - for (i = -255; i <= 255; ++i) { - abs0[255 + i] = (i < 0) ? -i : i; - abs1[255 + i] = abs0[255 + i] >> 1; - } - for (i = -1020; i <= 1020; ++i) { - sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i; - } - for (i = -112; i <= 112; ++i) { - sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; - } - for (i = -255; i <= 255 + 255; ++i) { - clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; - } - tables_ok = 1; - } -} - -//------------------------------------------------------------------------------ -// Edge filtering functions - -// 4 pixels in, 2 pixels out -static WEBP_INLINE void do_filter2(uint8_t* p, int step) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; - const int a1 = sclip2[112 + ((a + 4) >> 3)]; - const int a2 = sclip2[112 + ((a + 3) >> 3)]; - p[-step] = clip1[255 + p0 + a2]; - p[ 0] = clip1[255 + q0 - a1]; -} - -// 4 pixels in, 4 pixels out -static WEBP_INLINE void do_filter4(uint8_t* p, int step) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - const int a = 3 * (q0 - p0); - const int a1 = sclip2[112 + ((a + 4) >> 3)]; - const int a2 = sclip2[112 + ((a + 3) >> 3)]; - const int a3 = (a1 + 1) >> 1; - p[-2*step] = clip1[255 + p1 + a3]; - p[- step] = clip1[255 + p0 + a2]; - p[ 0] = clip1[255 + q0 - a1]; - p[ step] = clip1[255 + q1 - a3]; -} - -// high edge-variance -static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); -} - -static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; -} - -static WEBP_INLINE int needs_filter2(const uint8_t* p, - int step, int t, int it) { - const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; - const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; - if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) - return 0; - return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && - abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && - abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; +#include "../dsp/dsp.h" + +// This table gives, for a given sharpness, the filtering strength to be +// used (at least) in order to filter a given edge step delta. +// This is constructed by brute force inspection: for all delta, we iterate +// over all possible filtering strength / thresh until needs_filter() returns +// true. +#define MAX_DELTA_SIZE 64 +static const uint8_t kLevelsFromDelta[8][MAX_DELTA_SIZE] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 17, 18, + 20, 21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, + 44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 19, + 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, + 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 18, 19, + 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, + 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 20, + 21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, + 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, + 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44, + 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 18, 19, 21, + 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, + 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }, + { 0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, + 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45, + 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 } +}; + +int VP8FilterStrengthFromDelta(int sharpness, int delta) { + const int pos = (delta < MAX_DELTA_SIZE) ? delta : MAX_DELTA_SIZE - 1; + assert(sharpness >= 0 && sharpness <= 7); + return kLevelsFromDelta[sharpness][pos]; } //------------------------------------------------------------------------------ -// Simple In-loop filtering (Paragraph 15.2) - -static void SimpleVFilter16(uint8_t* p, int stride, int thresh) { - int i; - for (i = 0; i < 16; ++i) { - if (needs_filter(p + i, stride, thresh)) { - do_filter2(p + i, stride); - } - } -} - -static void SimpleHFilter16(uint8_t* p, int stride, int thresh) { - int i; - for (i = 0; i < 16; ++i) { - if (needs_filter(p + i * stride, 1, thresh)) { - do_filter2(p + i * stride, 1); - } - } -} - -static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4 * stride; - SimpleVFilter16(p, stride, thresh); - } -} - -static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4; - SimpleHFilter16(p, stride, thresh); - } -} - -//------------------------------------------------------------------------------ -// Complex In-loop filtering (Paragraph 15.3) - -static WEBP_INLINE void FilterLoop24(uint8_t* p, - int hstride, int vstride, int size, - int thresh, int ithresh, int hev_thresh) { - while (size-- > 0) { - if (needs_filter2(p, hstride, thresh, ithresh)) { - if (hev(p, hstride, hev_thresh)) { - do_filter2(p, hstride); - } else { - do_filter4(p, hstride); - } - } - p += vstride; - } -} - -// on three inner edges -static void VFilter16i(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4 * stride; - FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh); - } -} - -static void HFilter16i(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4; - FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh); - } -} - -static void VFilter8i(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); - FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); -} - -static void HFilter8i(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); - FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); -} - -//------------------------------------------------------------------------------ - -void (*VP8EncVFilter16i)(uint8_t*, int, int, int, int) = VFilter16i; -void (*VP8EncHFilter16i)(uint8_t*, int, int, int, int) = HFilter16i; -void (*VP8EncVFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8i; -void (*VP8EncHFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i; - -void (*VP8EncSimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i; -void (*VP8EncSimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i; - -//------------------------------------------------------------------------------ // Paragraph 15.4: compute the inner-edge filtering strength static int GetILevel(int sharpness, int level) { @@ -211,22 +85,22 @@ static void DoFilter(const VP8EncIterator* const it, int level) { const int ilevel = GetILevel(enc->config_->filter_sharpness, level); const int limit = 2 * level + ilevel; - uint8_t* const y_dst = it->yuv_out2_ + Y_OFF; - uint8_t* const u_dst = it->yuv_out2_ + U_OFF; - uint8_t* const v_dst = it->yuv_out2_ + V_OFF; + uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC; + uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC; + uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC; // copy current block to yuv_out2_ - memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t)); + memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t)); if (enc->filter_hdr_.simple_ == 1) { // simple - VP8EncSimpleHFilter16i(y_dst, BPS, limit); - VP8EncSimpleVFilter16i(y_dst, BPS, limit); + VP8SimpleHFilter16i(y_dst, BPS, limit); + VP8SimpleVFilter16i(y_dst, BPS, limit); } else { // complex const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0; - VP8EncHFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); - VP8EncHFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); - VP8EncVFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); - VP8EncVFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); + VP8HFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); + VP8HFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); + VP8VFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); + VP8VFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); } } @@ -321,13 +195,16 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { // compute SSIM in a 10 x 10 window for (x = 3; x < 13; x++) { for (y = 3; y < 13; y++) { - VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s); + VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, + x, y, 16, 16, &s); } } for (x = 1; x < 7; x++) { for (y = 1; y < 7; y++) { - VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s); - VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s); + VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, + x, y, 8, 8, &s); + VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, + x, y, 8, 8, &s); } } return VP8SSIMGet(&s); @@ -338,28 +215,28 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { // loop filter strength void VP8InitFilter(VP8EncIterator* const it) { - int s, i; - if (!it->lf_stats_) return; - - InitTables(); - for (s = 0; s < NUM_MB_SEGMENTS; s++) { - for (i = 0; i < MAX_LF_LEVELS; i++) { - (*it->lf_stats_)[s][i] = 0; + if (it->lf_stats_ != NULL) { + int s, i; + for (s = 0; s < NUM_MB_SEGMENTS; s++) { + for (i = 0; i < MAX_LF_LEVELS; i++) { + (*it->lf_stats_)[s][i] = 0; + } } } } void VP8StoreFilterStats(VP8EncIterator* const it) { int d; + VP8Encoder* const enc = it->enc_; const int s = it->mb_->segment_; - const int level0 = it->enc_->dqm_[s].fstrength_; // TODO: ref_lf_delta[] + const int level0 = enc->dqm_[s].fstrength_; // TODO: ref_lf_delta[] // explore +/-quant range of values around level0 - const int delta_min = -it->enc_->dqm_[s].quant_; - const int delta_max = it->enc_->dqm_[s].quant_; + const int delta_min = -enc->dqm_[s].quant_; + const int delta_max = enc->dqm_[s].quant_; const int step_size = (delta_max - delta_min >= 4) ? 4 : 1; - if (!it->lf_stats_) return; + if (it->lf_stats_ == NULL) return; // NOTE: Currently we are applying filter only across the sublock edges // There are two reasons for that. @@ -383,27 +260,40 @@ void VP8StoreFilterStats(VP8EncIterator* const it) { } void VP8AdjustFilterStrength(VP8EncIterator* const it) { - int s; VP8Encoder* const enc = it->enc_; - - if (!it->lf_stats_) { - return; - } - for (s = 0; s < NUM_MB_SEGMENTS; s++) { - int i, best_level = 0; - // Improvement over filter level 0 should be at least 1e-5 (relatively) - double best_v = 1.00001 * (*it->lf_stats_)[s][0]; - for (i = 1; i < MAX_LF_LEVELS; i++) { - const double v = (*it->lf_stats_)[s][i]; - if (v > best_v) { - best_v = v; - best_level = i; + if (it->lf_stats_ != NULL) { + int s; + for (s = 0; s < NUM_MB_SEGMENTS; s++) { + int i, best_level = 0; + // Improvement over filter level 0 should be at least 1e-5 (relatively) + double best_v = 1.00001 * (*it->lf_stats_)[s][0]; + for (i = 1; i < MAX_LF_LEVELS; i++) { + const double v = (*it->lf_stats_)[s][i]; + if (v > best_v) { + best_v = v; + best_level = i; + } } + enc->dqm_[s].fstrength_ = best_level; } - enc->dqm_[s].fstrength_ = best_level; + } else if (enc->config_->filter_strength > 0) { + int max_level = 0; + int s; + for (s = 0; s < NUM_MB_SEGMENTS; s++) { + VP8SegmentInfo* const dqm = &enc->dqm_[s]; + // this '>> 3' accounts for some inverse WHT scaling + const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3; + const int level = + VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta); + if (level > dqm->fstrength_) { + dqm->fstrength_ = level; + } + if (max_level < dqm->fstrength_) { + max_level = dqm->fstrength_; + } + } + enc->filter_hdr_.level_ = max_level; } } -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif +// ----------------------------------------------------------------------------- diff --git a/drivers/webp/enc/frame.c b/drivers/webp/enc/frame.c index bdd360069b..5b7a40b9ad 100644 --- a/drivers/webp/enc/frame.c +++ b/drivers/webp/enc/frame.c @@ -1,61 +1,98 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // frame coding and analysis // // Author: Skal (pascal.massimino@gmail.com) -#include <assert.h> -#include <stdlib.h> #include <string.h> #include <math.h> -#include "./vp8enci.h" #include "./cost.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "./vp8enci.h" +#include "../dsp/dsp.h" +#include "../webp/format_constants.h" // RIFF constants #define SEGMENT_VISU 0 #define DEBUG_SEARCH 0 // useful to track search convergence -// On-the-fly info about the current set of residuals. Handy to avoid -// passing zillions of params. -typedef struct { - int first; - int last; - const int16_t* coeffs; - - int coeff_type; - ProbaArray* prob; - StatsArray* stats; - CostArray* cost; -} VP8Residual; +//------------------------------------------------------------------------------ +// multi-pass convergence + +#define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + \ + VP8_FRAME_HEADER_SIZE) +#define DQ_LIMIT 0.4 // convergence is considered reached if dq < DQ_LIMIT +// we allow 2k of extra head-room in PARTITION0 limit. +#define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11) + +typedef struct { // struct for organizing convergence in either size or PSNR + int is_first; + float dq; + float q, last_q; + double value, last_value; // PSNR or size + double target; + int do_size_search; +} PassStats; + +static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { + const uint64_t target_size = (uint64_t)enc->config_->target_size; + const int do_size_search = (target_size != 0); + const float target_PSNR = enc->config_->target_PSNR; + + s->is_first = 1; + s->dq = 10.f; + s->q = s->last_q = enc->config_->quality; + s->target = do_size_search ? (double)target_size + : (target_PSNR > 0.) ? target_PSNR + : 40.; // default, just in case + s->value = s->last_value = 0.; + s->do_size_search = do_size_search; + return do_size_search; +} + +static float Clamp(float v, float min, float max) { + return (v < min) ? min : (v > max) ? max : v; +} + +static float ComputeNextQ(PassStats* const s) { + float dq; + if (s->is_first) { + dq = (s->value > s->target) ? -s->dq : s->dq; + s->is_first = 0; + } else if (s->value != s->last_value) { + const double slope = (s->target - s->value) / (s->last_value - s->value); + dq = (float)(slope * (s->last_q - s->q)); + } else { + dq = 0.; // we're done?! + } + // Limit variable to avoid large swings. + s->dq = Clamp(dq, -30.f, 30.f); + s->last_q = s->q; + s->last_value = s->value; + s->q = Clamp(s->q + s->dq, 0.f, 100.f); + return s->q; +} //------------------------------------------------------------------------------ // Tables for level coding -const uint8_t VP8EncBands[16 + 1] = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, - 0 // sentinel -}; - -static const uint8_t kCat3[] = { 173, 148, 140 }; -static const uint8_t kCat4[] = { 176, 155, 140, 135 }; -static const uint8_t kCat5[] = { 180, 157, 141, 134, 130 }; -static const uint8_t kCat6[] = +const uint8_t VP8Cat3[] = { 173, 148, 140 }; +const uint8_t VP8Cat4[] = { 176, 155, 140, 135 }; +const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 }; +const uint8_t VP8Cat6[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; //------------------------------------------------------------------------------ // Reset the statistics about: number of skips, token proba, level cost,... static void ResetStats(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; + VP8EncProba* const proba = &enc->proba_; VP8CalculateLevelCosts(proba); proba->nb_skip_ = 0; } @@ -71,7 +108,7 @@ static int CalcSkipProba(uint64_t nb, uint64_t total) { // Returns the bit-cost for coding the skip probability. static int FinalizeSkipProba(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; + VP8EncProba* const proba = &enc->proba_; const int nb_mbs = enc->mb_w_ * enc->mb_h_; const int nb_events = proba->nb_skip_; int size; @@ -86,82 +123,6 @@ static int FinalizeSkipProba(VP8Encoder* const enc) { return size; } -//------------------------------------------------------------------------------ -// Recording of token probabilities. - -static void ResetTokenStats(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; - memset(proba->stats_, 0, sizeof(proba->stats_)); -} - -// Record proba context used -static int Record(int bit, proba_t* const stats) { - proba_t p = *stats; - if (p >= 0xffff0000u) { // an overflow is inbound. - p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. - } - // record bit count (lower 16 bits) and increment total count (upper 16 bits). - p += 0x00010000u + bit; - *stats = p; - return bit; -} - -// We keep the table free variant around for reference, in case. -#define USE_LEVEL_CODE_TABLE - -// Simulate block coding, but only record statistics. -// Note: no need to record the fixed probas. -static int RecordCoeffs(int ctx, const VP8Residual* const res) { - int n = res->first; - proba_t* s = res->stats[VP8EncBands[n]][ctx]; - if (res->last < 0) { - Record(0, s + 0); - return 0; - } - while (n <= res->last) { - int v; - Record(1, s + 0); - while ((v = res->coeffs[n++]) == 0) { - Record(0, s + 1); - s = res->stats[VP8EncBands[n]][0]; - } - Record(1, s + 1); - if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1 - s = res->stats[VP8EncBands[n]][1]; - } else { - v = abs(v); -#if !defined(USE_LEVEL_CODE_TABLE) - if (!Record(v > 4, s + 3)) { - if (Record(v != 2, s + 4)) - Record(v == 4, s + 5); - } else if (!Record(v > 10, s + 6)) { - Record(v > 6, s + 7); - } else if (!Record((v >= 3 + (8 << 2)), s + 8)) { - Record((v >= 3 + (8 << 1)), s + 9); - } else { - Record((v >= 3 + (8 << 3)), s + 10); - } -#else - if (v > MAX_VARIABLE_LEVEL) - v = MAX_VARIABLE_LEVEL; - - { - const int bits = VP8LevelCodes[v - 1][1]; - int pattern = VP8LevelCodes[v - 1][0]; - int i; - for (i = 0; (pattern >>= 1) != 0; ++i) { - const int mask = 2 << i; - if (pattern & 1) Record(!!(bits & mask), s + 3 + i); - } - } -#endif - s = res->stats[VP8EncBands[n]][2]; - } - } - if (n < 16) Record(0, s + 0); - return 1; -} - // Collect statistics and deduce probabilities for next coding pass. // Return the total bit-cost for coding the probability updates. static int CalcTokenProba(int nb, int total) { @@ -174,8 +135,12 @@ static int BranchCost(int nb, int total, int proba) { return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba); } -static int FinalizeTokenProbas(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; +static void ResetTokenStats(VP8Encoder* const enc) { + VP8EncProba* const proba = &enc->proba_; + memset(proba->stats_, 0, sizeof(proba->stats_)); +} + +static int FinalizeTokenProbas(VP8EncProba* const proba) { int has_changed = 0; int size = 0; int t, b, c, p; @@ -212,129 +177,44 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) { } //------------------------------------------------------------------------------ -// helper functions for residuals struct VP8Residual. - -static void InitResidual(int first, int coeff_type, - VP8Encoder* const enc, VP8Residual* const res) { - res->coeff_type = coeff_type; - res->prob = enc->proba_.coeffs_[coeff_type]; - res->stats = enc->proba_.stats_[coeff_type]; - res->cost = enc->proba_.level_cost_[coeff_type]; - res->first = first; -} +// Finalize Segment probability based on the coding tree -static void SetResidualCoeffs(const int16_t* const coeffs, - VP8Residual* const res) { - int n; - res->last = -1; - for (n = 15; n >= res->first; --n) { - if (coeffs[n]) { - res->last = n; - break; - } - } - res->coeffs = coeffs; +static int GetProba(int a, int b) { + const int total = a + b; + return (total == 0) ? 255 // that's the default probability. + : (255 * a + total / 2) / total; // rounded proba } -//------------------------------------------------------------------------------ -// Mode costs - -static int GetResidualCost(int ctx, const VP8Residual* const res) { - int n = res->first; - int p0 = res->prob[VP8EncBands[n]][ctx][0]; - const uint16_t* t = res->cost[VP8EncBands[n]][ctx]; - int cost; +static void SetSegmentProbas(VP8Encoder* const enc) { + int p[NUM_MB_SEGMENTS] = { 0 }; + int n; - if (res->last < 0) { - return VP8BitCost(0, p0); - } - cost = 0; - while (n <= res->last) { - const int v = res->coeffs[n]; - const int b = VP8EncBands[n + 1]; - ++n; - if (v == 0) { - // short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0): - cost += t[0]; - t = res->cost[b][0]; - continue; - } - cost += VP8BitCost(1, p0); - if (2u >= (unsigned int)(v + 1)) { // v = -1 or 1 - // short-case for "VP8LevelCost(t, 1)" (256 is VP8LevelFixedCosts[1]): - cost += 256 + t[1]; - p0 = res->prob[b][1][0]; - t = res->cost[b][1]; - } else { - cost += VP8LevelCost(t, abs(v)); - p0 = res->prob[b][2][0]; - t = res->cost[b][2]; - } + for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { + const VP8MBInfo* const mb = &enc->mb_info_[n]; + p[mb->segment_]++; } - if (n < 16) cost += VP8BitCost(0, p0); - return cost; -} - -int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) { - const int x = (it->i4_ & 3), y = (it->i4_ >> 2); - VP8Residual res; - VP8Encoder* const enc = it->enc_; - int R = 0; - int ctx; - - InitResidual(0, 3, enc, &res); - ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(levels, &res); - R += GetResidualCost(ctx, &res); - return R; -} - -int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) { - VP8Residual res; - VP8Encoder* const enc = it->enc_; - int x, y; - int R = 0; - - VP8IteratorNzToBytes(it); // re-import the non-zero context - - // DC - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); - R += GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res); - - // AC - InitResidual(1, 0, enc, &res); - for (y = 0; y < 4; ++y) { - for (x = 0; x < 4; ++x) { - const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); - R += GetResidualCost(ctx, &res); - it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0); + if (enc->pic_->stats != NULL) { + for (n = 0; n < NUM_MB_SEGMENTS; ++n) { + enc->pic_->stats->segment_size[n] = p[n]; } } - return R; -} - -int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) { - VP8Residual res; - VP8Encoder* const enc = it->enc_; - int ch, x, y; - int R = 0; - - VP8IteratorNzToBytes(it); // re-import the non-zero context - - InitResidual(0, 2, enc, &res); - for (ch = 0; ch <= 2; ch += 2) { - for (y = 0; y < 2; ++y) { - for (x = 0; x < 2; ++x) { - const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); - R += GetResidualCost(ctx, &res); - it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0); - } - } + if (enc->segment_hdr_.num_segments_ > 1) { + uint8_t* const probas = enc->proba_.segments_; + probas[0] = GetProba(p[0] + p[1], p[2] + p[3]); + probas[1] = GetProba(p[0], p[1]); + probas[2] = GetProba(p[2], p[3]); + + enc->segment_hdr_.update_map_ = + (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255); + enc->segment_hdr_.size_ = + p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) + + p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) + + p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) + + p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2])); + } else { + enc->segment_hdr_.update_map_ = 0; + enc->segment_hdr_.size_ = 0; } - return R; } //------------------------------------------------------------------------------ @@ -342,7 +222,8 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) { static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) { int n = res->first; - const uint8_t* p = res->prob[VP8EncBands[n]][ctx]; + // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 + const uint8_t* p = res->prob[n][ctx]; if (!VP8PutBit(bw, res->last >= 0, p[0])) { return 0; } @@ -371,30 +252,30 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) { } else { int mask; const uint8_t* tab; - if (v < 3 + (8 << 1)) { // kCat3 (3b) + if (v < 3 + (8 << 1)) { // VP8Cat3 (3b) VP8PutBit(bw, 0, p[8]); VP8PutBit(bw, 0, p[9]); v -= 3 + (8 << 0); mask = 1 << 2; - tab = kCat3; - } else if (v < 3 + (8 << 2)) { // kCat4 (4b) + tab = VP8Cat3; + } else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b) VP8PutBit(bw, 0, p[8]); VP8PutBit(bw, 1, p[9]); v -= 3 + (8 << 1); mask = 1 << 3; - tab = kCat4; - } else if (v < 3 + (8 << 3)) { // kCat5 (5b) + tab = VP8Cat4; + } else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b) VP8PutBit(bw, 1, p[8]); VP8PutBit(bw, 0, p[10]); v -= 3 + (8 << 2); mask = 1 << 4; - tab = kCat5; - } else { // kCat6 (11b) + tab = VP8Cat5; + } else { // VP8Cat6 (11b) VP8PutBit(bw, 1, p[8]); VP8PutBit(bw, 1, p[10]); v -= 3 + (8 << 3); mask = 1 << 10; - tab = kCat6; + tab = VP8Cat6; } while (mask) { VP8PutBit(bw, !!(v & mask), *tab++); @@ -411,8 +292,7 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) { return 1; } -static void CodeResiduals(VP8BitWriter* const bw, - VP8EncIterator* const it, +static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it, const VP8ModeScore* const rd) { int x, y, ch; VP8Residual res; @@ -425,32 +305,32 @@ static void CodeResiduals(VP8BitWriter* const bw, pos1 = VP8BitWriterPos(bw); if (i16) { - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); it->top_nz_[8] = it->left_nz_[8] = PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res); - InitResidual(1, 0, enc, &res); + VP8InitResidual(1, 0, enc, &res); } else { - InitResidual(0, 3, enc, &res); + VP8InitResidual(0, 3, enc, &res); } // luma-AC for (y = 0; y < 4; ++y) { for (x = 0; x < 4; ++x) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res); } } pos2 = VP8BitWriterPos(bw); // U/V - InitResidual(0, 2, enc, &res); + VP8InitResidual(0, 2, enc, &res); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = PutCoeffs(bw, ctx, &res); } @@ -475,33 +355,33 @@ static void RecordResiduals(VP8EncIterator* const it, VP8IteratorNzToBytes(it); if (it->mb_->type_ == 1) { // i16x16 - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); it->top_nz_[8] = it->left_nz_[8] = - RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res); - InitResidual(1, 0, enc, &res); + VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res); + VP8InitResidual(1, 0, enc, &res); } else { - InitResidual(0, 3, enc, &res); + VP8InitResidual(0, 3, enc, &res); } // luma-AC for (y = 0; y < 4; ++y) { for (x = 0; x < 4; ++x) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); - it->top_nz_[x] = it->left_nz_[y] = RecordCoeffs(ctx, &res); + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res); } } // U/V - InitResidual(0, 2, enc, &res); + VP8InitResidual(0, 2, enc, &res); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = - RecordCoeffs(ctx, &res); + VP8RecordCoeffs(ctx, &res); } } } @@ -512,176 +392,59 @@ static void RecordResiduals(VP8EncIterator* const it, //------------------------------------------------------------------------------ // Token buffer -#ifdef USE_TOKEN_BUFFER - -void VP8TBufferInit(VP8TBuffer* const b) { - b->rows_ = NULL; - b->tokens_ = NULL; - b->last_ = &b->rows_; - b->left_ = 0; - b->error_ = 0; -} - -int VP8TBufferNewPage(VP8TBuffer* const b) { - VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page)); - if (page == NULL) { - b->error_ = 1; - return 0; - } - *b->last_ = page; - b->last_ = &page->next_; - b->left_ = MAX_NUM_TOKEN; - b->tokens_ = page->tokens_; - return 1; -} - -void VP8TBufferClear(VP8TBuffer* const b) { - if (b != NULL) { - const VP8Tokens* p = b->rows_; - while (p != NULL) { - const VP8Tokens* const next = p->next_; - free((void*)p); - p = next; - } - VP8TBufferInit(b); - } -} - -int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw, - const uint8_t* const probas) { - VP8Tokens* p = b->rows_; - if (b->error_) return 0; - while (p != NULL) { - const int N = (p->next_ == NULL) ? b->left_ : 0; - int n = MAX_NUM_TOKEN; - while (n-- > N) { - VP8PutBit(bw, (p->tokens_[n] >> 15) & 1, probas[p->tokens_[n] & 0x7fff]); - } - p = p->next_; - } - return 1; -} - -#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX)) - -static int RecordCoeffTokens(int ctx, const VP8Residual* const res, - VP8TBuffer* tokens) { - int n = res->first; - int b = VP8EncBands[n]; - if (!VP8AddToken(tokens, res->last >= 0, TOKEN_ID(b, ctx, 0))) { - return 0; - } - - while (n < 16) { - const int c = res->coeffs[n++]; - const int sign = c < 0; - int v = sign ? -c : c; - const int base_id = TOKEN_ID(b, ctx, 0); - if (!VP8AddToken(tokens, v != 0, base_id + 1)) { - b = VP8EncBands[n]; - ctx = 0; - continue; - } - if (!VP8AddToken(tokens, v > 1, base_id + 2)) { - b = VP8EncBands[n]; - ctx = 1; - } else { - if (!VP8AddToken(tokens, v > 4, base_id + 3)) { - if (VP8AddToken(tokens, v != 2, base_id + 4)) - VP8AddToken(tokens, v == 4, base_id + 5); - } else if (!VP8AddToken(tokens, v > 10, base_id + 6)) { - if (!VP8AddToken(tokens, v > 6, base_id + 7)) { -// VP8AddToken(tokens, v == 6, 159); - } else { -// VP8AddToken(tokens, v >= 9, 165); -// VP8AddToken(tokens, !(v & 1), 145); - } - } else { - int mask; - const uint8_t* tab; - if (v < 3 + (8 << 1)) { // kCat3 (3b) - VP8AddToken(tokens, 0, base_id + 8); - VP8AddToken(tokens, 0, base_id + 9); - v -= 3 + (8 << 0); - mask = 1 << 2; - tab = kCat3; - } else if (v < 3 + (8 << 2)) { // kCat4 (4b) - VP8AddToken(tokens, 0, base_id + 8); - VP8AddToken(tokens, 1, base_id + 9); - v -= 3 + (8 << 1); - mask = 1 << 3; - tab = kCat4; - } else if (v < 3 + (8 << 3)) { // kCat5 (5b) - VP8AddToken(tokens, 1, base_id + 8); - VP8AddToken(tokens, 0, base_id + 10); - v -= 3 + (8 << 2); - mask = 1 << 4; - tab = kCat5; - } else { // kCat6 (11b) - VP8AddToken(tokens, 1, base_id + 8); - VP8AddToken(tokens, 1, base_id + 10); - v -= 3 + (8 << 3); - mask = 1 << 10; - tab = kCat6; - } - while (mask) { - // VP8AddToken(tokens, !!(v & mask), *tab++); - mask >>= 1; - } - } - ctx = 2; - } - b = VP8EncBands[n]; - // VP8PutBitUniform(bw, sign); - if (n == 16 || !VP8AddToken(tokens, n <= res->last, TOKEN_ID(b, ctx, 0))) { - return 1; // EOB - } - } - return 1; -} +#if !defined(DISABLE_TOKEN_BUFFER) -static void RecordTokens(VP8EncIterator* const it, - const VP8ModeScore* const rd, VP8TBuffer tokens[2]) { +static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, + VP8TBuffer* const tokens) { int x, y, ch; VP8Residual res; VP8Encoder* const enc = it->enc_; VP8IteratorNzToBytes(it); if (it->mb_->type_ == 1) { // i16x16 - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); -// TODO(skal): FIX -> it->top_nz_[8] = it->left_nz_[8] = - RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], &res, &tokens[0]); - InitResidual(1, 0, enc, &res); + const int ctx = it->top_nz_[8] + it->left_nz_[8]; + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + it->top_nz_[8] = it->left_nz_[8] = + VP8RecordCoeffTokens(ctx, 1, + res.first, res.last, res.coeffs, tokens); + VP8RecordCoeffs(ctx, &res); + VP8InitResidual(1, 0, enc, &res); } else { - InitResidual(0, 3, enc, &res); + VP8InitResidual(0, 3, enc, &res); } // luma-AC for (y = 0; y < 4; ++y) { for (x = 0; x < 4; ++x) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); it->top_nz_[x] = it->left_nz_[y] = - RecordCoeffTokens(ctx, &res, &tokens[0]); + VP8RecordCoeffTokens(ctx, res.coeff_type, + res.first, res.last, res.coeffs, tokens); + VP8RecordCoeffs(ctx, &res); } } // U/V - InitResidual(0, 2, enc, &res); + VP8InitResidual(0, 2, enc, &res); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = - RecordCoeffTokens(ctx, &res, &tokens[1]); + VP8RecordCoeffTokens(ctx, 2, + res.first, res.last, res.coeffs, tokens); + VP8RecordCoeffs(ctx, &res); } } } + VP8IteratorBytesToNz(it); + return !tokens->error_; } -#endif // USE_TOKEN_BUFFER +#endif // !DISABLE_TOKEN_BUFFER //------------------------------------------------------------------------------ // ExtraInfo map / Debug function @@ -697,7 +460,10 @@ static void SetBlock(uint8_t* p, int value, int size) { #endif static void ResetSSE(VP8Encoder* const enc) { - memset(enc->sse_, 0, sizeof(enc->sse_)); + enc->sse_[0] = 0; + enc->sse_[1] = 0; + enc->sse_[2] = 0; + // Note: enc->sse_[3] is managed by alpha.c enc->sse_count_ = 0; } @@ -706,9 +472,9 @@ static void StoreSSE(const VP8EncIterator* const it) { const uint8_t* const in = it->yuv_in_; const uint8_t* const out = it->yuv_out_; // Note: not totally accurate at boundary. And doesn't include in-loop filter. - enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF); - enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF); - enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF); + enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC); + enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC); + enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC); enc->sse_count_ += 16 * 16; } @@ -736,72 +502,163 @@ static void StoreSideInfo(const VP8EncIterator* const it) { const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3); *info = (b > 255) ? 255 : b; break; } + case 7: *info = mb->alpha_; break; default: *info = 0; break; - }; + } } #if SEGMENT_VISU // visualize segments and prediction modes - SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16); - SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8); - SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8); + SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16); + SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8); + SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8); #endif } -//------------------------------------------------------------------------------ -// Main loops -// -// VP8EncLoop(): does the final bitstream coding. - -static void ResetAfterSkip(VP8EncIterator* const it) { - if (it->mb_->type_ == 1) { - *it->nz_ = 0; // reset all predictors - it->left_nz_[8] = 0; - } else { - *it->nz_ &= (1 << 24); // preserve the dc_nz bit - } +static double GetPSNR(uint64_t mse, uint64_t size) { + return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99; } -int VP8EncLoop(VP8Encoder* const enc) { - int i, s, p; - int ok = 1; - VP8EncIterator it; - VP8ModeScore info; - const int dont_use_skip = !enc->proba_.use_skip_proba_; - const int rd_opt = enc->rd_opt_level_; - const int kAverageBytesPerMB = 5; // TODO: have a kTable[quality/10] - const int bytes_per_parts = - enc->mb_w_ * enc->mb_h_ * kAverageBytesPerMB / enc->num_parts_; +//------------------------------------------------------------------------------ +// StatLoop(): only collect statistics (number of skips, token usage, ...). +// This is used for deciding optimal probabilities. It also modifies the +// quantizer value if some target (size, PSNR) was specified. - // Initialize the bit-writers - for (p = 0; p < enc->num_parts_; ++p) { - VP8BitWriterInit(enc->parts_ + p, bytes_per_parts); - } +static void SetLoopParams(VP8Encoder* const enc, float q) { + // Make sure the quality parameter is inside valid bounds + q = Clamp(q, 0.f, 100.f); + + VP8SetSegmentParams(enc, q); // setup segment quantizations and filters + SetSegmentProbas(enc); // compute segment probabilities ResetStats(enc); ResetSSE(enc); +} + +static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt, + int nb_mbs, int percent_delta, + PassStats* const s) { + VP8EncIterator it; + uint64_t size = 0; + uint64_t size_p0 = 0; + uint64_t distortion = 0; + const uint64_t pixel_count = nb_mbs * 384; VP8IteratorInit(enc, &it); - VP8InitFilter(&it); + SetLoopParams(enc, s->q); do { - VP8IteratorImport(&it); - // Warning! order is important: first call VP8Decimate() and - // *then* decide how to code the skip decision if there's one. - if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) { - CodeResiduals(it.bw_, &it, &info); - } else { // reset predictors after a skip - ResetAfterSkip(&it); + VP8ModeScore info; + VP8IteratorImport(&it, NULL); + if (VP8Decimate(&it, &info, rd_opt)) { + // Just record the number of skips and act like skip_proba is not used. + enc->proba_.nb_skip_++; } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (enc->use_layer_) { - VP8EncCodeLayerBlock(&it); + RecordResiduals(&it, &info); + size += info.R + info.H; + size_p0 += info.H; + distortion += info.D; + if (percent_delta && !VP8IteratorProgress(&it, percent_delta)) + return 0; + VP8IteratorSaveBoundary(&it); + } while (VP8IteratorNext(&it) && --nb_mbs > 0); + + size_p0 += enc->segment_hdr_.size_; + if (s->do_size_search) { + size += FinalizeSkipProba(enc); + size += FinalizeTokenProbas(&enc->proba_); + size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE; + s->value = (double)size; + } else { + s->value = GetPSNR(distortion, pixel_count); + } + return size_p0; +} + +static int StatLoop(VP8Encoder* const enc) { + const int method = enc->method_; + const int do_search = enc->do_search_; + const int fast_probe = ((method == 0 || method == 3) && !do_search); + int num_pass_left = enc->config_->pass; + const int task_percent = 20; + const int percent_per_pass = + (task_percent + num_pass_left / 2) / num_pass_left; + const int final_percent = enc->percent_ + task_percent; + const VP8RDLevel rd_opt = + (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE; + int nb_mbs = enc->mb_w_ * enc->mb_h_; + PassStats stats; + + InitPassStats(enc, &stats); + ResetTokenStats(enc); + + // Fast mode: quick analysis pass over few mbs. Better than nothing. + if (fast_probe) { + if (method == 3) { // we need more stats for method 3 to be reliable. + nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100; + } else { + nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50; } + } + + while (num_pass_left-- > 0) { + const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) || + (num_pass_left == 0) || + (enc->max_i4_header_bits_ == 0); + const uint64_t size_p0 = + OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats); + if (size_p0 == 0) return 0; +#if (DEBUG_SEARCH > 0) + printf("#%d value:%.1lf -> %.1lf q:%.2f -> %.2f\n", + num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q); #endif - StoreSideInfo(&it); - VP8StoreFilterStats(&it); - VP8IteratorExport(&it); - ok = VP8IteratorProgress(&it, 20); - } while (ok && VP8IteratorNext(&it, it.yuv_out_)); + if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) { + ++num_pass_left; + enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation... + continue; // ...and start over + } + if (is_last_pass) { + break; + } + // If no target size: just do several pass without changing 'q' + if (do_search) { + ComputeNextQ(&stats); + if (fabs(stats.dq) <= DQ_LIMIT) break; + } + } + if (!do_search || !stats.do_size_search) { + // Need to finalize probas now, since it wasn't done during the search. + FinalizeSkipProba(enc); + FinalizeTokenProbas(&enc->proba_); + } + VP8CalculateLevelCosts(&enc->proba_); // finalize costs + return WebPReportProgress(enc->pic_, final_percent, &enc->percent_); +} + +//------------------------------------------------------------------------------ +// Main loops +// + +static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 }; +static int PreLoopInitialize(VP8Encoder* const enc) { + int p; + int ok = 1; + const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4]; + const int bytes_per_parts = + enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_; + // Initialize the bit-writers + for (p = 0; ok && p < enc->num_parts_; ++p) { + ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts); + } + if (!ok) { + VP8EncFreeBitWriters(enc); // malloc error occurred + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + return ok; +} + +static int PostLoopFinalize(VP8EncIterator* const it, int ok) { + VP8Encoder* const enc = it->enc_; if (ok) { // Finalize the partitions, check for extra errors. + int p; for (p = 0; p < enc->num_parts_; ++p) { VP8BitWriterFinish(enc->parts_ + p); ok &= !enc->parts_[p].error_; @@ -809,131 +666,185 @@ int VP8EncLoop(VP8Encoder* const enc) { } if (ok) { // All good. Finish up. - if (enc->pic_->stats) { // finalize byte counters... + if (enc->pic_->stats != NULL) { // finalize byte counters... + int i, s; for (i = 0; i <= 2; ++i) { for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3); + enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3); } } } - VP8AdjustFilterStrength(&it); // ...and store filter stats. + VP8AdjustFilterStrength(it); // ...and store filter stats. } else { // Something bad happened -> need to do some memory cleanup. VP8EncFreeBitWriters(enc); } - return ok; } //------------------------------------------------------------------------------ -// VP8StatLoop(): only collect statistics (number of skips, token usage, ...) -// This is used for deciding optimal probabilities. It also -// modifies the quantizer value if some target (size, PNSR) -// was specified. - -#define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better - -static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs, - float* const PSNR, int percent_delta) { - VP8EncIterator it; - uint64_t size = 0; - uint64_t distortion = 0; - const uint64_t pixel_count = nb_mbs * 384; +// VP8EncLoop(): does the final bitstream coding. - // Make sure the quality parameter is inside valid bounds - if (q < 0.) { - q = 0; - } else if (q > 100.) { - q = 100; +static void ResetAfterSkip(VP8EncIterator* const it) { + if (it->mb_->type_ == 1) { + *it->nz_ = 0; // reset all predictors + it->left_nz_[8] = 0; + } else { + *it->nz_ &= (1 << 24); // preserve the dc_nz bit } +} - VP8SetSegmentParams(enc, q); // setup segment quantizations and filters +int VP8EncLoop(VP8Encoder* const enc) { + VP8EncIterator it; + int ok = PreLoopInitialize(enc); + if (!ok) return 0; - ResetStats(enc); - ResetTokenStats(enc); + StatLoop(enc); // stats-collection loop VP8IteratorInit(enc, &it); + VP8InitFilter(&it); do { VP8ModeScore info; - VP8IteratorImport(&it); - if (VP8Decimate(&it, &info, rd_opt)) { - // Just record the number of skips and act like skip_proba is not used. - enc->proba_.nb_skip_++; + const int dont_use_skip = !enc->proba_.use_skip_proba_; + const VP8RDLevel rd_opt = enc->rd_opt_level_; + + VP8IteratorImport(&it, NULL); + // Warning! order is important: first call VP8Decimate() and + // *then* decide how to code the skip decision if there's one. + if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) { + CodeResiduals(it.bw_, &it, &info); + } else { // reset predictors after a skip + ResetAfterSkip(&it); } - RecordResiduals(&it, &info); - size += info.R; - distortion += info.D; - if (percent_delta && !VP8IteratorProgress(&it, percent_delta)) - return 0; - } while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0); - size += FinalizeSkipProba(enc); - size += FinalizeTokenProbas(enc); - size += enc->segment_hdr_.size_; - size = ((size + 1024) >> 11) + kHeaderSizeEstimate; - - if (PSNR) { - *PSNR = (float)(10.* log10(255. * 255. * pixel_count / distortion)); - } - return (int)size; + StoreSideInfo(&it); + VP8StoreFilterStats(&it); + VP8IteratorExport(&it); + ok = VP8IteratorProgress(&it, 20); + VP8IteratorSaveBoundary(&it); + } while (ok && VP8IteratorNext(&it)); + + return PostLoopFinalize(&it, ok); } -// successive refinement increments. -static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 }; +//------------------------------------------------------------------------------ +// Single pass using Token Buffer. -int VP8StatLoop(VP8Encoder* const enc) { - const int do_search = - (enc->config_->target_size > 0 || enc->config_->target_PSNR > 0); - const int fast_probe = (enc->method_ < 2 && !do_search); - float q = enc->config_->quality; - const int max_passes = enc->config_->pass; - const int task_percent = 20; - const int percent_per_pass = (task_percent + max_passes / 2) / max_passes; - const int final_percent = enc->percent_ + task_percent; - int pass; - int nb_mbs; +#if !defined(DISABLE_TOKEN_BUFFER) - // Fast mode: quick analysis pass over few mbs. Better than nothing. - nb_mbs = enc->mb_w_ * enc->mb_h_; - if (fast_probe && nb_mbs > 100) nb_mbs = 100; - - // No target size: just do several pass without changing 'q' - if (!do_search) { - for (pass = 0; pass < max_passes; ++pass) { - const int rd_opt = (enc->method_ > 2); - if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) { - return 0; - } +#define MIN_COUNT 96 // minimum number of macroblocks before updating stats + +int VP8EncTokenLoop(VP8Encoder* const enc) { + // Roughly refresh the proba eight times per pass + int max_count = (enc->mb_w_ * enc->mb_h_) >> 3; + int num_pass_left = enc->config_->pass; + const int do_search = enc->do_search_; + VP8EncIterator it; + VP8EncProba* const proba = &enc->proba_; + const VP8RDLevel rd_opt = enc->rd_opt_level_; + const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384; + PassStats stats; + int ok; + + InitPassStats(enc, &stats); + ok = PreLoopInitialize(enc); + if (!ok) return 0; + + if (max_count < MIN_COUNT) max_count = MIN_COUNT; + + assert(enc->num_parts_ == 1); + assert(enc->use_tokens_); + assert(proba->use_skip_proba_ == 0); + assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful + assert(num_pass_left > 0); + + while (ok && num_pass_left-- > 0) { + const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) || + (num_pass_left == 0) || + (enc->max_i4_header_bits_ == 0); + uint64_t size_p0 = 0; + uint64_t distortion = 0; + int cnt = max_count; + VP8IteratorInit(enc, &it); + SetLoopParams(enc, stats.q); + if (is_last_pass) { + ResetTokenStats(enc); + VP8InitFilter(&it); // don't collect stats until last pass (too costly) } - } else { - // binary search for a size close to target - for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) { - const int rd_opt = 1; - float PSNR; - int criterion; - const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR, - percent_per_pass); -#if DEBUG_SEARCH - printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q); -#endif - if (!size) return 0; - if (enc->config_->target_PSNR > 0) { - criterion = (PSNR < enc->config_->target_PSNR); - } else { - criterion = (size < enc->config_->target_size); + VP8TBufferClear(&enc->tokens_); + do { + VP8ModeScore info; + VP8IteratorImport(&it, NULL); + if (--cnt < 0) { + FinalizeTokenProbas(proba); + VP8CalculateLevelCosts(proba); // refresh cost tables for rd-opt + cnt = max_count; } - // dichotomize - if (criterion) { - q += dqs[pass]; - } else { - q -= dqs[pass]; + VP8Decimate(&it, &info, rd_opt); + ok = RecordTokens(&it, &info, &enc->tokens_); + if (!ok) { + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + break; + } + size_p0 += info.H; + distortion += info.D; + if (is_last_pass) { + StoreSideInfo(&it); + VP8StoreFilterStats(&it); + VP8IteratorExport(&it); + ok = VP8IteratorProgress(&it, 20); } + VP8IteratorSaveBoundary(&it); + } while (ok && VP8IteratorNext(&it)); + if (!ok) break; + + size_p0 += enc->segment_hdr_.size_; + if (stats.do_size_search) { + uint64_t size = FinalizeTokenProbas(&enc->proba_); + size += VP8EstimateTokenSize(&enc->tokens_, + (const uint8_t*)proba->coeffs_); + size = (size + size_p0 + 1024) >> 11; // -> size in bytes + size += HEADER_SIZE_ESTIMATE; + stats.value = (double)size; + } else { // compute and store PSNR + stats.value = GetPSNR(distortion, pixel_count); + } + +#if (DEBUG_SEARCH > 0) + printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf\n", + num_pass_left, stats.last_value, stats.value, + stats.last_q, stats.q, stats.dq); +#endif + if (size_p0 > PARTITION0_SIZE_LIMIT) { + ++num_pass_left; + enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation... + continue; // ...and start over + } + if (is_last_pass) { + break; // done + } + if (do_search) { + ComputeNextQ(&stats); // Adjust q } } - return WebPReportProgress(enc->pic_, final_percent, &enc->percent_); + if (ok) { + if (!stats.do_size_search) { + FinalizeTokenProbas(&enc->proba_); + } + ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, + (const uint8_t*)proba->coeffs_, 1); + } + ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); + return PostLoopFinalize(&it, ok); +} + +#else + +int VP8EncTokenLoop(VP8Encoder* const enc) { + (void)enc; + return 0; // we shouldn't be here. } +#endif // DISABLE_TOKEN_BUFFER + //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/histogram.c b/drivers/webp/enc/histogram.c index ca838e064d..62c320d809 100644 --- a/drivers/webp/enc/histogram.c +++ b/drivers/webp/enc/histogram.c @@ -1,38 +1,82 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Author: Jyrki Alakuijala (jyrki@google.com) // #ifdef HAVE_CONFIG_H -#include "config.h" +#include "../webp/config.h" #endif #include <math.h> -#include <stdio.h> #include "./backward_references.h" #include "./histogram.h" #include "../dsp/lossless.h" #include "../utils/utils.h" +#define MAX_COST 1.e38 + +// Number of partitions for the three dominant (literal, red and blue) symbol +// costs. +#define NUM_PARTITIONS 4 +// The size of the bin-hash corresponding to the three dominant costs. +#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS) +// Maximum number of histograms allowed in greedy combining algorithm. +#define MAX_HISTO_GREEDY 100 + static void HistogramClear(VP8LHistogram* const p) { - memset(p->literal_, 0, sizeof(p->literal_)); - memset(p->red_, 0, sizeof(p->red_)); - memset(p->blue_, 0, sizeof(p->blue_)); - memset(p->alpha_, 0, sizeof(p->alpha_)); - memset(p->distance_, 0, sizeof(p->distance_)); - p->bit_cost_ = 0; + uint32_t* const literal = p->literal_; + const int cache_bits = p->palette_code_bits_; + const int histo_size = VP8LGetHistogramSize(cache_bits); + memset(p, 0, histo_size); + p->palette_code_bits_ = cache_bits; + p->literal_ = literal; +} + +// Swap two histogram pointers. +static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) { + VP8LHistogram* const tmp = *A; + *A = *B; + *B = tmp; +} + +static void HistogramCopy(const VP8LHistogram* const src, + VP8LHistogram* const dst) { + uint32_t* const dst_literal = dst->literal_; + const int dst_cache_bits = dst->palette_code_bits_; + const int histo_size = VP8LGetHistogramSize(dst_cache_bits); + assert(src->palette_code_bits_ == dst_cache_bits); + memcpy(dst, src, histo_size); + dst->literal_ = dst_literal; +} + +int VP8LGetHistogramSize(int cache_bits) { + const int literal_size = VP8LHistogramNumCodes(cache_bits); + const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size; + assert(total_size <= (size_t)0x7fffffff); + return (int)total_size; +} + +void VP8LFreeHistogram(VP8LHistogram* const histo) { + WebPSafeFree(histo); +} + +void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) { + WebPSafeFree(histo); } void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo) { - int i; - for (i = 0; i < refs->size; ++i) { - VP8LHistogramAddSinglePixOrCopy(histo, &refs->refs[i]); + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LRefsCursorNext(&c); } } @@ -51,13 +95,25 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) { HistogramClear(p); } +VP8LHistogram* VP8LAllocateHistogram(int cache_bits) { + VP8LHistogram* histo = NULL; + const int total_size = VP8LGetHistogramSize(cache_bits); + uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); + if (memory == NULL) return NULL; + histo = (VP8LHistogram*)memory; + // literal_ won't necessary be aligned. + histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); + VP8LHistogramInit(histo, cache_bits); + return histo; +} + VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { int i; VP8LHistogramSet* set; - VP8LHistogram* bulk; - const uint64_t total_size = (uint64_t)sizeof(*set) - + size * sizeof(*set->histograms) - + size * sizeof(**set->histograms); + const int histo_size = VP8LGetHistogramSize(cache_bits); + const size_t total_size = + sizeof(*set) + size * (sizeof(*set->histograms) + + histo_size + WEBP_ALIGN_CST); uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); if (memory == NULL) return NULL; @@ -65,12 +121,15 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { memory += sizeof(*set); set->histograms = (VP8LHistogram**)memory; memory += size * sizeof(*set->histograms); - bulk = (VP8LHistogram*)memory; set->max_size = size; set->size = size; for (i = 0; i < size; ++i) { - set->histograms[i] = bulk + i; + memory = (uint8_t*)WEBP_ALIGN(memory); + set->histograms[i] = (VP8LHistogram*)memory; + // literal_ won't necessary be aligned. + set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); VP8LHistogramInit(set->histograms[i], cache_bits); + memory += histo_size; } return set; } @@ -85,151 +144,183 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, ++histo->literal_[PixOrCopyLiteral(v, 1)]; ++histo->blue_[PixOrCopyLiteral(v, 0)]; } else if (PixOrCopyIsCacheIdx(v)) { - int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); + const int literal_ix = + NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); ++histo->literal_[literal_ix]; } else { - int code, extra_bits_count, extra_bits_value; - PrefixEncode(PixOrCopyLength(v), - &code, &extra_bits_count, &extra_bits_value); - ++histo->literal_[256 + code]; - PrefixEncode(PixOrCopyDistance(v), - &code, &extra_bits_count, &extra_bits_value); + int code, extra_bits; + VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits); + ++histo->literal_[NUM_LITERAL_CODES + code]; + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); ++histo->distance_[code]; } } +// ----------------------------------------------------------------------------- +// Various histogram combine/cost-eval functions + +static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, + const VP8LHistogram* const b, + double cost_threshold, + double* cost) { + const int palette_code_bits = a->palette_code_bits_; + assert(a->palette_code_bits_ == b->palette_code_bits_); + *cost += VP8LGetCombinedEntropy(a->literal_, b->literal_, + VP8LHistogramNumCodes(palette_code_bits)); + *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, + b->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES); + if (*cost > cost_threshold) return 0; + + *cost += VP8LGetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += VP8LGetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += VP8LGetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += VP8LGetCombinedEntropy(a->distance_, b->distance_, + NUM_DISTANCE_CODES); + *cost += VP8LExtraCostCombined(a->distance_, b->distance_, + NUM_DISTANCE_CODES); + if (*cost > cost_threshold) return 0; + + return 1; +} - -static double BitsEntropy(const int* const array, int n) { - double retval = 0.; - int sum = 0; - int nonzeros = 0; - int max_val = 0; - int i; - double mix; - for (i = 0; i < n; ++i) { - if (array[i] != 0) { - sum += array[i]; - ++nonzeros; - retval -= VP8LFastSLog2(array[i]); - if (max_val < array[i]) { - max_val = array[i]; - } - } +// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing +// to the threshold value 'cost_threshold'. The score returned is +// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed. +// Since the previous score passed is 'cost_threshold', we only need to compare +// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out +// early. +static double HistogramAddEval(const VP8LHistogram* const a, + const VP8LHistogram* const b, + VP8LHistogram* const out, + double cost_threshold) { + double cost = 0; + const double sum_cost = a->bit_cost_ + b->bit_cost_; + cost_threshold += sum_cost; + + if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { + VP8LHistogramAdd(a, b, out); + out->bit_cost_ = cost; + out->palette_code_bits_ = a->palette_code_bits_; + out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ? + a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM; } - retval += VP8LFastSLog2(sum); - if (nonzeros < 5) { - if (nonzeros <= 1) { - return 0; - } - // Two symbols, they will be 0 and 1 in a Huffman code. - // Let's mix in a bit of entropy to favor good clustering when - // distributions of these are combined. - if (nonzeros == 2) { - return 0.99 * sum + 0.01 * retval; - } - // No matter what the entropy says, we cannot be better than min_limit - // with Huffman coding. I am mixing a bit of entropy into the - // min_limit since it produces much better (~0.5 %) compression results - // perhaps because of better entropy clustering. - if (nonzeros == 3) { - mix = 0.95; - } else { - mix = 0.7; // nonzeros == 4. - } - } else { - mix = 0.627; - } + return cost - sum_cost; +} - { - double min_limit = 2 * sum - max_val; - min_limit = mix * min_limit + (1.0 - mix) * retval; - return (retval < min_limit) ? min_limit : retval; - } +// Same as HistogramAddEval(), except that the resulting histogram +// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit +// the term C(b) which is constant over all the evaluations. +static double HistogramAddThresh(const VP8LHistogram* const a, + const VP8LHistogram* const b, + double cost_threshold) { + double cost = -a->bit_cost_; + GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); + return cost; } -double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { - double retval = BitsEntropy(&p->literal_[0], VP8LHistogramNumCodes(p)) - + BitsEntropy(&p->red_[0], 256) - + BitsEntropy(&p->blue_[0], 256) - + BitsEntropy(&p->alpha_[0], 256) - + BitsEntropy(&p->distance_[0], NUM_DISTANCE_CODES); - // Compute the extra bits cost. - int i; - for (i = 2; i < NUM_LENGTH_CODES - 2; ++i) { - retval += - (i >> 1) * p->literal_[256 + i + 2]; - } - for (i = 2; i < NUM_DISTANCE_CODES - 2; ++i) { - retval += (i >> 1) * p->distance_[i + 2]; - } - return retval; -} - - -// Returns the cost encode the rle-encoded entropy code. -// The constants in this function are experimental. -static double HuffmanCost(const int* const population, int length) { - // Small bias because Huffman code length is typically not stored in - // full length. - static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; - static const double kSmallBias = 9.1; - double retval = kHuffmanCodeOfHuffmanCodeSize - kSmallBias; - int streak = 0; - int i = 0; - for (; i < length - 1; ++i) { - ++streak; - if (population[i] == population[i + 1]) { - continue; - } - last_streak_hack: - // population[i] points now to the symbol in the streak of same values. - if (streak > 3) { - if (population[i] == 0) { - retval += 1.5625 + 0.234375 * streak; - } else { - retval += 2.578125 + 0.703125 * streak; - } - } else { - if (population[i] == 0) { - retval += 1.796875 * streak; - } else { - retval += 3.28125 * streak; - } - } - streak = 0; - } - if (i == length - 1) { - ++streak; - goto last_streak_hack; +// ----------------------------------------------------------------------------- + +// The structure to keep track of cost range for the three dominant entropy +// symbols. +// TODO(skal): Evaluate if float can be used here instead of double for +// representing the entropy costs. +typedef struct { + double literal_max_; + double literal_min_; + double red_max_; + double red_min_; + double blue_max_; + double blue_min_; +} DominantCostRange; + +static void DominantCostRangeInit(DominantCostRange* const c) { + c->literal_max_ = 0.; + c->literal_min_ = MAX_COST; + c->red_max_ = 0.; + c->red_min_ = MAX_COST; + c->blue_max_ = 0.; + c->blue_min_ = MAX_COST; +} + +static void UpdateDominantCostRange( + const VP8LHistogram* const h, DominantCostRange* const c) { + if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_; + if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_; + if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_; + if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_; + if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_; + if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_; +} + +static void UpdateHistogramCost(VP8LHistogram* const h) { + uint32_t alpha_sym, red_sym, blue_sym; + const double alpha_cost = VP8LPopulationCost(h->alpha_, NUM_LITERAL_CODES, + &alpha_sym); + const double distance_cost = + VP8LPopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); + const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); + h->literal_cost_ = VP8LPopulationCost(h->literal_, num_codes, NULL) + + VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES); + h->red_cost_ = VP8LPopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); + h->blue_cost_ = VP8LPopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); + h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + + alpha_cost + distance_cost; + if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) { + h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM; + } else { + h->trivial_symbol_ = + ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0); } - return retval; } -// Estimates the Huffman dictionary + other block overhead size. -static double HistogramEstimateBitsHeader(const VP8LHistogram* const p) { - return HuffmanCost(&p->alpha_[0], 256) + - HuffmanCost(&p->red_[0], 256) + - HuffmanCost(&p->literal_[0], VP8LHistogramNumCodes(p)) + - HuffmanCost(&p->blue_[0], 256) + - HuffmanCost(&p->distance_[0], NUM_DISTANCE_CODES); +static int GetBinIdForEntropy(double min, double max, double val) { + const double range = max - min + 1e-6; + const double delta = val - min; + return (int)(NUM_PARTITIONS * delta / range); } -double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { - return HistogramEstimateBitsHeader(p) + VP8LHistogramEstimateBitsBulk(p); +static int GetHistoBinIndexLowEffort( + const VP8LHistogram* const h, const DominantCostRange* const c) { + const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, + h->literal_cost_); + assert(bin_id < NUM_PARTITIONS); + return bin_id; } -static void HistogramBuildImage(int xsize, int histo_bits, - const VP8LBackwardRefs* const backward_refs, - VP8LHistogramSet* const image) { - int i; +static int GetHistoBinIndex( + const VP8LHistogram* const h, const DominantCostRange* const c) { + const int bin_id = + GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) + + NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_, + h->red_cost_) + + NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_, + c->literal_max_, + h->literal_cost_); + assert(bin_id < BIN_SIZE); + return bin_id; +} + +// Construct the histograms from backward references. +static void HistogramBuild( + int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs, + VP8LHistogramSet* const image_histo) { int x = 0, y = 0; const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits); - VP8LHistogram** const histograms = image->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs); assert(histo_bits > 0); - for (i = 0; i < backward_refs->size; ++i) { - const PixOrCopy* const v = &backward_refs->refs[i]; + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits); VP8LHistogramAddSinglePixOrCopy(histograms[ix], v); x += PixOrCopyLength(v); @@ -237,7 +328,134 @@ static void HistogramBuildImage(int xsize, int histo_bits, x -= xsize; ++y; } + VP8LRefsCursorNext(&c); + } +} + +// Copies the histograms and computes its bit_cost. +static void HistogramCopyAndAnalyze( + VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) { + int i; + const int histo_size = orig_histo->size; + VP8LHistogram** const orig_histograms = orig_histo->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + for (i = 0; i < histo_size; ++i) { + VP8LHistogram* const histo = orig_histograms[i]; + UpdateHistogramCost(histo); + // Copy histograms from orig_histo[] to image_histo[]. + HistogramCopy(histo, histograms[i]); + } +} + +// Partition histograms to different entropy bins for three dominant (literal, +// red and blue) symbol costs and compute the histogram aggregate bit_cost. +static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, + int16_t* const bin_map, int low_effort) { + int i; + VP8LHistogram** const histograms = image_histo->histograms; + const int histo_size = image_histo->size; + const int bin_depth = histo_size + 1; + DominantCostRange cost_range; + DominantCostRangeInit(&cost_range); + + // Analyze the dominant (literal, red and blue) entropy costs. + for (i = 0; i < histo_size; ++i) { + VP8LHistogram* const histo = histograms[i]; + UpdateDominantCostRange(histo, &cost_range); + } + + // bin-hash histograms on three of the dominant (literal, red and blue) + // symbol costs. + for (i = 0; i < histo_size; ++i) { + int num_histos; + VP8LHistogram* const histo = histograms[i]; + const int16_t bin_id = low_effort ? + (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) : + (int16_t)GetHistoBinIndex(histo, &cost_range); + const int bin_offset = bin_id * bin_depth; + // bin_map[n][0] for every bin 'n' maintains the counter for the number of + // histograms in that bin. + // Get and increment the num_histos in that bin. + num_histos = ++bin_map[bin_offset]; + assert(bin_offset + num_histos < bin_depth * BIN_SIZE); + // Add histogram i'th index at num_histos (last) position in the bin_map. + bin_map[bin_offset + num_histos] = i; + } +} + +// Compact the histogram set by removing unused entries. +static void HistogramCompactBins(VP8LHistogramSet* const image_histo) { + VP8LHistogram** const histograms = image_histo->histograms; + int i, j; + + for (i = 0, j = 0; i < image_histo->size; ++i) { + if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) { + if (j < i) { + histograms[j] = histograms[i]; + histograms[i] = NULL; + } + ++j; + } } + image_histo->size = j; +} + +static VP8LHistogram* HistogramCombineEntropyBin( + VP8LHistogramSet* const image_histo, + VP8LHistogram* cur_combo, + int16_t* const bin_map, int bin_depth, int num_bins, + double combine_cost_factor, int low_effort) { + int bin_id; + VP8LHistogram** const histograms = image_histo->histograms; + + for (bin_id = 0; bin_id < num_bins; ++bin_id) { + const int bin_offset = bin_id * bin_depth; + const int num_histos = bin_map[bin_offset]; + const int idx1 = bin_map[bin_offset + 1]; + int num_combine_failures = 0; + int n; + for (n = 2; n <= num_histos; ++n) { + const int idx2 = bin_map[bin_offset + n]; + if (low_effort) { + // Merge all histograms with the same bin index, irrespective of cost of + // the merged histograms. + VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + const double bit_cost_idx2 = histograms[idx2]->bit_cost_; + if (bit_cost_idx2 > 0.) { + const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; + const double curr_cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], + cur_combo, bit_cost_thresh); + if (curr_cost_diff < bit_cost_thresh) { + // Try to merge two histograms only if the combo is a trivial one or + // the two candidate histograms are already non-trivial. + // For some images, 'try_combine' turns out to be false for a lot of + // histogram pairs. In that case, we fallback to combining + // histograms as usual to avoid increasing the header size. + const int try_combine = + (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || + ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && + (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); + const int max_combine_failures = 32; + if (try_combine || (num_combine_failures >= max_combine_failures)) { + HistogramSwap(&cur_combo, &histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + ++num_combine_failures; + } + } + } + } + } + if (low_effort) { + // Update the bit_cost for the merged histograms (per bin index). + UpdateHistogramCost(histograms[idx1]); + } + } + HistogramCompactBins(image_histo); + return cur_combo; } static uint32_t MyRand(uint32_t *seed) { @@ -248,159 +466,433 @@ static uint32_t MyRand(uint32_t *seed) { return *seed; } -static int HistogramCombine(const VP8LHistogramSet* const in, - VP8LHistogramSet* const out, int num_pairs) { +// ----------------------------------------------------------------------------- +// Histogram pairs priority queue + +// Pair of histograms. Negative idx1 value means that pair is out-of-date. +typedef struct { + int idx1; + int idx2; + double cost_diff; + double cost_combo; +} HistogramPair; + +typedef struct { + HistogramPair* heap; + int* positions; + int size; + int max_index; +} HistoHeap; + +static int HistoHeapInit(HistoHeap* const histo_heap, const int max_index) { + histo_heap->size = 0; + histo_heap->max_index = max_index; + histo_heap->heap = WebPSafeMalloc(max_index * max_index, + sizeof(*histo_heap->heap)); + histo_heap->positions = WebPSafeMalloc(max_index * max_index, + sizeof(*histo_heap->positions)); + return histo_heap->heap != NULL && histo_heap->positions != NULL; +} + +static void HistoHeapClear(HistoHeap* const histo_heap) { + assert(histo_heap != NULL); + WebPSafeFree(histo_heap->heap); + WebPSafeFree(histo_heap->positions); +} + +static void SwapHistogramPairs(HistogramPair *p1, + HistogramPair *p2) { + const HistogramPair tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +// Given a valid min-heap in range [0, heap_size-1) this function places value +// heap[heap_size-1] into right location within heap and sets its position in +// positions array. +static void HeapPush(HistoHeap* const histo_heap) { + HistogramPair* const heap = histo_heap->heap - 1; + int* const positions = histo_heap->positions; + const int max_index = histo_heap->max_index; + int v; + ++histo_heap->size; + v = histo_heap->size; + while (v > 1 && heap[v].cost_diff < heap[v >> 1].cost_diff) { + SwapHistogramPairs(&heap[v], &heap[v >> 1]); + // Change position of moved pair in heap. + if (heap[v].idx1 >= 0) { + const int pos = heap[v].idx1 * max_index + heap[v].idx2; + assert(pos >= 0 && pos < max_index * max_index); + positions[pos] = v; + } + v >>= 1; + } + positions[heap[v].idx1 * max_index + heap[v].idx2] = v; +} + +// Given a valid min-heap in range [0, heap_size) this function shortens heap +// range by one and places element with the lowest value to (heap_size-1). +static void HeapPop(HistoHeap* const histo_heap) { + HistogramPair* const heap = histo_heap->heap - 1; + int* const positions = histo_heap->positions; + const int heap_size = histo_heap->size; + const int max_index = histo_heap->max_index; + int v = 1; + if (heap[v].idx1 >= 0) { + positions[heap[v].idx1 * max_index + heap[v].idx2] = -1; + } + SwapHistogramPairs(&heap[v], &heap[heap_size]); + while ((v << 1) < heap_size) { + int son = (heap[v << 1].cost_diff < heap[v].cost_diff) ? (v << 1) : v; + if (((v << 1) + 1) < heap_size && + heap[(v << 1) + 1].cost_diff < heap[son].cost_diff) { + son = (v << 1) + 1; + } + if (son == v) break; + SwapHistogramPairs(&heap[v], &heap[son]); + // Change position of moved pair in heap. + if (heap[v].idx1 >= 0) { + positions[heap[v].idx1 * max_index + heap[v].idx2] = v; + } + v = son; + } + if (heap[v].idx1 >= 0) { + positions[heap[v].idx1 * max_index + heap[v].idx2] = v; + } + --histo_heap->size; +} + +// ----------------------------------------------------------------------------- + +static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, + HistogramPair* const pair, + VP8LHistogram* const histos) { + if (idx1 > idx2) { + const int tmp = idx2; + idx2 = idx1; + idx1 = tmp; + } + pair->idx1 = idx1; + pair->idx2 = idx2; + pair->cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0); + pair->cost_combo = histos->bit_cost_; +} + +#define POSITION_INVALID (-1) + +// Invalidates pairs intersecting (idx1, idx2) in heap. +static void InvalidatePairs(int idx1, int idx2, + const HistoHeap* const histo_heap) { + HistogramPair* const heap = histo_heap->heap - 1; + int* const positions = histo_heap->positions; + const int max_index = histo_heap->max_index; + int i; + for (i = 0; i < idx1; ++i) { + const int pos = positions[i * max_index + idx1]; + if (pos >= 0) { + heap[pos].idx1 = POSITION_INVALID; + } + } + for (i = idx1 + 1; i < max_index; ++i) { + const int pos = positions[idx1 * max_index + i]; + if (pos >= 0) { + heap[pos].idx1 = POSITION_INVALID; + } + } + for (i = 0; i < idx2; ++i) { + const int pos = positions[i * max_index + idx2]; + if (pos >= 0) { + heap[pos].idx1 = POSITION_INVALID; + } + } + for (i = idx2 + 1; i < max_index; ++i) { + const int pos = positions[idx2 * max_index + i]; + if (pos >= 0) { + heap[pos].idx1 = POSITION_INVALID; + } + } +} + +// Combines histograms by continuously choosing the one with the highest cost +// reduction. +static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, + VP8LHistogram* const histos) { int ok = 0; - int i, iter; + int image_histo_size = image_histo->size; + int i, j; + VP8LHistogram** const histograms = image_histo->histograms; + // Indexes of remaining histograms. + int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters)); + // Heap of histogram pairs. + HistoHeap histo_heap; + + if (!HistoHeapInit(&histo_heap, image_histo_size) || clusters == NULL) { + goto End; + } + + for (i = 0; i < image_histo_size; ++i) { + // Initialize clusters indexes. + clusters[i] = i; + for (j = i + 1; j < image_histo_size; ++j) { + // Initialize positions array. + histo_heap.positions[i * histo_heap.max_index + j] = POSITION_INVALID; + PreparePair(histograms, i, j, &histo_heap.heap[histo_heap.size], histos); + if (histo_heap.heap[histo_heap.size].cost_diff < 0) { + HeapPush(&histo_heap); + } + } + } + + while (image_histo_size > 1 && histo_heap.size > 0) { + const int idx1 = histo_heap.heap[0].idx1; + const int idx2 = histo_heap.heap[0].idx2; + VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); + histograms[idx1]->bit_cost_ = histo_heap.heap[0].cost_combo; + // Remove merged histogram. + for (i = 0; i + 1 < image_histo_size; ++i) { + if (clusters[i] >= idx2) { + clusters[i] = clusters[i + 1]; + } + } + --image_histo_size; + + // Invalidate pairs intersecting the just combined best pair. + InvalidatePairs(idx1, idx2, &histo_heap); + + // Pop invalid pairs from the top of the heap. + while (histo_heap.size > 0 && histo_heap.heap[0].idx1 < 0) { + HeapPop(&histo_heap); + } + + // Push new pairs formed with combined histogram to the heap. + for (i = 0; i < image_histo_size; ++i) { + if (clusters[i] != idx1) { + PreparePair(histograms, idx1, clusters[i], + &histo_heap.heap[histo_heap.size], histos); + if (histo_heap.heap[histo_heap.size].cost_diff < 0) { + HeapPush(&histo_heap); + } + } + } + } + // Move remaining histograms to the beginning of the array. + for (i = 0; i < image_histo_size; ++i) { + if (i != clusters[i]) { // swap the two histograms + HistogramSwap(&histograms[i], &histograms[clusters[i]]); + } + } + + image_histo->size = image_histo_size; + ok = 1; + + End: + WebPSafeFree(clusters); + HistoHeapClear(&histo_heap); + return ok; +} + +static VP8LHistogram* HistogramCombineStochastic( + VP8LHistogramSet* const image_histo, + VP8LHistogram* tmp_histo, + VP8LHistogram* best_combo, + int quality, int min_cluster_size) { + int iter; uint32_t seed = 0; int tries_with_no_success = 0; - const int min_cluster_size = 2; - int out_size = in->size; - const int outer_iters = in->size * 3; - VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos)); - VP8LHistogram* cur_combo = histos + 0; // trial merged histogram - VP8LHistogram* best_combo = histos + 1; // best merged histogram so far - if (histos == NULL) goto End; - - // Copy histograms from in[] to out[]. - assert(in->size <= out->size); - for (i = 0; i < in->size; ++i) { - in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]); - *out->histograms[i] = *in->histograms[i]; - } - - // Collapse similar histograms in 'out'. - for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) { - // We pick the best pair to be combined out of 'inner_iters' pairs. + int image_histo_size = image_histo->size; + const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8; + const int outer_iters = image_histo_size * iter_mult; + const int num_pairs = image_histo_size / 2; + const int num_tries_no_success = outer_iters / 2; + VP8LHistogram** const histograms = image_histo->histograms; + + // Collapse similar histograms in 'image_histo'. + ++min_cluster_size; + for (iter = 0; + iter < outer_iters && image_histo_size >= min_cluster_size; + ++iter) { double best_cost_diff = 0.; - int best_idx1 = 0, best_idx2 = 1; + int best_idx1 = -1, best_idx2 = 1; int j; + const int num_tries = + (num_pairs < image_histo_size) ? num_pairs : image_histo_size; seed += iter; - for (j = 0; j < num_pairs; ++j) { + for (j = 0; j < num_tries; ++j) { double curr_cost_diff; // Choose two histograms at random and try to combine them. - const uint32_t idx1 = MyRand(&seed) % out_size; - const uint32_t tmp = ((j & 7) + 1) % (out_size - 1); - const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1); - const uint32_t idx2 = (idx1 + diff + 1) % out_size; + const uint32_t idx1 = MyRand(&seed) % image_histo_size; + const uint32_t tmp = (j & 7) + 1; + const uint32_t diff = + (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1); + const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size; if (idx1 == idx2) { continue; } - *cur_combo = *out->histograms[idx1]; - VP8LHistogramAdd(cur_combo, out->histograms[idx2]); - cur_combo->bit_cost_ = VP8LHistogramEstimateBits(cur_combo); + // Calculate cost reduction on combining. - curr_cost_diff = cur_combo->bit_cost_ - - out->histograms[idx1]->bit_cost_ - - out->histograms[idx2]->bit_cost_; - if (best_cost_diff > curr_cost_diff) { // found a better pair? - { // swap cur/best combo histograms - VP8LHistogram* const tmp_histo = cur_combo; - cur_combo = best_combo; - best_combo = tmp_histo; - } + curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], + tmp_histo, best_cost_diff); + if (curr_cost_diff < best_cost_diff) { // found a better pair? + HistogramSwap(&best_combo, &tmp_histo); best_cost_diff = curr_cost_diff; best_idx1 = idx1; best_idx2 = idx2; } } - if (best_cost_diff < 0.0) { - *out->histograms[best_idx1] = *best_combo; + if (best_idx1 >= 0) { + HistogramSwap(&best_combo, &histograms[best_idx1]); // swap best_idx2 slot with last one (which is now unused) - --out_size; - if (best_idx2 != out_size) { - out->histograms[best_idx2] = out->histograms[out_size]; - out->histograms[out_size] = NULL; // just for sanity check. + --image_histo_size; + if (best_idx2 != image_histo_size) { + HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]); + histograms[image_histo_size] = NULL; } tries_with_no_success = 0; } - if (++tries_with_no_success >= 50) { + if (++tries_with_no_success >= num_tries_no_success) { break; } } - out->size = out_size; - ok = 1; - - End: - free(histos); - return ok; + image_histo->size = image_histo_size; + return best_combo; } // ----------------------------------------------------------------------------- // Histogram refinement -// What is the bit cost of moving square_histogram from -// cur_symbol to candidate_symbol. -// TODO(skal): we don't really need to copy the histogram and Add(). Instead -// we just need VP8LDualHistogramEstimateBits(A, B) estimation function. -static double HistogramDistance(const VP8LHistogram* const square_histogram, - const VP8LHistogram* const candidate) { - const double previous_bit_cost = candidate->bit_cost_; - double new_bit_cost; - VP8LHistogram modified_histo; - modified_histo = *candidate; - VP8LHistogramAdd(&modified_histo, square_histogram); - new_bit_cost = VP8LHistogramEstimateBits(&modified_histo); - - return new_bit_cost - previous_bit_cost; -} - // Find the best 'out' histogram for each of the 'in' histograms. // Note: we assume that out[]->bit_cost_ is already up-to-date. -static void HistogramRemap(const VP8LHistogramSet* const in, - const VP8LHistogramSet* const out, +static void HistogramRemap(const VP8LHistogramSet* const orig_histo, + const VP8LHistogramSet* const image_histo, uint16_t* const symbols) { int i; - for (i = 0; i < in->size; ++i) { - int best_out = 0; - double best_bits = HistogramDistance(in->histograms[i], out->histograms[0]); - int k; - for (k = 1; k < out->size; ++k) { - const double cur_bits = - HistogramDistance(in->histograms[i], out->histograms[k]); - if (cur_bits < best_bits) { - best_bits = cur_bits; - best_out = k; + VP8LHistogram** const orig_histograms = orig_histo->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + const int orig_histo_size = orig_histo->size; + const int image_histo_size = image_histo->size; + if (image_histo_size > 1) { + for (i = 0; i < orig_histo_size; ++i) { + int best_out = 0; + double best_bits = + HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST); + int k; + for (k = 1; k < image_histo_size; ++k) { + const double cur_bits = + HistogramAddThresh(histograms[k], orig_histograms[i], best_bits); + if (cur_bits < best_bits) { + best_bits = cur_bits; + best_out = k; + } } + symbols[i] = best_out; + } + } else { + assert(image_histo_size == 1); + for (i = 0; i < orig_histo_size; ++i) { + symbols[i] = 0; } - symbols[i] = best_out; } // Recompute each out based on raw and symbols. - for (i = 0; i < out->size; ++i) { - HistogramClear(out->histograms[i]); + for (i = 0; i < image_histo_size; ++i) { + HistogramClear(histograms[i]); } - for (i = 0; i < in->size; ++i) { - VP8LHistogramAdd(out->histograms[symbols[i]], in->histograms[i]); + + for (i = 0; i < orig_histo_size; ++i) { + const int idx = symbols[i]; + VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]); } } +static double GetCombineCostFactor(int histo_size, int quality) { + double combine_cost_factor = 0.16; + if (quality < 90) { + if (histo_size > 256) combine_cost_factor /= 2.; + if (histo_size > 512) combine_cost_factor /= 2.; + if (histo_size > 1024) combine_cost_factor /= 2.; + if (quality <= 50) combine_cost_factor /= 2.; + } + return combine_cost_factor; +} + int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, - int quality, int histo_bits, int cache_bits, - VP8LHistogramSet* const image_in, + int quality, int low_effort, + int histo_bits, int cache_bits, + VP8LHistogramSet* const image_histo, + VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols) { int ok = 0; const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; - const int num_histo_pairs = 10 + quality / 2; // For HistogramCombine(). - const int histo_image_raw_size = histo_xsize * histo_ysize; - VP8LHistogramSet* const image_out = - VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits); - if (image_out == NULL) return 0; - - // Build histogram image. - HistogramBuildImage(xsize, histo_bits, refs, image_out); - // Collapse similar histograms. - if (!HistogramCombine(image_out, image_in, num_histo_pairs)) { - goto Error; + const int image_histo_raw_size = histo_xsize * histo_ysize; + const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; + + // The bin_map for every bin follows following semantics: + // bin_map[n][0] = num_histo; // The number of histograms in that bin. + // bin_map[n][1] = index of first histogram in that bin; + // bin_map[n][num_histo] = index of last histogram in that bin; + // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices. + const int bin_depth = image_histo_raw_size + 1; + int16_t* bin_map = NULL; + VP8LHistogramSet* const orig_histo = + VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); + VP8LHistogram* cur_combo; + const int entropy_combine = + (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100); + + if (orig_histo == NULL) goto Error; + + // Don't attempt linear bin-partition heuristic for: + // histograms of small sizes, as bin_map will be very sparse and; + // Maximum quality (q==100), to preserve the compression gains at that level. + if (entropy_combine) { + const int bin_map_size = bin_depth * entropy_combine_num_bins; + bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map)); + if (bin_map == NULL) goto Error; } + + // Construct the histograms from backward references. + HistogramBuild(xsize, histo_bits, refs, orig_histo); + // Copies the histograms and computes its bit_cost. + HistogramCopyAndAnalyze(orig_histo, image_histo); + + cur_combo = tmp_histos->histograms[1]; // pick up working slot + if (entropy_combine) { + const double combine_cost_factor = + GetCombineCostFactor(image_histo_raw_size, quality); + HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); + // Collapse histograms with similar entropy. + cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map, + bin_depth, entropy_combine_num_bins, + combine_cost_factor, low_effort); + } + + // Don't combine the histograms using stochastic and greedy heuristics for + // low-effort compression mode. + if (!low_effort || !entropy_combine) { + const float x = quality / 100.f; + // cubic ramp between 1 and MAX_HISTO_GREEDY: + const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); + cur_combo = HistogramCombineStochastic(image_histo, + tmp_histos->histograms[0], + cur_combo, quality, threshold_size); + if ((image_histo->size <= threshold_size) && + !HistogramCombineGreedy(image_histo, cur_combo)) { + goto Error; + } + } + + // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also. // Find the optimal map from original histograms to the final ones. - HistogramRemap(image_out, image_in, histogram_symbols); + HistogramRemap(orig_histo, image_histo, histogram_symbols); + ok = 1; -Error: - free(image_out); + Error: + WebPSafeFree(bin_map); + VP8LFreeHistogramSet(orig_histo); return ok; } diff --git a/drivers/webp/enc/histogram.h b/drivers/webp/enc/histogram.h index 5b5de25539..adb16c01ca 100644 --- a/drivers/webp/enc/histogram.h +++ b/drivers/webp/enc/histogram.h @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Author: Jyrki Alakuijala (jyrki@google.com) @@ -12,17 +14,13 @@ #ifndef WEBP_ENC_HISTOGRAM_H_ #define WEBP_ENC_HISTOGRAM_H_ -#include <assert.h> -#include <stddef.h> -#include <stdlib.h> -#include <stdio.h> #include <string.h> #include "./backward_references.h" -#include "../format_constants.h" -#include "../types.h" +#include "../webp/format_constants.h" +#include "../webp/types.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif @@ -30,18 +28,23 @@ extern "C" { typedef struct { // literal_ contains green literal, palette-code and // copy-length-prefix histogram - int literal_[PIX_OR_COPY_CODES_MAX]; - int red_[256]; - int blue_[256]; - int alpha_[256]; + uint32_t* literal_; // Pointer to the allocated buffer for literal. + uint32_t red_[NUM_LITERAL_CODES]; + uint32_t blue_[NUM_LITERAL_CODES]; + uint32_t alpha_[NUM_LITERAL_CODES]; // Backward reference prefix-code histogram. - int distance_[NUM_DISTANCE_CODES]; + uint32_t distance_[NUM_DISTANCE_CODES]; int palette_code_bits_; - double bit_cost_; // cached value of VP8LHistogramEstimateBits(this) + uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha + // literal symbols are single valued. + double bit_cost_; // cached value of bit cost. + double literal_cost_; // Cached values of dominant entropy costs: + double red_cost_; // literal, red & blue. + double blue_cost_; } VP8LHistogram; // Collection of histograms with fixed capacity, allocated as one -// big memory chunk. Can be destroyed by simply calling 'free()'. +// big memory chunk. Can be destroyed by calling WebPSafeFree(). typedef struct { int size; // number of slots currently in use int max_size; // maximum capacity @@ -57,6 +60,9 @@ void VP8LHistogramCreate(VP8LHistogram* const p, const VP8LBackwardRefs* const refs, int palette_code_bits); +// Return the size of the histogram for a given palette_code_bits. +int VP8LGetHistogramSize(int palette_code_bits); + // Set the palette_code_bits and reset the stats. void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits); @@ -64,51 +70,40 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits); void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo); +// Free the memory allocated for the histogram. +void VP8LFreeHistogram(VP8LHistogram* const histo); + +// Free the memory allocated for the histogram set. +void VP8LFreeHistogramSet(VP8LHistogramSet* const histo); + // Allocate an array of pointer to histograms, allocated and initialized // using 'cache_bits'. Return NULL in case of memory error. VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits); +// Allocate and initialize histogram object with specified 'cache_bits'. +// Returns NULL in case of memory error. +// Special case of VP8LAllocateHistogramSet, with size equals 1. +VP8LHistogram* VP8LAllocateHistogram(int cache_bits); + // Accumulate a token 'v' into a histogram. void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, const PixOrCopy* const v); -// Estimate how many bits the combined entropy of literals and distance -// approximately maps to. -double VP8LHistogramEstimateBits(const VP8LHistogram* const p); - -// This function estimates the cost in bits excluding the bits needed to -// represent the entropy code itself. -double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p); - -static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p, - const VP8LHistogram* const a) { - int i; - for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) { - p->literal_[i] += a->literal_[i]; - } - for (i = 0; i < NUM_DISTANCE_CODES; ++i) { - p->distance_[i] += a->distance_[i]; - } - for (i = 0; i < 256; ++i) { - p->red_[i] += a->red_[i]; - p->blue_[i] += a->blue_[i]; - p->alpha_[i] += a->alpha_[i]; - } -} - -static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) { - return 256 + NUM_LENGTH_CODES + - ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0); +static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { + return NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); } // Builds the histogram image. int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, - int quality, int histogram_bits, int cache_bits, + int quality, int low_effort, + int histogram_bits, int cache_bits, VP8LHistogramSet* const image_in, + VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols); -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } #endif diff --git a/drivers/webp/enc/iterator.c b/drivers/webp/enc/iterator.c index 86e473bcf0..99d960a547 100644 --- a/drivers/webp/enc/iterator.c +++ b/drivers/webp/enc/iterator.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // VP8Iterator: block iterator @@ -13,21 +15,16 @@ #include "./vp8enci.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - //------------------------------------------------------------------------------ // VP8Iterator //------------------------------------------------------------------------------ static void InitLeft(VP8EncIterator* const it) { - const VP8Encoder* const enc = it->enc_; - enc->y_left_[-1] = enc->u_left_[-1] = enc->v_left_[-1] = + it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = (it->y_ > 0) ? 129 : 127; - memset(enc->y_left_, 129, 16); - memset(enc->u_left_, 129, 8); - memset(enc->v_left_, 129, 8); + memset(it->y_left_, 129, 16); + memset(it->u_left_, 129, 8); + memset(it->v_left_, 129, 8); it->left_nz_[8] = 0; } @@ -38,43 +35,60 @@ static void InitTop(VP8EncIterator* const it) { memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_)); } -void VP8IteratorReset(VP8EncIterator* const it) { +void VP8IteratorSetRow(VP8EncIterator* const it, int y) { VP8Encoder* const enc = it->enc_; it->x_ = 0; - it->y_ = 0; - it->y_offset_ = 0; - it->uv_offset_ = 0; - it->mb_ = enc->mb_info_; - it->preds_ = enc->preds_; + it->y_ = y; + it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)]; + it->preds_ = enc->preds_ + y * 4 * enc->preds_w_; it->nz_ = enc->nz_; - it->bw_ = &enc->parts_[0]; - it->done_ = enc->mb_w_* enc->mb_h_; + it->mb_ = enc->mb_info_ + y * enc->mb_w_; + it->y_top_ = enc->y_top_; + it->uv_top_ = enc->uv_top_; + InitLeft(it); +} + +void VP8IteratorReset(VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + VP8IteratorSetRow(it, 0); + VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default InitTop(it); InitLeft(it); memset(it->bit_count_, 0, sizeof(it->bit_count_)); it->do_trellis_ = 0; } +void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) { + it->count_down_ = it->count_down0_ = count_down; +} + +int VP8IteratorIsDone(const VP8EncIterator* const it) { + return (it->count_down_ <= 0); +} + void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { it->enc_ = enc; it->y_stride_ = enc->pic_->y_stride; it->uv_stride_ = enc->pic_->uv_stride; - // TODO(later): for multithreading, these should be owned by 'it'. - it->yuv_in_ = enc->yuv_in_; - it->yuv_out_ = enc->yuv_out_; - it->yuv_out2_ = enc->yuv_out2_; - it->yuv_p_ = enc->yuv_p_; + it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_); + it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC; + it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC; + it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC; it->lf_stats_ = enc->lf_stats_; it->percent0_ = enc->percent_; + it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1); + it->u_left_ = it->y_left_ + 16 + 16; + it->v_left_ = it->u_left_ + 16; VP8IteratorReset(it); } int VP8IteratorProgress(const VP8EncIterator* const it, int delta) { VP8Encoder* const enc = it->enc_; - if (delta && enc->pic_->progress_hook) { - const int percent = (enc->mb_h_ <= 1) + if (delta && enc->pic_->progress_hook != NULL) { + const int done = it->count_down0_ - it->count_down_; + const int percent = (it->count_down0_ <= 0) ? it->percent0_ - : it->percent0_ + delta * it->y_ / (enc->mb_h_ - 1); + : it->percent0_ + delta * done / it->count_down0_; return WebPReportProgress(enc->pic_, percent, &enc->percent_); } return 1; @@ -84,6 +98,8 @@ int VP8IteratorProgress(const VP8EncIterator* const it, int delta) { // Import the source samples into the cache. Takes care of replicating // boundary pixels if necessary. +static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; } + static void ImportBlock(const uint8_t* src, int src_stride, uint8_t* dst, int w, int h, int size) { int i; @@ -101,30 +117,55 @@ static void ImportBlock(const uint8_t* src, int src_stride, } } -void VP8IteratorImport(const VP8EncIterator* const it) { +static void ImportLine(const uint8_t* src, int src_stride, + uint8_t* dst, int len, int total_len) { + int i; + for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src; + for (; i < total_len; ++i) dst[i] = dst[len - 1]; +} + +void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) { const VP8Encoder* const enc = it->enc_; const int x = it->x_, y = it->y_; const WebPPicture* const pic = enc->pic_; - const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16; + const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16; const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8; const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8; - uint8_t* const ydst = it->yuv_in_ + Y_OFF; - uint8_t* const udst = it->yuv_in_ + U_OFF; - uint8_t* const vdst = it->yuv_in_ + V_OFF; - int w = (pic->width - x * 16); - int h = (pic->height - y * 16); - - if (w > 16) w = 16; - if (h > 16) h = 16; - - // Luma plane - ImportBlock(ysrc, pic->y_stride, ydst, w, h, 16); - - { // U/V planes - const int uv_w = (w + 1) >> 1; - const int uv_h = (h + 1) >> 1; - ImportBlock(usrc, pic->uv_stride, udst, uv_w, uv_h, 8); - ImportBlock(vsrc, pic->uv_stride, vdst, uv_w, uv_h, 8); + const int w = MinSize(pic->width - x * 16, 16); + const int h = MinSize(pic->height - y * 16, 16); + const int uv_w = (w + 1) >> 1; + const int uv_h = (h + 1) >> 1; + + ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16); + ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8); + ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8); + + if (tmp_32 == NULL) return; + + // Import source (uncompressed) samples into boundary. + if (x == 0) { + InitLeft(it); + } else { + if (y == 0) { + it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127; + } else { + it->y_left_[-1] = ysrc[- 1 - pic->y_stride]; + it->u_left_[-1] = usrc[- 1 - pic->uv_stride]; + it->v_left_[-1] = vsrc[- 1 - pic->uv_stride]; + } + ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16); + ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8); + ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8); + } + + it->y_top_ = tmp_32 + 0; + it->uv_top_ = tmp_32 + 16; + if (y == 0) { + memset(tmp_32, 127, 32 * sizeof(*tmp_32)); + } else { + ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16); + ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8); + ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8); } } @@ -144,9 +185,9 @@ void VP8IteratorExport(const VP8EncIterator* const it) { const VP8Encoder* const enc = it->enc_; if (enc->config_->show_compressed) { const int x = it->x_, y = it->y_; - const uint8_t* const ysrc = it->yuv_out_ + Y_OFF; - const uint8_t* const usrc = it->yuv_out_ + U_OFF; - const uint8_t* const vsrc = it->yuv_out_ + V_OFF; + const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC; + const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC; const WebPPicture* const pic = enc->pic_; uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16; uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8; @@ -240,48 +281,44 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) { #undef BIT //------------------------------------------------------------------------------ -// Advance to the next position, doing the bookeeping. +// Advance to the next position, doing the bookkeeping. -int VP8IteratorNext(VP8EncIterator* const it, - const uint8_t* const block_to_save) { +void VP8IteratorSaveBoundary(VP8EncIterator* const it) { VP8Encoder* const enc = it->enc_; - if (block_to_save) { - const int x = it->x_, y = it->y_; - const uint8_t* const ysrc = block_to_save + Y_OFF; - const uint8_t* const usrc = block_to_save + U_OFF; - if (x < enc->mb_w_ - 1) { // left - int i; - for (i = 0; i < 16; ++i) { - enc->y_left_[i] = ysrc[15 + i * BPS]; - } - for (i = 0; i < 8; ++i) { - enc->u_left_[i] = usrc[7 + i * BPS]; - enc->v_left_[i] = usrc[15 + i * BPS]; - } - // top-left (before 'top'!) - enc->y_left_[-1] = enc->y_top_[x * 16 + 15]; - enc->u_left_[-1] = enc->uv_top_[x * 16 + 0 + 7]; - enc->v_left_[-1] = enc->uv_top_[x * 16 + 8 + 7]; + const int x = it->x_, y = it->y_; + const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC; + if (x < enc->mb_w_ - 1) { // left + int i; + for (i = 0; i < 16; ++i) { + it->y_left_[i] = ysrc[15 + i * BPS]; } - if (y < enc->mb_h_ - 1) { // top - memcpy(enc->y_top_ + x * 16, ysrc + 15 * BPS, 16); - memcpy(enc->uv_top_ + x * 16, usrc + 7 * BPS, 8 + 8); + for (i = 0; i < 8; ++i) { + it->u_left_[i] = uvsrc[7 + i * BPS]; + it->v_left_[i] = uvsrc[15 + i * BPS]; } + // top-left (before 'top'!) + it->y_left_[-1] = it->y_top_[15]; + it->u_left_[-1] = it->uv_top_[0 + 7]; + it->v_left_[-1] = it->uv_top_[8 + 7]; } + if (y < enc->mb_h_ - 1) { // top + memcpy(it->y_top_, ysrc + 15 * BPS, 16); + memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8); + } +} - it->mb_++; +int VP8IteratorNext(VP8EncIterator* const it) { it->preds_ += 4; - it->nz_++; - it->x_++; - if (it->x_ == enc->mb_w_) { - it->x_ = 0; - it->y_++; - it->bw_ = &enc->parts_[it->y_ & (enc->num_parts_ - 1)]; - it->preds_ = enc->preds_ + it->y_ * 4 * enc->preds_w_; - it->nz_ = enc->nz_; - InitLeft(it); + it->mb_ += 1; + it->nz_ += 1; + it->y_top_ += 16; + it->uv_top_ += 16; + it->x_ += 1; + if (it->x_ == it->enc_->mb_w_) { + VP8IteratorSetRow(it, ++it->y_); } - return (0 < --it->done_); + return (0 < --it->count_down_); } //------------------------------------------------------------------------------ @@ -368,15 +405,15 @@ void VP8IteratorStartI4(VP8EncIterator* const it) { // Import the boundary samples for (i = 0; i < 17; ++i) { // left - it->i4_boundary_[i] = enc->y_left_[15 - i]; + it->i4_boundary_[i] = it->y_left_[15 - i]; } for (i = 0; i < 16; ++i) { // top - it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i]; + it->i4_boundary_[17 + i] = it->y_top_[i]; } // top-right samples have a special case on the far right of the picture if (it->x_ < enc->mb_w_ - 1) { for (i = 16; i < 16 + 4; ++i) { - it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i]; + it->i4_boundary_[17 + i] = it->y_top_[i]; } } else { // else, replicate the last valid pixel four times for (i = 16; i < 16 + 4; ++i) { @@ -417,6 +454,3 @@ int VP8IteratorRotateI4(VP8EncIterator* const it, //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/layer.c b/drivers/webp/enc/layer.c deleted file mode 100644 index 423127df63..0000000000 --- a/drivers/webp/enc/layer.c +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ -// ----------------------------------------------------------------------------- -// -// Enhancement layer (for YUV444/422) -// -// Author: Skal (pascal.massimino@gmail.com) - -#include <stdlib.h> - -#include "./vp8enci.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -//------------------------------------------------------------------------------ - -void VP8EncInitLayer(VP8Encoder* const enc) { - enc->use_layer_ = (enc->pic_->u0 != NULL); - enc->layer_data_size_ = 0; - enc->layer_data_ = NULL; - if (enc->use_layer_) { - VP8BitWriterInit(&enc->layer_bw_, enc->mb_w_ * enc->mb_h_ * 3); - } -} - -void VP8EncCodeLayerBlock(VP8EncIterator* it) { - (void)it; // remove a warning -} - -int VP8EncFinishLayer(VP8Encoder* const enc) { - if (enc->use_layer_) { - enc->layer_data_ = VP8BitWriterFinish(&enc->layer_bw_); - enc->layer_data_size_ = VP8BitWriterSize(&enc->layer_bw_); - } - return 1; -} - -void VP8EncDeleteLayer(VP8Encoder* enc) { - free(enc->layer_data_); -} - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/picture.c b/drivers/webp/enc/picture.c index 44eed06083..26679a72e4 100644 --- a/drivers/webp/enc/picture.c +++ b/drivers/webp/enc/picture.c @@ -1,470 +1,179 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // -// WebPPicture utils: colorspace conversion, crop, ... +// WebPPicture class basis // // Author: Skal (pascal.massimino@gmail.com) #include <assert.h> #include <stdlib.h> -#include <math.h> #include "./vp8enci.h" -#include "../utils/rescaler.h" -#include "../utils/utils.h" #include "../dsp/dsp.h" -#include "../dsp/yuv.h" - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -#define HALVE(x) (((x) + 1) >> 1) -#define IS_YUV_CSP(csp, YUV_CSP) (((csp) & WEBP_CSP_UV_MASK) == (YUV_CSP)) - -static const union { - uint32_t argb; - uint8_t bytes[4]; -} test_endian = { 0xff000000u }; -#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) +#include "../utils/utils.h" //------------------------------------------------------------------------------ // WebPPicture //------------------------------------------------------------------------------ -int WebPPictureAlloc(WebPPicture* picture) { - if (picture != NULL) { - const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; - const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; - const int width = picture->width; - const int height = picture->height; - - if (!picture->use_argb) { - const int y_stride = width; - const int uv_width = HALVE(width); - const int uv_height = HALVE(height); - const int uv_stride = uv_width; - int uv0_stride = 0; - int a_width, a_stride; - uint64_t y_size, uv_size, uv0_size, a_size, total_size; - uint8_t* mem; - - // U/V - switch (uv_csp) { - case WEBP_YUV420: - break; -#ifdef WEBP_EXPERIMENTAL_FEATURES - case WEBP_YUV400: // for now, we'll just reset the U/V samples - break; - case WEBP_YUV422: - uv0_stride = uv_width; - break; - case WEBP_YUV444: - uv0_stride = width; - break; -#endif - default: - return 0; - } - uv0_size = height * uv0_stride; - - // alpha - a_width = has_alpha ? width : 0; - a_stride = a_width; - y_size = (uint64_t)y_stride * height; - uv_size = (uint64_t)uv_stride * uv_height; - a_size = (uint64_t)a_stride * height; - - total_size = y_size + a_size + 2 * uv_size + 2 * uv0_size; - - // Security and validation checks - if (width <= 0 || height <= 0 || // luma/alpha param error - uv_width < 0 || uv_height < 0) { // u/v param error - return 0; - } - // Clear previous buffer and allocate a new one. - WebPPictureFree(picture); // erase previous buffer - mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem)); - if (mem == NULL) return 0; - - // From now on, we're in the clear, we can no longer fail... - picture->memory_ = (void*)mem; - picture->y_stride = y_stride; - picture->uv_stride = uv_stride; - picture->a_stride = a_stride; - picture->uv0_stride = uv0_stride; - // TODO(skal): we could align the y/u/v planes and adjust stride. - picture->y = mem; - mem += y_size; - - picture->u = mem; - mem += uv_size; - picture->v = mem; - mem += uv_size; - - if (a_size) { - picture->a = mem; - mem += a_size; - } - if (uv0_size) { - picture->u0 = mem; - mem += uv0_size; - picture->v0 = mem; - mem += uv0_size; - } - } else { - void* memory; - const uint64_t argb_size = (uint64_t)width * height; - if (width <= 0 || height <= 0) { - return 0; - } - // Clear previous buffer and allocate a new one. - WebPPictureFree(picture); // erase previous buffer - memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb)); - if (memory == NULL) return 0; +static int DummyWriter(const uint8_t* data, size_t data_size, + const WebPPicture* const picture) { + // The following are to prevent 'unused variable' error message. + (void)data; + (void)data_size; + (void)picture; + return 1; +} - // TODO(skal): align plane to cache line? - picture->memory_argb_ = memory; - picture->argb = (uint32_t*)memory; - picture->argb_stride = width; - } +int WebPPictureInitInternal(WebPPicture* picture, int version) { + if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) { + return 0; // caller/system version mismatch! + } + if (picture != NULL) { + memset(picture, 0, sizeof(*picture)); + picture->writer = DummyWriter; + WebPEncodingSetError(picture, VP8_ENC_OK); } return 1; } -// Remove reference to the ARGB buffer (doesn't free anything). -static void PictureResetARGB(WebPPicture* const picture) { +//------------------------------------------------------------------------------ + +static void WebPPictureResetBufferARGB(WebPPicture* const picture) { picture->memory_argb_ = NULL; picture->argb = NULL; picture->argb_stride = 0; } -// Remove reference to the YUVA buffer (doesn't free anything). -static void PictureResetYUVA(WebPPicture* const picture) { +static void WebPPictureResetBufferYUVA(WebPPicture* const picture) { picture->memory_ = NULL; picture->y = picture->u = picture->v = picture->a = NULL; - picture->u0 = picture->v0 = NULL; picture->y_stride = picture->uv_stride = 0; picture->a_stride = 0; - picture->uv0_stride = 0; } -// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them -// into 'dst'. Mark 'dst' as not owning any memory. -static void WebPPictureGrabSpecs(const WebPPicture* const src, - WebPPicture* const dst) { - assert(src != NULL && dst != NULL); - *dst = *src; - PictureResetYUVA(dst); - PictureResetARGB(dst); +void WebPPictureResetBuffers(WebPPicture* const picture) { + WebPPictureResetBufferARGB(picture); + WebPPictureResetBufferYUVA(picture); } -// Allocate a new argb buffer, discarding any existing one and preserving -// the other YUV(A) buffer. -static int PictureAllocARGB(WebPPicture* const picture) { - WebPPicture tmp; - free(picture->memory_argb_); - PictureResetARGB(picture); - picture->use_argb = 1; - WebPPictureGrabSpecs(picture, &tmp); - if (!WebPPictureAlloc(&tmp)) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - picture->memory_argb_ = tmp.memory_argb_; - picture->argb = tmp.argb; - picture->argb_stride = tmp.argb_stride; - return 1; -} +int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { + void* memory; + const uint64_t argb_size = (uint64_t)width * height; -// Release memory owned by 'picture' (both YUV and ARGB buffers). -void WebPPictureFree(WebPPicture* picture) { - if (picture != NULL) { - free(picture->memory_); - free(picture->memory_argb_); - PictureResetYUVA(picture); - PictureResetARGB(picture); - } -} + assert(picture != NULL); -//------------------------------------------------------------------------------ -// Picture copying + WebPSafeFree(picture->memory_argb_); + WebPPictureResetBufferARGB(picture); -// Not worth moving to dsp/enc.c (only used here). -static void CopyPlane(const uint8_t* src, int src_stride, - uint8_t* dst, int dst_stride, int width, int height) { - while (height-- > 0) { - memcpy(dst, src, width); - src += src_stride; - dst += dst_stride; + if (width <= 0 || height <= 0) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } -} - -// Adjust top-left corner to chroma sample position. -static void SnapTopLeftPosition(const WebPPicture* const pic, - int* const left, int* const top) { - if (!pic->use_argb) { - const int is_yuv422 = IS_YUV_CSP(pic->colorspace, WEBP_YUV422); - if (IS_YUV_CSP(pic->colorspace, WEBP_YUV420) || is_yuv422) { - *left &= ~1; - if (!is_yuv422) *top &= ~1; - } + // allocate a new buffer. + memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb)); + if (memory == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } -} - -// Adjust top-left corner and verify that the sub-rectangle is valid. -static int AdjustAndCheckRectangle(const WebPPicture* const pic, - int* const left, int* const top, - int width, int height) { - SnapTopLeftPosition(pic, left, top); - if ((*left) < 0 || (*top) < 0) return 0; - if (width <= 0 || height <= 0) return 0; - if ((*left) + width > pic->width) return 0; - if ((*top) + height > pic->height) return 0; + // TODO(skal): align plane to cache line? + picture->memory_argb_ = memory; + picture->argb = (uint32_t*)memory; + picture->argb_stride = width; return 1; } -int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { - if (src == NULL || dst == NULL) return 0; - if (src == dst) return 1; +int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { + const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; + const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; + const int y_stride = width; + const int uv_width = (width + 1) >> 1; + const int uv_height = (height + 1) >> 1; + const int uv_stride = uv_width; + int a_width, a_stride; + uint64_t y_size, uv_size, a_size, total_size; + uint8_t* mem; - WebPPictureGrabSpecs(src, dst); - if (!WebPPictureAlloc(dst)) return 0; + assert(picture != NULL); - if (!src->use_argb) { - CopyPlane(src->y, src->y_stride, - dst->y, dst->y_stride, dst->width, dst->height); - CopyPlane(src->u, src->uv_stride, - dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); - CopyPlane(src->v, src->uv_stride, - dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); - if (dst->a != NULL) { - CopyPlane(src->a, src->a_stride, - dst->a, dst->a_stride, dst->width, dst->height); - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (dst->u0 != NULL) { - int uv0_width = src->width; - if (IS_YUV_CSP(dst->colorspace, WEBP_YUV422)) { - uv0_width = HALVE(uv0_width); - } - CopyPlane(src->u0, src->uv0_stride, - dst->u0, dst->uv0_stride, uv0_width, dst->height); - CopyPlane(src->v0, src->uv0_stride, - dst->v0, dst->uv0_stride, uv0_width, dst->height); - } -#endif - } else { - CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, - (uint8_t*)dst->argb, 4 * dst->argb_stride, - 4 * dst->width, dst->height); - } - return 1; -} + WebPSafeFree(picture->memory_); + WebPPictureResetBufferYUVA(picture); -int WebPPictureIsView(const WebPPicture* picture) { - if (picture == NULL) return 0; - if (picture->use_argb) { - return (picture->memory_argb_ == NULL); + if (uv_csp != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); } - return (picture->memory_ == NULL); -} -int WebPPictureView(const WebPPicture* src, - int left, int top, int width, int height, - WebPPicture* dst) { - if (src == NULL || dst == NULL) return 0; + // alpha + a_width = has_alpha ? width : 0; + a_stride = a_width; + y_size = (uint64_t)y_stride * height; + uv_size = (uint64_t)uv_stride * uv_height; + a_size = (uint64_t)a_stride * height; - // verify rectangle position. - if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0; + total_size = y_size + a_size + 2 * uv_size; - if (src != dst) { // beware of aliasing! We don't want to leak 'memory_'. - WebPPictureGrabSpecs(src, dst); + // Security and validation checks + if (width <= 0 || height <= 0 || // luma/alpha param error + uv_width < 0 || uv_height < 0) { // u/v param error + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } - dst->width = width; - dst->height = height; - if (!src->use_argb) { - dst->y = src->y + top * src->y_stride + left; - dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1); - dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1); - if (src->a != NULL) { - dst->a = src->a + top * src->a_stride + left; - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (src->u0 != NULL) { - const int left_pos = - IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left; - dst->u0 = src->u0 + top * src->uv0_stride + left_pos; - dst->v0 = src->v0 + top * src->uv0_stride + left_pos; - } -#endif - } else { - dst->argb = src->argb + top * src->argb_stride + left; + // allocate a new buffer. + mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + if (mem == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - return 1; -} -//------------------------------------------------------------------------------ -// Picture cropping + // From now on, we're in the clear, we can no longer fail... + picture->memory_ = (void*)mem; + picture->y_stride = y_stride; + picture->uv_stride = uv_stride; + picture->a_stride = a_stride; -int WebPPictureCrop(WebPPicture* pic, - int left, int top, int width, int height) { - WebPPicture tmp; + // TODO(skal): we could align the y/u/v planes and adjust stride. + picture->y = mem; + mem += y_size; - if (pic == NULL) return 0; - if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0; + picture->u = mem; + mem += uv_size; + picture->v = mem; + mem += uv_size; - WebPPictureGrabSpecs(pic, &tmp); - tmp.width = width; - tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; - - if (!pic->use_argb) { - const int y_offset = top * pic->y_stride + left; - const int uv_offset = (top / 2) * pic->uv_stride + left / 2; - CopyPlane(pic->y + y_offset, pic->y_stride, - tmp.y, tmp.y_stride, width, height); - CopyPlane(pic->u + uv_offset, pic->uv_stride, - tmp.u, tmp.uv_stride, HALVE(width), HALVE(height)); - CopyPlane(pic->v + uv_offset, pic->uv_stride, - tmp.v, tmp.uv_stride, HALVE(width), HALVE(height)); - - if (tmp.a != NULL) { - const int a_offset = top * pic->a_stride + left; - CopyPlane(pic->a + a_offset, pic->a_stride, - tmp.a, tmp.a_stride, width, height); - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (tmp.u0 != NULL) { - int w = width; - int left_pos = left; - if (IS_YUV_CSP(tmp.colorspace, WEBP_YUV422)) { - w = HALVE(w); - left_pos = HALVE(left_pos); - } - CopyPlane(pic->u0 + top * pic->uv0_stride + left_pos, pic->uv0_stride, - tmp.u0, tmp.uv0_stride, w, height); - CopyPlane(pic->v0 + top * pic->uv0_stride + left_pos, pic->uv0_stride, - tmp.v0, tmp.uv0_stride, w, height); - } -#endif - } else { - const uint8_t* const src = - (const uint8_t*)(pic->argb + top * pic->argb_stride + left); - CopyPlane(src, pic->argb_stride * 4, - (uint8_t*)tmp.argb, tmp.argb_stride * 4, - width * 4, height); + if (a_size > 0) { + picture->a = mem; + mem += a_size; } - WebPPictureFree(pic); - *pic = tmp; + (void)mem; // makes the static analyzer happy return 1; } -//------------------------------------------------------------------------------ -// Simple picture rescaler - -static void RescalePlane(const uint8_t* src, - int src_width, int src_height, int src_stride, - uint8_t* dst, - int dst_width, int dst_height, int dst_stride, - int32_t* const work, - int num_channels) { - WebPRescaler rescaler; - int y = 0; - WebPRescalerInit(&rescaler, src_width, src_height, - dst, dst_width, dst_height, dst_stride, - num_channels, - src_width, dst_width, - src_height, dst_height, - work); - memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); - while (y < src_height) { - y += WebPRescalerImport(&rescaler, src_height - y, - src + y * src_stride, src_stride); - WebPRescalerExport(&rescaler); - } -} - -int WebPPictureRescale(WebPPicture* pic, int width, int height) { - WebPPicture tmp; - int prev_width, prev_height; - int32_t* work; - - if (pic == NULL) return 0; - prev_width = pic->width; - prev_height = pic->height; - // if width is unspecified, scale original proportionally to height ratio. - if (width == 0) { - width = (prev_width * height + prev_height / 2) / prev_height; - } - // if height is unspecified, scale original proportionally to width ratio. - if (height == 0) { - height = (prev_height * width + prev_width / 2) / prev_width; - } - // Check if the overall dimensions still make sense. - if (width <= 0 || height <= 0) return 0; - - WebPPictureGrabSpecs(pic, &tmp); - tmp.width = width; - tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; - - if (!pic->use_argb) { - work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); - if (work == NULL) { - WebPPictureFree(&tmp); - return 0; - } +int WebPPictureAlloc(WebPPicture* picture) { + if (picture != NULL) { + const int width = picture->width; + const int height = picture->height; - RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, - tmp.y, width, height, tmp.y_stride, work, 1); - RescalePlane(pic->u, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.u, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); - RescalePlane(pic->v, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.v, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); + WebPPictureFree(picture); // erase previous buffer - if (tmp.a != NULL) { - RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, - tmp.a, width, height, tmp.a_stride, work, 1); - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (tmp.u0 != NULL) { - const int s = IS_YUV_CSP(tmp.colorspace, WEBP_YUV422) ? 2 : 1; - RescalePlane( - pic->u0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride, - tmp.u0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1); - RescalePlane( - pic->v0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride, - tmp.v0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1); - } -#endif - } else { - work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); - if (work == NULL) { - WebPPictureFree(&tmp); - return 0; + if (!picture->use_argb) { + return WebPPictureAllocYUVA(picture, width, height); + } else { + return WebPPictureAllocARGB(picture, width, height); } - - RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, - pic->argb_stride * 4, - (uint8_t*)tmp.argb, width, height, - tmp.argb_stride * 4, - work, 4); - } - WebPPictureFree(pic); - free(work); - *pic = tmp; return 1; } +void WebPPictureFree(WebPPicture* picture) { + if (picture != NULL) { + WebPSafeFree(picture->memory_); + WebPSafeFree(picture->memory_argb_); + WebPPictureResetBuffers(picture); + } +} + //------------------------------------------------------------------------------ // WebPMemoryWriter: Write-to-memory @@ -494,7 +203,7 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size, if (w->size > 0) { memcpy(new_mem, w->mem, w->size); } - free(w->mem); + WebPSafeFree(w->mem); w->mem = new_mem; // down-cast is ok, thanks to WebPSafeMalloc w->max_size = (size_t)next_max_size; @@ -506,469 +215,13 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size, return 1; } -//------------------------------------------------------------------------------ -// Detection of non-trivial transparency - -// Returns true if alpha[] has non-0xff values. -static int CheckNonOpaque(const uint8_t* alpha, int width, int height, - int x_step, int y_step) { - if (alpha == NULL) return 0; - while (height-- > 0) { - int x; - for (x = 0; x < width * x_step; x += x_step) { - if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time. - } - alpha += y_step; - } - return 0; -} - -// Checking for the presence of non-opaque alpha. -int WebPPictureHasTransparency(const WebPPicture* picture) { - if (picture == NULL) return 0; - if (!picture->use_argb) { - return CheckNonOpaque(picture->a, picture->width, picture->height, - 1, picture->a_stride); - } else { - int x, y; - const uint32_t* argb = picture->argb; - if (argb == NULL) return 0; - for (y = 0; y < picture->height; ++y) { - for (x = 0; x < picture->width; ++x) { - if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff - } - argb += picture->argb_stride; - } - } - return 0; -} - -//------------------------------------------------------------------------------ -// RGB -> YUV conversion - -// TODO: we can do better than simply 2x2 averaging on U/V samples. -#define SUM4(ptr) ((ptr)[0] + (ptr)[step] + \ - (ptr)[rgb_stride] + (ptr)[rgb_stride + step]) -#define SUM2H(ptr) (2 * (ptr)[0] + 2 * (ptr)[step]) -#define SUM2V(ptr) (2 * (ptr)[0] + 2 * (ptr)[rgb_stride]) -#define SUM1(ptr) (4 * (ptr)[0]) -#define RGB_TO_UV(x, y, SUM) { \ - const int src = (2 * (step * (x) + (y) * rgb_stride)); \ - const int dst = (x) + (y) * picture->uv_stride; \ - const int r = SUM(r_ptr + src); \ - const int g = SUM(g_ptr + src); \ - const int b = SUM(b_ptr + src); \ - picture->u[dst] = VP8RGBToU(r, g, b); \ - picture->v[dst] = VP8RGBToV(r, g, b); \ -} - -#define RGB_TO_UV0(x_in, x_out, y, SUM) { \ - const int src = (step * (x_in) + (y) * rgb_stride); \ - const int dst = (x_out) + (y) * picture->uv0_stride; \ - const int r = SUM(r_ptr + src); \ - const int g = SUM(g_ptr + src); \ - const int b = SUM(b_ptr + src); \ - picture->u0[dst] = VP8RGBToU(r, g, b); \ - picture->v0[dst] = VP8RGBToV(r, g, b); \ -} - -static void MakeGray(WebPPicture* const picture) { - int y; - const int uv_width = HALVE(picture->width); - const int uv_height = HALVE(picture->height); - for (y = 0; y < uv_height; ++y) { - memset(picture->u + y * picture->uv_stride, 128, uv_width); - memset(picture->v + y * picture->uv_stride, 128, uv_width); - } -} - -static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, - const uint8_t* const g_ptr, - const uint8_t* const b_ptr, - const uint8_t* const a_ptr, - int step, // bytes per pixel - int rgb_stride, // bytes per scanline - WebPPicture* const picture) { - const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; - int x, y; - const int width = picture->width; - const int height = picture->height; - const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); - - picture->colorspace = uv_csp; - picture->use_argb = 0; - if (has_alpha) { - picture->colorspace |= WEBP_CSP_ALPHA_BIT; - } - if (!WebPPictureAlloc(picture)) return 0; - - // Import luma plane - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - picture->y[x + y * picture->y_stride] = - VP8RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset]); - } - } - - // Downsample U/V plane - if (uv_csp != WEBP_YUV400) { - for (y = 0; y < (height >> 1); ++y) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV(x, y, SUM4); - } - if (width & 1) { - RGB_TO_UV(x, y, SUM2V); - } - } - if (height & 1) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV(x, y, SUM2H); - } - if (width & 1) { - RGB_TO_UV(x, y, SUM1); - } - } - -#ifdef WEBP_EXPERIMENTAL_FEATURES - // Store original U/V samples too - if (uv_csp == WEBP_YUV422) { - for (y = 0; y < height; ++y) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV0(2 * x, x, y, SUM2H); - } - if (width & 1) { - RGB_TO_UV0(2 * x, x, y, SUM1); - } - } - } else if (uv_csp == WEBP_YUV444) { - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - RGB_TO_UV0(x, x, y, SUM1); - } - } - } -#endif - } else { - MakeGray(picture); - } - - if (has_alpha) { - assert(step >= 4); - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - picture->a[x + y * picture->a_stride] = - a_ptr[step * x + y * rgb_stride]; - } - } - } - return 1; -} - -static int Import(WebPPicture* const picture, - const uint8_t* const rgb, int rgb_stride, - int step, int swap_rb, int import_alpha) { - const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0); - const uint8_t* const g_ptr = rgb + 1; - const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2); - const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL; - const int width = picture->width; - const int height = picture->height; - - if (!picture->use_argb) { - return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, - picture); - } - if (import_alpha) { - picture->colorspace |= WEBP_CSP_ALPHA_BIT; - } else { - picture->colorspace &= ~WEBP_CSP_ALPHA_BIT; - } - if (!WebPPictureAlloc(picture)) return 0; - - if (!import_alpha) { - int x, y; - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - const uint32_t argb = - 0xff000000u | - (r_ptr[offset] << 16) | - (g_ptr[offset] << 8) | - (b_ptr[offset]); - picture->argb[x + y * picture->argb_stride] = argb; - } - } - } else { - int x, y; - assert(step >= 4); - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - const uint32_t argb = (a_ptr[offset] << 24) | - (r_ptr[offset] << 16) | - (g_ptr[offset] << 8) | - (b_ptr[offset]); - picture->argb[x + y * picture->argb_stride] = argb; - } - } - } - return 1; -} -#undef SUM4 -#undef SUM2V -#undef SUM2H -#undef SUM1 -#undef RGB_TO_UV - -int WebPPictureImportRGB(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return Import(picture, rgb, rgb_stride, 3, 0, 0); -} - -int WebPPictureImportBGR(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return Import(picture, rgb, rgb_stride, 3, 1, 0); -} - -int WebPPictureImportRGBA(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 0, 1); -} - -int WebPPictureImportBGRA(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 1, 1); -} - -int WebPPictureImportRGBX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 0, 0); -} - -int WebPPictureImportBGRX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 1, 0); -} - -//------------------------------------------------------------------------------ -// Automatic YUV <-> ARGB conversions. - -int WebPPictureYUVAToARGB(WebPPicture* picture) { - if (picture == NULL) return 0; - if (picture->memory_ == NULL || picture->y == NULL || - picture->u == NULL || picture->v == NULL) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } - if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } - if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - // Allocate a new argb buffer (discarding the previous one). - if (!PictureAllocARGB(picture)) return 0; - - // Convert - { - int y; - const int width = picture->width; - const int height = picture->height; - const int argb_stride = 4 * picture->argb_stride; - uint8_t* dst = (uint8_t*)picture->argb; - const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; - WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST); - - // First row, with replicated top samples. - upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, width); - cur_y += picture->y_stride; - dst += argb_stride; - // Center rows. - for (y = 1; y + 1 < height; y += 2) { - const uint8_t* const top_u = cur_u; - const uint8_t* const top_v = cur_v; - cur_u += picture->uv_stride; - cur_v += picture->uv_stride; - upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v, - dst, dst + argb_stride, width); - cur_y += 2 * picture->y_stride; - dst += 2 * argb_stride; - } - // Last row (if needed), with replicated bottom samples. - if (height > 1 && !(height & 1)) { - upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); - } - // Insert alpha values if needed, in replacement for the default 0xff ones. - if (picture->colorspace & WEBP_CSP_ALPHA_BIT) { - for (y = 0; y < height; ++y) { - uint32_t* const dst = picture->argb + y * picture->argb_stride; - const uint8_t* const src = picture->a + y * picture->a_stride; - int x; - for (x = 0; x < width; ++x) { - dst[x] = (dst[x] & 0x00ffffffu) | (src[x] << 24); - } - } - } - } - return 1; -} - -int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { - if (picture == NULL) return 0; - if (picture->argb == NULL) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } else { - const uint8_t* const argb = (const uint8_t*)picture->argb; - const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1; - const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2; - const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3; - const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0; - // We work on a tmp copy of 'picture', because ImportYUVAFromRGBA() - // would be calling WebPPictureFree(picture) otherwise. - WebPPicture tmp = *picture; - PictureResetARGB(&tmp); // reset ARGB buffer so that it's not free()'d. - tmp.use_argb = 0; - tmp.colorspace = colorspace & WEBP_CSP_UV_MASK; - if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, &tmp)) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - // Copy back the YUV specs into 'picture'. - tmp.argb = picture->argb; - tmp.argb_stride = picture->argb_stride; - tmp.memory_argb_ = picture->memory_argb_; - *picture = tmp; +void WebPMemoryWriterClear(WebPMemoryWriter* writer) { + if (writer != NULL) { + WebPSafeFree(writer->mem); + writer->mem = NULL; + writer->size = 0; + writer->max_size = 0; } - return 1; -} - -//------------------------------------------------------------------------------ -// Helper: clean up fully transparent area to help compressibility. - -#define SIZE 8 -#define SIZE2 (SIZE / 2) -static int is_transparent_area(const uint8_t* ptr, int stride, int size) { - int y, x; - for (y = 0; y < size; ++y) { - for (x = 0; x < size; ++x) { - if (ptr[x]) { - return 0; - } - } - ptr += stride; - } - return 1; -} - -static WEBP_INLINE void flatten(uint8_t* ptr, int v, int stride, int size) { - int y; - for (y = 0; y < size; ++y) { - memset(ptr, v, size); - ptr += stride; - } -} - -void WebPCleanupTransparentArea(WebPPicture* pic) { - int x, y, w, h; - const uint8_t* a_ptr; - int values[3] = { 0 }; - - if (pic == NULL) return; - - a_ptr = pic->a; - if (a_ptr == NULL) return; // nothing to do - - w = pic->width / SIZE; - h = pic->height / SIZE; - for (y = 0; y < h; ++y) { - int need_reset = 1; - for (x = 0; x < w; ++x) { - const int off_a = (y * pic->a_stride + x) * SIZE; - const int off_y = (y * pic->y_stride + x) * SIZE; - const int off_uv = (y * pic->uv_stride + x) * SIZE2; - if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) { - if (need_reset) { - values[0] = pic->y[off_y]; - values[1] = pic->u[off_uv]; - values[2] = pic->v[off_uv]; - need_reset = 0; - } - flatten(pic->y + off_y, values[0], pic->y_stride, SIZE); - flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2); - flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2); - } else { - need_reset = 1; - } - } - // ignore the left-overs on right/bottom - } -} - -#undef SIZE -#undef SIZE2 - - -//------------------------------------------------------------------------------ -// Distortion - -// Max value returned in case of exact similarity. -static const double kMinDistortion_dB = 99.; - -int WebPPictureDistortion(const WebPPicture* pic1, const WebPPicture* pic2, - int type, float result[5]) { - int c; - DistoStats stats[5]; - int has_alpha; - - if (pic1 == NULL || pic2 == NULL || - pic1->width != pic2->width || pic1->height != pic2->height || - pic1->y == NULL || pic2->y == NULL || - pic1->u == NULL || pic2->u == NULL || - pic1->v == NULL || pic2->v == NULL || - result == NULL) { - return 0; - } - // TODO(skal): provide distortion for ARGB too. - if (pic1->use_argb == 1 || pic1->use_argb != pic2->use_argb) { - return 0; - } - - has_alpha = !!(pic1->colorspace & WEBP_CSP_ALPHA_BIT); - if (has_alpha != !!(pic2->colorspace & WEBP_CSP_ALPHA_BIT) || - (has_alpha && (pic1->a == NULL || pic2->a == NULL))) { - return 0; - } - - memset(stats, 0, sizeof(stats)); - VP8SSIMAccumulatePlane(pic1->y, pic1->y_stride, - pic2->y, pic2->y_stride, - pic1->width, pic1->height, &stats[0]); - VP8SSIMAccumulatePlane(pic1->u, pic1->uv_stride, - pic2->u, pic2->uv_stride, - (pic1->width + 1) >> 1, (pic1->height + 1) >> 1, - &stats[1]); - VP8SSIMAccumulatePlane(pic1->v, pic1->uv_stride, - pic2->v, pic2->uv_stride, - (pic1->width + 1) >> 1, (pic1->height + 1) >> 1, - &stats[2]); - if (has_alpha) { - VP8SSIMAccumulatePlane(pic1->a, pic1->a_stride, - pic2->a, pic2->a_stride, - pic1->width, pic1->height, &stats[3]); - } - for (c = 0; c <= 4; ++c) { - if (type == 1) { - const double v = VP8SSIMGet(&stats[c]); - result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) - : kMinDistortion_dB); - } else { - const double v = VP8SSIMGetSquaredError(&stats[c]); - result[c] = (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) - : kMinDistortion_dB); - } - // Accumulate forward - if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]); - } - return 1; } //------------------------------------------------------------------------------ @@ -1000,7 +253,7 @@ static size_t Encode(const uint8_t* rgba, int width, int height, int stride, ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic); WebPPictureFree(&pic); if (!ok) { - free(wrt.mem); + WebPMemoryWriterClear(&wrt); *output = NULL; return 0; } @@ -1014,10 +267,10 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, float q, \ return Encode(in, w, h, bps, IMPORTER, q, 0, out); \ } -ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB); -ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR); -ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA); -ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA); +ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB) +ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR) +ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA) +ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA) #undef ENCODE_FUNC @@ -1027,15 +280,11 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) { \ return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out); \ } -LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB); -LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR); -LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA); -LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA); +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA) #undef LOSSLESS_ENCODE_FUNC //------------------------------------------------------------------------------ - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/quant.c b/drivers/webp/enc/quant.c index ea153849c8..002c326b82 100644 --- a/drivers/webp/enc/quant.c +++ b/drivers/webp/enc/quant.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Quantization @@ -11,6 +13,7 @@ #include <assert.h> #include <math.h> +#include <stdlib.h> // for abs() #include "./vp8enci.h" #include "./cost.h" @@ -22,16 +25,78 @@ #define MID_ALPHA 64 // neutral value for susceptibility #define MIN_ALPHA 30 // lowest usable value for susceptibility -#define MAX_ALPHA 100 // higher meaninful value for susceptibility +#define MAX_ALPHA 100 // higher meaningful value for susceptibility #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP // power-law modulation. Must be strictly less than 1. +#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision + +// number of non-zero coeffs below which we consider the block very flat +// (and apply a penalty to complex predictions) +#define FLATNESS_LIMIT_I16 10 // I16 mode +#define FLATNESS_LIMIT_I4 3 // I4 mode +#define FLATNESS_LIMIT_UV 2 // UV mode +#define FLATNESS_PENALTY 140 // roughly ~1bit per block + #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +// #define DEBUG_BLOCK + +//------------------------------------------------------------------------------ + +#if defined(DEBUG_BLOCK) + +#include <stdio.h> +#include <stdlib.h> + +static void PrintBlockInfo(const VP8EncIterator* const it, + const VP8ModeScore* const rd) { + int i, j; + const int is_i16 = (it->mb_->type_ == 1); + printf("SOURCE / OUTPUT / ABS DELTA\n"); + for (j = 0; j < 24; ++j) { + if (j == 16) printf("\n"); // newline before the U/V block + for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]); + printf(" "); + for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]); + printf(" "); + for (i = 0; i < 16; ++i) { + printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS])); + } + printf("\n"); + } + printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", + (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz, + (int)rd->score); + if (is_i16) { + printf("Mode: %d\n", rd->mode_i16); + printf("y_dc_levels:"); + for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]); + printf("\n"); + } else { + printf("Modes[16]: "); + for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]); + printf("\n"); + } + printf("y_ac_levels:\n"); + for (j = 0; j < 16; ++j) { + for (i = is_i16 ? 1 : 0; i < 16; ++i) { + printf("%4d ", rd->y_ac_levels[j][i]); + } + printf("\n"); + } + printf("\n"); + printf("uv_levels (mode=%d):\n", rd->mode_uv); + for (j = 0; j < 8; ++j) { + for (i = 0; i < 16; ++i) { + printf("%4d ", rd->uv_levels[j][i]); + } + printf("\n"); + } +} + +#endif // DEBUG_BLOCK //------------------------------------------------------------------------------ @@ -100,31 +165,13 @@ static const uint16_t kAcTable2[128] = { 385, 393, 401, 409, 416, 424, 432, 440 }; -static const uint16_t kCoeffThresh[16] = { - 0, 10, 20, 30, - 10, 20, 30, 30, - 20, 30, 30, 30, - 30, 30, 30, 30 -}; - -// TODO(skal): tune more. Coeff thresholding? -static const uint8_t kBiasMatrices[3][16] = { // [3] = [luma-ac,luma-dc,chroma] - { 96, 96, 96, 96, - 96, 96, 96, 96, - 96, 96, 96, 96, - 96, 96, 96, 96 }, - { 96, 96, 96, 96, - 96, 96, 96, 96, - 96, 96, 96, 96, - 96, 96, 96, 96 }, - { 96, 96, 96, 96, - 96, 96, 96, 96, - 96, 96, 96, 96, - 96, 96, 96, 96 } +static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac] + { 96, 110 }, { 96, 108 }, { 110, 115 } }; -// Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis). +// Sharpening by (slightly) raising the hi-frequency coeffs. // Hack-ish but helpful for mid-bitrate range. Use with care. +#define SHARPEN_BITS 11 // number of descaling bits for sharpening bias static const uint8_t kFreqSharpening[16] = { 0, 30, 60, 90, 30, 60, 90, 90, @@ -137,20 +184,30 @@ static const uint8_t kFreqSharpening[16] = { // Returns the average quantizer static int ExpandMatrix(VP8Matrix* const m, int type) { - int i; - int sum = 0; + int i, sum; + for (i = 0; i < 2; ++i) { + const int is_ac_coeff = (i > 0); + const int bias = kBiasMatrices[type][is_ac_coeff]; + m->iq_[i] = (1 << QFIX) / m->q_[i]; + m->bias_[i] = BIAS(bias); + // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is: + // * zero if coeff <= zthresh + // * non-zero if coeff > zthresh + m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i]; + } for (i = 2; i < 16; ++i) { m->q_[i] = m->q_[1]; + m->iq_[i] = m->iq_[1]; + m->bias_[i] = m->bias_[1]; + m->zthresh_[i] = m->zthresh_[1]; } - for (i = 0; i < 16; ++i) { - const int j = kZigzag[i]; - const int bias = kBiasMatrices[type][j]; - m->iq_[j] = (1 << QFIX) / m->q_[j]; - m->bias_[j] = BIAS(bias); - // TODO(skal): tune kCoeffThresh[] - m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8; - m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11; - sum += m->q_[j]; + for (sum = 0, i = 0; i < 16; ++i) { + if (type == 0) { // we only use sharpening for AC luma coeffs + m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS; + } else { + m->sharpen_[i] = 0; + } + sum += m->q_[i]; } return (sum + 8) >> 4; } @@ -178,17 +235,17 @@ static void SetupMatrices(VP8Encoder* enc) { q16 = ExpandMatrix(&m->y2_, 1); quv = ExpandMatrix(&m->uv_, 2); - // TODO: Switch to kLambda*[] tables? - { - m->lambda_i4_ = (3 * q4 * q4) >> 7; - m->lambda_i16_ = (3 * q16 * q16); - m->lambda_uv_ = (3 * quv * quv) >> 6; - m->lambda_mode_ = (1 * q4 * q4) >> 7; - m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; - m->lambda_trellis_i16_ = (q16 * q16) >> 2; - m->lambda_trellis_uv_ = (quv *quv) << 1; - m->tlambda_ = (tlambda_scale * q4) >> 5; - } + m->lambda_i4_ = (3 * q4 * q4) >> 7; + m->lambda_i16_ = (3 * q16 * q16); + m->lambda_uv_ = (3 * quv * quv) >> 6; + m->lambda_mode_ = (1 * q4 * q4) >> 7; + m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; + m->lambda_trellis_i16_ = (q16 * q16) >> 2; + m->lambda_trellis_uv_ = (quv *quv) << 1; + m->tlambda_ = (tlambda_scale * q4) >> 5; + + m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto + m->max_edge_ = 0; } } @@ -197,16 +254,21 @@ static void SetupMatrices(VP8Encoder* enc) { // Very small filter-strength values have close to no visual effect. So we can // save a little decoding-CPU by turning filtering off for these. -#define FSTRENGTH_CUTOFF 3 +#define FSTRENGTH_CUTOFF 2 static void SetupFilterStrength(VP8Encoder* const enc) { int i; - const int level0 = enc->config_->filter_strength; + // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering. + const int level0 = 5 * enc->config_->filter_strength; for (i = 0; i < NUM_MB_SEGMENTS; ++i) { - // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS) - const int level = level0 * 256 * enc->dqm_[i].quant_ / 128; - const int f = level / (256 + enc->dqm_[i].beta_); - enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f; + VP8SegmentInfo* const m = &enc->dqm_[i]; + // We focus on the quantization of AC coeffs. + const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2; + const int base_strength = + VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep); + // Segments with lower complexity ('beta') will be less filtered. + const int f = base_strength * level0 / (256 + m->beta_); + m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f; } // We record the initial strength (mainly for the case of 1-segment only). enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_; @@ -224,28 +286,90 @@ static void SetupFilterStrength(VP8Encoder* const enc) { // We want to emulate jpeg-like behaviour where the expected "good" quality // is around q=75. Internally, our "good" middle is around c=50. So we // map accordingly using linear piece-wise function -static double QualityToCompression(double q) { - const double c = q / 100.; - return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.; +static double QualityToCompression(double c) { + const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.; + // The file size roughly scales as pow(quantizer, 3.). Actually, the + // exponent is somewhere between 2.8 and 3.2, but we're mostly interested + // in the mid-quant range. So we scale the compressibility inversely to + // this power-law: quant ~= compression ^ 1/3. This law holds well for + // low quant. Finer modeling for high-quant would make use of kAcTable[] + // more explicitly. + const double v = pow(linear_c, 1 / 3.); + return v; +} + +static double QualityToJPEGCompression(double c, double alpha) { + // We map the complexity 'alpha' and quality setting 'c' to a compression + // exponent empirically matched to the compression curve of libjpeg6b. + // On average, the WebP output size will be roughly similar to that of a + // JPEG file compressed with same quality factor. + const double amin = 0.30; + const double amax = 0.85; + const double exp_min = 0.4; + const double exp_max = 0.9; + const double slope = (exp_min - exp_max) / (amax - amin); + // Linearly interpolate 'expn' from exp_min to exp_max + // in the [amin, amax] range. + const double expn = (alpha > amax) ? exp_min + : (alpha < amin) ? exp_max + : exp_max + slope * (alpha - amin); + const double v = pow(c, expn); + return v; +} + +static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, + const VP8SegmentInfo* const S2) { + return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_); +} + +static void SimplifySegments(VP8Encoder* const enc) { + int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; + const int num_segments = enc->segment_hdr_.num_segments_; + int num_final_segments = 1; + int s1, s2; + for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments + const VP8SegmentInfo* const S1 = &enc->dqm_[s1]; + int found = 0; + // check if we already have similar segment + for (s2 = 0; s2 < num_final_segments; ++s2) { + const VP8SegmentInfo* const S2 = &enc->dqm_[s2]; + if (SegmentsAreEquivalent(S1, S2)) { + found = 1; + break; + } + } + map[s1] = s2; + if (!found) { + if (num_final_segments != s1) { + enc->dqm_[num_final_segments] = enc->dqm_[s1]; + } + ++num_final_segments; + } + } + if (num_final_segments < num_segments) { // Remap + int i = enc->mb_w_ * enc->mb_h_; + while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_]; + enc->segment_hdr_.num_segments_ = num_final_segments; + // Replicate the trailing segment infos (it's mostly cosmetics) + for (i = num_final_segments; i < num_segments; ++i) { + enc->dqm_[i] = enc->dqm_[num_final_segments - 1]; + } + } } void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { int i; int dq_uv_ac, dq_uv_dc; - const int num_segments = enc->config_->segments; + const int num_segments = enc->segment_hdr_.num_segments_; const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.; - const double c_base = QualityToCompression(quality); + const double Q = quality / 100.; + const double c_base = enc->config_->emulate_jpeg_size ? + QualityToJPEGCompression(Q, enc->alpha_ / 255.) : + QualityToCompression(Q); for (i = 0; i < num_segments; ++i) { - // The file size roughly scales as pow(quantizer, 3.). Actually, the - // exponent is somewhere between 2.8 and 3.2, but we're mostly interested - // in the mid-quant range. So we scale the compressibility inversely to - // this power-law: quant ~= compression ^ 1/3. This law holds well for - // low quant. Finer modelling for high-quant would make use of kAcTable[] - // more explicitely. - // Additionally, we modulate the base exponent 1/3 to accommodate for the - // quantization susceptibility and allow denser segments to be quantized - // more. - const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.; + // We modulate the base coefficient to accommodate for the quantization + // susceptibility and allow denser segments to be quantized more. + const double expn = 1. - amp * enc->dqm_[i].alpha_; const double c = pow(c_base, expn); const int q = (int)(127. * (1. - c)); assert(expn > 0.); @@ -271,7 +395,7 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV); // We also boost the dc-uv-quant a little, based on sns-strength, since // U/V channels are quite more reactive to high quants (flat DC-blocks - // tend to appear, and are displeasant). + // tend to appear, and are unpleasant). dq_uv_dc = -4 * enc->config_->sns_strength / 100; dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed @@ -281,9 +405,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { enc->dq_uv_dc_ = dq_uv_dc; enc->dq_uv_ac_ = dq_uv_ac; - SetupMatrices(enc); - SetupFilterStrength(enc); // initialize segments' filtering, eventually + + if (num_segments > 1) SimplifySegments(enc); + + SetupMatrices(enc); // finalize quantization matrices } //------------------------------------------------------------------------------ @@ -299,16 +425,14 @@ const int VP8I4ModeOffsets[NUM_BMODES] = { }; void VP8MakeLuma16Preds(const VP8EncIterator* const it) { - const VP8Encoder* const enc = it->enc_; - const uint8_t* const left = it->x_ ? enc->y_left_ : NULL; - const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL; + const uint8_t* const left = it->x_ ? it->y_left_ : NULL; + const uint8_t* const top = it->y_ ? it->y_top_ : NULL; VP8EncPredLuma16(it->yuv_p_, left, top); } void VP8MakeChroma8Preds(const VP8EncIterator* const it) { - const VP8Encoder* const enc = it->enc_; - const uint8_t* const left = it->x_ ? enc->u_left_ : NULL; - const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL; + const uint8_t* const left = it->x_ ? it->u_left_ : NULL; + const uint8_t* const top = it->y_ ? it->uv_top_ : NULL; VP8EncPredChroma8(it->yuv_p_, left, top); } @@ -320,23 +444,21 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) { // Quantize // Layout: -// +----+ -// |YYYY| 0 -// |YYYY| 4 -// |YYYY| 8 -// |YYYY| 12 -// +----+ -// |UUVV| 16 -// |UUVV| 20 -// +----+ - -const int VP8Scan[16 + 4 + 4] = { - // Luma +// +----+----+ +// |YYYY|UUVV| 0 +// |YYYY|UUVV| 4 +// |YYYY|....| 8 +// |YYYY|....| 12 +// +----+----+ + +const int VP8Scan[16] = { // Luma 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, +}; +static const int VP8ScanUV[4 + 4] = { 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; @@ -364,6 +486,7 @@ static void InitScore(VP8ModeScore* const rd) { rd->D = 0; rd->SD = 0; rd->R = 0; + rd->H = 0; rd->nz = 0; rd->score = MAX_COST; } @@ -372,6 +495,7 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { dst->D = src->D; dst->SD = src->SD; dst->R = src->R; + dst->H = src->H; dst->nz = src->nz; // note that nz is not accumulated, but just copied. dst->score = src->score; } @@ -380,6 +504,7 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { dst->D += src->D; dst->SD += src->SD; dst->R += src->R; + dst->H += src->H; dst->nz |= src->nz; // here, new nz bits are accumulated. dst->score += src->score; } @@ -387,28 +512,31 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { //------------------------------------------------------------------------------ // Performs trellis-optimized quantization. -// Trellis - +// Trellis node typedef struct { - int prev; // best previous - int level; // level - int sign; // sign of coeff_i - score_t cost; // bit cost - score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2 - int ctx; // context (only depends on 'level'. Could be spared.) + int8_t prev; // best previous node + int8_t sign; // sign of coeff_i + int16_t level; // level } Node; +// Score state +typedef struct { + score_t score; // partial RD score + const uint16_t* costs; // shortcut to cost tables +} ScoreState; + // If a coefficient was quantized to a value Q (using a neutral bias), // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] // We don't test negative values though. #define MIN_DELTA 0 // how much lower level to try #define MAX_DELTA 1 // how much higher #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) -#define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA]) +#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA]) +#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { // TODO: incorporate the "* 256" in the tables? - rd->score = rd->R * lambda + 256 * (rd->D + rd->SD); + rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD); } static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, @@ -416,34 +544,37 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, return rate * lambda + 256 * distortion; } -static int TrellisQuantizeBlock(const VP8EncIterator* const it, +static int TrellisQuantizeBlock(const VP8Encoder* const enc, int16_t in[16], int16_t out[16], int ctx0, int coeff_type, const VP8Matrix* const mtx, int lambda) { - ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type]; - CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type]; + const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; + CostArrayPtr const costs = + (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; const int first = (coeff_type == 0) ? 1 : 0; - Node nodes[17][NUM_NODES]; + Node nodes[16][NUM_NODES]; + ScoreState score_states[2][NUM_NODES]; + ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); + ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA); int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous score_t best_score; - int best_node; - int last = first - 1; - int n, m, p, nz; + int n, m, p, last; { score_t cost; - score_t max_error; const int thresh = mtx->q_[1] * mtx->q_[1] / 4; - const int last_proba = last_costs[VP8EncBands[first]][ctx0][0]; + const int last_proba = probas[VP8EncBands[first]][ctx0][0]; - // compute maximal distortion. - max_error = 0; - for (n = first; n < 16; ++n) { - const int j = kZigzag[n]; + // compute the position of the last interesting coefficient + last = first - 1; + for (n = 15; n >= first; --n) { + const int j = kZigzag[n]; const int err = in[j] * in[j]; - max_error += kWeightTrellis[j] * err; - if (err > thresh) last = n; + if (err > thresh) { + last = n; + break; + } } // we don't need to go inspect up to n = 16 coeffs. We can just go up // to last + 1 (inclusive) without losing much. @@ -451,93 +582,95 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it, // compute 'skip' score. This is the max score one can do. cost = VP8BitCost(0, last_proba); - best_score = RDScoreTrellis(lambda, cost, max_error); + best_score = RDScoreTrellis(lambda, cost, 0); // initialize source node. - n = first - 1; for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { - NODE(n, m).cost = 0; - NODE(n, m).error = max_error; - NODE(n, m).ctx = ctx0; + const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; + ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); + ss_cur[m].costs = costs[first][ctx0]; } } // traverse trellis. for (n = first; n <= last; ++n) { - const int j = kZigzag[n]; - const int Q = mtx->q_[j]; - const int iQ = mtx->iq_[j]; - const int B = BIAS(0x00); // neutral bias + const int j = kZigzag[n]; + const uint32_t Q = mtx->q_[j]; + const uint32_t iQ = mtx->iq_[j]; + const uint32_t B = BIAS(0x00); // neutral bias // note: it's important to take sign of the _original_ coeff, // so we don't have to consider level < 0 afterward. const int sign = (in[j] < 0); - int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; - int level0; - if (coeff0 > 2047) coeff0 = 2047; + const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; + int level0 = QUANTDIV(coeff0, iQ, B); + if (level0 > MAX_LEVEL) level0 = MAX_LEVEL; + + { // Swap current and previous score states + ScoreState* const tmp = ss_cur; + ss_cur = ss_prev; + ss_prev = tmp; + } - level0 = QUANTDIV(coeff0, iQ, B); // test all alternate level values around level0. for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { Node* const cur = &NODE(n, m); - int delta_error, new_error; - score_t cur_score = MAX_COST; int level = level0 + m; - int last_proba; - - cur->sign = sign; - cur->level = level; - cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2; - if (level >= 2048 || level < 0) { // node is dead? - cur->cost = MAX_COST; + const int ctx = (level > 2) ? 2 : level; + const int band = VP8EncBands[n + 1]; + score_t base_score, last_pos_score; + score_t best_cur_score = MAX_COST; + int best_prev = 0; // default, in case + + ss_cur[m].score = MAX_COST; + ss_cur[m].costs = costs[n + 1][ctx]; + if (level > MAX_LEVEL || level < 0) { // node is dead? continue; } - last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0]; - // Compute delta_error = how much coding this level will - // subtract as distortion to max_error - new_error = coeff0 - level * Q; - delta_error = - kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error); + // Compute extra rate cost if last coeff's position is < 15 + { + const score_t last_pos_cost = + (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; + last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); + } + + { + // Compute delta_error = how much coding this level will + // subtract to max_error as distortion. + // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2 + const int new_error = coeff0 - level * Q; + const int delta_error = + kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0); + base_score = RDScoreTrellis(lambda, 0, delta_error); + } // Inspect all possible non-dead predecessors. Retain only the best one. for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { - const Node* const prev = &NODE(n - 1, p); - const int prev_ctx = prev->ctx; - const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx]; - const score_t total_error = prev->error - delta_error; - score_t cost, base_cost, score; - - if (prev->cost >= MAX_COST) { // dead node? - continue; - } - - // Base cost of both terminal/non-terminal - base_cost = prev->cost + VP8LevelCost(tcost, level); - + // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically + // eliminated since their score can't be better than the current best. + const score_t cost = VP8LevelCost(ss_prev[p].costs, level); // Examine node assuming it's a non-terminal one. - cost = base_cost; - if (level && n < 15) { - cost += VP8BitCost(1, last_proba); + const score_t score = + base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); + if (score < best_cur_score) { + best_cur_score = score; + best_prev = p; } - score = RDScoreTrellis(lambda, cost, total_error); - if (score < cur_score) { - cur_score = score; - cur->cost = cost; - cur->error = total_error; - cur->prev = p; - } - - // Now, record best terminal node (and thus best entry in the graph). - if (level) { - cost = base_cost; - if (n < 15) cost += VP8BitCost(0, last_proba); - score = RDScoreTrellis(lambda, cost, total_error); - if (score < best_score) { - best_score = score; - best_path[0] = n; // best eob position - best_path[1] = m; // best level - best_path[2] = p; // best predecessor - } + } + // Store best finding in current node. + cur->sign = sign; + cur->level = level; + cur->prev = best_prev; + ss_cur[m].score = best_cur_score; + + // Now, record best terminal node (and thus best entry in the graph). + if (level != 0) { + const score_t score = best_cur_score + last_pos_score; + if (score < best_score) { + best_score = score; + best_path[0] = n; // best eob position + best_path[1] = m; // best node index + best_path[2] = best_prev; // best predecessor } } } @@ -550,23 +683,25 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it, return 0; // skip! } - // Unwind the best path. - // Note: best-prev on terminal node is not necessarily equal to the - // best_prev for non-terminal. So we patch best_path[2] in. - n = best_path[0]; - best_node = best_path[1]; - NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal - nz = 0; - - for (; n >= first; --n) { - const Node* const node = &NODE(n, best_node); - const int j = kZigzag[n]; - out[n] = node->sign ? -node->level : node->level; - nz |= (node->level != 0); - in[j] = out[n] * mtx->q_[j]; - best_node = node->prev; + { + // Unwind the best path. + // Note: best-prev on terminal node is not necessarily equal to the + // best_prev for non-terminal. So we patch best_path[2] in. + int nz = 0; + int best_node = best_path[1]; + n = best_path[0]; + NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal + + for (; n >= first; --n) { + const Node* const node = &NODE(n, best_node); + const int j = kZigzag[n]; + out[n] = node->sign ? -node->level : node->level; + nz |= node->level; + in[j] = out[n] * mtx->q_[j]; + best_node = node->prev; + } + return (nz != 0); } - return nz; } #undef NODE @@ -582,17 +717,17 @@ static int ReconstructIntra16(VP8EncIterator* const it, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + Y_OFF; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; int nz = 0; int n; int16_t tmp[16][16], dc_tmp[16]; - for (n = 0; n < 16; ++n) { - VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); + for (n = 0; n < 16; n += 2) { + VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); } VP8FTransformWHT(tmp[0], dc_tmp); - nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24; + nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; if (DO_TRELLIS_I16 && it->do_trellis_) { int x, y; @@ -601,20 +736,26 @@ static int ReconstructIntra16(VP8EncIterator* const it, for (x = 0; x < 4; ++x, ++n) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; const int non_zero = - TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0, - &dqm->y1_, dqm->lambda_trellis_i16_); + TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, + &dqm->y1_, dqm->lambda_trellis_i16_); it->top_nz_[x] = it->left_nz_[y] = non_zero; + rd->y_ac_levels[n][0] = 0; nz |= non_zero << n; } } } else { - for (n = 0; n < 16; ++n) { - nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n; + for (n = 0; n < 16; n += 2) { + // Zero-out the first coeff, so that: a) nz is correct below, and + // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. + tmp[n][0] = tmp[n + 1][0] = 0; + nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; + assert(rd->y_ac_levels[n + 0][0] == 0); + assert(rd->y_ac_levels[n + 1][0] == 0); } } // Transform back - VP8ITransformWHT(dc_tmp, tmp[0]); + VP8TransformWHT(dc_tmp, tmp[0]); for (n = 0; n < 16; n += 2) { VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1); } @@ -637,10 +778,10 @@ static int ReconstructIntra4(VP8EncIterator* const it, if (DO_TRELLIS_I4 && it->do_trellis_) { const int x = it->i4_ & 3, y = it->i4_ >> 2; const int ctx = it->top_nz_[x] + it->left_nz_[y]; - nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_, + nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, dqm->lambda_trellis_i4_); } else { - nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_); + nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); } VP8ITransform(ref, tmp, yuv_out, 0); return nz; @@ -650,14 +791,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + U_OFF; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; int nz = 0; int n; int16_t tmp[8][16]; - for (n = 0; n < 8; ++n) { - VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]); + for (n = 0; n < 8; n += 2) { + VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); } if (DO_TRELLIS_UV && it->do_trellis_) { int ch, x, y; @@ -666,28 +807,45 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (x = 0; x < 2; ++x, ++n) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; const int non_zero = - TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2, - &dqm->uv_, dqm->lambda_trellis_uv_); + TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, + &dqm->uv_, dqm->lambda_trellis_uv_); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; nz |= non_zero << n; } } } } else { - for (n = 0; n < 8; ++n) { - nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n; + for (n = 0; n < 8; n += 2) { + nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; } } for (n = 0; n < 8; n += 2) { - VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1); + VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1); } return (nz << 16); } //------------------------------------------------------------------------------ // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost. -// Pick the mode is lower RD-cost = Rate + lamba * Distortion. +// Pick the mode is lower RD-cost = Rate + lambda * Distortion. + +static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { + // We look at the first three AC coefficients to determine what is the average + // delta between each sub-4x4 block. + const int v0 = abs(DCs[1]); + const int v1 = abs(DCs[4]); + const int v2 = abs(DCs[5]); + int max_v = (v0 > v1) ? v1 : v0; + max_v = (v2 > max_v) ? v2 : max_v; + if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; +} + +static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) { + VP8ModeScore* const tmp = *a; + *a = *b; + *b = tmp; +} static void SwapPtr(uint8_t** a, uint8_t** b) { uint8_t* const tmp = *a; @@ -699,43 +857,69 @@ static void SwapOut(VP8EncIterator* const it) { SwapPtr(&it->yuv_out_, &it->yuv_out2_); } -static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { - const VP8Encoder* const enc = it->enc_; - const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; +static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { + score_t score = 0; + while (num_blocks-- > 0) { // TODO(skal): refine positional scoring? + int i; + for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC + score += (levels[i] != 0); + if (score > thresh) return 0; + } + levels += 16; + } + return 1; +} + +static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { + const int kNumBlocks = 16; + VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i16_; const int tlambda = dqm->tlambda_; - const uint8_t* const src = it->yuv_in_ + Y_OFF; - VP8ModeScore rd16; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; + VP8ModeScore rd_tmp; + VP8ModeScore* rd_cur = &rd_tmp; + VP8ModeScore* rd_best = rd; int mode; rd->mode_i16 = -1; - for (mode = 0; mode < 4; ++mode) { - uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer - int nz; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer + rd_cur->mode_i16 = mode; // Reconstruct - nz = ReconstructIntra16(it, &rd16, tmp_dst, mode); + rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode); // Measure RD-score - rd16.D = VP8SSE16x16(src, tmp_dst); - rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) - : 0; - rd16.R = VP8GetCostLuma16(it, &rd16); - rd16.R += VP8FixedCostsI16[mode]; + rd_cur->D = VP8SSE16x16(src, tmp_dst); + rd_cur->SD = + tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; + rd_cur->H = VP8FixedCostsI16[mode]; + rd_cur->R = VP8GetCostLuma16(it, rd_cur); + if (mode > 0 && + IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { + // penalty to avoid flat area to be mispredicted by complex mode + rd_cur->R += FLATNESS_PENALTY * kNumBlocks; + } // Since we always examine Intra16 first, we can overwrite *rd directly. - SetRDScore(lambda, &rd16); - if (mode == 0 || rd16.score < rd->score) { - CopyScore(rd, &rd16); - rd->mode_i16 = mode; - rd->nz = nz; - memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels)); - memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels)); + SetRDScore(lambda, rd_cur); + if (mode == 0 || rd_cur->score < rd_best->score) { + SwapModeScore(&rd_cur, &rd_best); SwapOut(it); } } + if (rd_best != rd) { + memcpy(rd, rd_best, sizeof(*rd)); + } SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. VP8SetIntra16Mode(it, rd->mode_i16); + + // we have a blocky macroblock (only DCs are non-zero) with fairly high + // distortion, record max delta so we can later adjust the minimal filtering + // strength needed to smooth these blocks out. + if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) { + StoreMaxDelta(dqm, rd->y_dc_levels); + } } //------------------------------------------------------------------------------ @@ -755,8 +939,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i4_; const int tlambda = dqm->tlambda_; - const uint8_t* const src0 = it->yuv_in_ + Y_OFF; - uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF; + const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC; + uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC; int total_header_bits = 0; VP8ModeScore rd_best; @@ -765,9 +949,11 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { } InitScore(&rd_best); - rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145) + rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145) + SetRDScore(dqm->lambda_mode_, &rd_best); VP8IteratorStartI4(it); do { + const int kNumBlocks = 1; VP8ModeScore rd_i4; int mode; int best_mode = -1; @@ -791,27 +977,44 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { rd_tmp.SD = tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) : 0; - rd_tmp.R = VP8GetCostLuma4(it, tmp_levels); - rd_tmp.R += mode_costs[mode]; + rd_tmp.H = mode_costs[mode]; + + // Add flatness penalty + if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { + rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; + } else { + rd_tmp.R = 0; + } + // early-out check SetRDScore(lambda, &rd_tmp); + if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue; + + // finish computing score + rd_tmp.R += VP8GetCostLuma4(it, tmp_levels); + SetRDScore(lambda, &rd_tmp); + if (best_mode < 0 || rd_tmp.score < rd_i4.score) { CopyScore(&rd_i4, &rd_tmp); best_mode = mode; SwapPtr(&tmp_dst, &best_block); - memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels)); + memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, + sizeof(rd_best.y_ac_levels[it->i4_])); } } SetRDScore(dqm->lambda_mode_, &rd_i4); AddScore(&rd_best, &rd_i4); - total_header_bits += mode_costs[best_mode]; - if (rd_best.score >= rd->score || - total_header_bits > enc->max_i4_header_bits_) { + if (rd_best.score >= rd->score) { + return 0; + } + total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode]; + if (total_header_bits > enc->max_i4_header_bits_) { return 0; } // Copy selected samples if not in the right place already. - if (best_block != best_blocks + VP8Scan[it->i4_]) + if (best_block != best_blocks + VP8Scan[it->i4_]) { VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]); + } rd->modes_i4[it->i4_] = best_mode; it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0); } while (VP8IteratorRotateI4(it, best_blocks)); @@ -827,18 +1030,19 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { //------------------------------------------------------------------------------ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { - const VP8Encoder* const enc = it->enc_; - const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + const int kNumBlocks = 8; + const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_uv_; - const uint8_t* const src = it->yuv_in_ + U_OFF; - uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer - uint8_t* const dst0 = it->yuv_out_ + U_OFF; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer + uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC; + uint8_t* dst = dst0; VP8ModeScore rd_best; int mode; rd->mode_uv = -1; InitScore(&rd_best); - for (mode = 0; mode < 4; ++mode) { + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { VP8ModeScore rd_uv; // Reconstruct @@ -847,19 +1051,25 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { // Compute RD-score rd_uv.D = VP8SSE16x8(src, tmp_dst); rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. + rd_uv.H = VP8FixedCostsUV[mode]; rd_uv.R = VP8GetCostUV(it, &rd_uv); - rd_uv.R += VP8FixedCostsUV[mode]; + if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { + rd_uv.R += FLATNESS_PENALTY * kNumBlocks; + } SetRDScore(lambda, &rd_uv); if (mode == 0 || rd_uv.score < rd_best.score) { CopyScore(&rd_best, &rd_uv); rd->mode_uv = mode; memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); - memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ? + SwapPtr(&dst, &tmp_dst); } } VP8SetIntraUVMode(it, rd->mode_uv); AddScore(rd, &rd_best); + if (dst != dst0) { // copy 16x8 block if needed + VP8Copy16x8(dst, dst0); + } } //------------------------------------------------------------------------------ @@ -867,33 +1077,88 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { const VP8Encoder* const enc = it->enc_; - const int i16 = (it->mb_->type_ == 1); + const int is_i16 = (it->mb_->type_ == 1); int nz = 0; - if (i16) { - nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]); + if (is_i16) { + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); } else { VP8IteratorStartI4(it); do { const int mode = it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; - uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_]; VP8MakeIntra4Preds(it); nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], src, dst, mode) << it->i4_; - } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF)); + } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC)); } - nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_); + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); rd->nz = nz; } +// Refine intra16/intra4 sub-modes based on distortion only (not rate). +static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { + const int is_i16 = (it->mb_->type_ == 1); + score_t best_score = MAX_COST; + + if (try_both_i4_i16 || is_i16) { + int mode; + int best_mode = -1; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; + const score_t score = VP8SSE16x16(src, ref); + if (score < best_score) { + best_mode = mode; + best_score = score; + } + } + VP8SetIntra16Mode(it, best_mode); + } + if (try_both_i4_i16 || !is_i16) { + uint8_t modes_i4[16]; + // We don't evaluate the rate here, but just account for it through a + // constant penalty (i4 mode usually needs more bits compared to i16). + score_t score_i4 = (score_t)I4_PENALTY; + + VP8IteratorStartI4(it); + do { + int mode; + int best_sub_mode = -1; + score_t best_sub_score = MAX_COST; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + + // TODO(skal): we don't really need the prediction pixels here, + // but just the distortion against 'src'. + VP8MakeIntra4Preds(it); + for (mode = 0; mode < NUM_BMODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; + const score_t score = VP8SSE4x4(src, ref); + if (score < best_sub_score) { + best_sub_mode = mode; + best_sub_score = score; + } + } + modes_i4[it->i4_] = best_sub_mode; + score_i4 += best_sub_score; + if (score_i4 >= best_score) break; + } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); + if (score_i4 < best_score) { + VP8SetIntra4Mode(it, modes_i4); + } + } +} + //------------------------------------------------------------------------------ // Entry point -int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) { +int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, + VP8RDLevel rd_opt) { int is_skipped; + const int method = it->enc_->method_; InitScore(rd); @@ -902,22 +1167,21 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) { VP8MakeLuma16Preds(it); VP8MakeChroma8Preds(it); - // for rd_opt = 2, we perform trellis-quant on the final decision only. - // for rd_opt > 2, we use it for every scoring (=much slower). - if (rd_opt > 0) { - it->do_trellis_ = (rd_opt > 2); + if (rd_opt > RD_OPT_NONE) { + it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL); PickBestIntra16(it, rd); - if (it->enc_->method_ >= 2) { + if (method >= 2) { PickBestIntra4(it, rd); } PickBestUV(it, rd); - if (rd_opt == 2) { + if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now it->do_trellis_ = 1; SimpleQuantize(it, rd); } } else { - // TODO: for method_ == 2, pick the best intra4/intra16 based on SSE - it->do_trellis_ = (it->enc_->method_ == 2); + // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). + // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). + DistoRefine(it, (method >= 2)); SimpleQuantize(it, rd); } is_skipped = (rd->nz == 0); @@ -925,6 +1189,3 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) { return is_skipped; } -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/syntax.c b/drivers/webp/enc/syntax.c index 4221436ff9..a0e79ef404 100644 --- a/drivers/webp/enc/syntax.c +++ b/drivers/webp/enc/syntax.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Header syntax writing @@ -11,35 +13,20 @@ #include <assert.h> -#include "../format_constants.h" +#include "../utils/utils.h" +#include "../webp/format_constants.h" // RIFF constants +#include "../webp/mux_types.h" // ALPHA_FLAG #include "./vp8enci.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - //------------------------------------------------------------------------------ // Helper functions -// TODO(later): Move to webp/format_constants.h? -static void PutLE24(uint8_t* const data, uint32_t val) { - data[0] = (val >> 0) & 0xff; - data[1] = (val >> 8) & 0xff; - data[2] = (val >> 16) & 0xff; -} - -static void PutLE32(uint8_t* const data, uint32_t val) { - PutLE24(data, val); - data[3] = (val >> 24) & 0xff; -} - static int IsVP8XNeeded(const VP8Encoder* const enc) { return !!enc->has_alpha_; // Currently the only case when VP8X is needed. // This could change in the future. } static int PutPaddingByte(const WebPPicture* const pic) { - const uint8_t pad_byte[1] = { 0 }; return !!pic->writer(pad_byte, 1, pic); } @@ -73,14 +60,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) { assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE); if (enc->has_alpha_) { - flags |= ALPHA_FLAG_BIT; + flags |= ALPHA_FLAG; } PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE); PutLE32(vp8x + CHUNK_HEADER_SIZE, flags); PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1); PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1); - if(!pic->writer(vp8x, sizeof(vp8x), pic)) { + if (!pic->writer(vp8x, sizeof(vp8x), pic)) { return VP8_ENC_ERROR_BAD_WRITE; } return VP8_ENC_OK; @@ -199,8 +186,8 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0, // Segmentation header static void PutSegmentHeader(VP8BitWriter* const bw, const VP8Encoder* const enc) { - const VP8SegmentHeader* const hdr = &enc->segment_hdr_; - const VP8Proba* const proba = &enc->proba_; + const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_; + const VP8EncProba* const proba = &enc->proba_; if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) { // We always 'update' the quant and filter strength values const int update_data = 1; @@ -210,16 +197,16 @@ static void PutSegmentHeader(VP8BitWriter* const bw, // we always use absolute values, not relative ones VP8PutBitUniform(bw, 1); // (segment_feature_mode = 1. Paragraph 9.3.) for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - VP8PutSignedValue(bw, enc->dqm_[s].quant_, 7); + VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7); } for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - VP8PutSignedValue(bw, enc->dqm_[s].fstrength_, 6); + VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6); } } if (hdr->update_map_) { for (s = 0; s < 3; ++s) { if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) { - VP8PutValue(bw, proba->segments_[s], 8); + VP8PutBits(bw, proba->segments_[s], 8); } } } @@ -228,20 +215,20 @@ static void PutSegmentHeader(VP8BitWriter* const bw, // Filtering parameters header static void PutFilterHeader(VP8BitWriter* const bw, - const VP8FilterHeader* const hdr) { + const VP8EncFilterHeader* const hdr) { const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0); VP8PutBitUniform(bw, hdr->simple_); - VP8PutValue(bw, hdr->level_, 6); - VP8PutValue(bw, hdr->sharpness_, 3); + VP8PutBits(bw, hdr->level_, 6); + VP8PutBits(bw, hdr->sharpness_, 3); if (VP8PutBitUniform(bw, use_lf_delta)) { // '0' is the default value for i4x4_lf_delta_ at frame #0. const int need_update = (hdr->i4x4_lf_delta_ != 0); if (VP8PutBitUniform(bw, need_update)) { // we don't use ref_lf_delta => emit four 0 bits - VP8PutValue(bw, 0, 4); + VP8PutBits(bw, 0, 4); // we use mode_lf_delta for i4x4 - VP8PutSignedValue(bw, hdr->i4x4_lf_delta_, 6); - VP8PutValue(bw, 0, 3); // all others unused + VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6); + VP8PutBits(bw, 0, 3); // all others unused } } } @@ -249,12 +236,12 @@ static void PutFilterHeader(VP8BitWriter* const bw, // Nominal quantization parameters static void PutQuant(VP8BitWriter* const bw, const VP8Encoder* const enc) { - VP8PutValue(bw, enc->base_quant_, 7); - VP8PutSignedValue(bw, enc->dq_y1_dc_, 4); - VP8PutSignedValue(bw, enc->dq_y2_dc_, 4); - VP8PutSignedValue(bw, enc->dq_y2_ac_, 4); - VP8PutSignedValue(bw, enc->dq_uv_dc_, 4); - VP8PutSignedValue(bw, enc->dq_uv_ac_, 4); + VP8PutBits(bw, enc->base_quant_, 7); + VP8PutSignedBits(bw, enc->dq_y1_dc_, 4); + VP8PutSignedBits(bw, enc->dq_y2_dc_, 4); + VP8PutSignedBits(bw, enc->dq_y2_ac_, 4); + VP8PutSignedBits(bw, enc->dq_uv_dc_, 4); + VP8PutSignedBits(bw, enc->dq_uv_ac_, 4); } // Partition sizes @@ -276,58 +263,23 @@ static int EmitPartitionsSize(const VP8Encoder* const enc, //------------------------------------------------------------------------------ -#ifdef WEBP_EXPERIMENTAL_FEATURES - -#define KTRAILER_SIZE 8 - -static int WriteExtensions(VP8Encoder* const enc) { - uint8_t buffer[KTRAILER_SIZE]; - VP8BitWriter* const bw = &enc->bw_; - WebPPicture* const pic = enc->pic_; - - // Layer (bytes 0..3) - PutLE24(buffer + 0, enc->layer_data_size_); - buffer[3] = enc->pic_->colorspace & WEBP_CSP_UV_MASK; - if (enc->layer_data_size_ > 0) { - assert(enc->use_layer_); - // append layer data to last partition - if (!VP8BitWriterAppend(&enc->parts_[enc->num_parts_ - 1], - enc->layer_data_, enc->layer_data_size_)) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY); - } - } - - buffer[KTRAILER_SIZE - 1] = 0x01; // marker - if (!VP8BitWriterAppend(bw, buffer, KTRAILER_SIZE)) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY); - } - return 1; -} - -#endif /* WEBP_EXPERIMENTAL_FEATURES */ - -//------------------------------------------------------------------------------ - -static size_t GeneratePartition0(VP8Encoder* const enc) { +static int GeneratePartition0(VP8Encoder* const enc) { VP8BitWriter* const bw = &enc->bw_; const int mb_size = enc->mb_w_ * enc->mb_h_; uint64_t pos1, pos2, pos3; -#ifdef WEBP_EXPERIMENTAL_FEATURES - const int need_extensions = enc->use_layer_; -#endif pos1 = VP8BitWriterPos(bw); - VP8BitWriterInit(bw, mb_size * 7 / 8); // ~7 bits per macroblock -#ifdef WEBP_EXPERIMENTAL_FEATURES - VP8PutBitUniform(bw, need_extensions); // extensions -#else + if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) { // ~7 bits per macroblock + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } VP8PutBitUniform(bw, 0); // colorspace -#endif VP8PutBitUniform(bw, 0); // clamp type PutSegmentHeader(bw, enc); PutFilterHeader(bw, &enc->filter_hdr_); - VP8PutValue(bw, enc->config_->partitions, 2); + VP8PutBits(bw, enc->num_parts_ == 8 ? 3 : + enc->num_parts_ == 4 ? 2 : + enc->num_parts_ == 2 ? 1 : 0, 2); PutQuant(bw, enc); VP8PutBitUniform(bw, 0); // no proba update VP8WriteProbas(bw, &enc->proba_); @@ -335,21 +287,17 @@ static size_t GeneratePartition0(VP8Encoder* const enc) { VP8CodeIntraModes(enc); VP8BitWriterFinish(bw); -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (need_extensions && !WriteExtensions(enc)) { - return 0; - } -#endif - pos3 = VP8BitWriterPos(bw); if (enc->pic_->stats) { enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3); enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3); enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_; - enc->pic_->stats->layer_data_size = (int)enc->layer_data_size_; } - return !bw->error_; + if (bw->error_) { + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + return 1; } void VP8EncFreeBitWriters(VP8Encoder* const enc) { @@ -371,7 +319,8 @@ int VP8EncWrite(VP8Encoder* const enc) { int p; // Partition #0 with header and partition sizes - ok = !!GeneratePartition0(enc); + ok = GeneratePartition0(enc); + if (!ok) return 0; // Compute VP8 size vp8_size = VP8_FRAME_HEADER_SIZE + @@ -432,6 +381,3 @@ int VP8EncWrite(VP8Encoder* const enc) { //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/tree.c b/drivers/webp/enc/tree.c index 8b25e5e488..f141006d19 100644 --- a/drivers/webp/enc/tree.c +++ b/drivers/webp/enc/tree.c @@ -1,27 +1,24 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // -// Token probabilities +// Coding of token probabilities, intra modes and segments. // // Author: Skal (pascal.massimino@gmail.com) #include "./vp8enci.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - //------------------------------------------------------------------------------ // Default probabilities // Paragraph 13.5 const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { - // genereated using vp8_default_coef_probs() in entropy.c:129 { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } @@ -157,7 +154,7 @@ const uint8_t }; void VP8DefaultProbas(VP8Encoder* const enc) { - VP8Proba* const probas = &enc->proba_; + VP8EncProba* const probas = &enc->proba_; probas->use_skip_proba_ = 0; memset(probas->segments_, 255u, sizeof(probas->segments_)); memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0)); @@ -318,7 +315,7 @@ void VP8CodeIntraModes(VP8Encoder* const enc) { VP8EncIterator it; VP8IteratorInit(enc, &it); do { - const VP8MBInfo* mb = it.mb_; + const VP8MBInfo* const mb = it.mb_; const uint8_t* preds = it.preds_; if (enc->segment_hdr_.update_map_) { PutSegment(bw, mb->segment_, enc->proba_.segments_); @@ -343,7 +340,7 @@ void VP8CodeIntraModes(VP8Encoder* const enc) { } } PutUVMode(bw, mb->uv_mode_); - } while (VP8IteratorNext(&it, 0)); + } while (VP8IteratorNext(&it)); } //------------------------------------------------------------------------------ @@ -485,7 +482,7 @@ const uint8_t } }; -void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) { +void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas) { int t, b, c, p; for (t = 0; t < NUM_TYPES; ++t) { for (b = 0; b < NUM_BANDS; ++b) { @@ -494,17 +491,14 @@ void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) { const uint8_t p0 = probas->coeffs_[t][b][c][p]; const int update = (p0 != VP8CoeffsProba0[t][b][c][p]); if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) { - VP8PutValue(bw, p0, 8); + VP8PutBits(bw, p0, 8); } } } } } if (VP8PutBitUniform(bw, probas->use_skip_proba_)) { - VP8PutValue(bw, probas->skip_proba_, 8); + VP8PutBits(bw, probas->skip_proba_, 8); } } -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/vp8enci.h b/drivers/webp/enc/vp8enci.h index 936e1c18ce..1a7ebe5703 100644 --- a/drivers/webp/enc/vp8enci.h +++ b/drivers/webp/enc/vp8enci.h @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // WebP encoder: internal header. @@ -13,11 +15,18 @@ #define WEBP_ENC_VP8ENCI_H_ #include <string.h> // for memcpy() -#include "../encode.h" +#include "../dec/common.h" #include "../dsp/dsp.h" #include "../utils/bit_writer.h" +#include "../utils/thread.h" +#include "../utils/utils.h" +#include "../webp/encode.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef WEBP_EXPERIMENTAL_FEATURES +#include "./vp8li.h" +#endif // WEBP_EXPERIMENTAL_FEATURES + +#ifdef __cplusplus extern "C" { #endif @@ -26,141 +35,94 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 -#define ENC_MIN_VERSION 2 -#define ENC_REV_VERSION 0 - -// size of histogram used by CollectHistogram. -#define MAX_COEFF_THRESH 64 - -// intra prediction modes -enum { B_DC_PRED = 0, // 4x4 modes - B_TM_PRED = 1, - B_VE_PRED = 2, - B_HE_PRED = 3, - B_RD_PRED = 4, - B_VR_PRED = 5, - B_LD_PRED = 6, - B_VL_PRED = 7, - B_HD_PRED = 8, - B_HU_PRED = 9, - NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 - - // Luma16 or UV modes - DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, - H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED - }; +#define ENC_MIN_VERSION 4 +#define ENC_REV_VERSION 4 -enum { NUM_MB_SEGMENTS = 4, - MAX_NUM_PARTITIONS = 8, - NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC - NUM_BANDS = 8, - NUM_CTX = 3, - NUM_PROBAS = 11, - MAX_LF_LEVELS = 64, // Maximum loop filter level - MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost +enum { MAX_LF_LEVELS = 64, // Maximum loop filter level + MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost + MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67) }; -// YUV-cache parameters. Cache is 16-pixels wide. -// The original or reconstructed samples can be accessed using VP8Scan[] +typedef enum { // Rate-distortion optimization levels + RD_OPT_NONE = 0, // no rd-opt + RD_OPT_BASIC = 1, // basic scoring (no trellis) + RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only + RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower) +} VP8RDLevel; + +// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). +// The original or reconstructed samples can be accessed using VP8Scan[]. // The predicted blocks can be accessed using offsets to yuv_p_ and -// the arrays VP8*ModeOffsets[]; -// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks. -// Y_OFF |YYYY| <- original samples (enc->yuv_in_) -// |YYYY| -// |YYYY| -// |YYYY| -// U_OFF |UUVV| V_OFF (=U_OFF + 8) -// |UUVV| -// +----+ -// Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_') -// |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_') -// |YYYY| -// |YYYY| -// U_OFF |UUVV| V_OFF -// |UUVV| -// x2 (for yuv_out2_) -// +----+ Prediction area ('yuv_p_', size = PRED_SIZE) -// I16DC16 |YYYY| Intra16 predictions (16x16 block each) -// |YYYY| -// |YYYY| -// |YYYY| -// I16TM16 |YYYY| -// |YYYY| -// |YYYY| -// |YYYY| -// I16VE16 |YYYY| -// |YYYY| -// |YYYY| -// |YYYY| -// I16HE16 |YYYY| -// |YYYY| -// |YYYY| -// |YYYY| -// +----+ Chroma U/V predictions (16x8 block each) -// C8DC8 |UUVV| -// |UUVV| -// C8TM8 |UUVV| -// |UUVV| -// C8VE8 |UUVV| -// |UUVV| -// C8HE8 |UUVV| -// |UUVV| -// +----+ Intra 4x4 predictions (4x4 block each) -// |YYYY| I4DC4 I4TM4 I4VE4 I4HE4 -// |YYYY| I4RD4 I4VR4 I4LD4 I4VL4 -// |YY..| I4HD4 I4HU4 I4TMP -// +----+ -#define BPS 16 // this is the common stride -#define Y_SIZE (BPS * 16) -#define UV_SIZE (BPS * 8) -#define YUV_SIZE (Y_SIZE + UV_SIZE) -#define PRED_SIZE (6 * 16 * BPS + 12 * BPS) -#define Y_OFF (0) -#define U_OFF (Y_SIZE) -#define V_OFF (U_OFF + 8) -#define ALIGN_CST 15 -#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) - -extern const int VP8Scan[16 + 4 + 4]; // in quant.c -extern const int VP8UVModeOffsets[4]; // in analyze.c +// the arrays VP8*ModeOffsets[]. +// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_) +// (see VP8Scan[] for accessing the blocks, along with +// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC): +// +----+----+ +// Y_OFF_ENC |YYYY|UUVV| +// U_OFF_ENC |YYYY|UUVV| +// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area +// |YYYY|....| +// +----+----+ +// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC) +// Intra16 predictions (16x16 block each, two per row): +// |I16DC16|I16TM16| +// |I16VE16|I16HE16| +// Chroma U/V predictions (16x8 block each, two per row): +// |C8DC8|C8TM8| +// |C8VE8|C8HE8| +// Intra 4x4 predictions (4x4 block each) +// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4| +// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted +#define YUV_SIZE_ENC (BPS * 16) +#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds +#define Y_OFF_ENC (0) +#define U_OFF_ENC (16) +#define V_OFF_ENC (16 + 8) + +extern const int VP8Scan[16]; // in quant.c +extern const int VP8UVModeOffsets[4]; // in analyze.c extern const int VP8I16ModeOffsets[4]; extern const int VP8I4ModeOffsets[NUM_BMODES]; // Layout of prediction blocks // intra 16x16 #define I16DC16 (0 * 16 * BPS) -#define I16TM16 (1 * 16 * BPS) -#define I16VE16 (2 * 16 * BPS) -#define I16HE16 (3 * 16 * BPS) +#define I16TM16 (I16DC16 + 16) +#define I16VE16 (1 * 16 * BPS) +#define I16HE16 (I16VE16 + 16) // chroma 8x8, two U/V blocks side by side (hence: 16x8 each) -#define C8DC8 (4 * 16 * BPS) -#define C8TM8 (4 * 16 * BPS + 8 * BPS) -#define C8VE8 (5 * 16 * BPS) -#define C8HE8 (5 * 16 * BPS + 8 * BPS) +#define C8DC8 (2 * 16 * BPS) +#define C8TM8 (C8DC8 + 1 * 16) +#define C8VE8 (2 * 16 * BPS + 8 * BPS) +#define C8HE8 (C8VE8 + 1 * 16) // intra 4x4 -#define I4DC4 (6 * 16 * BPS + 0) -#define I4TM4 (6 * 16 * BPS + 4) -#define I4VE4 (6 * 16 * BPS + 8) -#define I4HE4 (6 * 16 * BPS + 12) -#define I4RD4 (6 * 16 * BPS + 4 * BPS + 0) -#define I4VR4 (6 * 16 * BPS + 4 * BPS + 4) -#define I4LD4 (6 * 16 * BPS + 4 * BPS + 8) -#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12) -#define I4HD4 (6 * 16 * BPS + 8 * BPS + 0) -#define I4HU4 (6 * 16 * BPS + 8 * BPS + 4) -#define I4TMP (6 * 16 * BPS + 8 * BPS + 8) +#define I4DC4 (3 * 16 * BPS + 0) +#define I4TM4 (I4DC4 + 4) +#define I4VE4 (I4DC4 + 8) +#define I4HE4 (I4DC4 + 12) +#define I4RD4 (I4DC4 + 16) +#define I4VR4 (I4DC4 + 20) +#define I4LD4 (I4DC4 + 24) +#define I4VL4 (I4DC4 + 28) +#define I4HD4 (3 * 16 * BPS + 4 * BPS) +#define I4HU4 (I4HD4 + 4) +#define I4TMP (I4HD4 + 8) typedef int64_t score_t; // type used for scores, rate, distortion +// Note that MAX_COST is not the maximum allowed by sizeof(score_t), +// in order to allow overflowing computations. #define MAX_COST ((score_t)0x7fffffffffffffLL) #define QFIX 17 #define BIAS(b) ((b) << (QFIX - 8)) // Fun fact: this is the _only_ line where we're actually being lossy and // discarding bits. -static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) { - return (n * iQ + B) >> QFIX; +static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { + return (int)((n * iQ + B) >> QFIX); } -extern const uint8_t VP8Zigzag[16]; + +// Uncomment the following to remove token-buffer code: +// #define DISABLE_TOKEN_BUFFER //------------------------------------------------------------------------------ // Headers @@ -169,6 +131,8 @@ typedef uint32_t proba_t; // 16b + 16b typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS]; typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; +typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting +typedef const uint16_t* CostArrayMap[16][NUM_CTX]; typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats typedef struct VP8Encoder VP8Encoder; @@ -179,19 +143,20 @@ typedef struct { int update_map_; // whether to update the segment map or not. // must be 0 if there's only 1 segment. int size_; // bit-cost for transmitting the segment map -} VP8SegmentHeader; +} VP8EncSegmentHeader; // Struct collecting all frame-persistent probabilities. typedef struct { uint8_t segments_[3]; // probabilities for segment tree uint8_t skip_proba_; // final probability of being skipped. - ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes + ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes - CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k + CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes + CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes int dirty_; // if true, need to call VP8CalculateLevelCosts() int use_skip_proba_; // Note: we always use skip_proba for now. int nb_skip_; // number of skipped blocks -} VP8Proba; +} VP8EncProba; // Filter parameters. Not actually used in the code (we don't perform // the in-loop filtering), but filled from user's config @@ -200,7 +165,7 @@ typedef struct { int level_; // base filter level [0..63] int sharpness_; // [0..7] int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16 -} VP8FilterHeader; +} VP8EncFilterHeader; //------------------------------------------------------------------------------ // Informations about the macroblocks. @@ -217,8 +182,8 @@ typedef struct { typedef struct VP8Matrix { uint16_t q_[16]; // quantizer steps uint16_t iq_[16]; // reciprocals, fixed point. - uint16_t bias_[16]; // rounding bias - uint16_t zthresh_[16]; // value under which a coefficient is zeroed + uint32_t bias_[16]; // rounding bias + uint32_t zthresh_[16]; // value below which a coefficient is zeroed uint16_t sharpen_[16]; // frequency boosters for slight sharpening } VP8Matrix; @@ -229,16 +194,19 @@ typedef struct { int beta_; // filter-susceptibility, range [0,255]. int quant_; // final segment quantizer. int fstrength_; // final in-loop filtering strength + int max_edge_; // max edge delta (for filtering strength) + int min_disto_; // minimum distortion required to trigger filtering record // reactivities int lambda_i16_, lambda_i4_, lambda_uv_; int lambda_mode_, lambda_trellis_, tlambda_; int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; } VP8SegmentInfo; -// Handy transcient struct to accumulate score and info during RD-optimization +// Handy transient struct to accumulate score and info during RD-optimization // and mode evaluation. typedef struct { - score_t D, SD, R, score; // Distortion, spectral distortion, rate, score. + score_t D, SD; // Distortion, spectral distortion + score_t H, R, score; // header bits, rate, score. int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma. int16_t y_ac_levels[16][16]; int16_t uv_levels[4 + 4][16]; @@ -252,12 +220,11 @@ typedef struct { // right neighbouring data (samples, predictions, contexts, ...) typedef struct { int x_, y_; // current macroblock - int y_offset_, uv_offset_; // offset to the luma / chroma planes int y_stride_, uv_stride_; // respective strides - uint8_t* yuv_in_; // borrowed from enc_ (for now) - uint8_t* yuv_out_; // '' - uint8_t* yuv_out2_; // '' - uint8_t* yuv_p_; // '' + uint8_t* yuv_in_; // input samples + uint8_t* yuv_out_; // output samples + uint8_t* yuv_out2_; // secondary buffer swapped with yuv_out_. + uint8_t* yuv_p_; // scratch buffer for prediction VP8Encoder* enc_; // back-pointer VP8MBInfo* mb_; // current macroblock VP8BitWriter* bw_; // current bit-writer @@ -273,24 +240,44 @@ typedef struct { uint64_t uv_bits_; // macroblock bit-cost for chroma LFStats* lf_stats_; // filter stats (borrowed from enc_) int do_trellis_; // if true, perform extra level optimisation - int done_; // true when scan is finished + int count_down_; // number of mb still to be processed + int count_down0_; // starting counter value (for progress) int percent0_; // saved initial progress percent + + uint8_t* y_left_; // left luma samples (addressable from index -1 to 15). + uint8_t* u_left_; // left u samples (addressable from index -1 to 7) + uint8_t* v_left_; // left v samples (addressable from index -1 to 7) + + uint8_t* y_top_; // top luma samples at position 'x_' + uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes + + // memory for storing y/u/v_left_ + uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST]; + // memory for yuv_* + uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST]; } VP8EncIterator; // in iterator.c -// must be called first. +// must be called first void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it); -// restart a scan. +// restart a scan void VP8IteratorReset(VP8EncIterator* const it); -// import samples from source -void VP8IteratorImport(const VP8EncIterator* const it); +// reset iterator position to row 'y' +void VP8IteratorSetRow(VP8EncIterator* const it, int y); +// set count down (=number of iterations to go) +void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down); +// return true if iteration is finished +int VP8IteratorIsDone(const VP8EncIterator* const it); +// Import uncompressed samples from source. +// If tmp_32 is not NULL, import boundary samples too. +// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory. +void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32); // export decimated samples void VP8IteratorExport(const VP8EncIterator* const it); -// go to next macroblock. Returns !done_. If *block_to_save is non-null, will -// save the boundary values to top_/left_ arrays. block_to_save can be -// it->yuv_out_ or it->yuv_in_. -int VP8IteratorNext(VP8EncIterator* const it, - const uint8_t* const block_to_save); +// go to next macroblock. Returns false if not finished. +int VP8IteratorNext(VP8EncIterator* const it); +// save the yuv_out_ boundary values to top_/left_ arrays for next iterations. +void VP8IteratorSaveBoundary(VP8EncIterator* const it); // Report progression based on macroblock rows. Return 0 for user-abort request. int VP8IteratorProgress(const VP8EncIterator* const it, int final_delta_percent); @@ -314,44 +301,43 @@ void VP8SetSegment(const VP8EncIterator* const it, int segment); //------------------------------------------------------------------------------ // Paginated token buffer -// WIP: #define USE_TOKEN_BUFFER +typedef struct VP8Tokens VP8Tokens; // struct details in token.c -#ifdef USE_TOKEN_BUFFER +typedef struct { +#if !defined(DISABLE_TOKEN_BUFFER) + VP8Tokens* pages_; // first page + VP8Tokens** last_page_; // last page + uint16_t* tokens_; // set to (*last_page_)->tokens_ + int left_; // how many free tokens left before the page is full + int page_size_; // number of tokens per page +#endif + int error_; // true in case of malloc error +} VP8TBuffer; -#define MAX_NUM_TOKEN 2048 +// initialize an empty buffer +void VP8TBufferInit(VP8TBuffer* const b, int page_size); +void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory -typedef struct VP8Tokens VP8Tokens; -struct VP8Tokens { - uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot - int left_; - VP8Tokens* next_; -}; +#if !defined(DISABLE_TOKEN_BUFFER) -typedef struct { - VP8Tokens* rows_; - uint16_t* tokens_; // set to (*last_)->tokens_ - VP8Tokens** last_; - int left_; - int error_; // true in case of malloc error -} VP8TBuffer; +// Finalizes bitstream when probabilities are known. +// Deletes the allocated token memory if final_pass is true. +int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, + const uint8_t* const probas, int final_pass); -void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer -int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page -void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory +// record the coding of coefficients without knowing the probabilities yet +int VP8RecordCoeffTokens(const int ctx, const int coeff_type, + int first, int last, + const int16_t* const coeffs, + VP8TBuffer* const tokens); -int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw, - const uint8_t* const probas); +// Estimate the final coded size given a set of 'probas'. +size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas); -static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b, - int bit, int proba_idx) { - if (b->left_ > 0 || VP8TBufferNewPage(b)) { - const int slot = --b->left_; - b->tokens_[slot] = (bit << 15) | proba_idx; - } - return bit; -} +// unused for now +void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats); -#endif // USE_TOKEN_BUFFER +#endif // !DISABLE_TOKEN_BUFFER //------------------------------------------------------------------------------ // VP8Encoder @@ -361,8 +347,8 @@ struct VP8Encoder { WebPPicture* pic_; // input / output picture // headers - VP8FilterHeader filter_hdr_; // filtering information - VP8SegmentHeader segment_hdr_; // segment information + VP8EncFilterHeader filter_hdr_; // filtering information + VP8EncSegmentHeader segment_hdr_; // segment information int profile_; // VP8's profile, deduced from Config. @@ -376,6 +362,7 @@ struct VP8Encoder { // per-partition boolean decoders. VP8BitWriter bw_; // part0 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions + VP8TBuffer tokens_; // token buffer int percent_; // for progress @@ -383,17 +370,13 @@ struct VP8Encoder { int has_alpha_; uint8_t* alpha_data_; // non-NULL if transparency is present uint32_t alpha_data_size_; - - // enhancement layer - int use_layer_; - VP8BitWriter layer_bw_; - uint8_t* layer_data_; - size_t layer_data_size_; + WebPWorker alpha_worker_; // quantization info (one set of DC/AC dequant factor per segment) VP8SegmentInfo dqm_[NUM_MB_SEGMENTS]; int base_quant_; // nominal quantizer value. Only used // for relative coding of segments' quant. + int alpha_; // global susceptibility (<=> complexity) int uv_alpha_; // U/V quantization susceptibility // global offset of quantizers, shared by all segments int dq_y1_dc_; @@ -401,34 +384,29 @@ struct VP8Encoder { int dq_uv_dc_, dq_uv_ac_; // probabilities and statistics - VP8Proba proba_; - uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks - uint64_t sse_count_; // pixel count for the sse_[] stats - int coded_size_; - int residual_bytes_[3][4]; - int block_count_[3]; + VP8EncProba proba_; + uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks + uint64_t sse_count_; // pixel count for the sse_[] stats + int coded_size_; + int residual_bytes_[3][4]; + int block_count_[3]; // quality/speed settings - int method_; // 0=fastest, 6=best/slowest. - int rd_opt_level_; // Deduced from method_. - int max_i4_header_bits_; // partition #0 safeness factor + int method_; // 0=fastest, 6=best/slowest. + VP8RDLevel rd_opt_level_; // Deduced from method_. + int max_i4_header_bits_; // partition #0 safeness factor + int thread_level_; // derived from config->thread_level + int do_search_; // derived from config->target_XXX + int use_tokens_; // if true, use token buffer // Memory VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) uint32_t* nz_; // non-zero bit context: mb_w+1 - uint8_t* yuv_in_; // input samples - uint8_t* yuv_out_; // output samples - uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_. - uint8_t* yuv_p_; // scratch buffer for prediction - uint8_t *y_top_; // top luma samples. - uint8_t *uv_top_; // top u/v samples. - // U and V are packed into 16 pixels (8 U + 8 V) - uint8_t *y_left_; // left luma samples (adressable from index -1 to 15). - uint8_t *u_left_; // left u samples (adressable from index -1 to 7) - uint8_t *v_left_; // left v samples (adressable from index -1 to 7) - - LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off) + uint8_t* y_top_; // top luma samples. + uint8_t* uv_top_; // top u/v samples. + // U and V are packed into 16 bytes (8 U + 8 V) + LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off) }; //------------------------------------------------------------------------------ @@ -441,7 +419,7 @@ extern const uint8_t // Reset the token probabilities to their initial (default) values void VP8DefaultProbas(VP8Encoder* const enc); // Write the token probabilities -void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas); +void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas); // Writes the partition #0 modes (that is: all intra modes) void VP8CodeIntraModes(VP8Encoder* const enc); @@ -454,7 +432,11 @@ int VP8EncWrite(VP8Encoder* const enc); void VP8EncFreeBitWriters(VP8Encoder* const enc); // in frame.c -extern const uint8_t VP8EncBands[16 + 1]; +extern const uint8_t VP8Cat3[]; +extern const uint8_t VP8Cat4[]; +extern const uint8_t VP8Cat5[]; +extern const uint8_t VP8Cat6[]; + // Form all the four Intra16x16 predictions in the yuv_p_ cache void VP8MakeLuma16Preds(const VP8EncIterator* const it); // Form all the four Chroma8x8 predictions in the yuv_p_ cache @@ -466,9 +448,9 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it); int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd); int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]); int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd); -// Main stat / coding passes +// Main coding calls int VP8EncLoop(VP8Encoder* const enc); -int VP8StatLoop(VP8Encoder* const enc); +int VP8EncTokenLoop(VP8Encoder* const enc); // in webpenc.c // Assign an error code to a picture. Return false for convenience. @@ -485,18 +467,14 @@ int VP8EncAnalyze(VP8Encoder* const enc); // Sets up segment's quantization values, base_quant_ and filter strengths. void VP8SetSegmentParams(VP8Encoder* const enc, float quality); // Pick best modes and fills the levels. Returns true if skipped. -int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt); +int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, + VP8RDLevel rd_opt); // in alpha.c void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression +int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data -void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data - - // in layer.c -void VP8EncInitLayer(VP8Encoder* const enc); // init everything -void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock -int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding -void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory +int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data // in filter.c @@ -516,9 +494,38 @@ void VP8InitFilter(VP8EncIterator* const it); void VP8StoreFilterStats(VP8EncIterator* const it); void VP8AdjustFilterStrength(VP8EncIterator* const it); +// returns the approximate filtering strength needed to smooth a edge +// step of 'delta', given a sharpness parameter 'sharpness'. +int VP8FilterStrengthFromDelta(int sharpness, int delta); + + // misc utils for picture_*.c: + +// Remove reference to the ARGB/YUVA buffer (doesn't free anything). +void WebPPictureResetBuffers(WebPPicture* const picture); + +// Allocates ARGB buffer of given dimension (previous one is always free'd). +// Preserves the YUV(A) buffer. Returns false in case of error (invalid param, +// out-of-memory). +int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); + +// Allocates YUVA buffer of given dimension (previous one is always free'd). +// Uses picture->csp to determine whether an alpha buffer is needed. +// Preserves the ARGB buffer. +// Returns false in case of error (invalid param, out-of-memory). +int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); + + // in near_lossless.c +// Near lossless preprocessing in RGB color-space. +int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality); +// Near lossless adjustment for predictors. +void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits, + const uint32_t* argb_orig, + uint32_t* argb, uint32_t* argb_scratch, + const uint32_t* const transform_data, + int quality, int subtract_green); //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif diff --git a/drivers/webp/enc/vp8l.c b/drivers/webp/enc/vp8l.c index f4eb6e783f..047c9032ac 100644 --- a/drivers/webp/enc/vp8l.c +++ b/drivers/webp/enc/vp8l.c @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // main entry for the lossless encoder. @@ -11,7 +13,6 @@ // #include <assert.h> -#include <stdio.h> #include <stdlib.h> #include "./backward_references.h" @@ -21,28 +22,107 @@ #include "../utils/bit_writer.h" #include "../utils/huffman_encode.h" #include "../utils/utils.h" -#include "../format_constants.h" +#include "../webp/format_constants.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif +#include "./delta_palettization.h" #define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer. -#define MAX_HUFF_IMAGE_SIZE (16 * 1024 * 1024) -#define MAX_COLORS_FOR_GRAPH 64 +// Maximum number of histogram images (sub-blocks). +#define MAX_HUFF_IMAGE_SIZE 2600 -// ----------------------------------------------------------------------------- -// Palette +// Palette reordering for smaller sum of deltas (and for smaller storage). -static int CompareColors(const void* p1, const void* p2) { +static int PaletteCompareColorsForQsort(const void* p1, const void* p2) { const uint32_t a = *(const uint32_t*)p1; const uint32_t b = *(const uint32_t*)p2; - return (a < b) ? -1 : (a > b) ? 1 : 0; + assert(a != b); + return (a < b) ? -1 : 1; +} + +static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) { + return (v <= 128) ? v : (256 - v); +} + +// Computes a value that is related to the entropy created by the +// palette entry diff. +// +// Note that the last & 0xff is a no-operation in the next statement, but +// removed by most compilers and is here only for regularity of the code. +static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) { + const uint32_t diff = VP8LSubPixels(col1, col2); + const int kMoreWeightForRGBThanForAlpha = 9; + uint32_t score; + score = PaletteComponentDistance((diff >> 0) & 0xff); + score += PaletteComponentDistance((diff >> 8) & 0xff); + score += PaletteComponentDistance((diff >> 16) & 0xff); + score *= kMoreWeightForRGBThanForAlpha; + score += PaletteComponentDistance((diff >> 24) & 0xff); + return score; +} + +static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { + const uint32_t tmp = *col1; + *col1 = *col2; + *col2 = tmp; +} + +static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { + // Find greedily always the closest color of the predicted color to minimize + // deltas in the palette. This reduces storage needs since the + // palette is stored with delta encoding. + uint32_t predict = 0x00000000; + int i, k; + for (i = 0; i < num_colors; ++i) { + int best_ix = i; + uint32_t best_score = ~0U; + for (k = i; k < num_colors; ++k) { + const uint32_t cur_score = PaletteColorDistance(palette[k], predict); + if (best_score > cur_score) { + best_score = cur_score; + best_ix = k; + } + } + SwapColor(&palette[best_ix], &palette[i]); + predict = palette[i]; + } +} + +// The palette has been sorted by alpha. This function checks if the other +// components of the palette have a monotonic development with regards to +// position in the palette. If all have monotonic development, there is +// no benefit to re-organize them greedily. A monotonic development +// would be spotted in green-only situations (like lossy alpha) or gray-scale +// images. +static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { + uint32_t predict = 0x000000; + int i; + uint8_t sign_found = 0x00; + for (i = 0; i < num_colors; ++i) { + const uint32_t diff = VP8LSubPixels(palette[i], predict); + const uint8_t rd = (diff >> 16) & 0xff; + const uint8_t gd = (diff >> 8) & 0xff; + const uint8_t bd = (diff >> 0) & 0xff; + if (rd != 0x00) { + sign_found |= (rd < 0x80) ? 1 : 2; + } + if (gd != 0x00) { + sign_found |= (gd < 0x80) ? 8 : 16; + } + if (bd != 0x00) { + sign_found |= (bd < 0x80) ? 64 : 128; + } + predict = palette[i]; + } + return (sign_found & (sign_found << 1)) != 0; // two consequent signs. } +// ----------------------------------------------------------------------------- +// Palette + // If number of colors in the image is less than or equal to MAX_PALETTE_SIZE, // creates a palette and returns true, else returns false. static int AnalyzeAndCreatePalette(const WebPPicture* const pic, + int low_effort, uint32_t palette[MAX_PALETTE_SIZE], int* const palette_size) { int i, x, y, key; @@ -85,7 +165,7 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic, argb += pic->argb_stride; } - // TODO(skal): could we reuse in_use[] to speed up ApplyPalette()? + // TODO(skal): could we reuse in_use[] to speed up EncodePalette()? num_colors = 0; for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) { if (in_use[i]) { @@ -93,106 +173,272 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic, ++num_colors; } } - - qsort(palette, num_colors, sizeof(*palette), CompareColors); *palette_size = num_colors; + qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); + if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { + GreedyMinimizeDeltas(palette, num_colors); + } return 1; } +// These five modes are evaluated and their respective entropy is computed. +typedef enum { + kDirect = 0, + kSpatial = 1, + kSubGreen = 2, + kSpatialSubGreen = 3, + kPalette = 4, + kNumEntropyIx = 5 +} EntropyIx; + +typedef enum { + kHistoAlpha = 0, + kHistoAlphaPred, + kHistoGreen, + kHistoGreenPred, + kHistoRed, + kHistoRedPred, + kHistoBlue, + kHistoBluePred, + kHistoRedSubGreen, + kHistoRedPredSubGreen, + kHistoBlueSubGreen, + kHistoBluePredSubGreen, + kHistoPalette, + kHistoTotal // Must be last. +} HistoIx; + +static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) { + const uint32_t green = p >> 8; // The upper bits are masked away later. + ++r[((p >> 16) - green) & 0xff]; + ++b[(p - green) & 0xff]; +} + +static void AddSingle(uint32_t p, + uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) { + ++a[p >> 24]; + ++r[(p >> 16) & 0xff]; + ++g[(p >> 8) & 0xff]; + ++b[(p & 0xff)]; +} + static int AnalyzeEntropy(const uint32_t* argb, int width, int height, int argb_stride, - double* const nonpredicted_bits, - double* const predicted_bits) { - int x, y; - const uint32_t* last_line = NULL; - uint32_t last_pix = argb[0]; // so we're sure that pix_diff == 0 - - VP8LHistogram* nonpredicted = NULL; - VP8LHistogram* predicted = - (VP8LHistogram*)malloc(2 * sizeof(*predicted)); - if (predicted == NULL) return 0; - nonpredicted = predicted + 1; - - VP8LHistogramInit(predicted, 0); - VP8LHistogramInit(nonpredicted, 0); - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const uint32_t pix = argb[x]; - const uint32_t pix_diff = VP8LSubPixels(pix, last_pix); - if (pix_diff == 0) continue; - if (last_line != NULL && pix == last_line[x]) { - continue; + int use_palette, + EntropyIx* const min_entropy_ix, + int* const red_and_blue_always_zero) { + // Allocate histogram set with cache_bits = 0. + uint32_t* const histo = + (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256); + if (histo != NULL) { + int i, x, y; + const uint32_t* prev_row = argb; + const uint32_t* curr_row = argb + argb_stride; + for (y = 1; y < height; ++y) { + uint32_t prev_pix = curr_row[0]; + for (x = 1; x < width; ++x) { + const uint32_t pix = curr_row[x]; + const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix); + if ((pix_diff == 0) || (pix == prev_row[x])) continue; + prev_pix = pix; + AddSingle(pix, + &histo[kHistoAlpha * 256], + &histo[kHistoRed * 256], + &histo[kHistoGreen * 256], + &histo[kHistoBlue * 256]); + AddSingle(pix_diff, + &histo[kHistoAlphaPred * 256], + &histo[kHistoRedPred * 256], + &histo[kHistoGreenPred * 256], + &histo[kHistoBluePred * 256]); + AddSingleSubGreen(pix, + &histo[kHistoRedSubGreen * 256], + &histo[kHistoBlueSubGreen * 256]); + AddSingleSubGreen(pix_diff, + &histo[kHistoRedPredSubGreen * 256], + &histo[kHistoBluePredSubGreen * 256]); + { + // Approximate the palette by the entropy of the multiplicative hash. + const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24; + ++histo[kHistoPalette * 256 + (hash & 0xff)]; + } + } + prev_row = curr_row; + curr_row += argb_stride; + } + { + double entropy_comp[kHistoTotal]; + double entropy[kNumEntropyIx]; + EntropyIx k; + EntropyIx last_mode_to_analyze = + use_palette ? kPalette : kSpatialSubGreen; + int j; + // Let's add one zero to the predicted histograms. The zeros are removed + // too efficiently by the pix_diff == 0 comparison, at least one of the + // zeros is likely to exist. + ++histo[kHistoRedPredSubGreen * 256]; + ++histo[kHistoBluePredSubGreen * 256]; + ++histo[kHistoRedPred * 256]; + ++histo[kHistoGreenPred * 256]; + ++histo[kHistoBluePred * 256]; + ++histo[kHistoAlphaPred * 256]; + + for (j = 0; j < kHistoTotal; ++j) { + entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL); + } + entropy[kDirect] = entropy_comp[kHistoAlpha] + + entropy_comp[kHistoRed] + + entropy_comp[kHistoGreen] + + entropy_comp[kHistoBlue]; + entropy[kSpatial] = entropy_comp[kHistoAlphaPred] + + entropy_comp[kHistoRedPred] + + entropy_comp[kHistoGreenPred] + + entropy_comp[kHistoBluePred]; + entropy[kSubGreen] = entropy_comp[kHistoAlpha] + + entropy_comp[kHistoRedSubGreen] + + entropy_comp[kHistoGreen] + + entropy_comp[kHistoBlueSubGreen]; + entropy[kSpatialSubGreen] = entropy_comp[kHistoAlphaPred] + + entropy_comp[kHistoRedPredSubGreen] + + entropy_comp[kHistoGreenPred] + + entropy_comp[kHistoBluePredSubGreen]; + // Palette mode seems more efficient in a breakeven case. Bias with 1.0. + entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0; + + *min_entropy_ix = kDirect; + for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) { + if (entropy[*min_entropy_ix] > entropy[k]) { + *min_entropy_ix = k; + } } - last_pix = pix; + *red_and_blue_always_zero = 1; + // Let's check if the histogram of the chosen entropy mode has + // non-zero red and blue values. If all are zero, we can later skip + // the cross color optimization. { - const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix); - const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff); - VP8LHistogramAddSinglePixOrCopy(nonpredicted, &pix_token); - VP8LHistogramAddSinglePixOrCopy(predicted, &pix_diff_token); + static const uint8_t kHistoPairs[5][2] = { + { kHistoRed, kHistoBlue }, + { kHistoRedPred, kHistoBluePred }, + { kHistoRedSubGreen, kHistoBlueSubGreen }, + { kHistoRedPredSubGreen, kHistoBluePredSubGreen }, + { kHistoRed, kHistoBlue } + }; + const uint32_t* const red_histo = + &histo[256 * kHistoPairs[*min_entropy_ix][0]]; + const uint32_t* const blue_histo = + &histo[256 * kHistoPairs[*min_entropy_ix][1]]; + for (i = 1; i < 256; ++i) { + if ((red_histo[i] | blue_histo[i]) != 0) { + *red_and_blue_always_zero = 0; + break; + } + } } } - last_line = argb; - argb += argb_stride; + free(histo); + return 1; + } else { + return 0; } - *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(nonpredicted); - *predicted_bits = VP8LHistogramEstimateBitsBulk(predicted); - free(predicted); - return 1; } -static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) { +static int GetHistoBits(int method, int use_palette, int width, int height) { + // Make tile size a function of encoding method (Range: 0 to 6). + int histo_bits = (use_palette ? 9 : 7) - method; + while (1) { + const int huff_image_size = VP8LSubSampleSize(width, histo_bits) * + VP8LSubSampleSize(height, histo_bits); + if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break; + ++histo_bits; + } + return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS : + (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; +} + +static int GetTransformBits(int method, int histo_bits) { + const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5; + return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; +} + +static int AnalyzeAndInit(VP8LEncoder* const enc) { const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + const int pix_cnt = width * height; + const WebPConfig* const config = enc->config_; + const int method = config->method; + const int low_effort = (config->method == 0); + // we round the block size up, so we're guaranteed to have + // at max MAX_REFS_BLOCK_PER_IMAGE blocks used: + int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1; assert(pic != NULL && pic->argb != NULL); + enc->use_cross_color_ = 0; + enc->use_predict_ = 0; + enc->use_subtract_green_ = 0; enc->use_palette_ = - AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_); - - if (image_hint == WEBP_HINT_GRAPH) { - if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) { - enc->use_palette_ = 0; + AnalyzeAndCreatePalette(pic, low_effort, + enc->palette_, &enc->palette_size_); + + // TODO(jyrki): replace the decision to be based on an actual estimate + // of entropy, or even spatial variance of entropy. + enc->histo_bits_ = GetHistoBits(method, enc->use_palette_, + pic->width, pic->height); + enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_); + + if (low_effort) { + // AnalyzeEntropy is somewhat slow. + enc->use_predict_ = !enc->use_palette_; + enc->use_subtract_green_ = !enc->use_palette_; + enc->use_cross_color_ = 0; + } else { + int red_and_blue_always_zero; + EntropyIx min_entropy_ix; + if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, + enc->use_palette_, &min_entropy_ix, + &red_and_blue_always_zero)) { + return 0; } + enc->use_palette_ = (min_entropy_ix == kPalette); + enc->use_subtract_green_ = + (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen); + enc->use_predict_ = + (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen); + enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; } - if (!enc->use_palette_) { - if (image_hint == WEBP_HINT_PHOTO) { - enc->use_predict_ = 1; - enc->use_cross_color_ = 1; - } else { - double non_pred_entropy, pred_entropy; - if (!AnalyzeEntropy(pic->argb, pic->width, pic->height, pic->argb_stride, - &non_pred_entropy, &pred_entropy)) { - return 0; - } - if (pred_entropy < 0.95 * non_pred_entropy) { - enc->use_predict_ = 1; - // TODO(vikasa): Observed some correlation of cross_color transform with - // predict. Need to investigate this further and add separate heuristic - // for setting use_cross_color flag. - enc->use_cross_color_ = 1; - } - } - } + if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; + + // palette-friendly input typically uses less literals + // -> reduce block size a bit + if (enc->use_palette_) refs_block_size /= 2; + VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size); + VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size); return 1; } +// Returns false in case of memory error. static int GetHuffBitLengthsAndCodes( const VP8LHistogramSet* const histogram_image, HuffmanTreeCode* const huffman_codes) { int i, k; - int ok = 1; + int ok = 0; uint64_t total_length_size = 0; uint8_t* mem_buf = NULL; const int histogram_image_size = histogram_image->size; + int max_num_symbols = 0; + uint8_t* buf_rle = NULL; + HuffmanTree* huff_tree = NULL; // Iterate over all histograms and get the aggregate number of codes used. for (i = 0; i < histogram_image_size; ++i) { const VP8LHistogram* const histo = histogram_image->histograms[i]; HuffmanTreeCode* const codes = &huffman_codes[5 * i]; for (k = 0; k < 5; ++k) { - const int num_symbols = (k == 0) ? VP8LHistogramNumCodes(histo) - : (k == 4) ? NUM_DISTANCE_CODES - : 256; + const int num_symbols = + (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) : + (k == 4) ? NUM_DISTANCE_CODES : 256; codes[k].num_symbols = num_symbols; total_length_size += num_symbols; } @@ -204,10 +450,8 @@ static int GetHuffBitLengthsAndCodes( uint8_t* lengths; mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size, sizeof(*lengths) + sizeof(*codes)); - if (mem_buf == NULL) { - ok = 0; - goto End; - } + if (mem_buf == NULL) goto End; + codes = (uint16_t*)mem_buf; lengths = (uint8_t*)&codes[total_length_size]; for (i = 0; i < 5 * histogram_image_size; ++i) { @@ -216,22 +460,35 @@ static int GetHuffBitLengthsAndCodes( huffman_codes[i].code_lengths = lengths; codes += bit_length; lengths += bit_length; + if (max_num_symbols < bit_length) { + max_num_symbols = bit_length; + } } } + buf_rle = (uint8_t*)WebPSafeMalloc(1ULL, max_num_symbols); + huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * max_num_symbols, + sizeof(*huff_tree)); + if (buf_rle == NULL || huff_tree == NULL) goto End; + // Create Huffman trees. for (i = 0; i < histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[5 * i]; VP8LHistogram* const histo = histogram_image->histograms[i]; - ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0); - ok = ok && VP8LCreateHuffmanTree(histo->red_, 15, codes + 1); - ok = ok && VP8LCreateHuffmanTree(histo->blue_, 15, codes + 2); - ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 15, codes + 3); - ok = ok && VP8LCreateHuffmanTree(histo->distance_, 15, codes + 4); + VP8LCreateHuffmanTree(histo->literal_, 15, buf_rle, huff_tree, codes + 0); + VP8LCreateHuffmanTree(histo->red_, 15, buf_rle, huff_tree, codes + 1); + VP8LCreateHuffmanTree(histo->blue_, 15, buf_rle, huff_tree, codes + 2); + VP8LCreateHuffmanTree(histo->alpha_, 15, buf_rle, huff_tree, codes + 3); + VP8LCreateHuffmanTree(histo->distance_, 15, buf_rle, huff_tree, codes + 4); } - + ok = 1; End: - if (!ok) free(mem_buf); + WebPSafeFree(huff_tree); + WebPSafeFree(buf_rle); + if (!ok) { + WebPSafeFree(mem_buf); + memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes)); + } return ok; } @@ -251,9 +508,9 @@ static void StoreHuffmanTreeOfHuffmanTreeToBitMask( break; } } - VP8LWriteBits(bw, 4, codes_to_store - 4); + VP8LPutBits(bw, codes_to_store - 4, 4); for (i = 0; i < codes_to_store; ++i) { - VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]); + VP8LPutBits(bw, code_length_bitdepth[kStorageOrder[i]], 3); } } @@ -281,49 +538,46 @@ static void StoreHuffmanTreeToBitMask( for (i = 0; i < num_tokens; ++i) { const int ix = tokens[i].code; const int extra_bits = tokens[i].extra_bits; - VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]); + VP8LPutBits(bw, huffman_code->codes[ix], huffman_code->code_lengths[ix]); switch (ix) { case 16: - VP8LWriteBits(bw, 2, extra_bits); + VP8LPutBits(bw, extra_bits, 2); break; case 17: - VP8LWriteBits(bw, 3, extra_bits); + VP8LPutBits(bw, extra_bits, 3); break; case 18: - VP8LWriteBits(bw, 7, extra_bits); + VP8LPutBits(bw, extra_bits, 7); break; } } } -static int StoreFullHuffmanCode(VP8LBitWriter* const bw, - const HuffmanTreeCode* const tree) { - int ok = 0; +// 'huff_tree' and 'tokens' are pre-alloacted buffers. +static void StoreFullHuffmanCode(VP8LBitWriter* const bw, + HuffmanTree* const huff_tree, + HuffmanTreeToken* const tokens, + const HuffmanTreeCode* const tree) { uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 }; uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 }; const int max_tokens = tree->num_symbols; int num_tokens; HuffmanTreeCode huffman_code; - HuffmanTreeToken* const tokens = - (HuffmanTreeToken*)WebPSafeMalloc((uint64_t)max_tokens, sizeof(*tokens)); - if (tokens == NULL) return 0; - huffman_code.num_symbols = CODE_LENGTH_CODES; huffman_code.code_lengths = code_length_bitdepth; huffman_code.codes = code_length_bitdepth_symbols; - VP8LWriteBits(bw, 1, 0); + VP8LPutBits(bw, 0, 1); num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens); { - int histogram[CODE_LENGTH_CODES] = { 0 }; + uint32_t histogram[CODE_LENGTH_CODES] = { 0 }; + uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 }; int i; for (i = 0; i < num_tokens; ++i) { ++histogram[tokens[i].code]; } - if (!VP8LCreateHuffmanTree(histogram, 7, &huffman_code)) { - goto End; - } + VP8LCreateHuffmanTree(histogram, 7, buf_rle, huff_tree, &huffman_code); } StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth); @@ -350,24 +604,23 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw, } write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12); length = write_trimmed_length ? trimmed_length : num_tokens; - VP8LWriteBits(bw, 1, write_trimmed_length); + VP8LPutBits(bw, write_trimmed_length, 1); if (write_trimmed_length) { const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1); const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2; - VP8LWriteBits(bw, 3, nbitpairs - 1); + VP8LPutBits(bw, nbitpairs - 1, 3); assert(trimmed_length >= 2); - VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2); + VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2); } StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code); } - ok = 1; - End: - free(tokens); - return ok; } -static int StoreHuffmanCode(VP8LBitWriter* const bw, - const HuffmanTreeCode* const huffman_code) { +// 'huff_tree' and 'tokens' are pre-alloacted buffers. +static void StoreHuffmanCode(VP8LBitWriter* const bw, + HuffmanTree* const huff_tree, + HuffmanTreeToken* const tokens, + const HuffmanTreeCode* const huffman_code) { int i; int count = 0; int symbols[2] = { 0, 0 }; @@ -384,163 +637,248 @@ static int StoreHuffmanCode(VP8LBitWriter* const bw, if (count == 0) { // emit minimal tree for empty cases // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0 - VP8LWriteBits(bw, 4, 0x01); - return 1; + VP8LPutBits(bw, 0x01, 4); } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) { - VP8LWriteBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols. - VP8LWriteBits(bw, 1, count - 1); + VP8LPutBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols. + VP8LPutBits(bw, count - 1, 1); if (symbols[0] <= 1) { - VP8LWriteBits(bw, 1, 0); // Code bit for small (1 bit) symbol value. - VP8LWriteBits(bw, 1, symbols[0]); + VP8LPutBits(bw, 0, 1); // Code bit for small (1 bit) symbol value. + VP8LPutBits(bw, symbols[0], 1); } else { - VP8LWriteBits(bw, 1, 1); - VP8LWriteBits(bw, 8, symbols[0]); + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, symbols[0], 8); } if (count == 2) { - VP8LWriteBits(bw, 8, symbols[1]); + VP8LPutBits(bw, symbols[1], 8); } - return 1; } else { - return StoreFullHuffmanCode(bw, huffman_code); + StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code); } } -static void WriteHuffmanCode(VP8LBitWriter* const bw, - const HuffmanTreeCode* const code, int index) { - const int depth = code->code_lengths[index]; - const int symbol = code->codes[index]; - VP8LWriteBits(bw, depth, symbol); +static WEBP_INLINE void WriteHuffmanCode(VP8LBitWriter* const bw, + const HuffmanTreeCode* const code, + int code_index) { + const int depth = code->code_lengths[code_index]; + const int symbol = code->codes[code_index]; + VP8LPutBits(bw, symbol, depth); } -static void StoreImageToBitMask( +static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( + VP8LBitWriter* const bw, + const HuffmanTreeCode* const code, + int code_index, + int bits, + int n_bits) { + const int depth = code->code_lengths[code_index]; + const int symbol = code->codes[code_index]; + VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits); +} + +static WebPEncodingError StoreImageToBitMask( VP8LBitWriter* const bw, int width, int histo_bits, - const VP8LBackwardRefs* const refs, + VP8LBackwardRefs* const refs, const uint16_t* histogram_symbols, const HuffmanTreeCode* const huffman_codes) { + const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; + const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits); // x and y trace the position in the image. int x = 0; int y = 0; - const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; - int i; - for (i = 0; i < refs->size; ++i) { - const PixOrCopy* const v = &refs->refs[i]; - const int histogram_ix = histogram_symbols[histo_bits ? - (y >> histo_bits) * histo_xsize + - (x >> histo_bits) : 0]; - const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix; - if (PixOrCopyIsCacheIdx(v)) { - const int code = PixOrCopyCacheIdx(v); - const int literal_ix = 256 + NUM_LENGTH_CODES + code; - WriteHuffmanCode(bw, codes, literal_ix); - } else if (PixOrCopyIsLiteral(v)) { + int tile_x = x & tile_mask; + int tile_y = y & tile_mask; + int histogram_ix = histogram_symbols[0]; + const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; + if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) { + tile_x = x & tile_mask; + tile_y = y & tile_mask; + histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize + + (x >> histo_bits)]; + codes = huffman_codes + 5 * histogram_ix; + } + if (PixOrCopyIsLiteral(v)) { static const int order[] = { 1, 2, 0, 3 }; int k; for (k = 0; k < 4; ++k) { const int code = PixOrCopyLiteral(v, order[k]); WriteHuffmanCode(bw, codes + k, code); } + } else if (PixOrCopyIsCacheIdx(v)) { + const int code = PixOrCopyCacheIdx(v); + const int literal_ix = 256 + NUM_LENGTH_CODES + code; + WriteHuffmanCode(bw, codes, literal_ix); } else { int bits, n_bits; - int code, distance; + int code; - PrefixEncode(v->len, &code, &n_bits, &bits); - WriteHuffmanCode(bw, codes, 256 + code); - VP8LWriteBits(bw, n_bits, bits); + const int distance = PixOrCopyDistance(v); + VP8LPrefixEncode(v->len, &code, &n_bits, &bits); + WriteHuffmanCodeWithExtraBits(bw, codes, 256 + code, bits, n_bits); - distance = PixOrCopyDistance(v); - PrefixEncode(distance, &code, &n_bits, &bits); + // Don't write the distance with the extra bits code since + // the distance can be up to 18 bits of extra bits, and the prefix + // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits. + // TODO(jyrki): optimize this further. + VP8LPrefixEncode(distance, &code, &n_bits, &bits); WriteHuffmanCode(bw, codes + 4, code); - VP8LWriteBits(bw, n_bits, bits); + VP8LPutBits(bw, bits, n_bits); } x += PixOrCopyLength(v); while (x >= width) { x -= width; ++y; } + VP8LRefsCursorNext(&c); } + return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK; } // Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 -static int EncodeImageNoHuffman(VP8LBitWriter* const bw, - const uint32_t* const argb, - int width, int height, int quality) { +static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2], + int width, int height, + int quality) { int i; - int ok = 0; - VP8LBackwardRefs refs; + int max_tokens = 0; + WebPEncodingError err = VP8_ENC_OK; + VP8LBackwardRefs* refs; + HuffmanTreeToken* tokens = NULL; HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol - VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0); - if (histogram_image == NULL) return 0; + int cache_bits = 0; + VP8LHistogramSet* histogram_image = NULL; + HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( + 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); + if (huff_tree == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } // Calculate backward references from ARGB image. - if (!VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, &refs)) { + refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, + hash_chain, refs_array); + if (refs == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } + histogram_image = VP8LAllocateHistogramSet(1, cache_bits); + if (histogram_image == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Build histogram image and symbols from backward references. - VP8LHistogramStoreRefs(&refs, histogram_image->histograms[0]); + VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]); // Create Huffman bit lengths and codes for each histogram image. assert(histogram_image->size == 1); if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // No color cache, no Huffman image. - VP8LWriteBits(bw, 1, 0); + VP8LPutBits(bw, 0, 1); - // Store Huffman codes. + // Find maximum number of symbols for the huffman tree-set. for (i = 0; i < 5; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; - if (!StoreHuffmanCode(bw, codes)) { - goto Error; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; } + } + + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Store Huffman codes. + for (i = 0; i < 5; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); ClearHuffmanTreeIfOnlyOneSymbol(codes); } // Store actual literals. - StoreImageToBitMask(bw, width, 0, &refs, histogram_symbols, huffman_codes); - ok = 1; + err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, + huffman_codes); Error: - free(histogram_image); - VP8LClearBackwardRefs(&refs); - free(huffman_codes[0].codes); - return ok; + WebPSafeFree(tokens); + WebPSafeFree(huff_tree); + VP8LFreeHistogramSet(histogram_image); + WebPSafeFree(huffman_codes[0].codes); + return err; } -static int EncodeImageInternal(VP8LBitWriter* const bw, - const uint32_t* const argb, - int width, int height, int quality, - int cache_bits, int histogram_bits) { - int ok = 0; - const int use_2d_locality = 1; - const int use_color_cache = (cache_bits > 0); +static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2], + int width, int height, int quality, + int low_effort, int* cache_bits, + int histogram_bits, + size_t init_byte_position, + int* const hdr_size, + int* const data_size) { + WebPEncodingError err = VP8_ENC_OK; const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); - VP8LHistogramSet* histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, 0); + VP8LHistogramSet* histogram_image = NULL; + VP8LHistogramSet* tmp_histos = NULL; int histogram_image_size = 0; size_t bit_array_size = 0; + HuffmanTree* huff_tree = NULL; + HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; VP8LBackwardRefs refs; + VP8LBackwardRefs* best_refs; uint16_t* const histogram_symbols = - (uint16_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize, + (uint16_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_symbols)); assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); - if (histogram_image == NULL || histogram_symbols == NULL) goto Error; + assert(hdr_size != NULL); + assert(data_size != NULL); + VP8LBackwardRefsInit(&refs, refs_array[0].block_size_); + if (histogram_symbols == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + *cache_bits = MAX_COLOR_CACHE_BITS; + // 'best_refs' is the reference to the best backward refs and points to one + // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. - if (!VP8LGetBackwardReferences(width, height, argb, quality, cache_bits, - use_2d_locality, &refs)) { + best_refs = VP8LGetBackwardReferences(width, height, argb, quality, + low_effort, cache_bits, hash_chain, + refs_array); + if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); + tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits); + if (histogram_image == NULL || tmp_histos == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, &refs, - quality, histogram_bits, cache_bits, - histogram_image, - histogram_symbols)) { + if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort, + histogram_bits, *cache_bits, histogram_image, + tmp_histos, histogram_symbols)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // Create Huffman bit lengths and codes for each histogram image. @@ -548,171 +886,167 @@ static int EncodeImageInternal(VP8LBitWriter* const bw, bit_array_size = 5 * histogram_image_size; huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the latter + // need to outlive the following call to GetHuffBitLengthsAndCodes(). if (huffman_codes == NULL || !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } + // Free combined histograms. + VP8LFreeHistogramSet(histogram_image); + histogram_image = NULL; + + // Free scratch histograms. + VP8LFreeHistogramSet(tmp_histos); + tmp_histos = NULL; // Color Cache parameters. - VP8LWriteBits(bw, 1, use_color_cache); - if (use_color_cache) { - VP8LWriteBits(bw, 4, cache_bits); + if (*cache_bits > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, *cache_bits, 4); + } else { + VP8LPutBits(bw, 0, 1); } // Huffman image + meta huffman. { const int write_histogram_image = (histogram_image_size > 1); - VP8LWriteBits(bw, 1, write_histogram_image); + VP8LPutBits(bw, write_histogram_image, 1); if (write_histogram_image) { uint32_t* const histogram_argb = - (uint32_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize, + (uint32_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_argb)); int max_index = 0; uint32_t i; - if (histogram_argb == NULL) goto Error; + if (histogram_argb == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } for (i = 0; i < histogram_image_xysize; ++i) { - const int index = histogram_symbols[i] & 0xffff; - histogram_argb[i] = 0xff000000 | (index << 8); - if (index >= max_index) { - max_index = index + 1; + const int symbol_index = histogram_symbols[i] & 0xffff; + histogram_argb[i] = (symbol_index << 8); + if (symbol_index >= max_index) { + max_index = symbol_index + 1; } } histogram_image_size = max_index; - VP8LWriteBits(bw, 3, histogram_bits - 2); - ok = EncodeImageNoHuffman(bw, histogram_argb, - VP8LSubSampleSize(width, histogram_bits), - VP8LSubSampleSize(height, histogram_bits), - quality); - free(histogram_argb); - if (!ok) goto Error; + VP8LPutBits(bw, histogram_bits - 2, 3); + err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array, + VP8LSubSampleSize(width, histogram_bits), + VP8LSubSampleSize(height, histogram_bits), + quality); + WebPSafeFree(histogram_argb); + if (err != VP8_ENC_OK) goto Error; } } // Store Huffman codes. { int i; + int max_tokens = 0; + huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, + sizeof(*huff_tree)); + if (huff_tree == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Find maximum number of symbols for the huffman tree-set. for (i = 0; i < 5 * histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; - if (!StoreHuffmanCode(bw, codes)) goto Error; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, + sizeof(*tokens)); + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); ClearHuffmanTreeIfOnlyOneSymbol(codes); } } - // Free combined histograms. - free(histogram_image); - histogram_image = NULL; + *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); // Store actual literals. - StoreImageToBitMask(bw, width, histogram_bits, &refs, - histogram_symbols, huffman_codes); - ok = 1; + err = StoreImageToBitMask(bw, width, histogram_bits, &refs, + histogram_symbols, huffman_codes); + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); Error: - if (!ok) free(histogram_image); - - VP8LClearBackwardRefs(&refs); + WebPSafeFree(tokens); + WebPSafeFree(huff_tree); + VP8LFreeHistogramSet(histogram_image); + VP8LFreeHistogramSet(tmp_histos); + VP8LBackwardRefsClear(&refs); if (huffman_codes != NULL) { - free(huffman_codes->codes); - free(huffman_codes); + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); } - free(histogram_symbols); - return ok; + WebPSafeFree(histogram_symbols); + return err; } // ----------------------------------------------------------------------------- // Transforms -// Check if it would be a good idea to subtract green from red and blue. We -// only impact entropy in red/blue components, don't bother to look at others. -static int EvalAndApplySubtractGreen(VP8LEncoder* const enc, - int width, int height, - VP8LBitWriter* const bw) { - if (!enc->use_palette_) { - int i; - const uint32_t* const argb = enc->argb_; - double bit_cost_before, bit_cost_after; - VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo)); - if (histo == NULL) return 0; - - VP8LHistogramInit(histo, 1); - for (i = 0; i < width * height; ++i) { - const uint32_t c = argb[i]; - ++histo->red_[(c >> 16) & 0xff]; - ++histo->blue_[(c >> 0) & 0xff]; - } - bit_cost_before = VP8LHistogramEstimateBits(histo); - - VP8LHistogramInit(histo, 1); - for (i = 0; i < width * height; ++i) { - const uint32_t c = argb[i]; - const int green = (c >> 8) & 0xff; - ++histo->red_[((c >> 16) - green) & 0xff]; - ++histo->blue_[((c >> 0) - green) & 0xff]; - } - bit_cost_after = VP8LHistogramEstimateBits(histo); - free(histo); - - // Check if subtracting green yields low entropy. - enc->use_subtract_green_ = (bit_cost_after < bit_cost_before); - if (enc->use_subtract_green_) { - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, SUBTRACT_GREEN); - VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); - } - } - return 1; +static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, + VP8LBitWriter* const bw) { + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, SUBTRACT_GREEN, 2); + VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); } -static int ApplyPredictFilter(const VP8LEncoder* const enc, - int width, int height, int quality, - VP8LBitWriter* const bw) { +static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, + int width, int height, + int quality, int low_effort, + VP8LBitWriter* const bw) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); - VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_, - enc->transform_data_); - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM); + VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, + enc->argb_scratch_, enc->transform_data_); + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); - VP8LWriteBits(bw, 3, pred_bits - 2); - if (!EncodeImageNoHuffman(bw, enc->transform_data_, - transform_width, transform_height, quality)) { - return 0; - } - return 1; + VP8LPutBits(bw, pred_bits - 2, 3); + return EncodeImageNoHuffman(bw, enc->transform_data_, + (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); } -static int ApplyCrossColorFilter(const VP8LEncoder* const enc, - int width, int height, int quality, - VP8LBitWriter* const bw) { +static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, + int width, int height, + int quality, + VP8LBitWriter* const bw) { const int ccolor_transform_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); - const int step = (quality == 0) ? 32 : 8; - VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step, + VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, enc->argb_, enc->transform_data_); - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM); + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); assert(ccolor_transform_bits >= 2); - VP8LWriteBits(bw, 3, ccolor_transform_bits - 2); - if (!EncodeImageNoHuffman(bw, enc->transform_data_, - transform_width, transform_height, quality)) { - return 0; - } - return 1; + VP8LPutBits(bw, ccolor_transform_bits - 2, 3); + return EncodeImageNoHuffman(bw, enc->transform_data_, + (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); } // ----------------------------------------------------------------------------- -static void PutLE32(uint8_t* const data, uint32_t val) { - data[0] = (val >> 0) & 0xff; - data[1] = (val >> 8) & 0xff; - data[2] = (val >> 16) & 0xff; - data[3] = (val >> 24) & 0xff; -} - static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic, size_t riff_size, size_t vp8l_size) { uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = { @@ -733,14 +1067,14 @@ static int WriteImageSize(const WebPPicture* const pic, const int height = pic->height - 1; assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION); - VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width); - VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height); + VP8LPutBits(bw, width, VP8L_IMAGE_SIZE_BITS); + VP8LPutBits(bw, height, VP8L_IMAGE_SIZE_BITS); return !bw->error_; } static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) { - VP8LWriteBits(bw, 1, has_alpha); - VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION); + VP8LPutBits(bw, has_alpha, 1); + VP8LPutBits(bw, VP8L_VERSION, VP8L_VERSION_BITS); return !bw->error_; } @@ -780,166 +1114,261 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic, // Allocates the memory for argb (W x H) buffer, 2 rows of context for // prediction and transform data. +// Flags influencing the memory allocated: +// enc->transform_bits_ +// enc->use_predict_, enc->use_cross_color_ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, int width, int height) { WebPEncodingError err = VP8_ENC_OK; - const int tile_size = 1 << enc->transform_bits_; - const uint64_t image_size = width * height; - const uint64_t argb_scratch_size = tile_size * width + width; - const uint64_t transform_data_size = - (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) * - (uint64_t)VP8LSubSampleSize(height, enc->transform_bits_); - const uint64_t total_size = - image_size + argb_scratch_size + transform_data_size; - uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); - if (mem == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + if (enc->argb_ == NULL) { + const int tile_size = 1 << enc->transform_bits_; + const uint64_t image_size = width * height; + // Ensure enough size for tiles, as well as for two scanlines and two + // extra pixels for CopyImageWithPrediction. + const uint64_t argb_scratch_size = + enc->use_predict_ ? tile_size * width + width + 2 : 0; + const int transform_data_size = + (enc->use_predict_ || enc->use_cross_color_) + ? VP8LSubSampleSize(width, enc->transform_bits_) * + VP8LSubSampleSize(height, enc->transform_bits_) + : 0; + const uint64_t total_size = + image_size + WEBP_ALIGN_CST + + argb_scratch_size + WEBP_ALIGN_CST + + (uint64_t)transform_data_size; + uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + if (mem == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + enc->argb_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + image_size); + enc->argb_scratch_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); + enc->transform_data_ = mem; + enc->current_width_ = width; } - enc->argb_ = mem; - mem += image_size; - enc->argb_scratch_ = mem; - mem += argb_scratch_size; - enc->transform_data_ = mem; - enc->current_width_ = width; - Error: return err; } -// Bundles multiple (2, 4 or 8) pixels into a single pixel. -// Returns the new xsize. -static void BundleColorMap(const WebPPicture* const pic, - int xbits, uint32_t* bundled_argb, int xs) { - int y; - const int bit_depth = 1 << (3 - xbits); - uint32_t code = 0; - const uint32_t* argb = pic->argb; - const int width = pic->width; - const int height = pic->height; +static void ClearTransformBuffer(VP8LEncoder* const enc) { + WebPSafeFree(enc->argb_); + enc->argb_ = NULL; +} +static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { + WebPEncodingError err = VP8_ENC_OK; + const WebPPicture* const picture = enc->pic_; + const int width = picture->width; + const int height = picture->height; + int y; + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) return err; for (y = 0; y < height; ++y) { - int x; - for (x = 0; x < width; ++x) { - const int mask = (1 << xbits) - 1; - const int xsub = x & mask; - if (xsub == 0) { - code = 0; - } - // TODO(vikasa): simplify the bundling logic. - code |= (argb[x] & 0xff00) << (bit_depth * xsub); - bundled_argb[y * xs + (x >> xbits)] = 0xff000000 | code; - } - argb += pic->argb_stride; + memcpy(enc->argb_ + y * width, + picture->argb + y * picture->argb_stride, + width * sizeof(*enc->argb_)); } + assert(enc->current_width_ == width); + return VP8_ENC_OK; } -// Note: Expects "enc->palette_" to be set properly. -// Also, "enc->palette_" will be modified after this call and should not be used -// later. -static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw, - VP8LEncoder* const enc, int quality) { - WebPEncodingError err = VP8_ENC_OK; - int i, x, y; - const WebPPicture* const pic = enc->pic_; - uint32_t* argb = pic->argb; - const int width = pic->width; - const int height = pic->height; - uint32_t* const palette = enc->palette_; - const int palette_size = enc->palette_size_; +// ----------------------------------------------------------------------------- - // Replace each input pixel by corresponding palette index. - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const uint32_t pix = argb[x]; - for (i = 0; i < palette_size; ++i) { +static void MapToPalette(const uint32_t palette[], int num_colors, + uint32_t* const last_pix, int* const last_idx, + const uint32_t* src, uint8_t* dst, int width) { + int x; + int prev_idx = *last_idx; + uint32_t prev_pix = *last_pix; + for (x = 0; x < width; ++x) { + const uint32_t pix = src[x]; + if (pix != prev_pix) { + int i; + for (i = 0; i < num_colors; ++i) { if (pix == palette[i]) { - argb[x] = 0xff000000u | (i << 8); + prev_idx = i; + prev_pix = pix; break; } } } - argb += pic->argb_stride; + dst[x] = prev_idx; } + *last_idx = prev_idx; + *last_pix = prev_pix; +} - // Save palette to bitstream. - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM); - assert(palette_size >= 1); - VP8LWriteBits(bw, 8, palette_size - 1); - for (i = palette_size - 1; i >= 1; --i) { - palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); - } - if (!EncodeImageNoHuffman(bw, palette, palette_size, 1, quality)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; +// Remap argb values in src[] to packed palettes entries in dst[] +// using 'row' as a temporary buffer of size 'width'. +// We assume that all src[] values have a corresponding entry in the palette. +// Note: src[] can be the same as dst[] +static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, + uint32_t* dst, uint32_t dst_stride, + const uint32_t* palette, int palette_size, + int width, int height, int xbits) { + // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be + // made to work in-place. + uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); + int i, x, y; + int use_LUT = 1; + + if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + for (i = 0; i < palette_size; ++i) { + if ((palette[i] & 0xffff00ffu) != 0) { + use_LUT = 0; + break; + } } - if (palette_size <= 16) { - // Image can be packed (multiple pixels per uint32_t). - int xbits = 1; - if (palette_size <= 2) { - xbits = 3; - } else if (palette_size <= 4) { - xbits = 2; + if (use_LUT) { + uint8_t inv_palette[MAX_PALETTE_SIZE] = { 0 }; + for (i = 0; i < palette_size; ++i) { + const int color = (palette[i] >> 8) & 0xff; + inv_palette[color] = i; + } + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + const int color = (src[x] >> 8) & 0xff; + tmp_row[x] = inv_palette[color]; + } + VP8LBundleColorMap(tmp_row, width, xbits, dst); + src += src_stride; + dst += dst_stride; + } + } else { + // Use 1 pixel cache for ARGB pixels. + uint32_t last_pix = palette[0]; + int last_idx = 0; + for (y = 0; y < height; ++y) { + MapToPalette(palette, palette_size, &last_pix, &last_idx, + src, tmp_row, width); + VP8LBundleColorMap(tmp_row, width, xbits, dst); + src += src_stride; + dst += dst_stride; } - err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); - if (err != VP8_ENC_OK) goto Error; - BundleColorMap(pic, xbits, enc->argb_, enc->current_width_); } + WebPSafeFree(tmp_row); + return VP8_ENC_OK; +} - Error: +// Note: Expects "enc->palette_" to be set properly. +static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, + int in_place) { + WebPEncodingError err = VP8_ENC_OK; + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + const uint32_t* const palette = enc->palette_; + const uint32_t* src = in_place ? enc->argb_ : pic->argb; + const int src_stride = in_place ? enc->current_width_ : pic->argb_stride; + const int palette_size = enc->palette_size_; + int xbits; + + // Replace each input pixel by corresponding palette index. + // This is done line by line. + if (palette_size <= 4) { + xbits = (palette_size <= 2) ? 3 : 2; + } else { + xbits = (palette_size <= 16) ? 1 : 0; + } + + err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); + if (err != VP8_ENC_OK) return err; + + err = ApplyPalette(src, src_stride, + enc->argb_, enc->current_width_, + palette, palette_size, width, height, xbits); return err; } -// ----------------------------------------------------------------------------- +// Save palette_[] to bitstream. +static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, + VP8LEncoder* const enc) { + int i; + uint32_t tmp_palette[MAX_PALETTE_SIZE]; + const int palette_size = enc->palette_size_; + const uint32_t* const palette = enc->palette_; + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2); + assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE); + VP8LPutBits(bw, palette_size - 1, 8); + for (i = palette_size - 1; i >= 1; --i) { + tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); + } + tmp_palette[0] = palette[0]; + return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_, + palette_size, 1, 20 /* quality */); +} -static int GetHistoBits(const WebPConfig* const config, - const WebPPicture* const pic) { +#ifdef WEBP_EXPERIMENTAL_FEATURES + +static WebPEncodingError EncodeDeltaPalettePredictorImage( + VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality) { + const WebPPicture* const pic = enc->pic_; const int width = pic->width; const int height = pic->height; - const size_t hist_size = sizeof(VP8LHistogram); - // Make tile size a function of encoding method (Range: 0 to 6). - int histo_bits = 7 - config->method; - while (1) { - const size_t huff_image_size = VP8LSubSampleSize(width, histo_bits) * - VP8LSubSampleSize(height, histo_bits) * - hist_size; - if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break; - ++histo_bits; + + const int pred_bits = 5; + const int transform_width = VP8LSubSampleSize(width, pred_bits); + const int transform_height = VP8LSubSampleSize(height, pred_bits); + const int pred = 7; // default is Predictor7 (Top/Left Average) + const int tiles_per_row = VP8LSubSampleSize(width, pred_bits); + const int tiles_per_col = VP8LSubSampleSize(height, pred_bits); + uint32_t* predictors; + int tile_x, tile_y; + WebPEncodingError err = VP8_ENC_OK; + + predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row, + sizeof(*predictors)); + if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + + for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { + for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { + predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); + } } - return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS : - (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; -} -static void InitEncParams(VP8LEncoder* const enc) { - const WebPConfig* const config = enc->config_; - const WebPPicture* const picture = enc->pic_; - const int method = config->method; - const float quality = config->quality; - enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4; - enc->histo_bits_ = GetHistoBits(config, picture); - enc->cache_bits_ = (quality <= 25.f) ? 0 : 7; + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); + VP8LPutBits(bw, pred_bits - 2, 3); + err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); + WebPSafeFree(predictors); + return err; } +#endif // WEBP_EXPERIMENTAL_FEATURES + // ----------------------------------------------------------------------------- // VP8LEncoder static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, const WebPPicture* const picture) { - VP8LEncoder* const enc = (VP8LEncoder*)calloc(1, sizeof(*enc)); + VP8LEncoder* const enc = (VP8LEncoder*)WebPSafeCalloc(1ULL, sizeof(*enc)); if (enc == NULL) { WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); return NULL; } enc->config_ = config; enc->pic_ = picture; + + VP8LEncDspInit(); + return enc; } static void VP8LEncoderDelete(VP8LEncoder* enc) { - free(enc->argb_); - free(enc); + if (enc != NULL) { + VP8LHashChainClear(&enc->hash_chain_); + VP8LBackwardRefsClear(&enc->refs_[0]); + VP8LBackwardRefsClear(&enc->refs_[1]); + ClearTransformBuffer(enc); + WebPSafeFree(enc); + } } // ----------------------------------------------------------------------------- @@ -950,89 +1379,102 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, VP8LBitWriter* const bw) { WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; + const int low_effort = (config->method == 0); const int width = picture->width; const int height = picture->height; VP8LEncoder* const enc = VP8LEncoderNew(config, picture); const size_t byte_position = VP8LBitWriterNumBytes(bw); + int use_near_lossless = 0; + int hdr_size = 0; + int data_size = 0; + int use_delta_palettization = 0; if (enc == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - InitEncParams(enc); - // --------------------------------------------------------------------------- // Analyze image (entropy, num_palettes etc) - if (!VP8LEncAnalyze(enc, config->image_hint)) { + if (!AnalyzeAndInit(enc)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - if (enc->use_palette_) { - err = ApplyPalette(bw, enc, quality); - if (err != VP8_ENC_OK) goto Error; - // Color cache is disabled for palette. - enc->cache_bits_ = 0; + // Apply near-lossless preprocessing. + use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100); + if (use_near_lossless) { + if (!VP8ApplyNearLossless(width, height, picture->argb, + config->near_lossless)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } } - // In case image is not packed. - if (enc->argb_ == NULL) { - int y; - err = AllocateTransformBuffer(enc, width, height); +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->delta_palettization) { + enc->use_predict_ = 1; + enc->use_cross_color_ = 0; + enc->use_subtract_green_ = 0; + enc->use_palette_ = 1; + err = MakeInputImageCopy(enc); if (err != VP8_ENC_OK) goto Error; - for (y = 0; y < height; ++y) { - memcpy(enc->argb_ + y * width, - picture->argb + y * picture->argb_stride, - width * sizeof(*enc->argb_)); + err = WebPSearchOptimalDeltaPalette(enc); + if (err != VP8_ENC_OK) goto Error; + if (enc->use_palette_) { + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) goto Error; + err = EncodeDeltaPalettePredictorImage(bw, enc, quality); + if (err != VP8_ENC_OK) goto Error; + use_delta_palettization = 1; } - enc->current_width_ = width; } +#endif // WEBP_EXPERIMENTAL_FEATURES - // --------------------------------------------------------------------------- - // Apply transforms and write transform data. - - if (!EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + // Encode palette + if (enc->use_palette_) { + err = EncodePalette(bw, enc); + if (err != VP8_ENC_OK) goto Error; + err = MapImageFromPalette(enc, use_delta_palettization); + if (err != VP8_ENC_OK) goto Error; } - - if (enc->use_predict_) { - if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, bw)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; + if (!use_delta_palettization) { + // In case image is not packed. + if (enc->argb_ == NULL) { + err = MakeInputImageCopy(enc); + if (err != VP8_ENC_OK) goto Error; } - } - if (enc->use_cross_color_) { - if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; - } - } + // ------------------------------------------------------------------------- + // Apply transforms and write transform data. - VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT); // No more transforms. + if (enc->use_subtract_green_) { + ApplySubtractGreen(enc, enc->current_width_, height, bw); + } - // --------------------------------------------------------------------------- - // Estimate the color cache size. + if (enc->use_predict_) { + err = ApplyPredictFilter(enc, enc->current_width_, height, quality, + low_effort, bw); + if (err != VP8_ENC_OK) goto Error; + } - if (enc->cache_bits_ > 0) { - if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_, - height, &enc->cache_bits_)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; + if (enc->use_cross_color_) { + err = ApplyCrossColorFilter(enc, enc->current_width_, + height, quality, bw); + if (err != VP8_ENC_OK) goto Error; } } + VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms. + // --------------------------------------------------------------------------- // Encode and write the transformed image. - - if (!EncodeImageInternal(bw, enc->argb_, enc->current_width_, height, - quality, enc->cache_bits_, enc->histo_bits_)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, + enc->current_width_, height, quality, low_effort, + &enc->cache_bits_, enc->histo_bits_, byte_position, + &hdr_size, &data_size); + if (err != VP8_ENC_OK) goto Error; if (picture->stats != NULL) { WebPAuxStats* const stats = picture->stats; @@ -1046,6 +1488,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, stats->cache_bits = enc->cache_bits_; stats->palette_size = enc->palette_size_; stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position); + stats->lossless_hdr_size = hdr_size; + stats->lossless_data_size = data_size; } Error: @@ -1059,6 +1503,7 @@ int VP8LEncodeImage(const WebPConfig* const config, int has_alpha; size_t coded_size; int percent = 0; + int initial_size; WebPEncodingError err = VP8_ENC_OK; VP8LBitWriter bw; @@ -1072,7 +1517,11 @@ int VP8LEncodeImage(const WebPConfig* const config, width = picture->width; height = picture->height; - if (!VP8LBitWriterInit(&bw, (width * height) >> 1)) { + // Initialize BitWriter with size corresponding to 16 bpp to photo images and + // 8 bpp for graphical images. + initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? + width * height : width * height * 2; + if (!VP8LBitWriterInit(&bw, initial_size)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } @@ -1135,7 +1584,7 @@ int VP8LEncodeImage(const WebPConfig* const config, Error: if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY; - VP8LBitWriterDestroy(&bw); + VP8LBitWriterWipeOut(&bw); if (err != VP8_ENC_OK) { WebPEncodingSetError(picture, err); return 0; @@ -1144,7 +1593,3 @@ int VP8LEncodeImage(const WebPConfig* const config, } //------------------------------------------------------------------------------ - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif diff --git a/drivers/webp/enc/vp8li.h b/drivers/webp/enc/vp8li.h index bb111aec33..6b6db127db 100644 --- a/drivers/webp/enc/vp8li.h +++ b/drivers/webp/enc/vp8li.h @@ -1,8 +1,10 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Lossless encoder: internal header. @@ -12,12 +14,13 @@ #ifndef WEBP_ENC_VP8LI_H_ #define WEBP_ENC_VP8LI_H_ +#include "./backward_references.h" #include "./histogram.h" #include "../utils/bit_writer.h" -#include "../encode.h" -#include "../format_constants.h" +#include "../webp/encode.h" +#include "../webp/format_constants.h" -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus extern "C" { #endif @@ -43,6 +46,12 @@ typedef struct { int use_palette_; int palette_size_; uint32_t palette_[MAX_PALETTE_SIZE]; + + // Some 'scratch' (potentially large) objects. + struct VP8LBackwardRefs refs_[2]; // Backward Refs array corresponding to + // LZ77 & RLE coding. + VP8LHashChain hash_chain_; // HashChain data for constructing + // backward references. } VP8LEncoder; //------------------------------------------------------------------------------ @@ -61,7 +70,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, //------------------------------------------------------------------------------ -#if defined(__cplusplus) || defined(c_plusplus) +#ifdef __cplusplus } // extern "C" #endif diff --git a/drivers/webp/enc/webpenc.c b/drivers/webp/enc/webpenc.c index 3c275589fc..8ced07a2a3 100644 --- a/drivers/webp/enc/webpenc.c +++ b/drivers/webp/enc/webpenc.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // WebP encoder: main entry point @@ -14,16 +16,13 @@ #include <string.h> #include <math.h> +#include "./cost.h" #include "./vp8enci.h" #include "./vp8li.h" #include "../utils/utils.h" // #define PRINT_MEMORY_INFO -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - #ifdef PRINT_MEMORY_INFO #include <stdio.h> #endif @@ -35,43 +34,18 @@ int WebPGetEncoderVersion(void) { } //------------------------------------------------------------------------------ -// WebPPicture -//------------------------------------------------------------------------------ - -static int DummyWriter(const uint8_t* data, size_t data_size, - const WebPPicture* const picture) { - // The following are to prevent 'unused variable' error message. - (void)data; - (void)data_size; - (void)picture; - return 1; -} - -int WebPPictureInitInternal(WebPPicture* picture, int version) { - if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) { - return 0; // caller/system version mismatch! - } - if (picture != NULL) { - memset(picture, 0, sizeof(*picture)); - picture->writer = DummyWriter; - WebPEncodingSetError(picture, VP8_ENC_OK); - } - return 1; -} - -//------------------------------------------------------------------------------ // VP8Encoder //------------------------------------------------------------------------------ static void ResetSegmentHeader(VP8Encoder* const enc) { - VP8SegmentHeader* const hdr = &enc->segment_hdr_; + VP8EncSegmentHeader* const hdr = &enc->segment_hdr_; hdr->num_segments_ = enc->config_->segments; hdr->update_map_ = (hdr->num_segments_ > 1); hdr->size_ = 0; } static void ResetFilterHeader(VP8Encoder* const enc) { - VP8FilterHeader* const hdr = &enc->filter_hdr_; + VP8EncFilterHeader* const hdr = &enc->filter_hdr_; hdr->simple_ = 1; hdr->level_ = 0; hdr->sharpness_ = 0; @@ -93,56 +67,73 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) { enc->nz_[-1] = 0; // constant } -// Map configured quality level to coding tools used. -//-------------+---+---+---+---+---+---+ -// Quality | 0 | 1 | 2 | 3 | 4 | 5 + -//-------------+---+---+---+---+---+---+ -// dynamic prob| ~ | x | x | x | x | x | -//-------------+---+---+---+---+---+---+ -// rd-opt modes| | | x | x | x | x | -//-------------+---+---+---+---+---+---+ -// fast i4/i16 | x | x | | | | | -//-------------+---+---+---+---+---+---+ -// rd-opt i4/16| | | x | x | x | x | -//-------------+---+---+---+---+---+---+ -// Trellis | | x | | | x | x | -//-------------+---+---+---+---+---+---+ -// full-SNS | | | | | | x | -//-------------+---+---+---+---+---+---+ +// Mapping from config->method_ to coding tools used. +//-------------------+---+---+---+---+---+---+---+ +// Method | 0 | 1 | 2 | 3 |(4)| 5 | 6 | +//-------------------+---+---+---+---+---+---+---+ +// fast probe | x | | | x | | | | +//-------------------+---+---+---+---+---+---+---+ +// dynamic proba | ~ | x | x | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// fast mode analysis| | | | | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// basic rd-opt | | | | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// disto-score i4/16 | | | x | | | | | +//-------------------+---+---+---+---+---+---+---+ +// rd-opt i4/16 | | | ~ | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// token buffer (opt)| | | | x | x | x | x | +//-------------------+---+---+---+---+---+---+---+ +// Trellis | | | | | | x |Ful| +//-------------------+---+---+---+---+---+---+---+ +// full-SNS | | | | | x | x | x | +//-------------------+---+---+---+---+---+---+---+ static void MapConfigToTools(VP8Encoder* const enc) { - const int method = enc->config_->method; - const int limit = 100 - enc->config_->partition_limit; + const WebPConfig* const config = enc->config_; + const int method = config->method; + const int limit = 100 - config->partition_limit; enc->method_ = method; - enc->rd_opt_level_ = (method >= 6) ? 3 - : (method >= 5) ? 2 - : (method >= 3) ? 1 - : 0; + enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL + : (method >= 5) ? RD_OPT_TRELLIS + : (method >= 3) ? RD_OPT_BASIC + : RD_OPT_NONE; enc->max_i4_header_bits_ = 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block (limit * limit) / (100 * 100); // ... modulated with a quadratic curve. + + enc->thread_level_ = config->thread_level; + + enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0); + if (!config->low_memory) { +#if !defined(DISABLE_TOKEN_BUFFER) + enc->use_tokens_ = (enc->rd_opt_level_ >= RD_OPT_BASIC); // need rd stats +#endif + if (enc->use_tokens_) { + enc->num_parts_ = 1; // doesn't work with multi-partition + } + } } // Memory scaling with dimensions: // memory (bytes) ~= 2.25 * w + 0.0625 * w * h // -// Typical memory footprint (768x510 picture) -// Memory used: -// encoder: 33919 -// block cache: 2880 -// info: 3072 -// preds: 24897 -// top samples: 1623 -// non-zero: 196 -// lf-stats: 2048 -// total: 68635 -// Transcient object sizes: -// VP8EncIterator: 352 -// VP8ModeScore: 912 -// VP8SegmentInfo: 532 -// VP8Proba: 31032 +// Typical memory footprint (614x440 picture) +// encoder: 22111 +// info: 4368 +// preds: 17741 +// top samples: 1263 +// non-zero: 175 +// lf-stats: 0 +// total: 45658 +// Transient object sizes: +// VP8EncIterator: 3360 +// VP8ModeScore: 872 +// VP8SegmentInfo: 732 +// VP8EncProba: 18352 // LFStats: 2048 -// Picture size (yuv): 589824 +// Picture size (yuv): 419328 static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, WebPPicture* const picture) { @@ -154,20 +145,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, const int preds_h = 4 * mb_h + 1; const size_t preds_size = preds_w * preds_h * sizeof(uint8_t); const int top_stride = mb_w * 16; - const size_t nz_size = (mb_w + 1) * sizeof(uint32_t); - const size_t cache_size = (3 * YUV_SIZE + PRED_SIZE) * sizeof(uint8_t); + const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + WEBP_ALIGN_CST; const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo); - const size_t samples_size = (2 * top_stride + // top-luma/u/v - 16 + 16 + 16 + 8 + 1 + // left y/u/v - 2 * ALIGN_CST) // align all - * sizeof(uint8_t); + const size_t samples_size = 2 * top_stride * sizeof(uint8_t) // top-luma/u/v + + WEBP_ALIGN_CST; // align all const size_t lf_stats_size = - config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0; + config->autofilter ? sizeof(LFStats) + WEBP_ALIGN_CST : 0; VP8Encoder* enc; uint8_t* mem; const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct - + ALIGN_CST // cache alignment - + cache_size // working caches + + WEBP_ALIGN_CST // cache alignment + info_size // modes info + preds_size // prediction modes + samples_size // top/left samples @@ -178,23 +165,22 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, printf("===================================\n"); printf("Memory used:\n" " encoder: %ld\n" - " block cache: %ld\n" " info: %ld\n" " preds: %ld\n" " top samples: %ld\n" " non-zero: %ld\n" " lf-stats: %ld\n" " total: %ld\n", - sizeof(VP8Encoder) + ALIGN_CST, cache_size, info_size, + sizeof(VP8Encoder) + WEBP_ALIGN_CST, info_size, preds_size, samples_size, nz_size, lf_stats_size, size); - printf("Transcient object sizes:\n" + printf("Transient object sizes:\n" " VP8EncIterator: %ld\n" " VP8ModeScore: %ld\n" " VP8SegmentInfo: %ld\n" - " VP8Proba: %ld\n" + " VP8EncProba: %ld\n" " LFStats: %ld\n", sizeof(VP8EncIterator), sizeof(VP8ModeScore), - sizeof(VP8SegmentInfo), sizeof(VP8Proba), + sizeof(VP8SegmentInfo), sizeof(VP8EncProba), sizeof(LFStats)); printf("Picture size (yuv): %ld\n", mb_w * mb_h * 384 * sizeof(uint8_t)); @@ -206,41 +192,27 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, return NULL; } enc = (VP8Encoder*)mem; - mem = (uint8_t*)DO_ALIGN(mem + sizeof(*enc)); + mem = (uint8_t*)WEBP_ALIGN(mem + sizeof(*enc)); memset(enc, 0, sizeof(*enc)); enc->num_parts_ = 1 << config->partitions; enc->mb_w_ = mb_w; enc->mb_h_ = mb_h; enc->preds_w_ = preds_w; - enc->yuv_in_ = (uint8_t*)mem; - mem += YUV_SIZE; - enc->yuv_out_ = (uint8_t*)mem; - mem += YUV_SIZE; - enc->yuv_out2_ = (uint8_t*)mem; - mem += YUV_SIZE; - enc->yuv_p_ = (uint8_t*)mem; - mem += PRED_SIZE; enc->mb_info_ = (VP8MBInfo*)mem; mem += info_size; enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_; mem += preds_w * preds_h * sizeof(uint8_t); - enc->nz_ = 1 + (uint32_t*)mem; + enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem); mem += nz_size; - enc->lf_stats_ = lf_stats_size ? (LFStats*)DO_ALIGN(mem) : NULL; + enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL; mem += lf_stats_size; // top samples (all 16-aligned) - mem = (uint8_t*)DO_ALIGN(mem); + mem = (uint8_t*)WEBP_ALIGN(mem); enc->y_top_ = (uint8_t*)mem; enc->uv_top_ = enc->y_top_ + top_stride; mem += 2 * top_stride; - mem = (uint8_t*)DO_ALIGN(mem + 1); - enc->y_left_ = (uint8_t*)mem; - mem += 16 + 16; - enc->u_left_ = (uint8_t*)mem; - mem += 16; - enc->v_left_ = (uint8_t*)mem; - mem += 8; + assert(mem <= (uint8_t*)enc + size); enc->config_ = config; enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2; @@ -253,29 +225,32 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, ResetSegmentHeader(enc); ResetFilterHeader(enc); ResetBoundaryPredictions(enc); - + VP8EncDspCostInit(); VP8EncInitAlpha(enc); -#ifdef WEBP_EXPERIMENTAL_FEATURES - VP8EncInitLayer(enc); -#endif + // lower quality means smaller output -> we modulate a little the page + // size based on quality. This is just a crude 1rst-order prediction. + { + const float scale = 1.f + config->quality * 5.f / 100.f; // in [1,6] + VP8TBufferInit(&enc->tokens_, (int)(mb_w * mb_h * 4 * scale)); + } return enc; } -static void DeleteVP8Encoder(VP8Encoder* enc) { +static int DeleteVP8Encoder(VP8Encoder* enc) { + int ok = 1; if (enc != NULL) { - VP8EncDeleteAlpha(enc); -#ifdef WEBP_EXPERIMENTAL_FEATURES - VP8EncDeleteLayer(enc); -#endif - free(enc); + ok = VP8EncDeleteAlpha(enc); + VP8TBufferClear(&enc->tokens_); + WebPSafeFree(enc); } + return ok; } //------------------------------------------------------------------------------ static double GetPSNR(uint64_t err, uint64_t size) { - return err ? 10. * log10(255. * 255. * size / err) : 99.; + return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.; } static void FinalizePSNR(const VP8Encoder* const enc) { @@ -332,7 +307,7 @@ int WebPReportProgress(const WebPPicture* const pic, //------------------------------------------------------------------------------ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { - int ok; + int ok = 0; if (pic == NULL) return 0; @@ -346,44 +321,63 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); + if (!config->exact) { + WebPCleanupTransparentArea(pic); + } + if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats)); if (!config->lossless) { VP8Encoder* enc = NULL; - if (pic->y == NULL || pic->u == NULL || pic->v == NULL) { - if (pic->argb != NULL) { - if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0; + if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { + // Make sure we have YUVA samples. + if (config->preprocessing & 4) { + if (!WebPPictureSmartARGBToYUVA(pic)) { + return 0; + } } else { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); + float dithering = 0.f; + if (config->preprocessing & 2) { + const float x = config->quality / 100.f; + const float x2 = x * x; + // slowly decreasing from max dithering at low quality (q->0) + // to 0.5 dithering amplitude at high quality (q->100) + dithering = 1.0f + (0.5f - 1.0f) * x2 * x2; + } + if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) { + return 0; + } } } enc = InitVP8Encoder(config, pic); if (enc == NULL) return 0; // pic->error is already set. // Note: each of the tasks below account for 20% in the progress report. - ok = VP8EncAnalyze(enc) - && VP8StatLoop(enc) - && VP8EncLoop(enc) - && VP8EncFinishAlpha(enc) -#ifdef WEBP_EXPERIMENTAL_FEATURES - && VP8EncFinishLayer(enc) -#endif - && VP8EncWrite(enc); + ok = VP8EncAnalyze(enc); + + // Analysis is done, proceed to actual coding. + ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel + if (!enc->use_tokens_) { + ok = ok && VP8EncLoop(enc); + } else { + ok = ok && VP8EncTokenLoop(enc); + } + ok = ok && VP8EncFinishAlpha(enc); + + ok = ok && VP8EncWrite(enc); StoreStats(enc); if (!ok) { VP8EncFreeBitWriters(enc); } - DeleteVP8Encoder(enc); + ok &= DeleteVP8Encoder(enc); // must always be called, even if !ok } else { - if (pic->argb == NULL) - return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); + // Make sure we have ARGB samples. + if (pic->argb == NULL && !WebPPictureYUVAToARGB(pic)) { + return 0; + } ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. } return ok; } - -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" -#endif |