summaryrefslogtreecommitdiff
path: root/drivers/webp/enc/quant.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/webp/enc/quant.c')
-rw-r--r--drivers/webp/enc/quant.c458
1 files changed, 116 insertions, 342 deletions
diff --git a/drivers/webp/enc/quant.c b/drivers/webp/enc/quant.c
index e1d202b5a3..ea153849c8 100644
--- a/drivers/webp/enc/quant.c
+++ b/drivers/webp/enc/quant.c
@@ -1,10 +1,8 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
+// This code is licensed under the same terms as WebM:
+// Software License Agreement: http://www.webmproject.org/license/software/
+// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// Quantization
@@ -13,7 +11,6 @@
#include <assert.h>
#include <math.h>
-#include <stdlib.h> // for abs()
#include "./vp8enci.h"
#include "./cost.h"
@@ -25,78 +22,16 @@
#define MID_ALPHA 64 // neutral value for susceptibility
#define MIN_ALPHA 30 // lowest usable value for susceptibility
-#define MAX_ALPHA 100 // higher meaningful value for susceptibility
+#define MAX_ALPHA 100 // higher meaninful value for susceptibility
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
// power-law modulation. Must be strictly less than 1.
-#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
-
-// number of non-zero coeffs below which we consider the block very flat
-// (and apply a penalty to complex predictions)
-#define FLATNESS_LIMIT_I16 10 // I16 mode
-#define FLATNESS_LIMIT_I4 3 // I4 mode
-#define FLATNESS_LIMIT_UV 2 // UV mode
-#define FLATNESS_PENALTY 140 // roughly ~1bit per block
-
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
-// #define DEBUG_BLOCK
-
-//------------------------------------------------------------------------------
-
-#if defined(DEBUG_BLOCK)
-
-#include <stdio.h>
-#include <stdlib.h>
-
-static void PrintBlockInfo(const VP8EncIterator* const it,
- const VP8ModeScore* const rd) {
- int i, j;
- const int is_i16 = (it->mb_->type_ == 1);
- printf("SOURCE / OUTPUT / ABS DELTA\n");
- for (j = 0; j < 24; ++j) {
- if (j == 16) printf("\n"); // newline before the U/V block
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
- printf(" ");
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
- printf(" ");
- for (i = 0; i < 16; ++i) {
- printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
- }
- printf("\n");
- }
- printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
- (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
- (int)rd->score);
- if (is_i16) {
- printf("Mode: %d\n", rd->mode_i16);
- printf("y_dc_levels:");
- for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
- printf("\n");
- } else {
- printf("Modes[16]: ");
- for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
- printf("\n");
- }
- printf("y_ac_levels:\n");
- for (j = 0; j < 16; ++j) {
- for (i = is_i16 ? 1 : 0; i < 16; ++i) {
- printf("%4d ", rd->y_ac_levels[j][i]);
- }
- printf("\n");
- }
- printf("\n");
- printf("uv_levels (mode=%d):\n", rd->mode_uv);
- for (j = 0; j < 8; ++j) {
- for (i = 0; i < 16; ++i) {
- printf("%4d ", rd->uv_levels[j][i]);
- }
- printf("\n");
- }
-}
-
-#endif // DEBUG_BLOCK
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
//------------------------------------------------------------------------------
@@ -165,13 +100,31 @@ static const uint16_t kAcTable2[128] = {
385, 393, 401, 409, 416, 424, 432, 440
};
-static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]
- { 96, 110 }, { 96, 108 }, { 110, 115 }
+static const uint16_t kCoeffThresh[16] = {
+ 0, 10, 20, 30,
+ 10, 20, 30, 30,
+ 20, 30, 30, 30,
+ 30, 30, 30, 30
+};
+
+// TODO(skal): tune more. Coeff thresholding?
+static const uint8_t kBiasMatrices[3][16] = { // [3] = [luma-ac,luma-dc,chroma]
+ { 96, 96, 96, 96,
+ 96, 96, 96, 96,
+ 96, 96, 96, 96,
+ 96, 96, 96, 96 },
+ { 96, 96, 96, 96,
+ 96, 96, 96, 96,
+ 96, 96, 96, 96,
+ 96, 96, 96, 96 },
+ { 96, 96, 96, 96,
+ 96, 96, 96, 96,
+ 96, 96, 96, 96,
+ 96, 96, 96, 96 }
};
-// Sharpening by (slightly) raising the hi-frequency coeffs.
+// Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).
// Hack-ish but helpful for mid-bitrate range. Use with care.
-#define SHARPEN_BITS 11 // number of descaling bits for sharpening bias
static const uint8_t kFreqSharpening[16] = {
0, 30, 60, 90,
30, 60, 90, 90,
@@ -184,30 +137,20 @@ static const uint8_t kFreqSharpening[16] = {
// Returns the average quantizer
static int ExpandMatrix(VP8Matrix* const m, int type) {
- int i, sum;
- for (i = 0; i < 2; ++i) {
- const int is_ac_coeff = (i > 0);
- const int bias = kBiasMatrices[type][is_ac_coeff];
- m->iq_[i] = (1 << QFIX) / m->q_[i];
- m->bias_[i] = BIAS(bias);
- // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
- // * zero if coeff <= zthresh
- // * non-zero if coeff > zthresh
- m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
- }
+ int i;
+ int sum = 0;
for (i = 2; i < 16; ++i) {
m->q_[i] = m->q_[1];
- m->iq_[i] = m->iq_[1];
- m->bias_[i] = m->bias_[1];
- m->zthresh_[i] = m->zthresh_[1];
}
- for (sum = 0, i = 0; i < 16; ++i) {
- if (type == 0) { // we only use sharpening for AC luma coeffs
- m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
- } else {
- m->sharpen_[i] = 0;
- }
- sum += m->q_[i];
+ for (i = 0; i < 16; ++i) {
+ const int j = kZigzag[i];
+ const int bias = kBiasMatrices[type][j];
+ m->iq_[j] = (1 << QFIX) / m->q_[j];
+ m->bias_[j] = BIAS(bias);
+ // TODO(skal): tune kCoeffThresh[]
+ m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8;
+ m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;
+ sum += m->q_[j];
}
return (sum + 8) >> 4;
}
@@ -235,17 +178,17 @@ static void SetupMatrices(VP8Encoder* enc) {
q16 = ExpandMatrix(&m->y2_, 1);
quv = ExpandMatrix(&m->uv_, 2);
- m->lambda_i4_ = (3 * q4 * q4) >> 7;
- m->lambda_i16_ = (3 * q16 * q16);
- m->lambda_uv_ = (3 * quv * quv) >> 6;
- m->lambda_mode_ = (1 * q4 * q4) >> 7;
- m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
- m->lambda_trellis_i16_ = (q16 * q16) >> 2;
- m->lambda_trellis_uv_ = (quv *quv) << 1;
- m->tlambda_ = (tlambda_scale * q4) >> 5;
-
- m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto
- m->max_edge_ = 0;
+ // TODO: Switch to kLambda*[] tables?
+ {
+ m->lambda_i4_ = (3 * q4 * q4) >> 7;
+ m->lambda_i16_ = (3 * q16 * q16);
+ m->lambda_uv_ = (3 * quv * quv) >> 6;
+ m->lambda_mode_ = (1 * q4 * q4) >> 7;
+ m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
+ m->lambda_trellis_i16_ = (q16 * q16) >> 2;
+ m->lambda_trellis_uv_ = (quv *quv) << 1;
+ m->tlambda_ = (tlambda_scale * q4) >> 5;
+ }
}
}
@@ -254,21 +197,16 @@ static void SetupMatrices(VP8Encoder* enc) {
// Very small filter-strength values have close to no visual effect. So we can
// save a little decoding-CPU by turning filtering off for these.
-#define FSTRENGTH_CUTOFF 2
+#define FSTRENGTH_CUTOFF 3
static void SetupFilterStrength(VP8Encoder* const enc) {
int i;
- // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
- const int level0 = 5 * enc->config_->filter_strength;
+ const int level0 = enc->config_->filter_strength;
for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
- VP8SegmentInfo* const m = &enc->dqm_[i];
- // We focus on the quantization of AC coeffs.
- const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
- const int base_strength =
- VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
- // Segments with lower complexity ('beta') will be less filtered.
- const int f = base_strength * level0 / (256 + m->beta_);
- m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
+ // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)
+ const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;
+ const int f = level / (256 + enc->dqm_[i].beta_);
+ enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
}
// We record the initial strength (mainly for the case of 1-segment only).
enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
@@ -286,90 +224,28 @@ static void SetupFilterStrength(VP8Encoder* const enc) {
// We want to emulate jpeg-like behaviour where the expected "good" quality
// is around q=75. Internally, our "good" middle is around c=50. So we
// map accordingly using linear piece-wise function
-static double QualityToCompression(double c) {
- const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
- // The file size roughly scales as pow(quantizer, 3.). Actually, the
- // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
- // in the mid-quant range. So we scale the compressibility inversely to
- // this power-law: quant ~= compression ^ 1/3. This law holds well for
- // low quant. Finer modeling for high-quant would make use of kAcTable[]
- // more explicitly.
- const double v = pow(linear_c, 1 / 3.);
- return v;
-}
-
-static double QualityToJPEGCompression(double c, double alpha) {
- // We map the complexity 'alpha' and quality setting 'c' to a compression
- // exponent empirically matched to the compression curve of libjpeg6b.
- // On average, the WebP output size will be roughly similar to that of a
- // JPEG file compressed with same quality factor.
- const double amin = 0.30;
- const double amax = 0.85;
- const double exp_min = 0.4;
- const double exp_max = 0.9;
- const double slope = (exp_min - exp_max) / (amax - amin);
- // Linearly interpolate 'expn' from exp_min to exp_max
- // in the [amin, amax] range.
- const double expn = (alpha > amax) ? exp_min
- : (alpha < amin) ? exp_max
- : exp_max + slope * (alpha - amin);
- const double v = pow(c, expn);
- return v;
-}
-
-static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
- const VP8SegmentInfo* const S2) {
- return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
-}
-
-static void SimplifySegments(VP8Encoder* const enc) {
- int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
- const int num_segments = enc->segment_hdr_.num_segments_;
- int num_final_segments = 1;
- int s1, s2;
- for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
- const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
- int found = 0;
- // check if we already have similar segment
- for (s2 = 0; s2 < num_final_segments; ++s2) {
- const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
- if (SegmentsAreEquivalent(S1, S2)) {
- found = 1;
- break;
- }
- }
- map[s1] = s2;
- if (!found) {
- if (num_final_segments != s1) {
- enc->dqm_[num_final_segments] = enc->dqm_[s1];
- }
- ++num_final_segments;
- }
- }
- if (num_final_segments < num_segments) { // Remap
- int i = enc->mb_w_ * enc->mb_h_;
- while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
- enc->segment_hdr_.num_segments_ = num_final_segments;
- // Replicate the trailing segment infos (it's mostly cosmetics)
- for (i = num_final_segments; i < num_segments; ++i) {
- enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
- }
- }
+static double QualityToCompression(double q) {
+ const double c = q / 100.;
+ return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
}
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
int i;
int dq_uv_ac, dq_uv_dc;
- const int num_segments = enc->segment_hdr_.num_segments_;
+ const int num_segments = enc->config_->segments;
const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
- const double Q = quality / 100.;
- const double c_base = enc->config_->emulate_jpeg_size ?
- QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
- QualityToCompression(Q);
+ const double c_base = QualityToCompression(quality);
for (i = 0; i < num_segments; ++i) {
- // We modulate the base coefficient to accommodate for the quantization
- // susceptibility and allow denser segments to be quantized more.
- const double expn = 1. - amp * enc->dqm_[i].alpha_;
+ // The file size roughly scales as pow(quantizer, 3.). Actually, the
+ // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
+ // in the mid-quant range. So we scale the compressibility inversely to
+ // this power-law: quant ~= compression ^ 1/3. This law holds well for
+ // low quant. Finer modelling for high-quant would make use of kAcTable[]
+ // more explicitely.
+ // Additionally, we modulate the base exponent 1/3 to accommodate for the
+ // quantization susceptibility and allow denser segments to be quantized
+ // more.
+ const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.;
const double c = pow(c_base, expn);
const int q = (int)(127. * (1. - c));
assert(expn > 0.);
@@ -405,11 +281,9 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
enc->dq_uv_dc_ = dq_uv_dc;
enc->dq_uv_ac_ = dq_uv_ac;
- SetupFilterStrength(enc); // initialize segments' filtering, eventually
-
- if (num_segments > 1) SimplifySegments(enc);
+ SetupMatrices(enc);
- SetupMatrices(enc); // finalize quantization matrices
+ SetupFilterStrength(enc); // initialize segments' filtering, eventually
}
//------------------------------------------------------------------------------
@@ -425,14 +299,16 @@ const int VP8I4ModeOffsets[NUM_BMODES] = {
};
void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
- const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
- const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
+ const VP8Encoder* const enc = it->enc_;
+ const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;
+ const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
VP8EncPredLuma16(it->yuv_p_, left, top);
}
void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
- const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
- const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
+ const VP8Encoder* const enc = it->enc_;
+ const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;
+ const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
VP8EncPredChroma8(it->yuv_p_, left, top);
}
@@ -488,7 +364,6 @@ static void InitScore(VP8ModeScore* const rd) {
rd->D = 0;
rd->SD = 0;
rd->R = 0;
- rd->H = 0;
rd->nz = 0;
rd->score = MAX_COST;
}
@@ -497,7 +372,6 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
dst->D = src->D;
dst->SD = src->SD;
dst->R = src->R;
- dst->H = src->H;
dst->nz = src->nz; // note that nz is not accumulated, but just copied.
dst->score = src->score;
}
@@ -506,7 +380,6 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
dst->D += src->D;
dst->SD += src->SD;
dst->R += src->R;
- dst->H += src->H;
dst->nz |= src->nz; // here, new nz bits are accumulated.
dst->score += src->score;
}
@@ -535,7 +408,7 @@ typedef struct {
static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
// TODO: incorporate the "* 256" in the tables?
- rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
+ rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);
}
static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
@@ -598,10 +471,11 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
// note: it's important to take sign of the _original_ coeff,
// so we don't have to consider level < 0 afterward.
const int sign = (in[j] < 0);
- const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
- int level0 = QUANTDIV(coeff0, iQ, B);
- if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
+ int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+ int level0;
+ if (coeff0 > 2047) coeff0 = 2047;
+ level0 = QUANTDIV(coeff0, iQ, B);
// test all alternate level values around level0.
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
Node* const cur = &NODE(n, m);
@@ -613,7 +487,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
cur->sign = sign;
cur->level = level;
cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
- if (level > MAX_LEVEL || level < 0) { // node is dead?
+ if (level >= 2048 || level < 0) { // node is dead?
cur->cost = MAX_COST;
continue;
}
@@ -706,10 +580,10 @@ static int ReconstructIntra16(VP8EncIterator* const it,
VP8ModeScore* const rd,
uint8_t* const yuv_out,
int mode) {
- VP8Encoder* const enc = it->enc_;
+ const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
const uint8_t* const src = it->yuv_in_ + Y_OFF;
- VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+ const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
int nz = 0;
int n;
int16_t tmp[16][16], dc_tmp[16];
@@ -718,7 +592,7 @@ static int ReconstructIntra16(VP8EncIterator* const it,
VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
}
VP8FTransformWHT(tmp[0], dc_tmp);
- nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
+ nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;
if (DO_TRELLIS_I16 && it->do_trellis_) {
int x, y;
@@ -813,18 +687,7 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
//------------------------------------------------------------------------------
// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
-// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
-
-static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
- // We look at the first three AC coefficients to determine what is the average
- // delta between each sub-4x4 block.
- const int v0 = abs(DCs[1]);
- const int v1 = abs(DCs[4]);
- const int v2 = abs(DCs[5]);
- int max_v = (v0 > v1) ? v1 : v0;
- max_v = (v2 > max_v) ? v2 : max_v;
- if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
-}
+// Pick the mode is lower RD-cost = Rate + lamba * Distortion.
static void SwapPtr(uint8_t** a, uint8_t** b) {
uint8_t* const tmp = *a;
@@ -836,23 +699,9 @@ static void SwapOut(VP8EncIterator* const it) {
SwapPtr(&it->yuv_out_, &it->yuv_out2_);
}
-static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
- score_t score = 0;
- while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
- int i;
- for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
- score += (levels[i] != 0);
- if (score > thresh) return 0;
- }
- levels += 16;
- }
- return 1;
-}
-
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
- const int kNumBlocks = 16;
- VP8Encoder* const enc = it->enc_;
- VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+ const VP8Encoder* const enc = it->enc_;
+ const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i16_;
const int tlambda = dqm->tlambda_;
const uint8_t* const src = it->yuv_in_ + Y_OFF;
@@ -860,7 +709,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
int mode;
rd->mode_i16 = -1;
- for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+ for (mode = 0; mode < 4; ++mode) {
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
int nz;
@@ -871,13 +720,8 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd16.D = VP8SSE16x16(src, tmp_dst);
rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
: 0;
- rd16.H = VP8FixedCostsI16[mode];
rd16.R = VP8GetCostLuma16(it, &rd16);
- if (mode > 0 &&
- IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
- // penalty to avoid flat area to be mispredicted by complex mode
- rd16.R += FLATNESS_PENALTY * kNumBlocks;
- }
+ rd16.R += VP8FixedCostsI16[mode];
// Since we always examine Intra16 first, we can overwrite *rd directly.
SetRDScore(lambda, &rd16);
@@ -892,13 +736,6 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
}
SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
VP8SetIntra16Mode(it, rd->mode_i16);
-
- // we have a blocky macroblock (only DCs are non-zero) with fairly high
- // distortion, record max delta so we can later adjust the minimal filtering
- // strength needed to smooth these blocks out.
- if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
- StoreMaxDelta(dqm, rd->y_dc_levels);
- }
}
//------------------------------------------------------------------------------
@@ -928,11 +765,9 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
}
InitScore(&rd_best);
- rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
- SetRDScore(dqm->lambda_mode_, &rd_best);
+ rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145)
VP8IteratorStartI4(it);
do {
- const int kNumBlocks = 1;
VP8ModeScore rd_i4;
int mode;
int best_mode = -1;
@@ -956,11 +791,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd_tmp.SD =
tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
: 0;
- rd_tmp.H = mode_costs[mode];
rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
- if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
- rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
- }
+ rd_tmp.R += mode_costs[mode];
SetRDScore(lambda, &rd_tmp);
if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
@@ -972,17 +804,14 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
}
SetRDScore(dqm->lambda_mode_, &rd_i4);
AddScore(&rd_best, &rd_i4);
- if (rd_best.score >= rd->score) {
- return 0;
- }
- total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
- if (total_header_bits > enc->max_i4_header_bits_) {
+ total_header_bits += mode_costs[best_mode];
+ if (rd_best.score >= rd->score ||
+ total_header_bits > enc->max_i4_header_bits_) {
return 0;
}
// Copy selected samples if not in the right place already.
- if (best_block != best_blocks + VP8Scan[it->i4_]) {
+ if (best_block != best_blocks + VP8Scan[it->i4_])
VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
- }
rd->modes_i4[it->i4_] = best_mode;
it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
} while (VP8IteratorRotateI4(it, best_blocks));
@@ -998,7 +827,6 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
//------------------------------------------------------------------------------
static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
- const int kNumBlocks = 8;
const VP8Encoder* const enc = it->enc_;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_uv_;
@@ -1010,7 +838,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->mode_uv = -1;
InitScore(&rd_best);
- for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+ for (mode = 0; mode < 4; ++mode) {
VP8ModeScore rd_uv;
// Reconstruct
@@ -1019,11 +847,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
// Compute RD-score
rd_uv.D = VP8SSE16x8(src, tmp_dst);
rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
- rd_uv.H = VP8FixedCostsUV[mode];
rd_uv.R = VP8GetCostUV(it, &rd_uv);
- if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
- rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
- }
+ rd_uv.R += VP8FixedCostsUV[mode];
SetRDScore(lambda, &rd_uv);
if (mode == 0 || rd_uv.score < rd_best.score) {
@@ -1042,10 +867,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
const VP8Encoder* const enc = it->enc_;
- const int is_i16 = (it->mb_->type_ == 1);
+ const int i16 = (it->mb_->type_ == 1);
int nz = 0;
- if (is_i16) {
+ if (i16) {
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
} else {
VP8IteratorStartI4(it);
@@ -1064,66 +889,11 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->nz = nz;
}
-// Refine intra16/intra4 sub-modes based on distortion only (not rate).
-static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
- const int is_i16 = (it->mb_->type_ == 1);
- score_t best_score = MAX_COST;
-
- if (try_both_i4_i16 || is_i16) {
- int mode;
- int best_mode = -1;
- for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
- const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
- const score_t score = VP8SSE16x16(src, ref);
- if (score < best_score) {
- best_mode = mode;
- best_score = score;
- }
- }
- VP8SetIntra16Mode(it, best_mode);
- }
- if (try_both_i4_i16 || !is_i16) {
- uint8_t modes_i4[16];
- // We don't evaluate the rate here, but just account for it through a
- // constant penalty (i4 mode usually needs more bits compared to i16).
- score_t score_i4 = (score_t)I4_PENALTY;
-
- VP8IteratorStartI4(it);
- do {
- int mode;
- int best_sub_mode = -1;
- score_t best_sub_score = MAX_COST;
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
-
- // TODO(skal): we don't really need the prediction pixels here,
- // but just the distortion against 'src'.
- VP8MakeIntra4Preds(it);
- for (mode = 0; mode < NUM_BMODES; ++mode) {
- const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
- const score_t score = VP8SSE4x4(src, ref);
- if (score < best_sub_score) {
- best_sub_mode = mode;
- best_sub_score = score;
- }
- }
- modes_i4[it->i4_] = best_sub_mode;
- score_i4 += best_sub_score;
- if (score_i4 >= best_score) break;
- } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
- if (score_i4 < best_score) {
- VP8SetIntra4Mode(it, modes_i4);
- }
- }
-}
-
//------------------------------------------------------------------------------
// Entry point
-int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
- VP8RDLevel rd_opt) {
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
int is_skipped;
- const int method = it->enc_->method_;
InitScore(rd);
@@ -1132,21 +902,22 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
VP8MakeLuma16Preds(it);
VP8MakeChroma8Preds(it);
- if (rd_opt > RD_OPT_NONE) {
- it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
+ // for rd_opt = 2, we perform trellis-quant on the final decision only.
+ // for rd_opt > 2, we use it for every scoring (=much slower).
+ if (rd_opt > 0) {
+ it->do_trellis_ = (rd_opt > 2);
PickBestIntra16(it, rd);
- if (method >= 2) {
+ if (it->enc_->method_ >= 2) {
PickBestIntra4(it, rd);
}
PickBestUV(it, rd);
- if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
+ if (rd_opt == 2) {
it->do_trellis_ = 1;
SimpleQuantize(it, rd);
}
} else {
- // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
- // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
- DistoRefine(it, (method >= 2));
+ // TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
+ it->do_trellis_ = (it->enc_->method_ == 2);
SimpleQuantize(it, rd);
}
is_skipped = (rd->nz == 0);
@@ -1154,3 +925,6 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
return is_skipped;
}
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif