diff options
Diffstat (limited to 'drivers/webp/enc/analysis.c')
-rw-r--r-- | drivers/webp/enc/analysis.c | 361 |
1 files changed, 249 insertions, 112 deletions
diff --git a/drivers/webp/enc/analysis.c b/drivers/webp/enc/analysis.c index 22cfb492e7..b55128fd48 100644 --- a/drivers/webp/enc/analysis.c +++ b/drivers/webp/enc/analysis.c @@ -1,8 +1,10 @@ // Copyright 2011 Google Inc. All Rights Reserved. // -// This code is licensed under the same terms as WebM: -// Software License Agreement: http://www.webmproject.org/license/software/ -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Macroblock analysis @@ -17,16 +19,8 @@ #include "./cost.h" #include "../utils/utils.h" -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - #define MAX_ITERS_K_MEANS 6 -static int ClipAlpha(int alpha) { - return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha; -} - //------------------------------------------------------------------------------ // Smooth the segment map by replacing isolated block by the majority of its // neighbours. @@ -36,7 +30,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { const int w = enc->mb_w_; const int h = enc->mb_h_; const int majority_cnt_3_x_3_grid = 5; - uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp)); + uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp)); assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec if (tmp == NULL) return; @@ -57,6 +51,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { for (n = 0; n < NUM_MB_SEGMENTS; ++n) { if (cnt[n] >= majority_cnt_3_x_3_grid) { majority_seg = n; + break; } } tmp[x + y * w] = majority_seg; @@ -68,54 +63,14 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { mb->segment_ = tmp[x + y * w]; } } - free(tmp); + WebPSafeFree(tmp); } //------------------------------------------------------------------------------ -// Finalize Segment probability based on the coding tree - -static int GetProba(int a, int b) { - int proba; - const int total = a + b; - if (total == 0) return 255; // that's the default probability. - proba = (255 * a + total / 2) / total; - return proba; -} - -static void SetSegmentProbas(VP8Encoder* const enc) { - int p[NUM_MB_SEGMENTS] = { 0 }; - int n; - - for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { - const VP8MBInfo* const mb = &enc->mb_info_[n]; - p[mb->segment_]++; - } - if (enc->pic_->stats) { - for (n = 0; n < NUM_MB_SEGMENTS; ++n) { - enc->pic_->stats->segment_size[n] = p[n]; - } - } - if (enc->segment_hdr_.num_segments_ > 1) { - uint8_t* const probas = enc->proba_.segments_; - probas[0] = GetProba(p[0] + p[1], p[2] + p[3]); - probas[1] = GetProba(p[0], p[1]); - probas[2] = GetProba(p[2], p[3]); - - enc->segment_hdr_.update_map_ = - (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255); - enc->segment_hdr_.size_ = - p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) + - p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) + - p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) + - p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2])); - } else { - enc->segment_hdr_.update_map_ = 0; - enc->segment_hdr_.size_ = 0; - } -} +// set segment susceptibility alpha_ / beta_ static WEBP_INLINE int clip(int v, int m, int M) { - return v < m ? m : v > M ? M : v; + return (v < m) ? m : (v > M) ? M : v; } static void SetSegmentAlphas(VP8Encoder* const enc, @@ -142,28 +97,77 @@ static void SetSegmentAlphas(VP8Encoder* const enc, } //------------------------------------------------------------------------------ +// Compute susceptibility based on DCT-coeff histograms: +// the higher, the "easier" the macroblock is to compress. + +#define MAX_ALPHA 255 // 8b of precision for susceptibilities. +#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha. +#define DEFAULT_ALPHA (-1) +#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha)) + +static int FinalAlphaValue(int alpha) { + alpha = MAX_ALPHA - alpha; + return clip(alpha, 0, MAX_ALPHA); +} + +static int GetAlpha(const VP8Histogram* const histo) { + // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer + // values which happen to be mostly noise. This leaves the maximum precision + // for handling the useful small values which contribute most. + const int max_value = histo->max_value; + const int last_non_zero = histo->last_non_zero; + const int alpha = + (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; + return alpha; +} + +static void InitHistogram(VP8Histogram* const histo) { + histo->max_value = 0; + histo->last_non_zero = 1; +} + +static void MergeHistograms(const VP8Histogram* const in, + VP8Histogram* const out) { + if (in->max_value > out->max_value) { + out->max_value = in->max_value; + } + if (in->last_non_zero > out->last_non_zero) { + out->last_non_zero = in->last_non_zero; + } +} + +//------------------------------------------------------------------------------ // Simplified k-Means, to assign Nb segments based on alpha-histogram -static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { - const int nb = enc->segment_hdr_.num_segments_; +static void AssignSegments(VP8Encoder* const enc, + const int alphas[MAX_ALPHA + 1]) { + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid spurious warning about 'n + 1' exceeding + // array bounds of 'centers' with some compilers (noticed with gcc-4.9). + const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ? + enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS; int centers[NUM_MB_SEGMENTS]; int weighted_average = 0; - int map[256]; + int map[MAX_ALPHA + 1]; int a, n, k; - int min_a = 0, max_a = 255, range_a; + int min_a = 0, max_a = MAX_ALPHA, range_a; // 'int' type is ok for histo, and won't overflow int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS]; + assert(nb >= 1); + assert(nb <= NUM_MB_SEGMENTS); + // bracket the input - for (n = 0; n < 256 && alphas[n] == 0; ++n) {} + for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {} min_a = n; - for (n = 255; n > min_a && alphas[n] == 0; --n) {} + for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {} max_a = n; range_a = max_a - min_a; // Spread initial centers evenly - for (n = 1, k = 0; n < 2 * nb; n += 2) { - centers[k++] = min_a + (n * range_a) / (2 * nb); + for (k = 0, n = 1; k < nb; ++k, n += 2) { + assert(n < 2 * nb); + centers[k] = min_a + (n * range_a) / (2 * nb); } for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough @@ -178,7 +182,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { n = 0; // track the nearest center for current 'a' for (a = min_a; a <= max_a; ++a) { if (alphas[a]) { - while (n < nb - 1 && abs(a - centers[n + 1]) < abs(a - centers[n])) { + while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) { n++; } map[a] = n; @@ -210,7 +214,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { VP8MBInfo* const mb = &enc->mb_info_[n]; const int alpha = mb->alpha_; mb->segment_ = map[alpha]; - mb->alpha_ = centers[map[alpha]]; // just for the record. + mb->alpha_ = centers[map[alpha]]; // for the record. } if (nb > 1) { @@ -218,7 +222,6 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { if (smooth) SmoothSegmentMap(enc); } - SetSegmentProbas(enc); // Assign final proba SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas. } @@ -227,24 +230,30 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { // susceptibility and set best modes for this macroblock. // Segment assignment is done later. -// Number of modes to inspect for alpha_ evaluation. For high-quality settings, -// we don't need to test all the possible modes during the analysis phase. +// Number of modes to inspect for alpha_ evaluation. We don't need to test all +// the possible modes during the analysis phase: we risk falling into a local +// optimum, or be subject to boundary effect #define MAX_INTRA16_MODE 2 #define MAX_INTRA4_MODE 2 #define MAX_UV_MODE 2 static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { - const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4; + const int max_mode = MAX_INTRA16_MODE; int mode; - int best_alpha = -1; + int best_alpha = DEFAULT_ALPHA; int best_mode = 0; VP8MakeLuma16Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF, - it->yuv_p_ + VP8I16ModeOffsets[mode], - 0, 16); - if (alpha > best_alpha) { + VP8Histogram histo; + int alpha; + + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC, + it->yuv_p_ + VP8I16ModeOffsets[mode], + 0, 16, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { best_alpha = alpha; best_mode = mode; } @@ -256,46 +265,62 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, int best_alpha) { uint8_t modes[16]; - const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES; - int i4_alpha = 0; + const int max_mode = MAX_INTRA4_MODE; + int i4_alpha; + VP8Histogram total_histo; + int cur_histo = 0; + InitHistogram(&total_histo); + VP8IteratorStartI4(it); do { int mode; - int best_mode_alpha = -1; - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; + int best_mode_alpha = DEFAULT_ALPHA; + VP8Histogram histos[2]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; VP8MakeIntra4Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(src, - it->yuv_p_ + VP8I4ModeOffsets[mode], - 0, 1); - if (alpha > best_mode_alpha) { + int alpha; + + InitHistogram(&histos[cur_histo]); + VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], + 0, 1, &histos[cur_histo]); + alpha = GetAlpha(&histos[cur_histo]); + if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { best_mode_alpha = alpha; modes[it->i4_] = mode; + cur_histo ^= 1; // keep track of best histo so far. } } - i4_alpha += best_mode_alpha; + // accumulate best histogram + MergeHistograms(&histos[cur_histo ^ 1], &total_histo); // Note: we reuse the original samples for predictors - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); + } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - if (i4_alpha > best_alpha) { + i4_alpha = GetAlpha(&total_histo); + if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { VP8SetIntra4Mode(it, modes); - best_alpha = ClipAlpha(i4_alpha); + best_alpha = i4_alpha; } return best_alpha; } static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { - int best_alpha = -1; + int best_alpha = DEFAULT_ALPHA; int best_mode = 0; - const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4; + const int max_mode = MAX_UV_MODE; int mode; + VP8MakeChroma8Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF, - it->yuv_p_ + VP8UVModeOffsets[mode], - 16, 16 + 4 + 4); - if (alpha > best_alpha) { + VP8Histogram histo; + int alpha; + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC, + it->yuv_p_ + VP8UVModeOffsets[mode], + 16, 16 + 4 + 4, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { best_alpha = alpha; best_mode = mode; } @@ -305,7 +330,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { } static void MBAnalyze(VP8EncIterator* const it, - int alphas[256], int* const uv_alpha) { + int alphas[MAX_ALPHA + 1], + int* const alpha, int* const uv_alpha) { const VP8Encoder* const enc = it->enc_; int best_alpha, best_uv_alpha; @@ -314,7 +340,7 @@ static void MBAnalyze(VP8EncIterator* const it, VP8SetSegment(it, 0); // default segment, spec-wise. best_alpha = MBAnalyzeBestIntra16Mode(it); - if (enc->method_ != 3) { + if (enc->method_ >= 5) { // We go and make a fast decision for intra4/intra16. // It's usually not a good and definitive pick, but helps seeding the stats // about level bit-cost. @@ -324,10 +350,22 @@ static void MBAnalyze(VP8EncIterator* const it, best_uv_alpha = MBAnalyzeBestUVMode(it); // Final susceptibility mix - best_alpha = (best_alpha + best_uv_alpha + 1) / 2; + best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2; + best_alpha = FinalAlphaValue(best_alpha); alphas[best_alpha]++; + it->mb_->alpha_ = best_alpha; // for later remapping. + + // Accumulate for later complexity analysis. + *alpha += best_alpha; // mixed susceptibility (not just luma) *uv_alpha += best_uv_alpha; - it->mb_->alpha_ = best_alpha; // Informative only. +} + +static void DefaultMBInfo(VP8MBInfo* const mb) { + mb->type_ = 1; // I16x16 + mb->uv_mode_ = 0; + mb->skip_ = 0; // not skipped + mb->segment_ = 0; // default segment + mb->alpha_ = 0; } //------------------------------------------------------------------------------ @@ -340,25 +378,124 @@ static void MBAnalyze(VP8EncIterator* const it, // and decide intra4/intra16, but that's usually almost always a bad choice at // this stage. -int VP8EncAnalyze(VP8Encoder* const enc) { - int ok = 1; - int alphas[256] = { 0 }; - VP8EncIterator it; - - VP8IteratorInit(enc, &it); +static void ResetAllMBInfo(VP8Encoder* const enc) { + int n; + for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) { + DefaultMBInfo(&enc->mb_info_[n]); + } + // Default susceptibilities. + enc->dqm_[0].alpha_ = 0; + enc->dqm_[0].beta_ = 0; + // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value. + enc->alpha_ = 0; enc->uv_alpha_ = 0; - do { - VP8IteratorImport(&it); - MBAnalyze(&it, alphas, &enc->uv_alpha_); - ok = VP8IteratorProgress(&it, 20); - // Let's pretend we have perfect lossless reconstruction. - } while (ok && VP8IteratorNext(&it, it.yuv_in_)); - enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_; - if (ok) AssignSegments(enc, alphas); + WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); +} + +// struct used to collect job result +typedef struct { + WebPWorker worker; + int alphas[MAX_ALPHA + 1]; + int alpha, uv_alpha; + VP8EncIterator it; + int delta_progress; +} SegmentJob; +// main work call +static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) { + int ok = 1; + if (!VP8IteratorIsDone(it)) { + uint8_t tmp[32 + WEBP_ALIGN_CST]; + uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp); + do { + // Let's pretend we have perfect lossless reconstruction. + VP8IteratorImport(it, scratch); + MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha); + ok = VP8IteratorProgress(it, job->delta_progress); + } while (ok && VP8IteratorNext(it)); + } return ok; } -#if defined(__cplusplus) || defined(c_plusplus) -} // extern "C" +static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) { + int i; + for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i]; + dst->alpha += src->alpha; + dst->uv_alpha += src->uv_alpha; +} + +// initialize the job struct with some TODOs +static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, + int start_row, int end_row) { + WebPGetWorkerInterface()->Init(&job->worker); + job->worker.data1 = job; + job->worker.data2 = &job->it; + job->worker.hook = (WebPWorkerHook)DoSegmentsJob; + VP8IteratorInit(enc, &job->it); + VP8IteratorSetRow(&job->it, start_row); + VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_); + memset(job->alphas, 0, sizeof(job->alphas)); + job->alpha = 0; + job->uv_alpha = 0; + // only one of both jobs can record the progress, since we don't + // expect the user's hook to be multi-thread safe + job->delta_progress = (start_row == 0) ? 20 : 0; +} + +// main entry point +int VP8EncAnalyze(VP8Encoder* const enc) { + int ok = 1; + const int do_segments = + enc->config_->emulate_jpeg_size || // We need the complexity evaluation. + (enc->segment_hdr_.num_segments_ > 1) || + (enc->method_ == 0); // for method 0, we need preds_[] to be filled. + if (do_segments) { + const int last_row = enc->mb_h_; + // We give a little more than a half work to the main thread. + const int split_row = (9 * last_row + 15) >> 4; + const int total_mb = last_row * enc->mb_w_; +#ifdef WEBP_USE_THREAD + const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it + const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow); +#else + const int do_mt = 0; #endif + const WebPWorkerInterface* const worker_interface = + WebPGetWorkerInterface(); + SegmentJob main_job; + if (do_mt) { + SegmentJob side_job; + // Note the use of '&' instead of '&&' because we must call the functions + // no matter what. + InitSegmentJob(enc, &main_job, 0, split_row); + InitSegmentJob(enc, &side_job, split_row, last_row); + // we don't need to call Reset() on main_job.worker, since we're calling + // WebPWorkerExecute() on it + ok &= worker_interface->Reset(&side_job.worker); + // launch the two jobs in parallel + if (ok) { + worker_interface->Launch(&side_job.worker); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&side_job.worker); + ok &= worker_interface->Sync(&main_job.worker); + } + worker_interface->End(&side_job.worker); + if (ok) MergeJobs(&side_job, &main_job); // merge results together + } else { + // Even for single-thread case, we use the generic Worker tools. + InitSegmentJob(enc, &main_job, 0, last_row); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&main_job.worker); + } + worker_interface->End(&main_job.worker); + if (ok) { + enc->alpha_ = main_job.alpha / total_mb; + enc->uv_alpha_ = main_job.uv_alpha / total_mb; + AssignSegments(enc, main_job.alphas); + } + } else { // Use only one default segment. + ResetAllMBInfo(enc); + } + return ok; +} + |