diff options
Diffstat (limited to 'thirdparty/libwebp/dsp/enc.c')
-rw-r--r-- | thirdparty/libwebp/dsp/enc.c | 178 |
1 files changed, 123 insertions, 55 deletions
diff --git a/thirdparty/libwebp/dsp/enc.c b/thirdparty/libwebp/dsp/enc.c index db0e9e70ae..f31bc6de18 100644 --- a/thirdparty/libwebp/dsp/enc.c +++ b/thirdparty/libwebp/dsp/enc.c @@ -15,7 +15,7 @@ #include <stdlib.h> // for abs() #include "./dsp.h" -#include "../enc/vp8enci.h" +#include "../enc/vp8i_enc.h" static WEBP_INLINE uint8_t clip_8b(int v) { return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; @@ -551,6 +551,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) { return GetSSE(a, b, 4, 4); } +static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) { + int k, x, y; + for (k = 0; k < 4; ++k) { + uint32_t avg = 0; + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + avg += ref[x + y * BPS]; + } + } + dc[k] = avg; + ref += 4; // go to next 4x4 block. + } +} + //------------------------------------------------------------------------------ // Texture distortion // @@ -656,32 +670,6 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32], return nz; } -static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], - const VP8Matrix* const mtx) { - int n, last = -1; - for (n = 0; n < 16; ++n) { - const int j = kZigzag[n]; - const int sign = (in[j] < 0); - const uint32_t coeff = sign ? -in[j] : in[j]; - assert(mtx->sharpen_[j] == 0); - if (coeff > mtx->zthresh_[j]) { - const uint32_t Q = mtx->q_[j]; - const uint32_t iQ = mtx->iq_[j]; - const uint32_t B = mtx->bias_[j]; - int level = QUANTDIV(coeff, iQ, B); - if (level > MAX_LEVEL) level = MAX_LEVEL; - if (sign) level = -level; - in[j] = level * (int)Q; - out[n] = level; - if (level) last = n; - } else { - out[n] = 0; - in[j] = 0; - } - } - return (last >= 0); -} - //------------------------------------------------------------------------------ // Block copy @@ -703,11 +691,51 @@ static void Copy16x8(const uint8_t* src, uint8_t* dst) { } //------------------------------------------------------------------------------ +// SSIM / PSNR -static void SSIMAccumulateClipped(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int xo, int yo, int W, int H, - VP8DistoStats* const stats) { +// hat-shaped filter. Sum of coefficients is equal to 16. +static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = { + 1, 2, 3, 4, 3, 2, 1 +}; +static const uint32_t kWeightSum = 16 * 16; // sum{kWeight}^2 + +static WEBP_INLINE double SSIMCalculation( + const VP8DistoStats* const stats, uint32_t N /*num samples*/) { + const uint32_t w2 = N * N; + const uint32_t C1 = 20 * w2; + const uint32_t C2 = 60 * w2; + const uint32_t C3 = 8 * 8 * w2; // 'dark' limit ~= 6 + const uint64_t xmxm = (uint64_t)stats->xm * stats->xm; + const uint64_t ymym = (uint64_t)stats->ym * stats->ym; + if (xmxm + ymym >= C3) { + const int64_t xmym = (int64_t)stats->xm * stats->ym; + const int64_t sxy = (int64_t)stats->xym * N - xmym; // can be negative + const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm; + const uint64_t syy = (uint64_t)stats->yym * N - ymym; + // we descale by 8 to prevent overflow during the fnum/fden multiply. + const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8; + const uint64_t den_S = (sxx + syy + C2) >> 8; + const uint64_t fnum = (2 * xmym + C1) * num_S; + const uint64_t fden = (xmxm + ymym + C1) * den_S; + const double r = (double)fnum / fden; + assert(r >= 0. && r <= 1.0); + return r; + } + return 1.; // area is too dark to contribute meaningfully +} + +double VP8SSIMFromStats(const VP8DistoStats* const stats) { + return SSIMCalculation(stats, kWeightSum); +} + +double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) { + return SSIMCalculation(stats, stats->w); +} + +static double SSIMGetClipped_C(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2, + int xo, int yo, int W, int H) { + VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 }; const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL; const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1 : yo + VP8_SSIM_KERNEL; @@ -719,38 +747,61 @@ static void SSIMAccumulateClipped(const uint8_t* src1, int stride1, src2 += ymin * stride2; for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) { for (x = xmin; x <= xmax; ++x) { - const int s1 = src1[x]; - const int s2 = src2[x]; - stats->w += 1; - stats->xm += s1; - stats->ym += s2; - stats->xxm += s1 * s1; - stats->xym += s1 * s2; - stats->yym += s2 * s2; + const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo] + * kWeight[VP8_SSIM_KERNEL + y - yo]; + const uint32_t s1 = src1[x]; + const uint32_t s2 = src2[x]; + stats.w += w; + stats.xm += w * s1; + stats.ym += w * s2; + stats.xxm += w * s1 * s1; + stats.xym += w * s1 * s2; + stats.yym += w * s2 * s2; } } + return VP8SSIMFromStatsClipped(&stats); } -static void SSIMAccumulate(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - VP8DistoStats* const stats) { +static double SSIMGet_C(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2) { + VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 }; int x, y; for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) { for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) { - const int s1 = src1[x]; - const int s2 = src2[x]; - stats->w += 1; - stats->xm += s1; - stats->ym += s2; - stats->xxm += s1 * s1; - stats->xym += s1 * s2; - stats->yym += s2 * s2; + const uint32_t w = kWeight[x] * kWeight[y]; + const uint32_t s1 = src1[x]; + const uint32_t s2 = src2[x]; + stats.xm += w * s1; + stats.ym += w * s2; + stats.xxm += w * s1 * s1; + stats.xym += w * s1 * s2; + stats.yym += w * s2 * s2; } } + return VP8SSIMFromStats(&stats); +} + +//------------------------------------------------------------------------------ + +static uint32_t AccumulateSSE(const uint8_t* src1, + const uint8_t* src2, int len) { + int i; + uint32_t sse2 = 0; + assert(len <= 65535); // to ensure that accumulation fits within uint32_t + for (i = 0; i < len; ++i) { + const int32_t diff = src1[i] - src2[i]; + sse2 += diff * diff; + } + return sse2; } -VP8SSIMAccumulateFunc VP8SSIMAccumulate; -VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; +//------------------------------------------------------------------------------ + +VP8SSIMGetFunc VP8SSIMGet; +VP8SSIMGetClippedFunc VP8SSIMGetClipped; +VP8AccumulateSSEFunc VP8AccumulateSSE; + +extern void VP8SSIMDspInitSSE2(void); static volatile VP8CPUInfo ssim_last_cpuinfo_used = (VP8CPUInfo)&ssim_last_cpuinfo_used; @@ -758,8 +809,17 @@ static volatile VP8CPUInfo ssim_last_cpuinfo_used = WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) { if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return; - VP8SSIMAccumulate = SSIMAccumulate; - VP8SSIMAccumulateClipped = SSIMAccumulateClipped; + VP8SSIMGetClipped = SSIMGetClipped_C; + VP8SSIMGet = SSIMGet_C; + + VP8AccumulateSSE = AccumulateSSE; + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8SSIMDspInitSSE2(); + } +#endif + } ssim_last_cpuinfo_used = VP8GetCPUInfo; } @@ -783,6 +843,7 @@ VP8Metric VP8SSE16x8; VP8Metric VP8SSE4x4; VP8WMetric VP8TDisto4x4; VP8WMetric VP8TDisto16x16; +VP8MeanMetric VP8Mean16x4; VP8QuantizeBlock VP8EncQuantizeBlock; VP8Quantize2Blocks VP8EncQuantize2Blocks; VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; @@ -795,6 +856,7 @@ extern void VP8EncDspInitAVX2(void); extern void VP8EncDspInitNEON(void); extern void VP8EncDspInitMIPS32(void); extern void VP8EncDspInitMIPSdspR2(void); +extern void VP8EncDspInitMSA(void); static volatile VP8CPUInfo enc_last_cpuinfo_used = (VP8CPUInfo)&enc_last_cpuinfo_used; @@ -820,9 +882,10 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { VP8SSE4x4 = SSE4x4; VP8TDisto4x4 = Disto4x4; VP8TDisto16x16 = Disto16x16; + VP8Mean16x4 = Mean16x4; VP8EncQuantizeBlock = QuantizeBlock; VP8EncQuantize2Blocks = Quantize2Blocks; - VP8EncQuantizeBlockWHT = QuantizeBlockWHT; + VP8EncQuantizeBlockWHT = QuantizeBlock; VP8Copy4x4 = Copy4x4; VP8Copy16x8 = Copy16x8; @@ -858,6 +921,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { VP8EncDspInitMIPSdspR2(); } #endif +#if defined(WEBP_USE_MSA) + if (VP8GetCPUInfo(kMSA)) { + VP8EncDspInitMSA(); + } +#endif } enc_last_cpuinfo_used = VP8GetCPUInfo; } |