diff options
Diffstat (limited to 'thirdparty/libwebp/src/dsp/lossless_enc_sse2.c')
-rw-r--r-- | thirdparty/libwebp/src/dsp/lossless_enc_sse2.c | 44 |
1 files changed, 12 insertions, 32 deletions
diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c index f84a9909e1..36478c4912 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -170,12 +170,13 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride, //------------------------------------------------------------------------------ +// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But +// that's ok since the histogram values are less than 1<<28 (max picture size). #define LINE_SIZE 16 // 8 or 16 static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out, int size) { int i; - assert(size % LINE_SIZE == 0); - for (i = 0; i < size; i += LINE_SIZE) { + for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) { const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]); const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]); #if (LINE_SIZE == 16) @@ -195,12 +196,14 @@ static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out, _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3)); #endif } + for (; i < size; ++i) { + out[i] = a[i] + b[i]; + } } static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { int i; - assert(size % LINE_SIZE == 0); - for (i = 0; i < size; i += LINE_SIZE) { + for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) { const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]); const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]); #if (LINE_SIZE == 16) @@ -220,35 +223,11 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3)); #endif } -} -#undef LINE_SIZE - -// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But -// that's ok since the histogram values are less than 1<<28 (max picture size). -static void HistogramAdd_SSE2(const VP8LHistogram* const a, - const VP8LHistogram* const b, - VP8LHistogram* const out) { - int i; - const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_); - assert(a->palette_code_bits_ == b->palette_code_bits_); - if (b != out) { - AddVector_SSE2(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES); - AddVector_SSE2(a->red_, b->red_, out->red_, NUM_LITERAL_CODES); - AddVector_SSE2(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES); - AddVector_SSE2(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES); - } else { - AddVectorEq_SSE2(a->literal_, out->literal_, NUM_LITERAL_CODES); - AddVectorEq_SSE2(a->red_, out->red_, NUM_LITERAL_CODES); - AddVectorEq_SSE2(a->blue_, out->blue_, NUM_LITERAL_CODES); - AddVectorEq_SSE2(a->alpha_, out->alpha_, NUM_LITERAL_CODES); - } - for (i = NUM_LITERAL_CODES; i < literal_size; ++i) { - out->literal_[i] = a->literal_[i] + b->literal_[i]; - } - for (i = 0; i < NUM_DISTANCE_CODES; ++i) { - out->distance_[i] = a->distance_[i] + b->distance_[i]; + for (; i < size; ++i) { + out[i] += a[i]; } } +#undef LINE_SIZE //------------------------------------------------------------------------------ // Entropy @@ -675,7 +654,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) { VP8LTransformColor = TransformColor_SSE2; VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE2; VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2; - VP8LHistogramAdd = HistogramAdd_SSE2; + VP8LAddVector = AddVector_SSE2; + VP8LAddVectorEq = AddVectorEq_SSE2; VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2; VP8LVectorMismatch = VectorMismatch_SSE2; VP8LBundleColorMap = BundleColorMap_SSE2; |