diff options
author | K. S. Ernest (iFire) Lee <ernest.lee@chibifire.com> | 2022-01-18 04:39:55 -0800 |
---|---|---|
committer | K. S. Ernest (iFire) Lee <ernest.lee@chibifire.com> | 2022-02-04 15:15:26 -0800 |
commit | 419b342a9a716426159f7a51ae17390386ecc884 (patch) | |
tree | c42a9c3a41e94fa501ca09d0605ee44b88c3c9ef /thirdparty/cvtt/ConvectionKernels_IndexSelector.h | |
parent | 992794e44a47a7adddce589bd68ad71402d5ba66 (diff) |
Faster CVTT by reducing quality.
Make BC6 and BC7 CVTT faster while still having better quality than DXT5.
Diffstat (limited to 'thirdparty/cvtt/ConvectionKernels_IndexSelector.h')
-rw-r--r-- | thirdparty/cvtt/ConvectionKernels_IndexSelector.h | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelector.h b/thirdparty/cvtt/ConvectionKernels_IndexSelector.h new file mode 100644 index 0000000000..0f9d209183 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_IndexSelector.h @@ -0,0 +1,147 @@ +#pragma once +#ifndef __CVTT_INDEXSELECTOR_H__ +#define __CVTT_INDEXSELECTOR_H__ + +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + namespace Internal + { + extern const ParallelMath::UInt16 g_weightReciprocals[17]; + + template<int TVectorSize> + class IndexSelector + { + public: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::AInt16 MAInt16; + typedef ParallelMath::SInt32 MSInt32; + typedef ParallelMath::UInt31 MUInt31; + + + template<class TInterpolationEPType, class TColorEPType> + void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) + { + // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. + // We need to select indexes using the color-space endpoints. + + m_isUniform = true; + for (int ch = 1; ch < TVectorSize; ch++) + { + if (channelWeights[ch] != channelWeights[0]) + m_isUniform = false; + } + + // To work with channel weights, we need something where: + // pxDiff = px - ep[0] + // epDiff = ep[1] - ep[0] + // + // weightedEPDiff = epDiff * channelWeights + // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) + // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) + // index = normalizedIndex * maxValue + // + // Equivalent to: + // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) + // index = dot(axis, pxDiff) + + for (int ep = 0; ep < 2; ep++) + for (int ch = 0; ch < TVectorSize; ch++) + m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]); + + m_range = range; + m_maxValue = static_cast<float>(range - 1); + + MFloat epDiffWeighted[TVectorSize]; + for (int ch = 0; ch < TVectorSize; ch++) + { + m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); + MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); + epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; + } + + MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; + for (int ch = 1; ch < TVectorSize; ch++) + lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; + + ParallelMath::MakeSafeDenominator(lenSquared); + + MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; + + for (int ch = 0; ch < TVectorSize; ch++) + m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; + } + + template<bool TSigned> + void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) + { + MAInt16 converted[2][TVectorSize]; + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < TVectorSize; ch++) + converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]); + + Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range); + } + + void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) + { + MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); + + for (int ch = 0; ch < numRealChannels; ch++) + { + MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); + MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); + pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); + } + } + + void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) + { + MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); + + for (int ch = 0; ch < numRealChannels; ch++) + { + MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); + MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); + pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); + } + } + + void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) + { + ReconstructLDR_BC7(index, pixel, TVectorSize); + } + + void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) + { + ReconstructLDRPrecise(index, pixel, TVectorSize); + } + + MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const + { + MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; + for (int ch = 1; ch < TVectorSize; ch++) + dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; + + return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); + } + + protected: + MAInt16 m_endPoint[2][TVectorSize]; + + private: + MFloat m_origin[TVectorSize]; + MFloat m_axis[TVectorSize]; + int m_range; + float m_maxValue; + bool m_isUniform; + }; + } +} + +#endif + |