summaryrefslogtreecommitdiff
path: root/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h')
-rw-r--r--thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h181
1 files changed, 181 insertions, 0 deletions
diff --git a/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h b/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
new file mode 100644
index 0000000000..c1276553b2
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
@@ -0,0 +1,181 @@
+#pragma once
+#ifndef __CVTT_ENDPOINTREFINER_H__
+#define __CVTT_ENDPOINTREFINER_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+ namespace Internal
+ {
+ // Solve for a, b where v = a*t + b
+ // This allows endpoints to be mapped to where T=0 and T=1
+ // Least squares from totals:
+ // a = (tv - t*v/w)/(tt - t*t/w)
+ // b = (v - a*t)/w
+ template<int TVectorSize>
+ class EndpointRefiner
+ {
+ public:
+ typedef ParallelMath::Float MFloat;
+ typedef ParallelMath::UInt16 MUInt16;
+ typedef ParallelMath::UInt15 MUInt15;
+ typedef ParallelMath::AInt16 MAInt16;
+ typedef ParallelMath::SInt16 MSInt16;
+ typedef ParallelMath::SInt32 MSInt32;
+
+ MFloat m_tv[TVectorSize];
+ MFloat m_v[TVectorSize];
+ MFloat m_tt;
+ MFloat m_t;
+ MFloat m_w;
+ int m_wu;
+
+ float m_rcpMaxIndex;
+ float m_channelWeights[TVectorSize];
+ float m_rcpChannelWeights[TVectorSize];
+
+ void Init(int indexRange, const float channelWeights[TVectorSize])
+ {
+ for (int ch = 0; ch < TVectorSize; ch++)
+ {
+ m_tv[ch] = ParallelMath::MakeFloatZero();
+ m_v[ch] = ParallelMath::MakeFloatZero();
+ }
+ m_tt = ParallelMath::MakeFloatZero();
+ m_t = ParallelMath::MakeFloatZero();
+ m_w = ParallelMath::MakeFloatZero();
+
+ m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
+
+ for (int ch = 0; ch < TVectorSize; ch++)
+ {
+ m_channelWeights[ch] = channelWeights[ch];
+ m_rcpChannelWeights[ch] = 1.0f;
+ if (m_channelWeights[ch] != 0.0f)
+ m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
+ }
+
+ m_wu = 0;
+ }
+
+ void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
+ {
+ MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
+
+ for (int ch = 0; ch < TVectorSize; ch++)
+ {
+ MFloat v = pwFloatPixel[ch] * weight;
+
+ m_tv[ch] = m_tv[ch] + t * v;
+ m_v[ch] = m_v[ch] + v;
+ }
+ m_tt = m_tt + weight * t * t;
+ m_t = m_t + weight * t;
+ m_w = m_w + weight;
+ }
+
+ void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
+ {
+ MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
+
+ for (int ch = 0; ch < numRealChannels; ch++)
+ {
+ MFloat v = pwFloatPixel[ch];
+
+ m_tv[ch] = m_tv[ch] + t * v;
+ m_v[ch] = m_v[ch] + v;
+ }
+ m_tt = m_tt + t * t;
+ m_t = m_t + t;
+ m_wu++;
+ }
+
+ void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
+ {
+ ContributeUnweightedPW(floatPixel, index, TVectorSize);
+ }
+
+ void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
+ {
+ // a = (tv - t*v/w)/(tt - t*t/w)
+ // b = (v - a*t)/w
+ MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
+
+ ParallelMath::MakeSafeDenominator(w);
+ MFloat wRcp = ParallelMath::Reciprocal(w);
+
+ MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
+
+ ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
+ ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
+
+ for (int ch = 0; ch < TVectorSize; ch++)
+ {
+ /*
+ if (adenom == 0.0)
+ p1 = p2 = er.v / er.w;
+ else
+ {
+ float4 a = (er.tv - er.t*er.v / er.w) / adenom;
+ float4 b = (er.v - a * er.t) / er.w;
+ p1 = b;
+ p2 = a + b;
+ }
+ */
+
+ MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
+ MFloat b = (m_v[ch] - a * m_t) * wRcp;
+
+ MFloat p1 = b;
+ MFloat p2 = a + b;
+
+ ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
+ ParallelMath::ConditionalSet(p2, adenomZero, p1);
+
+ // Unweight
+ float inverseWeight = m_rcpChannelWeights[ch];
+
+ endPoint[0][ch] = p1 * inverseWeight;
+ endPoint[1][ch] = p2 * inverseWeight;
+ }
+ }
+
+ void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
+ {
+ MFloat floatEndPoint[2][TVectorSize];
+ GetRefinedEndpoints(floatEndPoint);
+
+ for (int epi = 0; epi < 2; epi++)
+ for (int ch = 0; ch < TVectorSize; ch++)
+ endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
+ }
+
+ void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
+ {
+ GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
+ }
+
+ void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
+ {
+ MFloat floatEndPoint[2][TVectorSize];
+ GetRefinedEndpoints(floatEndPoint);
+
+ for (int epi = 0; epi < 2; epi++)
+ {
+ for (int ch = 0; ch < TVectorSize; ch++)
+ {
+ MFloat f = floatEndPoint[epi][ch];
+ if (isSigned)
+ endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
+ else
+ endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
+ }
+ }
+ }
+ };
+ }
+}
+
+#endif
+