summaryrefslogtreecommitdiff
path: root/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h
blob: 84795cd68940f9235b85a58cfb01156d8b9db8c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#pragma once
#ifndef __CVTT_INDEXSELECTORHDR_H__
#define __CVTT_INDEXSELECTORHDR_H__

#include "ConvectionKernels_ParallelMath.h"
#include "ConvectionKernels_IndexSelector.h"

namespace cvtt
{
    namespace Internal
    {
        ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v);
        ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v);

        template<int TVectorSize>
        class IndexSelectorHDR : public IndexSelector<TVectorSize>
        {
        public:
            typedef ParallelMath::UInt15 MUInt15;
            typedef ParallelMath::UInt16 MUInt16;
            typedef ParallelMath::UInt31 MUInt31;
            typedef ParallelMath::SInt16 MSInt16;
            typedef ParallelMath::SInt32 MSInt32;
            typedef ParallelMath::Float MFloat;

        private:

            MUInt15 InvertSingle(const MUInt15& anIndex) const
            {
                MUInt15 inverted = m_maxValueMinusOne - anIndex;
                return ParallelMath::Select(m_isInverted, inverted, anIndex);
            }

            void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const
            {
                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));

                for (int ch = 0; ch < TVectorSize; ch++)
                {
                    MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]);
                    MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]);

                    MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);

                    pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6);

                    pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32));
                }
            }

            void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const
            {
                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));

                for (int ch = 0; ch < TVectorSize; ch++)
                {
                    MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]);
                    MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]);

                    MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);

                    pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6);

                    pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31)));
                }
            }

            MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const
            {
                MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch];
                return diff * diff;
            }

            MFloat ErrorForInterpolator(int index, const MFloat *pixel) const
            {
                MFloat error = ErrorForInterpolatorComponent(index, 0, pixel);
                for (int ch = 1; ch < TVectorSize; ch++)
                    error = error + ErrorForInterpolatorComponent(index, ch, pixel);
                return error;
            }

        public:

            void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights)
            {
                assert(range <= 16);

                m_range = range;

                m_isInverted = ParallelMath::MakeBoolInt16(false);
                m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1));

                if (!fastIndexing)
                {
                    for (int i = 0; i < range; i++)
                    {
                        MSInt16 recon2CL[TVectorSize];

                        if (isSigned)
                            ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
                        else
                            ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);

                        for (int ch = 0; ch < TVectorSize; ch++)
                            m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch];
                    }
                }
            }

            void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const
            {
                ReconstructHDRSignedUninverted(InvertSingle(index), pixel);
            }

            void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const
            {
                ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel);
            }

            void ConditionalInvert(const ParallelMath::Int16CompFlag &invert)
            {
                m_isInverted = invert;
            }

            MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const
            {
                MUInt15 index = ParallelMath::MakeUInt15(0);

                MFloat bestError = ErrorForInterpolator(0, pixel);
                for (int i = 1; i < m_range; i++)
                {
                    MFloat error = ErrorForInterpolator(i, pixel);
                    ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
                    ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
                    bestError = ParallelMath::Min(bestError, error);
                }

                return InvertSingle(index);
            }

            MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
            {
                return InvertSingle(this->SelectIndexLDR(pixel, rtn));
            }

        private:
            MFloat m_reconstructedInterpolators[16][TVectorSize];
            ParallelMath::Int16CompFlag m_isInverted;
            MUInt15 m_maxValueMinusOne;
            int m_range;
        };
    }
}
#endif