summaryrefslogtreecommitdiff
path: root/thirdparty/cvtt/ConvectionKernels_AggregatedError.h
blob: 9f9356a3451d0c913f0a760ec041d17a4f537380 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#pragma once
#ifndef __CVTT_AGGREGATEDERROR_H__
#define __CVTT_AGGREGATEDERROR_H__

#include "ConvectionKernels_ParallelMath.h"

namespace cvtt
{
    namespace Internal
    {
        template<int TVectorSize>
        class AggregatedError
        {
        public:
            typedef ParallelMath::UInt16 MUInt16;
            typedef ParallelMath::UInt31 MUInt31;
            typedef ParallelMath::Float MFloat;

            AggregatedError()
            {
                for (int ch = 0; ch < TVectorSize; ch++)
                    m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0);
            }

            void Add(const MUInt16 &channelErrorUnweighted, int ch)
            {
                m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted);
            }

            MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const
            {
                if (flags & cvtt::Flags::Uniform)
                {
                    MUInt31 total = m_errorUnweighted[0];
                    for (int ch = 1; ch < TVectorSize; ch++)
                        total = total + m_errorUnweighted[ch];
                    return ParallelMath::ToFloat(total);
                }
                else
                {
                    MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0];
                    for (int ch = 1; ch < TVectorSize; ch++)
                        total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch];
                    return total;
                }
            }

        private:
            MUInt31 m_errorUnweighted[TVectorSize];
        };
    }
}

#endif