thirdparty/thekla_atlas/nvmath/nvmath.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

// This code is in the public domain -- castanyo@yahoo.es

#pragma once
#ifndef NV_MATH_H
#define NV_MATH_H

#include "nvcore/nvcore.h"
#include "nvcore/Debug.h"   // nvDebugCheck
#include "nvcore/Utils.h"   // max, clamp

#include <math.h>

#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
#include <float.h>  // finite, isnan
#endif

#if NV_CPU_X86 || NV_CPU_X86_64
    //#include <intrin.h>
    #include <xmmintrin.h>
#endif


// Function linkage
#if NVMATH_SHARED
#ifdef NVMATH_EXPORTS
#define NVMATH_API DLL_EXPORT
#define NVMATH_CLASS DLL_EXPORT_CLASS
#else
#define NVMATH_API DLL_IMPORT
#define NVMATH_CLASS DLL_IMPORT
#endif
#else // NVMATH_SHARED
#define NVMATH_API
#define NVMATH_CLASS
#endif // NVMATH_SHARED

// Set some reasonable defaults.
#ifndef NV_USE_ALTIVEC
#   define NV_USE_ALTIVEC NV_CPU_PPC
//#   define NV_USE_ALTIVEC defined(__VEC__)
#endif

#ifndef NV_USE_SSE
#   if NV_CPU_X86_64
        // x64 always supports at least SSE2
#       define NV_USE_SSE 2
#   elif NV_CC_MSVC && defined(_M_IX86_FP)
        // Also on x86 with the /arch:SSE flag in MSVC.
#       define NV_USE_SSE _M_IX86_FP       // 1=SSE, 2=SS2
#   elif defined(__SSE__)
#       define NV_USE_SSE 1
#   elif defined(__SSE2__)
#       define NV_USE_SSE 2
#   else
        // Otherwise we assume no SSE.
#       define NV_USE_SSE 0
#   endif
#endif


// Internally set NV_USE_SIMD when either altivec or sse is available.
#if NV_USE_ALTIVEC && NV_USE_SSE
#	error "Cannot enable both altivec and sse!"
#endif


#ifndef PI
#define PI                  float(3.1415926535897932384626433833)
#endif

#define NV_EPSILON          (0.0001f)
#define NV_NORMAL_EPSILON   (0.001f)

/*
#define SQ(r)               ((r)*(r))

#define SIGN_BITMASK        0x80000000

/// Integer representation of a floating-point value.
#define IR(x)               ((uint32 &)(x))

/// Absolute integer representation of a floating-point value
#define AIR(x)              (IR(x) & 0x7fffffff)

/// Floating-point representation of an integer value.
#define FR(x)               ((float&)(x))

/// Integer-based comparison of a floating point value.
/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
*/

extern "C" inline double sqrt_assert(const double f)
{
    nvDebugCheck(f >= 0.0f);
    return sqrt(f);
}

inline float sqrtf_assert(const float f)
{
    nvDebugCheck(f >= 0.0f);
    return sqrtf(f);
}

extern "C" inline double acos_assert(const double f) 
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return acos(f);
}

inline float acosf_assert(const float f)
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return acosf(f);
}

extern "C" inline double asin_assert(const double f)
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return asin(f);
}

inline float asinf_assert(const float f)
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return asinf(f);
}

// Replace default functions with asserting ones.
#if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700))    // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194
#define sqrt sqrt_assert
#define sqrtf sqrtf_assert
#define acos acos_assert
#define acosf acosf_assert
#define asin asin_assert
#define asinf asinf_assert
#endif

#if NV_CC_MSVC
NV_FORCEINLINE float log2f(float x)
{
    nvCheck(x >= 0);
    return logf(x) / logf(2.0f);
}
NV_FORCEINLINE float exp2f(float x)
{
    return powf(2.0f, x);
}
#endif

namespace nv
{
    inline float toRadian(float degree) { return degree * (PI / 180.0f); }
    inline float toDegree(float radian) { return radian * (180.0f / PI); }

    // Robust floating point comparisons:
    // http://realtimecollisiondetection.net/blog/?p=89
    inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
    {
        //return fabs(f0-f1) <= epsilon;
        return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
    }

    inline bool isZero(const float f, const float epsilon = NV_EPSILON)
    {
        return fabs(f) <= epsilon;
    }

    inline bool isFinite(const float f)
    {
#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
        return _finite(f) != 0;
#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS
        return isfinite(f);
#elif NV_OS_LINUX
        return finitef(f);
#else
#   error "isFinite not supported"
#endif
        //return std::isfinite (f);
        //return finite (f);
    }

    inline bool isNan(const float f)
    {
#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
        return _isnan(f) != 0;
#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS
        return isnan(f);
#elif NV_OS_LINUX
        return isnanf(f);
#else
#   error "isNan not supported"
#endif
    }

    inline uint log2(uint32 i)
    {
        uint32 value = 0;
        while( i >>= 1 ) value++;
        return value;
    }

    inline uint log2(uint64 i)
    {
        uint64 value = 0;
        while (i >>= 1) value++;
        return U32(value);
    }

    inline float lerp(float f0, float f1, float t)
    {
        const float s = 1.0f - t;
        return f0 * s + f1 * t;
    }

    inline float square(float f) { return f * f; }
    inline int square(int i) { return i * i; }

    inline float cube(float f) { return f * f * f; }
    inline int cube(int i) { return i * i * i; }

    inline float frac(float f)
    {
        return f - floor(f);
    }

    inline float floatRound(float f)
    {
        return floorf(f + 0.5f);
    }

    // Eliminates negative zeros from a float array.
    inline void floatCleanup(float * fp, int n)
    {
        for (int i = 0; i < n; i++) {
            //nvDebugCheck(isFinite(fp[i]));
            union { float f; uint32 i; } x = { fp[i] };
            if (x.i == 0x80000000) fp[i] = 0.0f;
        }
    }

    inline float saturate(float f) {
        return clamp(f, 0.0f, 1.0f);
    }

    inline float linearstep(float edge0, float edge1, float x) {
        // Scale, bias and saturate x to 0..1 range
        return saturate((x - edge0) / (edge1 - edge0));
    }

    inline float smoothstep(float edge0, float edge1, float x) {
        x = linearstep(edge0, edge1, x); 

        // Evaluate polynomial
        return x*x*(3 - 2*x);
    }

    inline int sign(float a)
    {
        return (a > 0) - (a < 0);
        //if (a > 0.0f) return 1;
        //if (a < 0.0f) return -1;
        //return 0;
    }

    union Float754 {
        unsigned int raw;
        float value;
        struct {
        #if NV_BIG_ENDIAN
            unsigned int negative:1;
            unsigned int biasedexponent:8;
            unsigned int mantissa:23;
        #else
            unsigned int mantissa:23;
            unsigned int biasedexponent:8;
            unsigned int negative:1;
        #endif
        } field;
    };

    // Return the exponent of x ~ Floor(Log2(x))
    inline int floatExponent(float x)
    {
        Float754 f;
        f.value = x;
        return (f.field.biasedexponent - 127);
    }


    // FloatRGB9E5
    union Float3SE {
        uint32 v;
        struct {
        #if NV_BIG_ENDIAN
            uint32 e : 5;
            uint32 zm : 9;
            uint32 ym : 9;
            uint32 xm : 9;
        #else
            uint32 xm : 9;
            uint32 ym : 9;
            uint32 zm : 9;
            uint32 e : 5;
        #endif
        };
    };

    // FloatR11G11B10
    union Float3PK {
        uint32 v;
        struct {
        #if NV_BIG_ENDIAN
            uint32 ze : 5;
            uint32 zm : 5;
            uint32 ye : 5;
            uint32 ym : 6;
            uint32 xe : 5;
            uint32 xm : 6;
        #else
            uint32 xm : 6;
            uint32 xe : 5;
            uint32 ym : 6;
            uint32 ye : 5;
            uint32 zm : 5;
            uint32 ze : 5;
        #endif
        };
    };


} // nv

#endif // NV_MATH_H