From 83b630b8c27fc3307eba36fa2b6193690bd18e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Mon, 11 May 2020 14:36:46 +0200 Subject: thirdparty: Cleanup after #38386, document provenance and copyright Also renamed `delaunay.h` to `delaunay_2d.h` to match the class name. --- thirdparty/README.md | 19 + thirdparty/misc/r128.c | 2 + thirdparty/misc/r128.h | 2123 +++++++++++++++++++++++++++++ thirdparty/misc/stb_rect_pack.h | 628 +++++++++ thirdparty/oidn/.gitignore | 1 - thirdparty/oidn/LICENSE.txt | 202 +++ thirdparty/r128/r128.h | 2124 ------------------------------ thirdparty/stb_rect_pack/stb_rect_pack.h | 629 --------- 8 files changed, 2974 insertions(+), 2754 deletions(-) create mode 100644 thirdparty/misc/r128.c create mode 100644 thirdparty/misc/r128.h create mode 100644 thirdparty/misc/stb_rect_pack.h delete mode 100644 thirdparty/oidn/.gitignore create mode 100644 thirdparty/oidn/LICENSE.txt delete mode 100644 thirdparty/r128/r128.h delete mode 100644 thirdparty/stb_rect_pack/stb_rect_pack.h (limited to 'thirdparty') diff --git a/thirdparty/README.md b/thirdparty/README.md index e51e7d7f24..c000133fe7 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -384,11 +384,19 @@ Collection of single-file libraries used in Godot components. * Upstream: http://www.pcg-random.org * Version: minimal C implementation, http://www.pcg-random.org/download.html * License: Apache 2.0 +- `r128.h` + * Upstream: https://github.com/fahickman/r128 + * Version: 1.4.3 (2019) + * License: Public Domain - `smaz.{c,h}` * Upstream: https://github.com/antirez/smaz * Version: git (150e125cbae2e8fd20dd332432776ce13395d4d4, 2009) * License: BSD-3-Clause * Modifications: use `const char*` instead of `char*` for input string +- `stb_rect_pack.h` + * Upstream: https://github.com/nothings/stb + * Version: 1.00 (2019) + * License: Public Domain (Unlicense) or MIT - `triangulator.{cpp,h}` * Upstream: https://github.com/ivanfratric/polypartition (`src/polypartition.cpp`) * Version: TBD, class was renamed @@ -437,6 +445,17 @@ Files extracted from the upstream source: - LICENSE.txt +## oidn + +- Upstream: https://github.com/OpenImageDenoise/oidn +- Version: TBD +- License: Apache 2.0 + +Files extracted from upstream source: + +- TBD + + ## opus - Upstream: https://opus-codec.org diff --git a/thirdparty/misc/r128.c b/thirdparty/misc/r128.c new file mode 100644 index 0000000000..6b981aa693 --- /dev/null +++ b/thirdparty/misc/r128.c @@ -0,0 +1,2 @@ +#define R128_IMPLEMENTATION +#include "r128.h" diff --git a/thirdparty/misc/r128.h b/thirdparty/misc/r128.h new file mode 100644 index 0000000000..be7cd3024d --- /dev/null +++ b/thirdparty/misc/r128.h @@ -0,0 +1,2123 @@ +/* +r128.h: 128-bit (64.64) signed fixed-point arithmetic. Version 1.4.3 + +COMPILATION +----------- +Drop this header file somewhere in your project and include it wherever it is +needed. There is no separate .c file for this library. To get the code, in ONE +file in your project, put: + +#define R128_IMPLEMENTATION + +before you include this file. You may also provide a definition for R128_ASSERT +to force the library to use a custom assert macro. + +COMPILER/LIBRARY SUPPORT +------------------------ +This library requires a C89 compiler with support for 64-bit integers. If your +compiler does not support the long long data type, the R128_U64, etc. macros +must be set appropriately. On x86 and x64 targets, Intel intrinsics are used +for speed. If your compiler does not support these intrinsics, you can add +#define R128_STDC_ONLY +in your implementation file before including r128.h. + +The only C runtime library functionality used by this library is . +This can be avoided by defining an R128_ASSERT macro in your implementation +file. Since this library uses 64-bit arithmetic, this may implicitly add a +runtime library dependency on 32-bit platforms. + +C++ SUPPORT +----------- +Operator overloads are supplied for C++ files that include this file. Since all +C++ functions are declared inline (or static inline), the R128_IMPLEMENTATION +file can be either C++ or C. + +LICENSE +------- +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef H_R128_H +#define H_R128_H + +#include + +// 64-bit integer support +// If your compiler does not have stdint.h, add appropriate defines for these macros. +#if defined(_MSC_VER) && (_MSC_VER < 1600) +# define R128_S32 __int32 +# define R128_U32 unsigned __int32 +# define R128_S64 __int64 +# define R128_U64 unsigned __int64 +# define R128_LIT_S64(x) x##i64 +# define R128_LIT_U64(x) x##ui64 +#else +# include +# define R128_S32 int32_t +# define R128_U32 uint32_t +# define R128_S64 int64_t +# define R128_U64 uint64_t +# define R128_LIT_S64(x) x##ll +# define R128_LIT_U64(x) x##ull +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct R128 { + R128_U64 lo; + R128_U64 hi; + +#ifdef __cplusplus + R128(); + R128(double); + R128(int); + R128(R128_S64); + R128(R128_U64 low, R128_U64 high); + + operator double() const; + operator R128_S64() const; + operator int() const; + operator bool() const; + + bool operator!() const; + R128 operator~() const; + R128 operator-() const; + R128 &operator|=(const R128 &rhs); + R128 &operator&=(const R128 &rhs); + R128 &operator^=(const R128 &rhs); + R128 &operator+=(const R128 &rhs); + R128 &operator-=(const R128 &rhs); + R128 &operator*=(const R128 &rhs); + R128 &operator/=(const R128 &rhs); + R128 &operator%=(const R128 &rhs); + R128 &operator<<=(int amount); + R128 &operator>>=(int amount); +#endif //__cplusplus +} R128; + +// Type conversion +extern void r128FromInt(R128 *dst, R128_S64 v); +extern void r128FromFloat(R128 *dst, double v); +extern R128_S64 r128ToInt(const R128 *v); +extern double r128ToFloat(const R128 *v); + +// Copy +extern void r128Copy(R128 *dst, const R128 *src); + +// Negate +extern void r128Neg(R128 *dst, const R128 *src); + +// Bitwise operations +extern void r128Not(R128 *dst, const R128 *src); // ~a +extern void r128Or(R128 *dst, const R128 *a, const R128 *b); // a | b +extern void r128And(R128 *dst, const R128 *a, const R128 *b); // a & b +extern void r128Xor(R128 *dst, const R128 *a, const R128 *b); // a ^ b +extern void r128Shl(R128 *dst, const R128 *src, int amount); // shift left by amount mod 128 +extern void r128Shr(R128 *dst, const R128 *src, int amount); // shift right logical by amount mod 128 +extern void r128Sar(R128 *dst, const R128 *src, int amount); // shift right arithmetic by amount mod 128 + +// Arithmetic +extern void r128Add(R128 *dst, const R128 *a, const R128 *b); // a + b +extern void r128Sub(R128 *dst, const R128 *a, const R128 *b); // a - b +extern void r128Mul(R128 *dst, const R128 *a, const R128 *b); // a * b +extern void r128Div(R128 *dst, const R128 *a, const R128 *b); // a / b +extern void r128Mod(R128 *dst, const R128 *a, const R128 *b); // a - toInt(a / b) * b + +extern void r128Sqrt(R128 *dst, const R128 *v); // sqrt(v) +extern void r128Rsqrt(R128 *dst, const R128 *v); // 1 / sqrt(v) + +// Comparison +extern int r128Cmp(const R128 *a, const R128 *b); // sign of a-b +extern void r128Min(R128 *dst, const R128 *a, const R128 *b); +extern void r128Max(R128 *dst, const R128 *a, const R128 *b); +extern void r128Floor(R128 *dst, const R128 *v); +extern void r128Ceil(R128 *dst, const R128 *v); +extern int r128IsNeg(const R128 *v); // quick check for < 0 + +// String conversion +// +typedef enum R128ToStringSign { + R128ToStringSign_Default, // no sign character for positive values + R128ToStringSign_Space, // leading space for positive values + R128ToStringSign_Plus, // leading '+' for positive values +} R128ToStringSign; + +// Formatting options for use with r128ToStringOpt. The "defaults" correspond +// to a format string of "%f". +// +typedef struct R128ToStringFormat { + // sign character for positive values. Default is R128ToStringSign_Default. + R128ToStringSign sign; + + // minimum number of characters to write. Default is 0. + int width; + + // place to the right of the decimal at which rounding is performed. If negative, + // a maximum of 20 decimal places will be written, with no trailing zeroes. + // (20 places is sufficient to ensure that r128FromString will convert back to the + // original value.) Default is -1. NOTE: This is not the same default that the C + // standard library uses for %f. + int precision; + + // If non-zero, pads the output string with leading zeroes if the final result is + // fewer than width characters. Otherwise, leading spaces are used. Default is 0. + int zeroPad; + + // Always print a decimal point, even if the value is an integer. Default is 0. + int decimal; + + // Left-align output if width specifier requires padding. + // Default is 0 (right align). + int leftAlign; +} R128ToStringFormat; + +// r128ToStringOpt: convert R128 to a decimal string, with formatting. +// +// dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written +// (including null terminator). No additional rounding is performed if dstSize is not large +// enough to hold the entire string. +// +// opt: an R128ToStringFormat struct (q.v.) with formatting options. +// +// Uses the R128_decimal global as the decimal point character. +// Always writes a null terminator, even if the destination buffer is not large enough. +// +// Number of bytes that will be written (i.e. how big does dst need to be?): +// If width is specified: width + 1 bytes. +// If precision is specified: at most precision + 22 bytes. +// If neither is specified: at most 42 bytes. +// +// Returns the number of bytes that would have been written if dst was sufficiently large, +// not including the final null terminator. +// +extern int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt); + +// r128ToStringf: convert R128 to a decimal string, with formatting. +// +// dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written +// (including null terminator). +// +// format: a printf-style format specifier, as one would use with floating point types. +// e.g. "%+5.2f". (The leading % and trailing f are optional.) +// NOTE: This is NOT a full replacement for sprintf. Any characters in the format string +// that do not correspond to a format placeholder are ignored. +// +// Uses the R128_decimal global as the decimal point character. +// Always writes a null terminator, even if the destination buffer is not large enough. +// +// Number of bytes that will be written (i.e. how big does dst need to be?): +// If the precision field is specified: at most max(width, precision + 21) + 1 bytes +// Otherwise: at most max(width, 41) + 1 bytes. +// +// Returns the number of bytes that would have been written if dst was sufficiently large, +// not including the final null terminator. +// +extern int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v); + +// r128ToString: convert R128 to a decimal string, with default formatting. +// Equivalent to r128ToStringf(dst, dstSize, "%f", v). +// +// Uses the R128_decimal global as the decimal point character. +// Always writes a null terminator, even if the destination buffer is not large enough. +// +// Will write at most 42 bytes (including NUL) to dst. +// +// Returns the number of bytes that would have been written if dst was sufficiently large, +// not including the final null terminator. +// +extern int r128ToString(char *dst, size_t dstSize, const R128 *v); + +// r128FromString: Convert string to R128. +// +// The string can be formatted either as a decimal number with optional sign +// or as hexadecimal with a prefix of 0x or 0X. +// +// endptr, if not NULL, is set to the character following the last character +// used in the conversion. +// +extern void r128FromString(R128 *dst, const char *s, char **endptr); + +// Constants +extern const R128 R128_min; // minimum (most negative) value +extern const R128 R128_max; // maximum (most positive) value +extern const R128 R128_smallest; // smallest positive value +extern const R128 R128_zero; // zero +extern const R128 R128_one; // 1.0 + +extern char R128_decimal; // decimal point character used by r128From/ToString. defaults to '.' + +#ifdef __cplusplus +} + +#include +namespace std { +template<> +struct numeric_limits +{ + static const bool is_specialized = true; + + static R128 min() throw() { return R128_min; } + static R128 max() throw() { return R128_max; } + + static const int digits = 127; + static const int digits10 = 38; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const int radix = 2; + static R128 epsilon() throw() { return R128_smallest; } + static R128 round_error() throw() { return R128_one; } + + static const int min_exponent = 0; + static const int min_exponent10 = 0; + static const int max_exponent = 0; + static const int max_exponent10 = 0; + + static const bool has_infinity = false; + static const bool has_quiet_NaN = false; + static const bool has_signaling_NaN = false; + static const float_denorm_style has_denorm = denorm_absent; + static const bool has_denorm_loss = false; + + static R128 infinity() throw() { return R128_zero; } + static R128 quiet_NaN() throw() { return R128_zero; } + static R128 signaling_NaN() throw() { return R128_zero; } + static R128 denorm_min() throw() { return R128_zero; } + + static const bool is_iec559 = false; + static const bool is_bounded = true; + static const bool is_modulo = true; + + static const bool traps = numeric_limits::traps; + static const bool tinyness_before = false; + static const float_round_style round_style = round_toward_zero; +}; +} //namespace std + +inline R128::R128() {} + +inline R128::R128(double v) +{ + r128FromFloat(this, v); +} + +inline R128::R128(int v) +{ + r128FromInt(this, v); +} + +inline R128::R128(R128_S64 v) +{ + r128FromInt(this, v); +} + +inline R128::R128(R128_U64 low, R128_U64 high) +{ + lo = low; + hi = high; +} + +inline R128::operator double() const +{ + return r128ToFloat(this); +} + +inline R128::operator R128_S64() const +{ + return r128ToInt(this); +} + +inline R128::operator int() const +{ + return (int) r128ToInt(this); +} + +inline R128::operator bool() const +{ + return lo || hi; +} + +inline bool R128::operator!() const +{ + return !lo && !hi; +} + +inline R128 R128::operator~() const +{ + R128 r; + r128Not(&r, this); + return r; +} + +inline R128 R128::operator-() const +{ + R128 r; + r128Neg(&r, this); + return r; +} + +inline R128 &R128::operator|=(const R128 &rhs) +{ + r128Or(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator&=(const R128 &rhs) +{ + r128And(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator^=(const R128 &rhs) +{ + r128Xor(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator+=(const R128 &rhs) +{ + r128Add(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator-=(const R128 &rhs) +{ + r128Sub(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator*=(const R128 &rhs) +{ + r128Mul(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator/=(const R128 &rhs) +{ + r128Div(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator%=(const R128 &rhs) +{ + r128Mod(this, this, &rhs); + return *this; +} + +inline R128 &R128::operator<<=(int amount) +{ + r128Shl(this, this, amount); + return *this; +} + +inline R128 &R128::operator>>=(int amount) +{ + r128Sar(this, this, amount); + return *this; +} + +static inline R128 operator|(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r |= rhs; +} + +static inline R128 operator&(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r &= rhs; +} + +static inline R128 operator^(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r ^= rhs; +} + +static inline R128 operator+(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r += rhs; +} + +static inline R128 operator-(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r -= rhs; +} + +static inline R128 operator*(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r *= rhs; +} + +static inline R128 operator/(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r /= rhs; +} + +static inline R128 operator%(const R128 &lhs, const R128 &rhs) +{ + R128 r(lhs); + return r %= rhs; +} + +static inline R128 operator<<(const R128 &lhs, int amount) +{ + R128 r(lhs); + return r <<= amount; +} + +static inline R128 operator>>(const R128 &lhs, int amount) +{ + R128 r(lhs); + return r >>= amount; +} + +static inline bool operator<(const R128 &lhs, const R128 &rhs) +{ + return r128Cmp(&lhs, &rhs) < 0; +} + +static inline bool operator>(const R128 &lhs, const R128 &rhs) +{ + return r128Cmp(&lhs, &rhs) > 0; +} + +static inline bool operator<=(const R128 &lhs, const R128 &rhs) +{ + return r128Cmp(&lhs, &rhs) <= 0; +} + +static inline bool operator>=(const R128 &lhs, const R128 &rhs) +{ + return r128Cmp(&lhs, &rhs) >= 0; +} + +static inline bool operator==(const R128 &lhs, const R128 &rhs) +{ + return lhs.lo == rhs.lo && lhs.hi == rhs.hi; +} + +static inline bool operator!=(const R128 &lhs, const R128 &rhs) +{ + return lhs.lo != rhs.lo || lhs.hi != rhs.hi; +} + +#endif //__cplusplus +#endif //H_R128_H + +#ifdef R128_IMPLEMENTATION + +#ifdef R128_DEBUG_VIS +# define R128_DEBUG_SET(x) r128ToString(R128_last, sizeof(R128_last), x) +#else +# define R128_DEBUG_SET(x) +#endif + +#define R128_SET2(x, l, h) do { (x)->lo = (R128_U64)(l); (x)->hi = (R128_U64)(h); } while(0) +#define R128_R0(x) ((R128_U32)(x)->lo) +#define R128_R2(x) ((R128_U32)(x)->hi) +#if defined(_M_IX86) +// workaround: MSVC x86's handling of 64-bit values is not great +# define R128_SET4(x, r0, r1, r2, r3) do { \ + ((R128_U32*)&(x)->lo)[0] = (R128_U32)(r0); \ + ((R128_U32*)&(x)->lo)[1] = (R128_U32)(r1); \ + ((R128_U32*)&(x)->hi)[0] = (R128_U32)(r2); \ + ((R128_U32*)&(x)->hi)[1] = (R128_U32)(r3); \ + } while(0) +# define R128_R1(x) (((R128_U32*)&(x)->lo)[1]) +# define R128_R3(x) (((R128_U32*)&(x)->hi)[1]) +#else +# define R128_SET4(x, r0, r1, r2, r3) do { (x)->lo = (R128_U64)(r0) | ((R128_U64)(r1) << 32); \ + (x)->hi = (R128_U64)(r2) | ((R128_U64)(r3) << 32); } while(0) +# define R128_R1(x) ((R128_U32)((x)->lo >> 32)) +# define R128_R3(x) ((R128_U32)((x)->hi >> 32)) +#endif + +#if defined(_M_X64) +# define R128_INTEL 1 +# define R128_64BIT 1 +# ifndef R128_STDC_ONLY +# include +# endif +#elif defined(__x86_64__) +# define R128_INTEL 1 +# define R128_64BIT 1 +# ifndef R128_STDC_ONLY +# include +# endif +#elif defined(_M_IX86) +# define R128_INTEL 1 +# ifndef R128_STDC_ONLY +# include +# endif +#elif defined(__i386__) +# define R128_INTEL 1 +# ifndef R128_STDC_ONLY +# include +# endif +#elif defined(_M_ARM) +# ifndef R128_STDC_ONLY +# include +# endif +#elif defined(_M_ARM64) +# define R128_64BIT 1 +# ifndef R128_STDC_ONLY +# include +# endif +#elif defined(__aarch64__) +# define R128_64BIT 1 +#endif + +#ifndef R128_INTEL +# define R128_INTEL 0 +#endif + +#ifndef R128_64BIT +# define R128_64BIT 0 +#endif + +#ifndef R128_ASSERT +# include +# define R128_ASSERT(x) assert(x) +#endif + +#include // for NULL + +static const R128ToStringFormat R128__defaultFormat = { + R128ToStringSign_Default, + 0, + -1, + 0, + 0, + 0 +}; + +const R128 R128_min = { 0, R128_LIT_U64(0x8000000000000000) }; +const R128 R128_max = { R128_LIT_U64(0xffffffffffffffff), R128_LIT_U64(0x7fffffffffffffff) }; +const R128 R128_smallest = { 1, 0 }; +const R128 R128_zero = { 0, 0 }; +const R128 R128_one = { 0, 1 }; +char R128_decimal = '.'; +#ifdef R128_DEBUG_VIS +char R128_last[42]; +#endif + +static int r128__clz64(R128_U64 x) +{ +#if defined(R128_STDC_ONLY) + R128_U64 n = 64, y; + y = x >> 32; if (y) { n -= 32; x = y; } + y = x >> 16; if (y) { n -= 16; x = y; } + y = x >> 8; if (y) { n -= 8; x = y; } + y = x >> 4; if (y) { n -= 4; x = y; } + y = x >> 2; if (y) { n -= 2; x = y; } + y = x >> 1; if (y) { n -= 1; x = y; } + return (int)(n - x); +#elif defined(_M_X64) || defined(_M_ARM64) + unsigned long idx; + if (_BitScanReverse64(&idx, x)) { + return 63 - (int)idx; + } else { + return 64; + } +#elif defined(_MSC_VER) + unsigned long idx; + if (_BitScanReverse(&idx, (R128_U32)(x >> 32))) { + return 31 - (int)idx; + } else if (_BitScanReverse(&idx, (R128_U32)x)) { + return 63 - (int)idx; + } else { + return 64; + } +#else + return x ? __builtin_clzll(x) : 64; +#endif +} + +#if !R128_64BIT +// 32*32->64 +static R128_U64 r128__umul64(R128_U32 a, R128_U32 b) +{ +# if defined(_M_IX86) && !defined(R128_STDC_ONLY) + return __emulu(a, b); +# elif defined(_M_ARM) && !defined(R128_STDC_ONLY) + return _arm_umull(a, b); +# else + return a * (R128_U64)b; +# endif +} + +// 64/32->32 +static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem) +{ +# if defined(_M_IX86) && (_MSC_VER >= 1920) && !defined(R128_STDC_ONLY) + unsigned __int64 n = ((unsigned __int64)nhi << 32) | nlo; + return _udiv64(n, d, rem); +# elif defined(_M_IX86) && !defined(R128_STDC_ONLY) + __asm { + mov eax, nlo + mov edx, nhi + div d + mov ecx, rem + mov dword ptr [ecx], edx + } +# elif defined(__i386__) && !defined(R128_STDC_ONLY) + R128_U32 q, r; + __asm("divl %4" + : "=a"(q), "=d"(r) + : "a"(nlo), "d"(nhi), "X"(d)); + *rem = r; + return q; +# else + R128_U64 n64 = ((R128_U64)nhi << 32) | nlo; + *rem = (R128_U32)(n64 % d); + return (R128_U32)(n64 / d); +# endif +} +#elif !defined(_M_X64) || defined(R128_STDC_ONLY) +#define r128__umul64(a, b) ((a) * (R128_U64)(b)) +static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem) +{ + R128_U64 n64 = ((R128_U64)nhi << 32) | nlo; + *rem = (R128_U32)(n64 % d); + return (R128_U32)(n64 / d); +} +#endif //!R128_64BIT + +static void r128__neg(R128 *dst, const R128 *src) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(src != NULL); + +#if R128_INTEL && !defined(R128_STDC_ONLY) + { + unsigned char carry = 0; +# if R128_64BIT + carry = _addcarry_u64(carry, ~src->lo, 1, &dst->lo); + carry = _addcarry_u64(carry, ~src->hi, 0, &dst->hi); +# else + R128_U32 r0, r1, r2, r3; + carry = _addcarry_u32(carry, ~R128_R0(src), 1, &r0); + carry = _addcarry_u32(carry, ~R128_R1(src), 0, &r1); + carry = _addcarry_u32(carry, ~R128_R2(src), 0, &r2); + carry = _addcarry_u32(carry, ~R128_R3(src), 0, &r3); + R128_SET4(dst, r0, r1, r2, r3); +# endif //R128_64BIT + } +#else + if (src->lo) { + dst->lo = ~src->lo + 1; + dst->hi = ~src->hi; + } else { + dst->lo = 0; + dst->hi = ~src->hi + 1; + } +#endif //R128_INTEL +} + +// 64*64->128 +static void r128__umul128(R128 *dst, R128_U64 a, R128_U64 b) +{ +#if defined(_M_X64) && !defined(R128_STDC_ONLY) + dst->lo = _umul128(a, b, &dst->hi); +#elif R128_64BIT && !defined(_MSC_VER) && !defined(R128_STDC_ONLY) + unsigned __int128 p0 = a * (unsigned __int128)b; + dst->hi = (R128_U64)(p0 >> 64); + dst->lo = (R128_U64)p0; +#else + R128_U32 alo = (R128_U32)a; + R128_U32 ahi = (R128_U32)(a >> 32); + R128_U32 blo = (R128_U32)b; + R128_U32 bhi = (R128_U32)(b >> 32); + R128_U64 p0, p1, p2, p3; + + p0 = r128__umul64(alo, blo); + p1 = r128__umul64(alo, bhi); + p2 = r128__umul64(ahi, blo); + p3 = r128__umul64(ahi, bhi); + + { +#if R128_INTEL && !defined(R128_STDC_ONLY) + R128_U32 r0, r1, r2, r3; + unsigned char carry; + + r0 = (R128_U32)(p0); + r1 = (R128_U32)(p0 >> 32); + r2 = (R128_U32)(p1 >> 32); + r3 = (R128_U32)(p3 >> 32); + + carry = _addcarry_u32(0, r1, (R128_U32)p1, &r1); + carry = _addcarry_u32(carry, r2, (R128_U32)(p2 >> 32), &r2); + _addcarry_u32(carry, r3, 0, &r3); + carry = _addcarry_u32(0, r1, (R128_U32)p2, &r1); + carry = _addcarry_u32(carry, r2, (R128_U32)p3, &r2); + _addcarry_u32(carry, r3, 0, &r3); + + R128_SET4(dst, r0, r1, r2, r3); +#else + R128_U64 carry, lo, hi; + carry = ((R128_U64)(R128_U32)p1 + (R128_U64)(R128_U32)p2 + (p0 >> 32)) >> 32; + + lo = p0 + ((p1 + p2) << 32); + hi = p3 + ((R128_U32)(p1 >> 32) + (R128_U32)(p2 >> 32)) + carry; + + R128_SET2(dst, lo, hi); +#endif + } +#endif +} + +// 128/64->64 +#if defined(_M_X64) && (_MSC_VER < 1920) && !defined(R128_STDC_ONLY) +// MSVC x64 provides neither inline assembly nor (pre-2019) a div intrinsic, so we do fake +// "inline assembly" to avoid long division or outline assembly. +#pragma code_seg(".text") +__declspec(allocate(".text")) static const unsigned char r128__udiv128Code[] = { + 0x48, 0x8B, 0xC1, //mov rax, rcx + 0x49, 0xF7, 0xF0, //div rax, r8 + 0x49, 0x89, 0x11, //mov qword ptr [r9], rdx + 0xC3 //ret +}; +typedef R128_U64 (*r128__udiv128Proc)(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem); +static const r128__udiv128Proc r128__udiv128 = (r128__udiv128Proc)(void*)r128__udiv128Code; +#else +static R128_U64 r128__udiv128(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem) +{ +#if defined(_M_X64) && !defined(R128_STDC_ONLY) + return _udiv128(nhi, nlo, d, rem); +#elif defined(__x86_64__) && !defined(R128_STDC_ONLY) + R128_U64 q, r; + __asm("divq %4" + : "=a"(q), "=d"(r) + : "a"(nlo), "d"(nhi), "X"(d)); + *rem = r; + return q; +#else + R128_U64 tmp; + R128_U32 d0, d1; + R128_U32 n3, n2, n1, n0; + R128_U32 q0, q1; + R128_U32 r; + int shift; + + R128_ASSERT(d != 0); //division by zero + R128_ASSERT(nhi < d); //overflow + + // normalize + shift = r128__clz64(d); + + if (shift) { + R128 tmp128; + R128_SET2(&tmp128, nlo, nhi); + r128Shl(&tmp128, &tmp128, shift); + n3 = R128_R3(&tmp128); + n2 = R128_R2(&tmp128); + n1 = R128_R1(&tmp128); + n0 = R128_R0(&tmp128); + d <<= shift; + } else { + n3 = (R128_U32)(nhi >> 32); + n2 = (R128_U32)nhi; + n1 = (R128_U32)(nlo >> 32); + n0 = (R128_U32)nlo; + } + + d1 = (R128_U32)(d >> 32); + d0 = (R128_U32)d; + + // first digit + R128_ASSERT(n3 <= d1); + if (n3 < d1) { + q1 = r128__udiv64(n2, n3, d1, &r); + } else { + q1 = 0xffffffffu; + r = n2 + d1; + } +refine1: + if (r128__umul64(q1, d0) > ((R128_U64)r << 32) + n1) { + --q1; + if (r < ~d1 + 1) { + r += d1; + goto refine1; + } + } + + tmp = ((R128_U64)n2 << 32) + n1 - (r128__umul64(q1, d0) + (r128__umul64(q1, d1) << 32)); + n2 = (R128_U32)(tmp >> 32); + n1 = (R128_U32)tmp; + + // second digit + R128_ASSERT(n2 <= d1); + if (n2 < d1) { + q0 = r128__udiv64(n1, n2, d1, &r); + } else { + q0 = 0xffffffffu; + r = n1 + d1; + } +refine0: + if (r128__umul64(q0, d0) > ((R128_U64)r << 32) + n0) { + --q0; + if (r < ~d1 + 1) { + r += d1; + goto refine0; + } + } + + tmp = ((R128_U64)n1 << 32) + n0 - (r128__umul64(q0, d0) + (r128__umul64(q0, d1) << 32)); + n1 = (R128_U32)(tmp >> 32); + n0 = (R128_U32)tmp; + + *rem = (((R128_U64)n1 << 32) + n0) >> shift; + return ((R128_U64)q1 << 32) + q0; +#endif +} +#endif + +static int r128__ucmp(const R128 *a, const R128 *b) +{ + if (a->hi != b->hi) { + if (a->hi > b->hi) { + return 1; + } else { + return -1; + } + } else { + if (a->lo == b->lo) { + return 0; + } else if (a->lo > b->lo) { + return 1; + } else { + return -1; + } + } +} + +static void r128__umul(R128 *dst, const R128 *a, const R128 *b) +{ +#if defined(_M_X64) && !defined(R128_STDC_ONLY) + R128_U64 t0, t1; + R128_U64 lo, hi = 0; + unsigned char carry; + + t0 = _umul128(a->lo, b->lo, &t1); + carry = _addcarry_u64(0, t1, t0 >> 63, &lo); + _addcarry_u64(carry, hi, hi, &hi); + + t0 = _umul128(a->lo, b->hi, &t1); + carry = _addcarry_u64(0, lo, t0, &lo); + _addcarry_u64(carry, hi, t1, &hi); + + t0 = _umul128(a->hi, b->lo, &t1); + carry = _addcarry_u64(0, lo, t0, &lo); + _addcarry_u64(carry, hi, t1, &hi); + + t0 = _umul128(a->hi, b->hi, &t1); + hi += t0; + + R128_SET2(dst, lo, hi); +#elif defined(__x86_64__) && !defined(R128_STDC_ONLY) + unsigned __int128 p0, p1, p2, p3; + p0 = a->lo * (unsigned __int128)b->lo; + p1 = a->lo * (unsigned __int128)b->hi; + p2 = a->hi * (unsigned __int128)b->lo; + p3 = a->hi * (unsigned __int128)b->hi; + + p0 = (p3 << 64) + p2 + p1 + (p0 >> 64) + ((R128_U64)p0 >> 63); + dst->lo = (R128_U64)p0; + dst->hi = (R128_U64)(p0 >> 64); +#else + R128 p0, p1, p2, p3, round; + + r128__umul128(&p0, a->lo, b->lo); + round.hi = 0; round.lo = p0.lo >> 63; + p0.lo = p0.hi; p0.hi = 0; //r128Shr(&p0, &p0, 64); + r128Add(&p0, &p0, &round); + + r128__umul128(&p1, a->hi, b->lo); + r128Add(&p0, &p0, &p1); + + r128__umul128(&p2, a->lo, b->hi); + r128Add(&p0, &p0, &p2); + + r128__umul128(&p3, a->hi, b->hi); + p3.hi = p3.lo; p3.lo = 0; //r128Shl(&p3, &p3, 64); + r128Add(&p0, &p0, &p3); + + R128_SET2(dst, p0.lo, p0.hi); +#endif +} + +// Shift d left until the high bit is set, and shift n left by the same amount. +// returns non-zero on overflow. +static int r128__norm(R128 *n, R128 *d, R128_U64 *n2) +{ + R128_U64 d0, d1; + R128_U64 n0, n1; + int shift; + + d1 = d->hi; + d0 = d->lo; + n1 = n->hi; + n0 = n->lo; + + if (d1) { + shift = r128__clz64(d1); + if (shift) { + d1 = (d1 << shift) | (d0 >> (64 - shift)); + d0 = d0 << shift; + *n2 = n1 >> (64 - shift); + n1 = (n1 << shift) | (n0 >> (64 - shift)); + n0 = n0 << shift; + } else { + *n2 = 0; + } + } else { + shift = r128__clz64(d0); + if (r128__clz64(n1) <= shift) { + return 1; // overflow + } + + if (shift) { + d1 = d0 << shift; + d0 = 0; + *n2 = (n1 << shift) | (n0 >> (64 - shift)); + n1 = n0 << shift; + n0 = 0; + } else { + d1 = d0; + d0 = 0; + *n2 = n1; + n1 = n0; + n0 = 0; + } + } + + R128_SET2(n, n0, n1); + R128_SET2(d, d0, d1); + return 0; +} + +static void r128__udiv(R128 *quotient, const R128 *dividend, const R128 *divisor) +{ + R128 tmp; + R128_U64 d0, d1; + R128_U64 n1, n2, n3; + R128 q; + + R128_ASSERT(dividend != NULL); + R128_ASSERT(divisor != NULL); + R128_ASSERT(quotient != NULL); + R128_ASSERT(divisor->hi != 0 || divisor->lo != 0); // divide by zero + + // scale dividend and normalize + { + R128 n, d; + R128_SET2(&n, dividend->lo, dividend->hi); + R128_SET2(&d, divisor->lo, divisor->hi); + if (r128__norm(&n, &d, &n3)) { + R128_SET2(quotient, R128_max.lo, R128_max.hi); + return; + } + + d1 = d.hi; + d0 = d.lo; + n2 = n.hi; + n1 = n.lo; + } + + // first digit + R128_ASSERT(n3 <= d1); + { + R128 t0, t1; + t0.lo = n1; + if (n3 < d1) { + q.hi = r128__udiv128(n2, n3, d1, &t0.hi); + } else { + q.hi = R128_LIT_U64(0xffffffffffffffff); + t0.hi = n2 + d1; + } + +refine1: + r128__umul128(&t1, q.hi, d0); + if (r128__ucmp(&t1, &t0) > 0) { + --q.hi; + if (t0.hi < ~d1 + 1) { + t0.hi += d1; + goto refine1; + } + } + } + + { + R128 t0, t1, t2; + t0.hi = n2; + t0.lo = n1; + + r128__umul128(&t1, q.hi, d0); + r128__umul128(&t2, q.hi, d1); + + t2.hi = t2.lo; t2.lo = 0; //r128Shl(&t2, &t2, 64); + r128Add(&tmp, &t1, &t2); + r128Sub(&tmp, &t0, &tmp); + } + n2 = tmp.hi; + n1 = tmp.lo; + + // second digit + R128_ASSERT(n2 <= d1); + { + R128 t0, t1; + t0.lo = 0; + if (n2 < d1) { + q.lo = r128__udiv128(n1, n2, d1, &t0.hi); + } else { + q.lo = R128_LIT_U64(0xffffffffffffffff); + t0.hi = n1 + d1; + } + + refine0: + r128__umul128(&t1, q.lo, d0); + if (r128__ucmp(&t1, &t0) > 0) { + --q.lo; + if (t0.hi < ~d1 + 1) { + t0.hi += d1; + goto refine0; + } + } + } + + R128_SET2(quotient, q.lo, q.hi); +} + +static R128_U64 r128__umod(R128 *n, R128 *d) +{ + R128_U64 d0, d1; + R128_U64 n3, n2, n1; + R128_U64 q; + + R128_ASSERT(d != NULL); + R128_ASSERT(n != NULL); + R128_ASSERT(d->hi != 0 || d->lo != 0); // divide by zero + + if (r128__norm(n, d, &n3)) { + return R128_LIT_U64(0xffffffffffffffff); + } + + d1 = d->hi; + d0 = d->lo; + n2 = n->hi; + n1 = n->lo; + + R128_ASSERT(n3 < d1); + { + R128 t0, t1; + t0.lo = n1; + q = r128__udiv128(n2, n3, d1, &t0.hi); + + refine1: + r128__umul128(&t1, q, d0); + if (r128__ucmp(&t1, &t0) > 0) { + --q; + if (t0.hi < ~d1 + 1) { + t0.hi += d1; + goto refine1; + } + } + } + + return q; +} + +static int r128__format(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *format) +{ + char buf[128]; + R128 tmp; + R128_U64 whole; + char *cursor, *decimal, *dstp = dst; + int sign = 0; + int fullPrecision = 1; + int width, precision; + int padCnt, trail = 0; + + R128_ASSERT(dst != NULL && dstSize > 0); + R128_ASSERT(v != NULL); + R128_ASSERT(format != NULL); + + --dstSize; + + R128_SET2(&tmp, v->lo, v->hi); + if (r128IsNeg(&tmp)) { + r128__neg(&tmp, &tmp); + sign = 1; + } + + width = format->width; + if (width < 0) { + width = 0; + } + + precision = format->precision; + if (precision < 0) { + // print a maximum of 20 digits + fullPrecision = 0; + precision = 20; + } else if (precision > sizeof(buf) - 21) { + trail = precision - (sizeof(buf) - 21); + precision -= trail; + } + + whole = tmp.hi; + decimal = cursor = buf; + + // fractional part first in case a carry into the whole part is required + if (tmp.lo || format->decimal) { + while (tmp.lo || (fullPrecision && precision)) { + if ((int)(cursor - buf) == precision) { + if ((R128_S64)tmp.lo < 0) { + // round up, propagate carry backwards + char *c; + for (c = cursor - 1; c >= buf; --c) { + char d = ++*c; + if (d <= '9') { + goto endfrac; + } else { + *c = '0'; + } + } + + // carry out into the whole part + whole++; + } + + break; + } + + r128__umul128(&tmp, tmp.lo, 10); + *cursor++ = (char)tmp.hi + '0'; + } + + endfrac: + if (format->decimal || precision) { + decimal = cursor; + *cursor++ = R128_decimal; + } + } + + // whole part + do { + char digit = (char)(whole % 10); + whole /= 10; + *cursor++ = digit + '0'; + } while (whole); + +#define R128__WRITE(c) do { if (dstp < dst + dstSize) *dstp = c; ++dstp; } while(0) + + padCnt = width - (int)(cursor - buf) - 1; + + // left padding + if (!format->leftAlign) { + char padChar = format->zeroPad ? '0' : ' '; + if (format->zeroPad) { + if (sign) { + R128__WRITE('-'); + } else if (format->sign == R128ToStringSign_Plus) { + R128__WRITE('+'); + } else if (format->sign == R128ToStringSign_Space) { + R128__WRITE(' '); + } else { + ++padCnt; + } + } + + for (; padCnt > 0; --padCnt) { + R128__WRITE(padChar); + } + } + + if (format->leftAlign || !format->zeroPad) { + if (sign) { + R128__WRITE('-'); + } else if (format->sign == R128ToStringSign_Plus) { + R128__WRITE('+'); + } else if (format->sign == R128ToStringSign_Space) { + R128__WRITE(' '); + } else { + ++padCnt; + } + } + + { + char *i; + + // reverse the whole part + for (i = cursor - 1; i >= decimal; --i) { + R128__WRITE(*i); + } + + // copy the fractional part + for (i = buf; i < decimal; ++i) { + R128__WRITE(*i); + } + } + + // right padding + if (format->leftAlign) { + char padChar = format->zeroPad ? '0' : ' '; + for (; padCnt > 0; --padCnt) { + R128__WRITE(padChar); + } + } + + // trailing zeroes for very large precision + while (trail--) { + R128__WRITE('0'); + } + +#undef R128__WRITE + + if (dstp <= dst + dstSize) { + *dstp = '\0'; + } else { + dst[dstSize] = '\0'; + } + return (int)(dstp - dst); +} + +void r128FromInt(R128 *dst, R128_S64 v) +{ + R128_ASSERT(dst != NULL); + dst->lo = 0; + dst->hi = (R128_U64)v; + R128_DEBUG_SET(dst); +} + +void r128FromFloat(R128 *dst, double v) +{ + R128_ASSERT(dst != NULL); + + if (v < -9223372036854775808.0) { + r128Copy(dst, &R128_min); + } else if (v >= 9223372036854775808.0) { + r128Copy(dst, &R128_max); + } else { + R128 r; + int sign = 0; + + if (v < 0) { + v = -v; + sign = 1; + } + + r.hi = (R128_U64)(R128_S64)v; + v -= (R128_S64)v; + r.lo = (R128_U64)(v * 18446744073709551616.0); + + if (sign) { + r128__neg(&r, &r); + } + + r128Copy(dst, &r); + } +} + +void r128FromString(R128 *dst, const char *s, char **endptr) +{ + R128_U64 lo = 0, hi = 0; + R128_U64 base = 10; + + int sign = 0; + + R128_ASSERT(dst != NULL); + R128_ASSERT(s != NULL); + + R128_SET2(dst, 0, 0); + + // consume whitespace + for (;;) { + if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' || *s == '\v') { + ++s; + } else { + break; + } + } + + // sign + if (*s == '-') { + sign = 1; + ++s; + } else if (*s == '+') { + ++s; + } + + // parse base prefix + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; + s += 2; + } + + // whole part + for (;; ++s) { + R128_U64 digit; + + if ('0' <= *s && *s <= '9') { + digit = *s - '0'; + } else if (base == 16 && 'a' <= *s && *s <= 'f') { + digit = *s - 'a' + 10; + } else if (base == 16 && 'A' <= *s && *s <= 'F') { + digit = *s - 'A' + 10; + } else { + break; + } + + hi = hi * base + digit; + } + + // fractional part + if (*s == R128_decimal) { + const char *exp = ++s; + + // find the last digit and work backwards + for (;; ++s) { + if ('0' <= *s && *s <= '9') { + } else if (base == 16 && ('a' <= *s && *s <= 'f')) { + } else if (base == 16 && ('A' <= *s && *s <= 'F')) { + } else { + break; + } + } + + for (--s; s >= exp; --s) { + R128_U64 digit, unused; + + if ('0' <= *s && *s <= '9') { + digit = *s - '0'; + } else if ('a' <= *s && *s <= 'f') { + digit = *s - 'a' + 10; + } else { + digit = *s - 'A' + 10; + } + + lo = r128__udiv128(lo, digit, base, &unused); + } + } + + R128_SET2(dst, lo, hi); + if (sign) { + r128__neg(dst, dst); + } + + if (endptr) { + *endptr = (char *) s; + } +} + +R128_S64 r128ToInt(const R128 *v) +{ + R128_ASSERT(v != NULL); + return (R128_S64)v->hi; +} + +double r128ToFloat(const R128 *v) +{ + R128 tmp; + int sign = 0; + double d; + + R128_ASSERT(v != NULL); + + R128_SET2(&tmp, v->lo, v->hi); + if (r128IsNeg(&tmp)) { + r128__neg(&tmp, &tmp); + sign = 1; + } + + d = tmp.hi + tmp.lo * (1 / 18446744073709551616.0); + if (sign) { + d = -d; + } + + return d; +} + +int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt) +{ + return r128__format(dst, dstSize, v, opt); +} + +int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v) +{ + R128ToStringFormat opts; + + R128_ASSERT(dst != NULL && dstSize); + R128_ASSERT(format != NULL); + R128_ASSERT(v != NULL); + + opts.sign = R128__defaultFormat.sign; + opts.precision = R128__defaultFormat.precision; + opts.zeroPad = R128__defaultFormat.zeroPad; + opts.decimal = R128__defaultFormat.decimal; + opts.leftAlign = R128__defaultFormat.leftAlign; + + if (*format == '%') { + ++format; + } + + // flags field + for (;; ++format) { + if (*format == ' ' && opts.sign != R128ToStringSign_Plus) { + opts.sign = R128ToStringSign_Space; + } else if (*format == '+') { + opts.sign = R128ToStringSign_Plus; + } else if (*format == '0') { + opts.zeroPad = 1; + } else if (*format == '-') { + opts.leftAlign = 1; + } else if (*format == '#') { + opts.decimal = 1; + } else { + break; + } + } + + // width field + opts.width = 0; + for (;;) { + if ('0' <= *format && *format <= '9') { + opts.width = opts.width * 10 + *format++ - '0'; + } else { + break; + } + } + + // precision field + if (*format == '.') { + opts.precision = 0; + ++format; + for (;;) { + if ('0' <= *format && *format <= '9') { + opts.precision = opts.precision * 10 + *format++ - '0'; + } else { + break; + } + } + } + + return r128__format(dst, dstSize, v, &opts); +} + +int r128ToString(char *dst, size_t dstSize, const R128 *v) +{ + return r128__format(dst, dstSize, v, &R128__defaultFormat); +} + +void r128Copy(R128 *dst, const R128 *src) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(src != NULL); + dst->lo = src->lo; + dst->hi = src->hi; + R128_DEBUG_SET(dst); +} + +void r128Neg(R128 *dst, const R128 *src) +{ + r128__neg(dst, src); + R128_DEBUG_SET(dst); +} + +void r128Not(R128 *dst, const R128 *src) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(src != NULL); + + dst->lo = ~src->lo; + dst->hi = ~src->hi; + R128_DEBUG_SET(dst); +} + +void r128Or(R128 *dst, const R128 *a, const R128 *b) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + dst->lo = a->lo | b->lo; + dst->hi = a->hi | b->hi; + R128_DEBUG_SET(dst); +} + +void r128And(R128 *dst, const R128 *a, const R128 *b) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + dst->lo = a->lo & b->lo; + dst->hi = a->hi & b->hi; + R128_DEBUG_SET(dst); +} + +void r128Xor(R128 *dst, const R128 *a, const R128 *b) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + dst->lo = a->lo ^ b->lo; + dst->hi = a->hi ^ b->hi; + R128_DEBUG_SET(dst); +} + +void r128Shl(R128 *dst, const R128 *src, int amount) +{ + R128_U64 r[4]; + + R128_ASSERT(dst != NULL); + R128_ASSERT(src != NULL); + +#if defined(_M_IX86) && !defined(R128_STDC_ONLY) + __asm { + // load src + mov edx, dword ptr[src] + mov ecx, amount + + mov edi, dword ptr[edx] + mov esi, dword ptr[edx + 4] + mov ebx, dword ptr[edx + 8] + mov eax, dword ptr[edx + 12] + + // shift mod 32 + shld eax, ebx, cl + shld ebx, esi, cl + shld esi, edi, cl + shl edi, cl + + // clear out low 12 bytes of stack + xor edx, edx + mov dword ptr[r], edx + mov dword ptr[r + 4], edx + mov dword ptr[r + 8], edx + + // store shifted amount offset by count/32 bits + shr ecx, 5 + and ecx, 3 + mov dword ptr[r + ecx * 4 + 0], edi + mov dword ptr[r + ecx * 4 + 4], esi + mov dword ptr[r + ecx * 4 + 8], ebx + mov dword ptr[r + ecx * 4 + 12], eax + } +#else + + r[0] = src->lo; + r[1] = src->hi; + + amount &= 127; + if (amount >= 64) { + r[1] = r[0] << (amount - 64); + r[0] = 0; + } else if (amount) { +# ifdef _M_X64 + r[1] = __shiftleft128(r[0], r[1], (char) amount); +# else + r[1] = (r[1] << amount) | (r[0] >> (64 - amount)); +# endif + r[0] = r[0] << amount; + } +#endif //_M_IX86 + + dst->lo = r[0]; + dst->hi = r[1]; + R128_DEBUG_SET(dst); +} + +void r128Shr(R128 *dst, const R128 *src, int amount) +{ + R128_U64 r[4]; + + R128_ASSERT(dst != NULL); + R128_ASSERT(src != NULL); + +#if defined(_M_IX86) && !defined(R128_STDC_ONLY) + __asm { + // load src + mov edx, dword ptr[src] + mov ecx, amount + + mov edi, dword ptr[edx] + mov esi, dword ptr[edx + 4] + mov ebx, dword ptr[edx + 8] + mov eax, dword ptr[edx + 12] + + // shift mod 32 + shrd edi, esi, cl + shrd esi, ebx, cl + shrd ebx, eax, cl + shr eax, cl + + // clear out high 12 bytes of stack + xor edx, edx + mov dword ptr[r + 20], edx + mov dword ptr[r + 24], edx + mov dword ptr[r + 28], edx + + // store shifted amount offset by -count/32 bits + shr ecx, 5 + and ecx, 3 + neg ecx + mov dword ptr[r + ecx * 4 + 16], edi + mov dword ptr[r + ecx * 4 + 20], esi + mov dword ptr[r + ecx * 4 + 24], ebx + mov dword ptr[r + ecx * 4 + 28], eax + } +#else + r[2] = src->lo; + r[3] = src->hi; + + amount &= 127; + if (amount >= 64) { + r[2] = r[3] >> (amount - 64); + r[3] = 0; + } else if (amount) { +#ifdef _M_X64 + r[2] = __shiftright128(r[2], r[3], (char) amount); +#else + r[2] = (r[2] >> amount) | (r[3] << (64 - amount)); +#endif + r[3] = r[3] >> amount; + } +#endif + + dst->lo = r[2]; + dst->hi = r[3]; + R128_DEBUG_SET(dst); +} + +void r128Sar(R128 *dst, const R128 *src, int amount) +{ + R128_U64 r[4]; + + R128_ASSERT(dst != NULL); + R128_ASSERT(src != NULL); + +#if defined(_M_IX86) && !defined(R128_STDC_ONLY) + __asm { + // load src + mov edx, dword ptr[src] + mov ecx, amount + + mov edi, dword ptr[edx] + mov esi, dword ptr[edx + 4] + mov ebx, dword ptr[edx + 8] + mov eax, dword ptr[edx + 12] + + // shift mod 32 + shrd edi, esi, cl + shrd esi, ebx, cl + shrd ebx, eax, cl + sar eax, cl + + // copy sign to high 12 bytes of stack + cdq + mov dword ptr[r + 20], edx + mov dword ptr[r + 24], edx + mov dword ptr[r + 28], edx + + // store shifted amount offset by -count/32 bits + shr ecx, 5 + and ecx, 3 + neg ecx + mov dword ptr[r + ecx * 4 + 16], edi + mov dword ptr[r + ecx * 4 + 20], esi + mov dword ptr[r + ecx * 4 + 24], ebx + mov dword ptr[r + ecx * 4 + 28], eax + } +#else + r[2] = src->lo; + r[3] = src->hi; + + amount &= 127; + if (amount >= 64) { + r[2] = (R128_U64)((R128_S64)r[3] >> (amount - 64)); + r[3] = (R128_U64)((R128_S64)r[3] >> 63); + } else if (amount) { + r[2] = (r[2] >> amount) | (R128_U64)((R128_S64)r[3] << (64 - amount)); + r[3] = (R128_U64)((R128_S64)r[3] >> amount); + } +#endif + + dst->lo = r[2]; + dst->hi = r[3]; + R128_DEBUG_SET(dst); +} + +void r128Add(R128 *dst, const R128 *a, const R128 *b) +{ + unsigned char carry = 0; + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + +#if R128_INTEL && !defined(R128_STDC_ONLY) +# if R128_64BIT + carry = _addcarry_u64(carry, a->lo, b->lo, &dst->lo); + carry = _addcarry_u64(carry, a->hi, b->hi, &dst->hi); +# else + R128_U32 r0, r1, r2, r3; + carry = _addcarry_u32(carry, R128_R0(a), R128_R0(b), &r0); + carry = _addcarry_u32(carry, R128_R1(a), R128_R1(b), &r1); + carry = _addcarry_u32(carry, R128_R2(a), R128_R2(b), &r2); + carry = _addcarry_u32(carry, R128_R3(a), R128_R3(b), &r3); + R128_SET4(dst, r0, r1, r2, r3); +# endif //R128_64BIT +#else + { + R128_U64 r = a->lo + b->lo; + carry = r < a->lo; + dst->lo = r; + dst->hi = a->hi + b->hi + carry; + } +#endif //R128_INTEL + + R128_DEBUG_SET(dst); +} + +void r128Sub(R128 *dst, const R128 *a, const R128 *b) +{ + unsigned char borrow = 0; + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + +#if R128_INTEL && !defined(R128_STDC_ONLY) +# if R128_64BIT + borrow = _subborrow_u64(borrow, a->lo, b->lo, &dst->lo); + borrow = _subborrow_u64(borrow, a->hi, b->hi, &dst->hi); +# else + R128_U32 r0, r1, r2, r3; + borrow = _subborrow_u32(borrow, R128_R0(a), R128_R0(b), &r0); + borrow = _subborrow_u32(borrow, R128_R1(a), R128_R1(b), &r1); + borrow = _subborrow_u32(borrow, R128_R2(a), R128_R2(b), &r2); + borrow = _subborrow_u32(borrow, R128_R3(a), R128_R3(b), &r3); + R128_SET4(dst, r0, r1, r2, r3); +# endif //R128_64BIT +#else + { + R128_U64 r = a->lo - b->lo; + borrow = r > a->lo; + dst->lo = r; + dst->hi = a->hi - b->hi - borrow; + } +#endif //R128_INTEL + + R128_DEBUG_SET(dst); +} + +void r128Mul(R128 *dst, const R128 *a, const R128 *b) +{ + int sign = 0; + R128 ta, tb, tc; + + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + R128_SET2(&ta, a->lo, a->hi); + R128_SET2(&tb, b->lo, b->hi); + + if (r128IsNeg(&ta)) { + r128__neg(&ta, &ta); + sign = !sign; + } + if (r128IsNeg(&tb)) { + r128__neg(&tb, &tb); + sign = !sign; + } + + r128__umul(&tc, &ta, &tb); + if (sign) { + r128__neg(&tc, &tc); + } + + r128Copy(dst, &tc); +} + +void r128Div(R128 *dst, const R128 *a, const R128 *b) +{ + int sign = 0; + R128 tn, td, tq; + + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + R128_SET2(&tn, a->lo, a->hi); + R128_SET2(&td, b->lo, b->hi); + + if (r128IsNeg(&tn)) { + r128__neg(&tn, &tn); + sign = !sign; + } + + if (td.lo == 0 && td.hi == 0) { + // divide by zero + if (sign) { + r128Copy(dst, &R128_min); + } else { + r128Copy(dst, &R128_max); + } + return; + } else if (r128IsNeg(&td)) { + r128__neg(&td, &td); + sign = !sign; + } + + r128__udiv(&tq, &tn, &td); + + if (sign) { + r128__neg(&tq, &tq); + } + + r128Copy(dst, &tq); +} + +void r128Mod(R128 *dst, const R128 *a, const R128 *b) +{ + int sign = 0; + R128 tn, td, tq; + + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + R128_SET2(&tn, a->lo, a->hi); + R128_SET2(&td, b->lo, b->hi); + + if (r128IsNeg(&tn)) { + r128__neg(&tn, &tn); + sign = !sign; + } + + if (td.lo == 0 && td.hi == 0) { + // divide by zero + if (sign) { + r128Copy(dst, &R128_min); + } else { + r128Copy(dst, &R128_max); + } + return; + } else if (r128IsNeg(&td)) { + r128__neg(&td, &td); + sign = !sign; + } + + tq.hi = r128__umod(&tn, &td); + tq.lo = 0; + + if (sign) { + tq.hi = ~tq.hi + 1; + } + + r128Mul(&tq, &tq, b); + r128Sub(dst, a, &tq); +} + +void r128Rsqrt(R128 *dst, const R128 *v) +{ + static const R128 threeHalves = { R128_LIT_U64(0x8000000000000000), 1 }; + R128 x, est; + int i; + + if ((R128_S64)v->hi < 0) { + r128Copy(dst, &R128_min); + return; + } + + R128_SET2(&x, v->lo, v->hi); + + // get initial estimate + if (x.hi) { + int shift = (64 + r128__clz64(x.hi)) >> 1; + est.lo = R128_LIT_U64(1) << shift; + est.hi = 0; + } else if (x.lo) { + int shift = r128__clz64(x.lo) >> 1; + est.hi = R128_LIT_U64(1) << shift; + est.lo = 0; + } else { + R128_SET2(dst, 0, 0); + return; + } + + // x /= 2 + r128Shr(&x, &x, 1); + + // Newton-Raphson iterate + for (i = 0; i < 7; ++i) { + R128 newEst; + + // newEst = est * (threeHalves - (x / 2) * est * est); + r128__umul(&newEst, &est, &est); + r128__umul(&newEst, &newEst, &x); + r128Sub(&newEst, &threeHalves, &newEst); + r128__umul(&newEst, &est, &newEst); + + if (newEst.lo == est.lo && newEst.hi == est.hi) { + break; + } + R128_SET2(&est, newEst.lo, newEst.hi); + } + + r128Copy(dst, &est); +} + +void r128Sqrt(R128 *dst, const R128 *v) +{ + R128 x, est; + int i; + + if ((R128_S64)v->hi < 0) { + r128Copy(dst, &R128_min); + return; + } + + R128_SET2(&x, v->lo, v->hi); + + // get initial estimate + if (x.hi) { + int shift = (63 - r128__clz64(x.hi)) >> 1; + r128Shr(&est, &x, shift); + } else if (x.lo) { + int shift = (1 + r128__clz64(x.lo)) >> 1; + r128Shl(&est, &x, shift); + } else { + R128_SET2(dst, 0, 0); + return; + } + + // Newton-Raphson iterate + for (i = 0; i < 7; ++i) { + R128 newEst; + + // newEst = (est + x / est) / 2 + r128__udiv(&newEst, &x, &est); + r128Add(&newEst, &newEst, &est); + r128Shr(&newEst, &newEst, 1); + + if (newEst.lo == est.lo && newEst.hi == est.hi) { + break; + } + R128_SET2(&est, newEst.lo, newEst.hi); + } + + r128Copy(dst, &est); +} + +int r128Cmp(const R128 *a, const R128 *b) +{ + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + if (a->hi == b->hi) { + if (a->lo == b->lo) { + return 0; + } else if (a->lo > b->lo) { + return 1; + } else { + return -1; + } + } else if ((R128_S64)a->hi > (R128_S64)b->hi) { + return 1; + } else { + return -1; + } +} + +int r128IsNeg(const R128 *v) +{ + R128_ASSERT(v != NULL); + + return (R128_S64)v->hi < 0; +} + +void r128Min(R128 *dst, const R128 *a, const R128 *b) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + if (r128Cmp(a, b) < 0) { + r128Copy(dst, a); + } else { + r128Copy(dst, b); + } +} + +void r128Max(R128 *dst, const R128 *a, const R128 *b) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(a != NULL); + R128_ASSERT(b != NULL); + + if (r128Cmp(a, b) > 0) { + r128Copy(dst, a); + } else { + r128Copy(dst, b); + } +} + +void r128Floor(R128 *dst, const R128 *v) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(v != NULL); + + if ((R128_S64)v->hi < 0) { + dst->hi = v->hi - (v->lo != 0); + } else { + dst->hi = v->hi; + } + dst->lo = 0; + R128_DEBUG_SET(dst); +} + +void r128Ceil(R128 *dst, const R128 *v) +{ + R128_ASSERT(dst != NULL); + R128_ASSERT(v != NULL); + + if ((R128_S64)v->hi > 0) { + dst->hi = v->hi + (v->lo != 0); + } else { + dst->hi = v->hi; + } + dst->lo = 0; + R128_DEBUG_SET(dst); +} + +#endif //R128_IMPLEMENTATION diff --git a/thirdparty/misc/stb_rect_pack.h b/thirdparty/misc/stb_rect_pack.h new file mode 100644 index 0000000000..5c848de0e7 --- /dev/null +++ b/thirdparty/misc/stb_rect_pack.h @@ -0,0 +1,628 @@ +// stb_rect_pack.h - v1.00 - public domain - rectangle packing +// Sean Barrett 2014 +// +// Useful for e.g. packing rectangular textures into an atlas. +// Does not do rotation. +// +// Not necessarily the awesomest packing method, but better than +// the totally naive one in stb_truetype (which is primarily what +// this is meant to replace). +// +// Has only had a few tests run, may have issues. +// +// More docs to come. +// +// No memory allocations; uses qsort() and assert() from stdlib. +// Can override those by defining STBRP_SORT and STBRP_ASSERT. +// +// This library currently uses the Skyline Bottom-Left algorithm. +// +// Please note: better rectangle packers are welcome! Please +// implement them to the same API, but with a different init +// function. +// +// Credits +// +// Library +// Sean Barrett +// Minor features +// Martins Mozeiko +// github:IntellectualKitty +// +// Bugfixes / warning fixes +// Jeremy Jaussaud +// Fabian Giesen +// +// Version history: +// +// 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles +// 0.99 (2019-02-07) warning fixes +// 0.11 (2017-03-03) return packing success/fail result +// 0.10 (2016-10-25) remove cast-away-const to avoid warnings +// 0.09 (2016-08-27) fix compiler warnings +// 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0) +// 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0) +// 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort +// 0.05: added STBRP_ASSERT to allow replacing assert +// 0.04: fixed minor bug in STBRP_LARGE_RECTS support +// 0.01: initial release +// +// LICENSE +// +// See end of file for license information. + +////////////////////////////////////////////////////////////////////////////// +// +// INCLUDE SECTION +// + +#ifndef STB_INCLUDE_STB_RECT_PACK_H +#define STB_INCLUDE_STB_RECT_PACK_H + +#define STB_RECT_PACK_VERSION 1 + +#ifdef STBRP_STATIC +#define STBRP_DEF static +#else +#define STBRP_DEF extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct stbrp_context stbrp_context; +typedef struct stbrp_node stbrp_node; +typedef struct stbrp_rect stbrp_rect; + +#ifdef STBRP_LARGE_RECTS +typedef int stbrp_coord; +#else +typedef unsigned short stbrp_coord; +#endif + +STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects); +// Assign packed locations to rectangles. The rectangles are of type +// 'stbrp_rect' defined below, stored in the array 'rects', and there +// are 'num_rects' many of them. +// +// Rectangles which are successfully packed have the 'was_packed' flag +// set to a non-zero value and 'x' and 'y' store the minimum location +// on each axis (i.e. bottom-left in cartesian coordinates, top-left +// if you imagine y increasing downwards). Rectangles which do not fit +// have the 'was_packed' flag set to 0. +// +// You should not try to access the 'rects' array from another thread +// while this function is running, as the function temporarily reorders +// the array while it executes. +// +// To pack into another rectangle, you need to call stbrp_init_target +// again. To continue packing into the same rectangle, you can call +// this function again. Calling this multiple times with multiple rect +// arrays will probably produce worse packing results than calling it +// a single time with the full rectangle array, but the option is +// available. +// +// The function returns 1 if all of the rectangles were successfully +// packed and 0 otherwise. + +struct stbrp_rect +{ + // reserved for your use: + int id; + + // input: + stbrp_coord w, h; + + // output: + stbrp_coord x, y; + int was_packed; // non-zero if valid packing + +}; // 16 bytes, nominally + + +STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes); +// Initialize a rectangle packer to: +// pack a rectangle that is 'width' by 'height' in dimensions +// using temporary storage provided by the array 'nodes', which is 'num_nodes' long +// +// You must call this function every time you start packing into a new target. +// +// There is no "shutdown" function. The 'nodes' memory must stay valid for +// the following stbrp_pack_rects() call (or calls), but can be freed after +// the call (or calls) finish. +// +// Note: to guarantee best results, either: +// 1. make sure 'num_nodes' >= 'width' +// or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1' +// +// If you don't do either of the above things, widths will be quantized to multiples +// of small integers to guarantee the algorithm doesn't run out of temporary storage. +// +// If you do #2, then the non-quantized algorithm will be used, but the algorithm +// may run out of temporary storage and be unable to pack some rectangles. + +STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem); +// Optionally call this function after init but before doing any packing to +// change the handling of the out-of-temp-memory scenario, described above. +// If you call init again, this will be reset to the default (false). + + +STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic); +// Optionally select which packing heuristic the library should use. Different +// heuristics will produce better/worse results for different data sets. +// If you call init again, this will be reset to the default. + +enum +{ + STBRP_HEURISTIC_Skyline_default=0, + STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default, + STBRP_HEURISTIC_Skyline_BF_sortHeight +}; + + +////////////////////////////////////////////////////////////////////////////// +// +// the details of the following structures don't matter to you, but they must +// be visible so you can handle the memory allocations for them + +struct stbrp_node +{ + stbrp_coord x,y; + stbrp_node *next; +}; + +struct stbrp_context +{ + int width; + int height; + int align; + int init_mode; + int heuristic; + int num_nodes; + stbrp_node *active_head; + stbrp_node *free_head; + stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2' +}; + +#ifdef __cplusplus +} +#endif + +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION SECTION +// + +#ifdef STB_RECT_PACK_IMPLEMENTATION +#ifndef STBRP_SORT +#include +#define STBRP_SORT qsort +#endif + +#ifndef STBRP_ASSERT +#include +#define STBRP_ASSERT assert +#endif + +#ifdef _MSC_VER +#define STBRP__NOTUSED(v) (void)(v) +#else +#define STBRP__NOTUSED(v) (void)sizeof(v) +#endif + +enum +{ + STBRP__INIT_skyline = 1 +}; + +STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic) +{ + switch (context->init_mode) { + case STBRP__INIT_skyline: + STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight); + context->heuristic = heuristic; + break; + default: + STBRP_ASSERT(0); + } +} + +STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem) +{ + if (allow_out_of_mem) + // if it's ok to run out of memory, then don't bother aligning them; + // this gives better packing, but may fail due to OOM (even though + // the rectangles easily fit). @TODO a smarter approach would be to only + // quantize once we've hit OOM, then we could get rid of this parameter. + context->align = 1; + else { + // if it's not ok to run out of memory, then quantize the widths + // so that num_nodes is always enough nodes. + // + // I.e. num_nodes * align >= width + // align >= width / num_nodes + // align = ceil(width/num_nodes) + + context->align = (context->width + context->num_nodes-1) / context->num_nodes; + } +} + +STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes) +{ + int i; +#ifndef STBRP_LARGE_RECTS + STBRP_ASSERT(width <= 0xffff && height <= 0xffff); +#endif + + for (i=0; i < num_nodes-1; ++i) + nodes[i].next = &nodes[i+1]; + nodes[i].next = NULL; + context->init_mode = STBRP__INIT_skyline; + context->heuristic = STBRP_HEURISTIC_Skyline_default; + context->free_head = &nodes[0]; + context->active_head = &context->extra[0]; + context->width = width; + context->height = height; + context->num_nodes = num_nodes; + stbrp_setup_allow_out_of_mem(context, 0); + + // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly) + context->extra[0].x = 0; + context->extra[0].y = 0; + context->extra[0].next = &context->extra[1]; + context->extra[1].x = (stbrp_coord) width; +#ifdef STBRP_LARGE_RECTS + context->extra[1].y = (1<<30); +#else + context->extra[1].y = 65535; +#endif + context->extra[1].next = NULL; +} + +// find minimum y position if it starts at x1 +static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste) +{ + stbrp_node *node = first; + int x1 = x0 + width; + int min_y, visited_width, waste_area; + + STBRP__NOTUSED(c); + + STBRP_ASSERT(first->x <= x0); + + #if 0 + // skip in case we're past the node + while (node->next->x <= x0) + ++node; + #else + STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency + #endif + + STBRP_ASSERT(node->x <= x0); + + min_y = 0; + waste_area = 0; + visited_width = 0; + while (node->x < x1) { + if (node->y > min_y) { + // raise min_y higher. + // we've accounted for all waste up to min_y, + // but we'll now add more waste for everything we've visted + waste_area += visited_width * (node->y - min_y); + min_y = node->y; + // the first time through, visited_width might be reduced + if (node->x < x0) + visited_width += node->next->x - x0; + else + visited_width += node->next->x - node->x; + } else { + // add waste area + int under_width = node->next->x - node->x; + if (under_width + visited_width > width) + under_width = width - visited_width; + waste_area += under_width * (min_y - node->y); + visited_width += under_width; + } + node = node->next; + } + + *pwaste = waste_area; + return min_y; +} + +typedef struct +{ + int x,y; + stbrp_node **prev_link; +} stbrp__findresult; + +static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height) +{ + int best_waste = (1<<30), best_x, best_y = (1 << 30); + stbrp__findresult fr; + stbrp_node **prev, *node, *tail, **best = NULL; + + // align to multiple of c->align + width = (width + c->align - 1); + width -= width % c->align; + STBRP_ASSERT(width % c->align == 0); + + // if it can't possibly fit, bail immediately + if (width > c->width || height > c->height) { + fr.prev_link = NULL; + fr.x = fr.y = 0; + return fr; + } + + node = c->active_head; + prev = &c->active_head; + while (node->x + width <= c->width) { + int y,waste; + y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste); + if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL + // bottom left + if (y < best_y) { + best_y = y; + best = prev; + } + } else { + // best-fit + if (y + height <= c->height) { + // can only use it if it first vertically + if (y < best_y || (y == best_y && waste < best_waste)) { + best_y = y; + best_waste = waste; + best = prev; + } + } + } + prev = &node->next; + node = node->next; + } + + best_x = (best == NULL) ? 0 : (*best)->x; + + // if doing best-fit (BF), we also have to try aligning right edge to each node position + // + // e.g, if fitting + // + // ____________________ + // |____________________| + // + // into + // + // | | + // | ____________| + // |____________| + // + // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned + // + // This makes BF take about 2x the time + + if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) { + tail = c->active_head; + node = c->active_head; + prev = &c->active_head; + // find first node that's admissible + while (tail->x < width) + tail = tail->next; + while (tail) { + int xpos = tail->x - width; + int y,waste; + STBRP_ASSERT(xpos >= 0); + // find the left position that matches this + while (node->next->x <= xpos) { + prev = &node->next; + node = node->next; + } + STBRP_ASSERT(node->next->x > xpos && node->x <= xpos); + y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste); + if (y + height <= c->height) { + if (y <= best_y) { + if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) { + best_x = xpos; + STBRP_ASSERT(y <= best_y); + best_y = y; + best_waste = waste; + best = prev; + } + } + } + tail = tail->next; + } + } + + fr.prev_link = best; + fr.x = best_x; + fr.y = best_y; + return fr; +} + +static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height) +{ + // find best position according to heuristic + stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height); + stbrp_node *node, *cur; + + // bail if: + // 1. it failed + // 2. the best node doesn't fit (we don't always check this) + // 3. we're out of memory + if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) { + res.prev_link = NULL; + return res; + } + + // on success, create new node + node = context->free_head; + node->x = (stbrp_coord) res.x; + node->y = (stbrp_coord) (res.y + height); + + context->free_head = node->next; + + // insert the new node into the right starting point, and + // let 'cur' point to the remaining nodes needing to be + // stiched back in + + cur = *res.prev_link; + if (cur->x < res.x) { + // preserve the existing one, so start testing with the next one + stbrp_node *next = cur->next; + cur->next = node; + cur = next; + } else { + *res.prev_link = node; + } + + // from here, traverse cur and free the nodes, until we get to one + // that shouldn't be freed + while (cur->next && cur->next->x <= res.x + width) { + stbrp_node *next = cur->next; + // move the current node to the free list + cur->next = context->free_head; + context->free_head = cur; + cur = next; + } + + // stitch the list back in + node->next = cur; + + if (cur->x < res.x + width) + cur->x = (stbrp_coord) (res.x + width); + +#ifdef _DEBUG + cur = context->active_head; + while (cur->x < context->width) { + STBRP_ASSERT(cur->x < cur->next->x); + cur = cur->next; + } + STBRP_ASSERT(cur->next == NULL); + + { + int count=0; + cur = context->active_head; + while (cur) { + cur = cur->next; + ++count; + } + cur = context->free_head; + while (cur) { + cur = cur->next; + ++count; + } + STBRP_ASSERT(count == context->num_nodes+2); + } +#endif + + return res; +} + +static int rect_height_compare(const void *a, const void *b) +{ + const stbrp_rect *p = (const stbrp_rect *) a; + const stbrp_rect *q = (const stbrp_rect *) b; + if (p->h > q->h) + return -1; + if (p->h < q->h) + return 1; + return (p->w > q->w) ? -1 : (p->w < q->w); +} + +static int rect_original_order(const void *a, const void *b) +{ + const stbrp_rect *p = (const stbrp_rect *) a; + const stbrp_rect *q = (const stbrp_rect *) b; + return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed); +} + +#ifdef STBRP_LARGE_RECTS +#define STBRP__MAXVAL 0xffffffff +#else +#define STBRP__MAXVAL 0xffff +#endif + +STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects) +{ + int i, all_rects_packed = 1; + + // we use the 'was_packed' field internally to allow sorting/unsorting + for (i=0; i < num_rects; ++i) { + rects[i].was_packed = i; + } + + // sort according to heuristic + STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare); + + for (i=0; i < num_rects; ++i) { + if (rects[i].w == 0 || rects[i].h == 0) { + rects[i].x = rects[i].y = 0; // empty rect needs no space + } else { + stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h); + if (fr.prev_link) { + rects[i].x = (stbrp_coord) fr.x; + rects[i].y = (stbrp_coord) fr.y; + } else { + rects[i].x = rects[i].y = STBRP__MAXVAL; + } + } + } + + // unsort + STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order); + + // set was_packed flags and all_rects_packed status + for (i=0; i < num_rects; ++i) { + rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL); + if (!rects[i].was_packed) + all_rects_packed = 0; + } + + // return the all_rects_packed status + return all_rects_packed; +} +#endif + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/thirdparty/oidn/.gitignore b/thirdparty/oidn/.gitignore deleted file mode 100644 index 6be206fc29..0000000000 --- a/thirdparty/oidn/.gitignore +++ /dev/null @@ -1 +0,0 @@ -weights/rtlightmap_hdr.cpp diff --git a/thirdparty/oidn/LICENSE.txt b/thirdparty/oidn/LICENSE.txt new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/thirdparty/oidn/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/thirdparty/r128/r128.h b/thirdparty/r128/r128.h deleted file mode 100644 index 58933d7638..0000000000 --- a/thirdparty/r128/r128.h +++ /dev/null @@ -1,2124 +0,0 @@ -/* -r128.h: 128-bit (64.64) signed fixed-point arithmetic. Version 1.4.3 - -COMPILATION ------------ -Drop this header file somewhere in your project and include it wherever it is -needed. There is no separate .c file for this library. To get the code, in ONE -file in your project, put: - -#define R128_IMPLEMENTATION - -before you include this file. You may also provide a definition for R128_ASSERT -to force the library to use a custom assert macro. - -COMPILER/LIBRARY SUPPORT ------------------------- -This library requires a C89 compiler with support for 64-bit integers. If your -compiler does not support the long long data type, the R128_U64, etc. macros -must be set appropriately. On x86 and x64 targets, Intel intrinsics are used -for speed. If your compiler does not support these intrinsics, you can add -#define R128_STDC_ONLY -in your implementation file before including r128.h. - -The only C runtime library functionality used by this library is . -This can be avoided by defining an R128_ASSERT macro in your implementation -file. Since this library uses 64-bit arithmetic, this may implicitly add a -runtime library dependency on 32-bit platforms. - -C++ SUPPORT ------------ -Operator overloads are supplied for C++ files that include this file. Since all -C++ functions are declared inline (or static inline), the R128_IMPLEMENTATION -file can be either C++ or C. - -LICENSE -------- -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef H_R128_H -#define H_R128_H - -#include - -// 64-bit integer support -// If your compiler does not have stdint.h, add appropriate defines for these macros. -#if defined(_MSC_VER) && (_MSC_VER < 1600) -# define R128_S32 __int32 -# define R128_U32 unsigned __int32 -# define R128_S64 __int64 -# define R128_U64 unsigned __int64 -# define R128_LIT_S64(x) x##i64 -# define R128_LIT_U64(x) x##ui64 -#else -# include -# define R128_S32 int32_t -# define R128_U32 uint32_t -# define R128_S64 int64_t -# define R128_U64 long long unsigned int -# define R128_LIT_S64(x) x##ll -# define R128_LIT_U64(x) x##ull -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct R128 { - R128_U64 lo; - R128_U64 hi; - -#ifdef __cplusplus - R128(); - R128(double); - R128(int); - R128(R128_S64); - R128(R128_U64 low, R128_U64 high); - - operator double() const; - operator R128_S64() const; - operator int() const; - operator bool() const; - - bool operator!() const; - R128 operator~() const; - R128 operator-() const; - R128 &operator|=(const R128 &rhs); - R128 &operator&=(const R128 &rhs); - R128 &operator^=(const R128 &rhs); - R128 &operator+=(const R128 &rhs); - R128 &operator-=(const R128 &rhs); - R128 &operator*=(const R128 &rhs); - R128 &operator/=(const R128 &rhs); - R128 &operator%=(const R128 &rhs); - R128 &operator<<=(int amount); - R128 &operator>>=(int amount); -#endif //__cplusplus -} R128; - -// Type conversion -extern void r128FromInt(R128 *dst, R128_S64 v); -extern void r128FromFloat(R128 *dst, double v); -extern R128_S64 r128ToInt(const R128 *v); -extern double r128ToFloat(const R128 *v); - -// Copy -extern void r128Copy(R128 *dst, const R128 *src); - -// Negate -extern void r128Neg(R128 *dst, const R128 *src); - -// Bitwise operations -extern void r128Not(R128 *dst, const R128 *src); // ~a -extern void r128Or(R128 *dst, const R128 *a, const R128 *b); // a | b -extern void r128And(R128 *dst, const R128 *a, const R128 *b); // a & b -extern void r128Xor(R128 *dst, const R128 *a, const R128 *b); // a ^ b -extern void r128Shl(R128 *dst, const R128 *src, int amount); // shift left by amount mod 128 -extern void r128Shr(R128 *dst, const R128 *src, int amount); // shift right logical by amount mod 128 -extern void r128Sar(R128 *dst, const R128 *src, int amount); // shift right arithmetic by amount mod 128 - -// Arithmetic -extern void r128Add(R128 *dst, const R128 *a, const R128 *b); // a + b -extern void r128Sub(R128 *dst, const R128 *a, const R128 *b); // a - b -extern void r128Mul(R128 *dst, const R128 *a, const R128 *b); // a * b -extern void r128Div(R128 *dst, const R128 *a, const R128 *b); // a / b -extern void r128Mod(R128 *dst, const R128 *a, const R128 *b); // a - toInt(a / b) * b - -extern void r128Sqrt(R128 *dst, const R128 *v); // sqrt(v) -extern void r128Rsqrt(R128 *dst, const R128 *v); // 1 / sqrt(v) - -// Comparison -extern int r128Cmp(const R128 *a, const R128 *b); // sign of a-b -extern void r128Min(R128 *dst, const R128 *a, const R128 *b); -extern void r128Max(R128 *dst, const R128 *a, const R128 *b); -extern void r128Floor(R128 *dst, const R128 *v); -extern void r128Ceil(R128 *dst, const R128 *v); -extern int r128IsNeg(const R128 *v); // quick check for < 0 - -// String conversion -// -typedef enum R128ToStringSign { - R128ToStringSign_Default, // no sign character for positive values - R128ToStringSign_Space, // leading space for positive values - R128ToStringSign_Plus, // leading '+' for positive values -} R128ToStringSign; - -// Formatting options for use with r128ToStringOpt. The "defaults" correspond -// to a format string of "%f". -// -typedef struct R128ToStringFormat { - // sign character for positive values. Default is R128ToStringSign_Default. - R128ToStringSign sign; - - // minimum number of characters to write. Default is 0. - int width; - - // place to the right of the decimal at which rounding is performed. If negative, - // a maximum of 20 decimal places will be written, with no trailing zeroes. - // (20 places is sufficient to ensure that r128FromString will convert back to the - // original value.) Default is -1. NOTE: This is not the same default that the C - // standard library uses for %f. - int precision; - - // If non-zero, pads the output string with leading zeroes if the final result is - // fewer than width characters. Otherwise, leading spaces are used. Default is 0. - int zeroPad; - - // Always print a decimal point, even if the value is an integer. Default is 0. - int decimal; - - // Left-align output if width specifier requires padding. - // Default is 0 (right align). - int leftAlign; -} R128ToStringFormat; - -// r128ToStringOpt: convert R128 to a decimal string, with formatting. -// -// dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written -// (including null terminator). No additional rounding is performed if dstSize is not large -// enough to hold the entire string. -// -// opt: an R128ToStringFormat struct (q.v.) with formatting options. -// -// Uses the R128_decimal global as the decimal point character. -// Always writes a null terminator, even if the destination buffer is not large enough. -// -// Number of bytes that will be written (i.e. how big does dst need to be?): -// If width is specified: width + 1 bytes. -// If precision is specified: at most precision + 22 bytes. -// If neither is specified: at most 42 bytes. -// -// Returns the number of bytes that would have been written if dst was sufficiently large, -// not including the final null terminator. -// -extern int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt); - -// r128ToStringf: convert R128 to a decimal string, with formatting. -// -// dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written -// (including null terminator). -// -// format: a printf-style format specifier, as one would use with floating point types. -// e.g. "%+5.2f". (The leading % and trailing f are optional.) -// NOTE: This is NOT a full replacement for sprintf. Any characters in the format string -// that do not correspond to a format placeholder are ignored. -// -// Uses the R128_decimal global as the decimal point character. -// Always writes a null terminator, even if the destination buffer is not large enough. -// -// Number of bytes that will be written (i.e. how big does dst need to be?): -// If the precision field is specified: at most max(width, precision + 21) + 1 bytes -// Otherwise: at most max(width, 41) + 1 bytes. -// -// Returns the number of bytes that would have been written if dst was sufficiently large, -// not including the final null terminator. -// -extern int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v); - -// r128ToString: convert R128 to a decimal string, with default formatting. -// Equivalent to r128ToStringf(dst, dstSize, "%f", v). -// -// Uses the R128_decimal global as the decimal point character. -// Always writes a null terminator, even if the destination buffer is not large enough. -// -// Will write at most 42 bytes (including NUL) to dst. -// -// Returns the number of bytes that would have been written if dst was sufficiently large, -// not including the final null terminator. -// -extern int r128ToString(char *dst, size_t dstSize, const R128 *v); - -// r128FromString: Convert string to R128. -// -// The string can be formatted either as a decimal number with optional sign -// or as hexadecimal with a prefix of 0x or 0X. -// -// endptr, if not NULL, is set to the character following the last character -// used in the conversion. -// -extern void r128FromString(R128 *dst, const char *s, char **endptr); - -// Constants -extern const R128 R128_min; // minimum (most negative) value -extern const R128 R128_max; // maximum (most positive) value -extern const R128 R128_smallest; // smallest positive value -extern const R128 R128_zero; // zero -extern const R128 R128_one; // 1.0 - -extern char R128_decimal; // decimal point character used by r128From/ToString. defaults to '.' - -#ifdef __cplusplus -} - -#include -namespace std { -template<> -struct numeric_limits -{ - static const bool is_specialized = true; - - static R128 min() throw() { return R128_min; } - static R128 max() throw() { return R128_max; } - - static const int digits = 127; - static const int digits10 = 38; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = false; - static const int radix = 2; - static R128 epsilon() throw() { return R128_smallest; } - static R128 round_error() throw() { return R128_one; } - - static const int min_exponent = 0; - static const int min_exponent10 = 0; - static const int max_exponent = 0; - static const int max_exponent10 = 0; - - static const bool has_infinity = false; - static const bool has_quiet_NaN = false; - static const bool has_signaling_NaN = false; - static const float_denorm_style has_denorm = denorm_absent; - static const bool has_denorm_loss = false; - - static R128 infinity() throw() { return R128_zero; } - static R128 quiet_NaN() throw() { return R128_zero; } - static R128 signaling_NaN() throw() { return R128_zero; } - static R128 denorm_min() throw() { return R128_zero; } - - static const bool is_iec559 = false; - static const bool is_bounded = true; - static const bool is_modulo = true; - - static const bool traps = numeric_limits::traps; - static const bool tinyness_before = false; - static const float_round_style round_style = round_toward_zero; -}; -} //namespace std - -inline R128::R128() {} - -inline R128::R128(double v) -{ - r128FromFloat(this, v); -} - -inline R128::R128(int v) -{ - r128FromInt(this, v); -} - -inline R128::R128(R128_S64 v) -{ - r128FromInt(this, v); -} - -inline R128::R128(R128_U64 low, R128_U64 high) -{ - lo = low; - hi = high; -} - -inline R128::operator double() const -{ - return r128ToFloat(this); -} - -inline R128::operator R128_S64() const -{ - return r128ToInt(this); -} - -inline R128::operator int() const -{ - return (int) r128ToInt(this); -} - -inline R128::operator bool() const -{ - return lo || hi; -} - -inline bool R128::operator!() const -{ - return !lo && !hi; -} - -inline R128 R128::operator~() const -{ - R128 r; - r128Not(&r, this); - return r; -} - -inline R128 R128::operator-() const -{ - R128 r; - r128Neg(&r, this); - return r; -} - -inline R128 &R128::operator|=(const R128 &rhs) -{ - r128Or(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator&=(const R128 &rhs) -{ - r128And(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator^=(const R128 &rhs) -{ - r128Xor(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator+=(const R128 &rhs) -{ - r128Add(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator-=(const R128 &rhs) -{ - r128Sub(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator*=(const R128 &rhs) -{ - r128Mul(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator/=(const R128 &rhs) -{ - r128Div(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator%=(const R128 &rhs) -{ - r128Mod(this, this, &rhs); - return *this; -} - -inline R128 &R128::operator<<=(int amount) -{ - r128Shl(this, this, amount); - return *this; -} - -inline R128 &R128::operator>>=(int amount) -{ - r128Sar(this, this, amount); - return *this; -} - -static inline R128 operator|(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r |= rhs; -} - -static inline R128 operator&(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r &= rhs; -} - -static inline R128 operator^(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r ^= rhs; -} - -static inline R128 operator+(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r += rhs; -} - -static inline R128 operator-(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r -= rhs; -} - -static inline R128 operator*(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r *= rhs; -} - -static inline R128 operator/(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r /= rhs; -} - -static inline R128 operator%(const R128 &lhs, const R128 &rhs) -{ - R128 r(lhs); - return r %= rhs; -} - -static inline R128 operator<<(const R128 &lhs, int amount) -{ - R128 r(lhs); - return r <<= amount; -} - -static inline R128 operator>>(const R128 &lhs, int amount) -{ - R128 r(lhs); - return r >>= amount; -} - -static inline bool operator<(const R128 &lhs, const R128 &rhs) -{ - return r128Cmp(&lhs, &rhs) < 0; -} - -static inline bool operator>(const R128 &lhs, const R128 &rhs) -{ - return r128Cmp(&lhs, &rhs) > 0; -} - -static inline bool operator<=(const R128 &lhs, const R128 &rhs) -{ - return r128Cmp(&lhs, &rhs) <= 0; -} - -static inline bool operator>=(const R128 &lhs, const R128 &rhs) -{ - return r128Cmp(&lhs, &rhs) >= 0; -} - -static inline bool operator==(const R128 &lhs, const R128 &rhs) -{ - return lhs.lo == rhs.lo && lhs.hi == rhs.hi; -} - -static inline bool operator!=(const R128 &lhs, const R128 &rhs) -{ - return lhs.lo != rhs.lo || lhs.hi != rhs.hi; -} - -#endif //__cplusplus -#endif //H_R128_H - -#ifdef R128_IMPLEMENTATION - -#ifdef R128_DEBUG_VIS -# define R128_DEBUG_SET(x) r128ToString(R128_last, sizeof(R128_last), x) -#else -# define R128_DEBUG_SET(x) -#endif - -#define R128_SET2(x, l, h) do { (x)->lo = (R128_U64)(l); (x)->hi = (R128_U64)(h); } while(0) -#define R128_R0(x) ((R128_U32)(x)->lo) -#define R128_R2(x) ((R128_U32)(x)->hi) -#if defined(_M_IX86) -// workaround: MSVC x86's handling of 64-bit values is not great -# define R128_SET4(x, r0, r1, r2, r3) do { \ - ((R128_U32*)&(x)->lo)[0] = (R128_U32)(r0); \ - ((R128_U32*)&(x)->lo)[1] = (R128_U32)(r1); \ - ((R128_U32*)&(x)->hi)[0] = (R128_U32)(r2); \ - ((R128_U32*)&(x)->hi)[1] = (R128_U32)(r3); \ - } while(0) -# define R128_R1(x) (((R128_U32*)&(x)->lo)[1]) -# define R128_R3(x) (((R128_U32*)&(x)->hi)[1]) -#else -# define R128_SET4(x, r0, r1, r2, r3) do { (x)->lo = (R128_U64)(r0) | ((R128_U64)(r1) << 32); \ - (x)->hi = (R128_U64)(r2) | ((R128_U64)(r3) << 32); } while(0) -# define R128_R1(x) ((R128_U32)((x)->lo >> 32)) -# define R128_R3(x) ((R128_U32)((x)->hi >> 32)) -#endif - -#if defined(_M_X64) -# define R128_INTEL 1 -# define R128_64BIT 1 -# ifndef R128_STDC_ONLY -# include -# endif -#elif defined(__x86_64__) -# define R128_INTEL 1 -# define R128_64BIT 1 -# ifndef R128_STDC_ONLY -# include -# endif -#elif defined(_M_IX86) -# define R128_INTEL 1 -# ifndef R128_STDC_ONLY -# include -# endif -#elif defined(__i386__) -# define R128_INTEL 1 -# ifndef R128_STDC_ONLY -# include -# endif -#elif defined(_M_ARM) -# ifndef R128_STDC_ONLY -# include -# endif -#elif defined(_M_ARM64) -# define R128_64BIT 1 -# ifndef R128_STDC_ONLY -# include -# endif -#elif defined(__aarch64__) -# define R128_64BIT 1 -#endif - -#ifndef R128_INTEL -# define R128_INTEL 0 -#endif - -#ifndef R128_64BIT -# define R128_64BIT 0 -#endif - -#ifndef R128_ASSERT -# include -# define R128_ASSERT(x) assert(x) -#endif - -#include // for NULL - -static const R128ToStringFormat R128__defaultFormat = { - R128ToStringSign_Default, - 0, - -1, - 0, - 0, - 0 -}; - -const R128 R128_min = { 0, R128_LIT_U64(0x8000000000000000) }; -const R128 R128_max = { R128_LIT_U64(0xffffffffffffffff), R128_LIT_U64(0x7fffffffffffffff) }; -const R128 R128_smallest = { 1, 0 }; -const R128 R128_zero = { 0, 0 }; -const R128 R128_one = { 0, 1 }; -char R128_decimal = '.'; -#ifdef R128_DEBUG_VIS -char R128_last[42]; -#endif - -static int r128__clz64(R128_U64 x) -{ -#if defined(R128_STDC_ONLY) - R128_U64 n = 64, y; - y = x >> 32; if (y) { n -= 32; x = y; } - y = x >> 16; if (y) { n -= 16; x = y; } - y = x >> 8; if (y) { n -= 8; x = y; } - y = x >> 4; if (y) { n -= 4; x = y; } - y = x >> 2; if (y) { n -= 2; x = y; } - y = x >> 1; if (y) { n -= 1; x = y; } - return (int)(n - x); -#elif defined(_M_X64) || defined(_M_ARM64) - unsigned long idx; - if (_BitScanReverse64(&idx, x)) { - return 63 - (int)idx; - } else { - return 64; - } -#elif defined(_MSC_VER) - unsigned long idx; - if (_BitScanReverse(&idx, (R128_U32)(x >> 32))) { - return 31 - (int)idx; - } else if (_BitScanReverse(&idx, (R128_U32)x)) { - return 63 - (int)idx; - } else { - return 64; - } -#else - return x ? __builtin_clzll(x) : 64; -#endif -} - -#if !R128_64BIT -// 32*32->64 -static R128_U64 r128__umul64(R128_U32 a, R128_U32 b) -{ -# if defined(_M_IX86) && !defined(R128_STDC_ONLY) - return __emulu(a, b); -# elif defined(_M_ARM) && !defined(R128_STDC_ONLY) - return _arm_umull(a, b); -# else - return a * (R128_U64)b; -# endif -} - -// 64/32->32 -static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem) -{ -# if defined(_M_IX86) && (_MSC_VER >= 1920) && !defined(R128_STDC_ONLY) - unsigned __int64 n = ((unsigned __int64)nhi << 32) | nlo; - return _udiv64(n, d, rem); -# elif defined(_M_IX86) && !defined(R128_STDC_ONLY) - __asm { - mov eax, nlo - mov edx, nhi - div d - mov ecx, rem - mov dword ptr [ecx], edx - } -# elif defined(__i386__) && !defined(R128_STDC_ONLY) - R128_U32 q, r; - __asm("divl %4" - : "=a"(q), "=d"(r) - : "a"(nlo), "d"(nhi), "X"(d)); - *rem = r; - return q; -# else - R128_U64 n64 = ((R128_U64)nhi << 32) | nlo; - *rem = (R128_U32)(n64 % d); - return (R128_U32)(n64 / d); -# endif -} -#elif !defined(_M_X64) || defined(R128_STDC_ONLY) -#define r128__umul64(a, b) ((a) * (R128_U64)(b)) -/*static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem) -{ - R128_U64 n64 = ((R128_U64)nhi << 32) | nlo; - *rem = (R128_U32)(n64 % d); - return (R128_U32)(n64 / d); -}*/ -#endif //!R128_64BIT - -static void r128__neg(R128 *dst, const R128 *src) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(src != NULL); - -#if R128_INTEL && !defined(R128_STDC_ONLY) - { - unsigned char carry = 0; -# if R128_64BIT - carry = _addcarry_u64(carry, ~src->lo, 1, &dst->lo); - carry = _addcarry_u64(carry, ~src->hi, 0, &dst->hi); -# else - R128_U32 r0, r1, r2, r3; - carry = _addcarry_u32(carry, ~R128_R0(src), 1, &r0); - carry = _addcarry_u32(carry, ~R128_R1(src), 0, &r1); - carry = _addcarry_u32(carry, ~R128_R2(src), 0, &r2); - carry = _addcarry_u32(carry, ~R128_R3(src), 0, &r3); - R128_SET4(dst, r0, r1, r2, r3); -# endif //R128_64BIT - } -#else - if (src->lo) { - dst->lo = ~src->lo + 1; - dst->hi = ~src->hi; - } else { - dst->lo = 0; - dst->hi = ~src->hi + 1; - } -#endif //R128_INTEL -} - -// 64*64->128 -static void r128__umul128(R128 *dst, R128_U64 a, R128_U64 b) -{ -#if defined(_M_X64) && !defined(R128_STDC_ONLY) - dst->lo = _umul128(a, b, &dst->hi); -#elif R128_64BIT && !defined(_MSC_VER) && !defined(R128_STDC_ONLY) - unsigned __int128 p0 = a * (unsigned __int128)b; - dst->hi = (R128_U64)(p0 >> 64); - dst->lo = (R128_U64)p0; -#else - R128_U32 alo = (R128_U32)a; - R128_U32 ahi = (R128_U32)(a >> 32); - R128_U32 blo = (R128_U32)b; - R128_U32 bhi = (R128_U32)(b >> 32); - R128_U64 p0, p1, p2, p3; - - p0 = r128__umul64(alo, blo); - p1 = r128__umul64(alo, bhi); - p2 = r128__umul64(ahi, blo); - p3 = r128__umul64(ahi, bhi); - - { -#if R128_INTEL && !defined(R128_STDC_ONLY) - R128_U32 r0, r1, r2, r3; - unsigned char carry; - - r0 = (R128_U32)(p0); - r1 = (R128_U32)(p0 >> 32); - r2 = (R128_U32)(p1 >> 32); - r3 = (R128_U32)(p3 >> 32); - - carry = _addcarry_u32(0, r1, (R128_U32)p1, &r1); - carry = _addcarry_u32(carry, r2, (R128_U32)(p2 >> 32), &r2); - _addcarry_u32(carry, r3, 0, &r3); - carry = _addcarry_u32(0, r1, (R128_U32)p2, &r1); - carry = _addcarry_u32(carry, r2, (R128_U32)p3, &r2); - _addcarry_u32(carry, r3, 0, &r3); - - R128_SET4(dst, r0, r1, r2, r3); -#else - R128_U64 carry, lo, hi; - carry = ((R128_U64)(R128_U32)p1 + (R128_U64)(R128_U32)p2 + (p0 >> 32)) >> 32; - - lo = p0 + ((p1 + p2) << 32); - hi = p3 + ((R128_U32)(p1 >> 32) + (R128_U32)(p2 >> 32)) + carry; - - R128_SET2(dst, lo, hi); -#endif - } -#endif -} - -// 128/64->64 -#if defined(_M_X64) && (_MSC_VER < 1920) && !defined(R128_STDC_ONLY) -// MSVC x64 provides neither inline assembly nor (pre-2019) a div intrinsic, so we do fake -// "inline assembly" to avoid long division or outline assembly. -#pragma code_seg(".text") -__declspec(allocate(".text")) static const unsigned char r128__udiv128Code[] = { - 0x48, 0x8B, 0xC1, //mov rax, rcx - 0x49, 0xF7, 0xF0, //div rax, r8 - 0x49, 0x89, 0x11, //mov qword ptr [r9], rdx - 0xC3 //ret -}; -typedef R128_U64 (*r128__udiv128Proc)(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem); -static const r128__udiv128Proc r128__udiv128 = (r128__udiv128Proc)(void*)r128__udiv128Code; -#else -static R128_U64 r128__udiv128(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem) -{ -#if defined(_M_X64) && !defined(R128_STDC_ONLY) - return _udiv128(nhi, nlo, d, rem); -#elif defined(__x86_64__) && !defined(R128_STDC_ONLY) - R128_U64 q, r; - __asm("divq %4" - : "=a"(q), "=d"(r) - : "a"(nlo), "d"(nhi), "X"(d)); - *rem = r; - return q; -#else - R128_U64 tmp; - R128_U32 d0, d1; - R128_U32 n3, n2, n1, n0; - R128_U32 q0, q1; - R128_U32 r; - int shift; - - R128_ASSERT(d != 0); //division by zero - R128_ASSERT(nhi < d); //overflow - - // normalize - shift = r128__clz64(d); - - if (shift) { - R128 tmp128; - R128_SET2(&tmp128, nlo, nhi); - r128Shl(&tmp128, &tmp128, shift); - n3 = R128_R3(&tmp128); - n2 = R128_R2(&tmp128); - n1 = R128_R1(&tmp128); - n0 = R128_R0(&tmp128); - d <<= shift; - } else { - n3 = (R128_U32)(nhi >> 32); - n2 = (R128_U32)nhi; - n1 = (R128_U32)(nlo >> 32); - n0 = (R128_U32)nlo; - } - - d1 = (R128_U32)(d >> 32); - d0 = (R128_U32)d; - - // first digit - R128_ASSERT(n3 <= d1); - if (n3 < d1) { - q1 = r128__udiv64(n2, n3, d1, &r); - } else { - q1 = 0xffffffffu; - r = n2 + d1; - } -refine1: - if (r128__umul64(q1, d0) > ((R128_U64)r << 32) + n1) { - --q1; - if (r < ~d1 + 1) { - r += d1; - goto refine1; - } - } - - tmp = ((R128_U64)n2 << 32) + n1 - (r128__umul64(q1, d0) + (r128__umul64(q1, d1) << 32)); - n2 = (R128_U32)(tmp >> 32); - n1 = (R128_U32)tmp; - - // second digit - R128_ASSERT(n2 <= d1); - if (n2 < d1) { - q0 = r128__udiv64(n1, n2, d1, &r); - } else { - q0 = 0xffffffffu; - r = n1 + d1; - } -refine0: - if (r128__umul64(q0, d0) > ((R128_U64)r << 32) + n0) { - --q0; - if (r < ~d1 + 1) { - r += d1; - goto refine0; - } - } - - tmp = ((R128_U64)n1 << 32) + n0 - (r128__umul64(q0, d0) + (r128__umul64(q0, d1) << 32)); - n1 = (R128_U32)(tmp >> 32); - n0 = (R128_U32)tmp; - - *rem = (((R128_U64)n1 << 32) + n0) >> shift; - return ((R128_U64)q1 << 32) + q0; -#endif -} -#endif - -static int r128__ucmp(const R128 *a, const R128 *b) -{ - if (a->hi != b->hi) { - if (a->hi > b->hi) { - return 1; - } else { - return -1; - } - } else { - if (a->lo == b->lo) { - return 0; - } else if (a->lo > b->lo) { - return 1; - } else { - return -1; - } - } -} - -static void r128__umul(R128 *dst, const R128 *a, const R128 *b) -{ -#if defined(_M_X64) && !defined(R128_STDC_ONLY) - R128_U64 t0, t1; - R128_U64 lo, hi = 0; - unsigned char carry; - - t0 = _umul128(a->lo, b->lo, &t1); - carry = _addcarry_u64(0, t1, t0 >> 63, &lo); - _addcarry_u64(carry, hi, hi, &hi); - - t0 = _umul128(a->lo, b->hi, &t1); - carry = _addcarry_u64(0, lo, t0, &lo); - _addcarry_u64(carry, hi, t1, &hi); - - t0 = _umul128(a->hi, b->lo, &t1); - carry = _addcarry_u64(0, lo, t0, &lo); - _addcarry_u64(carry, hi, t1, &hi); - - t0 = _umul128(a->hi, b->hi, &t1); - hi += t0; - - R128_SET2(dst, lo, hi); -#elif defined(__x86_64__) && !defined(R128_STDC_ONLY) - unsigned __int128 p0, p1, p2, p3; - p0 = a->lo * (unsigned __int128)b->lo; - p1 = a->lo * (unsigned __int128)b->hi; - p2 = a->hi * (unsigned __int128)b->lo; - p3 = a->hi * (unsigned __int128)b->hi; - - p0 = (p3 << 64) + p2 + p1 + (p0 >> 64) + ((R128_U64)p0 >> 63); - dst->lo = (R128_U64)p0; - dst->hi = (R128_U64)(p0 >> 64); -#else - R128 p0, p1, p2, p3, round; - - r128__umul128(&p0, a->lo, b->lo); - round.hi = 0; round.lo = p0.lo >> 63; - p0.lo = p0.hi; p0.hi = 0; //r128Shr(&p0, &p0, 64); - r128Add(&p0, &p0, &round); - - r128__umul128(&p1, a->hi, b->lo); - r128Add(&p0, &p0, &p1); - - r128__umul128(&p2, a->lo, b->hi); - r128Add(&p0, &p0, &p2); - - r128__umul128(&p3, a->hi, b->hi); - p3.hi = p3.lo; p3.lo = 0; //r128Shl(&p3, &p3, 64); - r128Add(&p0, &p0, &p3); - - R128_SET2(dst, p0.lo, p0.hi); -#endif -} - -// Shift d left until the high bit is set, and shift n left by the same amount. -// returns non-zero on overflow. -static int r128__norm(R128 *n, R128 *d, R128_U64 *n2) -{ - R128_U64 d0, d1; - R128_U64 n0, n1; - int shift; - - d1 = d->hi; - d0 = d->lo; - n1 = n->hi; - n0 = n->lo; - - if (d1) { - shift = r128__clz64(d1); - if (shift) { - d1 = (d1 << shift) | (d0 >> (64 - shift)); - d0 = d0 << shift; - *n2 = n1 >> (64 - shift); - n1 = (n1 << shift) | (n0 >> (64 - shift)); - n0 = n0 << shift; - } else { - *n2 = 0; - } - } else { - shift = r128__clz64(d0); - if (r128__clz64(n1) <= shift) { - return 1; // overflow - } - - if (shift) { - d1 = d0 << shift; - d0 = 0; - *n2 = (n1 << shift) | (n0 >> (64 - shift)); - n1 = n0 << shift; - n0 = 0; - } else { - d1 = d0; - d0 = 0; - *n2 = n1; - n1 = n0; - n0 = 0; - } - } - - R128_SET2(n, n0, n1); - R128_SET2(d, d0, d1); - return 0; -} - -static void r128__udiv(R128 *quotient, const R128 *dividend, const R128 *divisor) -{ - R128 tmp; - R128_U64 d0, d1; - R128_U64 n1, n2, n3; - R128 q; - - R128_ASSERT(dividend != NULL); - R128_ASSERT(divisor != NULL); - R128_ASSERT(quotient != NULL); - R128_ASSERT(divisor->hi != 0 || divisor->lo != 0); // divide by zero - - // scale dividend and normalize - { - R128 n, d; - R128_SET2(&n, dividend->lo, dividend->hi); - R128_SET2(&d, divisor->lo, divisor->hi); - if (r128__norm(&n, &d, &n3)) { - R128_SET2(quotient, R128_max.lo, R128_max.hi); - return; - } - - d1 = d.hi; - d0 = d.lo; - n2 = n.hi; - n1 = n.lo; - } - - // first digit - R128_ASSERT(n3 <= d1); - { - R128 t0, t1; - t0.lo = n1; - if (n3 < d1) { - q.hi = r128__udiv128(n2, n3, d1, &t0.hi); - } else { - q.hi = R128_LIT_U64(0xffffffffffffffff); - t0.hi = n2 + d1; - } - -refine1: - r128__umul128(&t1, q.hi, d0); - if (r128__ucmp(&t1, &t0) > 0) { - --q.hi; - if (t0.hi < ~d1 + 1) { - t0.hi += d1; - goto refine1; - } - } - } - - { - R128 t0, t1, t2; - t0.hi = n2; - t0.lo = n1; - - r128__umul128(&t1, q.hi, d0); - r128__umul128(&t2, q.hi, d1); - - t2.hi = t2.lo; t2.lo = 0; //r128Shl(&t2, &t2, 64); - r128Add(&tmp, &t1, &t2); - r128Sub(&tmp, &t0, &tmp); - } - n2 = tmp.hi; - n1 = tmp.lo; - - // second digit - R128_ASSERT(n2 <= d1); - { - R128 t0, t1; - t0.lo = 0; - if (n2 < d1) { - q.lo = r128__udiv128(n1, n2, d1, &t0.hi); - } else { - q.lo = R128_LIT_U64(0xffffffffffffffff); - t0.hi = n1 + d1; - } - - refine0: - r128__umul128(&t1, q.lo, d0); - if (r128__ucmp(&t1, &t0) > 0) { - --q.lo; - if (t0.hi < ~d1 + 1) { - t0.hi += d1; - goto refine0; - } - } - } - - R128_SET2(quotient, q.lo, q.hi); -} - -static R128_U64 r128__umod(R128 *n, R128 *d) -{ - R128_U64 d0, d1; - R128_U64 n3, n2, n1; - R128_U64 q; - - R128_ASSERT(d != NULL); - R128_ASSERT(n != NULL); - R128_ASSERT(d->hi != 0 || d->lo != 0); // divide by zero - - if (r128__norm(n, d, &n3)) { - return R128_LIT_U64(0xffffffffffffffff); - } - - d1 = d->hi; - d0 = d->lo; - n2 = n->hi; - n1 = n->lo; - - R128_ASSERT(n3 < d1); - { - R128 t0, t1; - t0.lo = n1; - q = r128__udiv128(n2, n3, d1, &t0.hi); - - refine1: - r128__umul128(&t1, q, d0); - if (r128__ucmp(&t1, &t0) > 0) { - --q; - if (t0.hi < ~d1 + 1) { - t0.hi += d1; - goto refine1; - } - } - } - - return q; -} - -static int r128__format(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *format) -{ - char buf[128]; - R128 tmp; - R128_U64 whole; - char *cursor, *decimal, *dstp = dst; - int sign = 0; - int fullPrecision = 1; - int width, precision; - int padCnt, trail = 0; - - R128_ASSERT(dst != NULL && dstSize > 0); - R128_ASSERT(v != NULL); - R128_ASSERT(format != NULL); - - --dstSize; - - R128_SET2(&tmp, v->lo, v->hi); - if (r128IsNeg(&tmp)) { - r128__neg(&tmp, &tmp); - sign = 1; - } - - width = format->width; - if (width < 0) { - width = 0; - } - - precision = format->precision; - if (precision < 0) { - // print a maximum of 20 digits - fullPrecision = 0; - precision = 20; - } else if (precision > (int)sizeof(buf) - 21) { - trail = precision - (sizeof(buf) - 21); - precision -= trail; - } - - whole = tmp.hi; - decimal = cursor = buf; - - // fractional part first in case a carry into the whole part is required - if (tmp.lo || format->decimal) { - while (tmp.lo || (fullPrecision && precision)) { - if ((int)(cursor - buf) == precision) { - if ((R128_S64)tmp.lo < 0) { - // round up, propagate carry backwards - char *c; - for (c = cursor - 1; c >= buf; --c) { - char d = ++*c; - if (d <= '9') { - goto endfrac; - } else { - *c = '0'; - } - } - - // carry out into the whole part - whole++; - } - - break; - } - - r128__umul128(&tmp, tmp.lo, 10); - *cursor++ = (char)tmp.hi + '0'; - } - - endfrac: - if (format->decimal || precision) { - decimal = cursor; - *cursor++ = R128_decimal; - } - } - - // whole part - do { - char digit = (char)(whole % 10); - whole /= 10; - *cursor++ = digit + '0'; - } while (whole); - -#define R128__WRITE(c) do { if (dstp < dst + dstSize) *dstp = c; ++dstp; } while(0) - - padCnt = width - (int)(cursor - buf) - 1; - - // left padding - if (!format->leftAlign) { - char padChar = format->zeroPad ? '0' : ' '; - if (format->zeroPad) { - if (sign) { - R128__WRITE('-'); - } else if (format->sign == R128ToStringSign_Plus) { - R128__WRITE('+'); - } else if (format->sign == R128ToStringSign_Space) { - R128__WRITE(' '); - } else { - ++padCnt; - } - } - - for (; padCnt > 0; --padCnt) { - R128__WRITE(padChar); - } - } - - if (format->leftAlign || !format->zeroPad) { - if (sign) { - R128__WRITE('-'); - } else if (format->sign == R128ToStringSign_Plus) { - R128__WRITE('+'); - } else if (format->sign == R128ToStringSign_Space) { - R128__WRITE(' '); - } else { - ++padCnt; - } - } - - { - char *i; - - // reverse the whole part - for (i = cursor - 1; i >= decimal; --i) { - R128__WRITE(*i); - } - - // copy the fractional part - for (i = buf; i < decimal; ++i) { - R128__WRITE(*i); - } - } - - // right padding - if (format->leftAlign) { - char padChar = format->zeroPad ? '0' : ' '; - for (; padCnt > 0; --padCnt) { - R128__WRITE(padChar); - } - } - - // trailing zeroes for very large precision - while (trail--) { - R128__WRITE('0'); - } - -#undef R128__WRITE - - if (dstp <= dst + dstSize) { - *dstp = '\0'; - } else { - dst[dstSize] = '\0'; - } - return (int)(dstp - dst); -} - -void r128FromInt(R128 *dst, R128_S64 v) -{ - R128_ASSERT(dst != NULL); - dst->lo = 0; - dst->hi = (R128_U64)v; - R128_DEBUG_SET(dst); -} - -void r128FromFloat(R128 *dst, double v) -{ - R128_ASSERT(dst != NULL); - - if (v < -9223372036854775808.0) { - r128Copy(dst, &R128_min); - } else if (v >= 9223372036854775808.0) { - r128Copy(dst, &R128_max); - } else { - R128 r; - int sign = 0; - - if (v < 0) { - v = -v; - sign = 1; - } - - r.hi = (R128_U64)(R128_S64)v; - v -= (R128_S64)v; - r.lo = (R128_U64)(v * 18446744073709551616.0); - - if (sign) { - r128__neg(&r, &r); - } - - r128Copy(dst, &r); - } -} - -void r128FromString(R128 *dst, const char *s, char **endptr) -{ - R128_U64 lo = 0, hi = 0; - R128_U64 base = 10; - - int sign = 0; - - R128_ASSERT(dst != NULL); - R128_ASSERT(s != NULL); - - R128_SET2(dst, 0, 0); - - // consume whitespace - for (;;) { - if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' || *s == '\v') { - ++s; - } else { - break; - } - } - - // sign - if (*s == '-') { - sign = 1; - ++s; - } else if (*s == '+') { - ++s; - } - - // parse base prefix - if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { - base = 16; - s += 2; - } - - // whole part - for (;; ++s) { - R128_U64 digit; - - if ('0' <= *s && *s <= '9') { - digit = *s - '0'; - } else if (base == 16 && 'a' <= *s && *s <= 'f') { - digit = *s - 'a' + 10; - } else if (base == 16 && 'A' <= *s && *s <= 'F') { - digit = *s - 'A' + 10; - } else { - break; - } - - hi = hi * base + digit; - } - - // fractional part - if (*s == R128_decimal) { - const char *exp = ++s; - - // find the last digit and work backwards - for (;; ++s) { - if ('0' <= *s && *s <= '9') { - } else if (base == 16 && ('a' <= *s && *s <= 'f')) { - } else if (base == 16 && ('A' <= *s && *s <= 'F')) { - } else { - break; - } - } - - for (--s; s >= exp; --s) { - R128_U64 digit, unused; - - if ('0' <= *s && *s <= '9') { - digit = *s - '0'; - } else if ('a' <= *s && *s <= 'f') { - digit = *s - 'a' + 10; - } else { - digit = *s - 'A' + 10; - } - - lo = r128__udiv128(lo, digit, base, &unused); - } - } - - R128_SET2(dst, lo, hi); - if (sign) { - r128__neg(dst, dst); - } - - if (endptr) { - *endptr = (char *) s; - } -} - -R128_S64 r128ToInt(const R128 *v) -{ - R128_ASSERT(v != NULL); - return (R128_S64)v->hi; -} - -double r128ToFloat(const R128 *v) -{ - R128 tmp; - int sign = 0; - double d; - - R128_ASSERT(v != NULL); - - R128_SET2(&tmp, v->lo, v->hi); - if (r128IsNeg(&tmp)) { - r128__neg(&tmp, &tmp); - sign = 1; - } - - d = tmp.hi + tmp.lo * (1 / 18446744073709551616.0); - if (sign) { - d = -d; - } - - return d; -} - -int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt) -{ - return r128__format(dst, dstSize, v, opt); -} - -int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v) -{ - R128ToStringFormat opts; - - R128_ASSERT(dst != NULL && dstSize); - R128_ASSERT(format != NULL); - R128_ASSERT(v != NULL); - - opts.sign = R128__defaultFormat.sign; - opts.precision = R128__defaultFormat.precision; - opts.zeroPad = R128__defaultFormat.zeroPad; - opts.decimal = R128__defaultFormat.decimal; - opts.leftAlign = R128__defaultFormat.leftAlign; - - if (*format == '%') { - ++format; - } - - // flags field - for (;; ++format) { - if (*format == ' ' && opts.sign != R128ToStringSign_Plus) { - opts.sign = R128ToStringSign_Space; - } else if (*format == '+') { - opts.sign = R128ToStringSign_Plus; - } else if (*format == '0') { - opts.zeroPad = 1; - } else if (*format == '-') { - opts.leftAlign = 1; - } else if (*format == '#') { - opts.decimal = 1; - } else { - break; - } - } - - // width field - opts.width = 0; - for (;;) { - if ('0' <= *format && *format <= '9') { - opts.width = opts.width * 10 + *format++ - '0'; - } else { - break; - } - } - - // precision field - if (*format == '.') { - opts.precision = 0; - ++format; - for (;;) { - if ('0' <= *format && *format <= '9') { - opts.precision = opts.precision * 10 + *format++ - '0'; - } else { - break; - } - } - } - - return r128__format(dst, dstSize, v, &opts); -} - -int r128ToString(char *dst, size_t dstSize, const R128 *v) -{ - return r128__format(dst, dstSize, v, &R128__defaultFormat); -} - -void r128Copy(R128 *dst, const R128 *src) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(src != NULL); - dst->lo = src->lo; - dst->hi = src->hi; - R128_DEBUG_SET(dst); -} - -void r128Neg(R128 *dst, const R128 *src) -{ - r128__neg(dst, src); - R128_DEBUG_SET(dst); -} - -void r128Not(R128 *dst, const R128 *src) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(src != NULL); - - dst->lo = ~src->lo; - dst->hi = ~src->hi; - R128_DEBUG_SET(dst); -} - -void r128Or(R128 *dst, const R128 *a, const R128 *b) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - dst->lo = a->lo | b->lo; - dst->hi = a->hi | b->hi; - R128_DEBUG_SET(dst); -} - -void r128And(R128 *dst, const R128 *a, const R128 *b) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - dst->lo = a->lo & b->lo; - dst->hi = a->hi & b->hi; - R128_DEBUG_SET(dst); -} - -void r128Xor(R128 *dst, const R128 *a, const R128 *b) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - dst->lo = a->lo ^ b->lo; - dst->hi = a->hi ^ b->hi; - R128_DEBUG_SET(dst); -} - -void r128Shl(R128 *dst, const R128 *src, int amount) -{ - R128_U64 r[4]; - - R128_ASSERT(dst != NULL); - R128_ASSERT(src != NULL); - -#if defined(_M_IX86) && !defined(R128_STDC_ONLY) - __asm { - // load src - mov edx, dword ptr[src] - mov ecx, amount - - mov edi, dword ptr[edx] - mov esi, dword ptr[edx + 4] - mov ebx, dword ptr[edx + 8] - mov eax, dword ptr[edx + 12] - - // shift mod 32 - shld eax, ebx, cl - shld ebx, esi, cl - shld esi, edi, cl - shl edi, cl - - // clear out low 12 bytes of stack - xor edx, edx - mov dword ptr[r], edx - mov dword ptr[r + 4], edx - mov dword ptr[r + 8], edx - - // store shifted amount offset by count/32 bits - shr ecx, 5 - and ecx, 3 - mov dword ptr[r + ecx * 4 + 0], edi - mov dword ptr[r + ecx * 4 + 4], esi - mov dword ptr[r + ecx * 4 + 8], ebx - mov dword ptr[r + ecx * 4 + 12], eax - } -#else - - r[0] = src->lo; - r[1] = src->hi; - - amount &= 127; - if (amount >= 64) { - r[1] = r[0] << (amount - 64); - r[0] = 0; - } else if (amount) { -# ifdef _M_X64 - r[1] = __shiftleft128(r[0], r[1], (char) amount); -# else - r[1] = (r[1] << amount) | (r[0] >> (64 - amount)); -# endif - r[0] = r[0] << amount; - } -#endif //_M_IX86 - - dst->lo = r[0]; - dst->hi = r[1]; - R128_DEBUG_SET(dst); -} - -void r128Shr(R128 *dst, const R128 *src, int amount) -{ - R128_U64 r[4]; - - R128_ASSERT(dst != NULL); - R128_ASSERT(src != NULL); - -#if defined(_M_IX86) && !defined(R128_STDC_ONLY) - __asm { - // load src - mov edx, dword ptr[src] - mov ecx, amount - - mov edi, dword ptr[edx] - mov esi, dword ptr[edx + 4] - mov ebx, dword ptr[edx + 8] - mov eax, dword ptr[edx + 12] - - // shift mod 32 - shrd edi, esi, cl - shrd esi, ebx, cl - shrd ebx, eax, cl - shr eax, cl - - // clear out high 12 bytes of stack - xor edx, edx - mov dword ptr[r + 20], edx - mov dword ptr[r + 24], edx - mov dword ptr[r + 28], edx - - // store shifted amount offset by -count/32 bits - shr ecx, 5 - and ecx, 3 - neg ecx - mov dword ptr[r + ecx * 4 + 16], edi - mov dword ptr[r + ecx * 4 + 20], esi - mov dword ptr[r + ecx * 4 + 24], ebx - mov dword ptr[r + ecx * 4 + 28], eax - } -#else - r[2] = src->lo; - r[3] = src->hi; - - amount &= 127; - if (amount >= 64) { - r[2] = r[3] >> (amount - 64); - r[3] = 0; - } else if (amount) { -#ifdef _M_X64 - r[2] = __shiftright128(r[2], r[3], (char) amount); -#else - r[2] = (r[2] >> amount) | (r[3] << (64 - amount)); -#endif - r[3] = r[3] >> amount; - } -#endif - - dst->lo = r[2]; - dst->hi = r[3]; - R128_DEBUG_SET(dst); -} - -void r128Sar(R128 *dst, const R128 *src, int amount) -{ - R128_U64 r[4]; - - R128_ASSERT(dst != NULL); - R128_ASSERT(src != NULL); - -#if defined(_M_IX86) && !defined(R128_STDC_ONLY) - __asm { - // load src - mov edx, dword ptr[src] - mov ecx, amount - - mov edi, dword ptr[edx] - mov esi, dword ptr[edx + 4] - mov ebx, dword ptr[edx + 8] - mov eax, dword ptr[edx + 12] - - // shift mod 32 - shrd edi, esi, cl - shrd esi, ebx, cl - shrd ebx, eax, cl - sar eax, cl - - // copy sign to high 12 bytes of stack - cdq - mov dword ptr[r + 20], edx - mov dword ptr[r + 24], edx - mov dword ptr[r + 28], edx - - // store shifted amount offset by -count/32 bits - shr ecx, 5 - and ecx, 3 - neg ecx - mov dword ptr[r + ecx * 4 + 16], edi - mov dword ptr[r + ecx * 4 + 20], esi - mov dword ptr[r + ecx * 4 + 24], ebx - mov dword ptr[r + ecx * 4 + 28], eax - } -#else - r[2] = src->lo; - r[3] = src->hi; - - amount &= 127; - if (amount >= 64) { - r[2] = (R128_U64)((R128_S64)r[3] >> (amount - 64)); - r[3] = (R128_U64)((R128_S64)r[3] >> 63); - } else if (amount) { - r[2] = (r[2] >> amount) | (R128_U64)((R128_S64)r[3] << (64 - amount)); - r[3] = (R128_U64)((R128_S64)r[3] >> amount); - } -#endif - - dst->lo = r[2]; - dst->hi = r[3]; - R128_DEBUG_SET(dst); -} - -void r128Add(R128 *dst, const R128 *a, const R128 *b) -{ - unsigned char carry = 0; - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - -#if R128_INTEL && !defined(R128_STDC_ONLY) -# if R128_64BIT - carry = _addcarry_u64(carry, a->lo, b->lo, &dst->lo); - carry = _addcarry_u64(carry, a->hi, b->hi, &dst->hi); -# else - R128_U32 r0, r1, r2, r3; - carry = _addcarry_u32(carry, R128_R0(a), R128_R0(b), &r0); - carry = _addcarry_u32(carry, R128_R1(a), R128_R1(b), &r1); - carry = _addcarry_u32(carry, R128_R2(a), R128_R2(b), &r2); - carry = _addcarry_u32(carry, R128_R3(a), R128_R3(b), &r3); - R128_SET4(dst, r0, r1, r2, r3); -# endif //R128_64BIT -#else - { - R128_U64 r = a->lo + b->lo; - carry = r < a->lo; - dst->lo = r; - dst->hi = a->hi + b->hi + carry; - } -#endif //R128_INTEL - - R128_DEBUG_SET(dst); -} - -void r128Sub(R128 *dst, const R128 *a, const R128 *b) -{ - unsigned char borrow = 0; - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - -#if R128_INTEL && !defined(R128_STDC_ONLY) -# if R128_64BIT - borrow = _subborrow_u64(borrow, a->lo, b->lo, &dst->lo); - borrow = _subborrow_u64(borrow, a->hi, b->hi, &dst->hi); -# else - R128_U32 r0, r1, r2, r3; - borrow = _subborrow_u32(borrow, R128_R0(a), R128_R0(b), &r0); - borrow = _subborrow_u32(borrow, R128_R1(a), R128_R1(b), &r1); - borrow = _subborrow_u32(borrow, R128_R2(a), R128_R2(b), &r2); - borrow = _subborrow_u32(borrow, R128_R3(a), R128_R3(b), &r3); - R128_SET4(dst, r0, r1, r2, r3); -# endif //R128_64BIT -#else - { - R128_U64 r = a->lo - b->lo; - borrow = r > a->lo; - dst->lo = r; - dst->hi = a->hi - b->hi - borrow; - } -#endif //R128_INTEL - - R128_DEBUG_SET(dst); -} - -void r128Mul(R128 *dst, const R128 *a, const R128 *b) -{ - int sign = 0; - R128 ta, tb, tc; - - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - R128_SET2(&ta, a->lo, a->hi); - R128_SET2(&tb, b->lo, b->hi); - - if (r128IsNeg(&ta)) { - r128__neg(&ta, &ta); - sign = !sign; - } - if (r128IsNeg(&tb)) { - r128__neg(&tb, &tb); - sign = !sign; - } - - r128__umul(&tc, &ta, &tb); - if (sign) { - r128__neg(&tc, &tc); - } - - r128Copy(dst, &tc); -} - -void r128Div(R128 *dst, const R128 *a, const R128 *b) -{ - int sign = 0; - R128 tn, td, tq; - - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - R128_SET2(&tn, a->lo, a->hi); - R128_SET2(&td, b->lo, b->hi); - - if (r128IsNeg(&tn)) { - r128__neg(&tn, &tn); - sign = !sign; - } - - if (td.lo == 0 && td.hi == 0) { - // divide by zero - if (sign) { - r128Copy(dst, &R128_min); - } else { - r128Copy(dst, &R128_max); - } - return; - } else if (r128IsNeg(&td)) { - r128__neg(&td, &td); - sign = !sign; - } - - r128__udiv(&tq, &tn, &td); - - if (sign) { - r128__neg(&tq, &tq); - } - - r128Copy(dst, &tq); -} - -void r128Mod(R128 *dst, const R128 *a, const R128 *b) -{ - int sign = 0; - R128 tn, td, tq; - - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - R128_SET2(&tn, a->lo, a->hi); - R128_SET2(&td, b->lo, b->hi); - - if (r128IsNeg(&tn)) { - r128__neg(&tn, &tn); - sign = !sign; - } - - if (td.lo == 0 && td.hi == 0) { - // divide by zero - if (sign) { - r128Copy(dst, &R128_min); - } else { - r128Copy(dst, &R128_max); - } - return; - } else if (r128IsNeg(&td)) { - r128__neg(&td, &td); - sign = !sign; - } - - tq.hi = r128__umod(&tn, &td); - tq.lo = 0; - - if (sign) { - tq.hi = ~tq.hi + 1; - } - - r128Mul(&tq, &tq, b); - r128Sub(dst, a, &tq); -} - -void r128Rsqrt(R128 *dst, const R128 *v) -{ - static const R128 threeHalves = { R128_LIT_U64(0x8000000000000000), 1 }; - R128 x, est; - int i; - - if ((R128_S64)v->hi < 0) { - r128Copy(dst, &R128_min); - return; - } - - R128_SET2(&x, v->lo, v->hi); - - // get initial estimate - if (x.hi) { - int shift = (64 + r128__clz64(x.hi)) >> 1; - est.lo = R128_LIT_U64(1) << shift; - est.hi = 0; - } else if (x.lo) { - int shift = r128__clz64(x.lo) >> 1; - est.hi = R128_LIT_U64(1) << shift; - est.lo = 0; - } else { - R128_SET2(dst, 0, 0); - return; - } - - // x /= 2 - r128Shr(&x, &x, 1); - - // Newton-Raphson iterate - for (i = 0; i < 7; ++i) { - R128 newEst; - - // newEst = est * (threeHalves - (x / 2) * est * est); - r128__umul(&newEst, &est, &est); - r128__umul(&newEst, &newEst, &x); - r128Sub(&newEst, &threeHalves, &newEst); - r128__umul(&newEst, &est, &newEst); - - if (newEst.lo == est.lo && newEst.hi == est.hi) { - break; - } - R128_SET2(&est, newEst.lo, newEst.hi); - } - - r128Copy(dst, &est); -} - -void r128Sqrt(R128 *dst, const R128 *v) -{ - R128 x, est; - int i; - - if ((R128_S64)v->hi < 0) { - r128Copy(dst, &R128_min); - return; - } - - R128_SET2(&x, v->lo, v->hi); - - // get initial estimate - if (x.hi) { - int shift = (63 - r128__clz64(x.hi)) >> 1; - r128Shr(&est, &x, shift); - } else if (x.lo) { - int shift = (1 + r128__clz64(x.lo)) >> 1; - r128Shl(&est, &x, shift); - } else { - R128_SET2(dst, 0, 0); - return; - } - - // Newton-Raphson iterate - for (i = 0; i < 7; ++i) { - R128 newEst; - - // newEst = (est + x / est) / 2 - r128__udiv(&newEst, &x, &est); - r128Add(&newEst, &newEst, &est); - r128Shr(&newEst, &newEst, 1); - - if (newEst.lo == est.lo && newEst.hi == est.hi) { - break; - } - R128_SET2(&est, newEst.lo, newEst.hi); - } - - r128Copy(dst, &est); -} - -int r128Cmp(const R128 *a, const R128 *b) -{ - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - if (a->hi == b->hi) { - if (a->lo == b->lo) { - return 0; - } else if (a->lo > b->lo) { - return 1; - } else { - return -1; - } - } else if ((R128_S64)a->hi > (R128_S64)b->hi) { - return 1; - } else { - return -1; - } -} - -int r128IsNeg(const R128 *v) -{ - R128_ASSERT(v != NULL); - - return (R128_S64)v->hi < 0; -} - -void r128Min(R128 *dst, const R128 *a, const R128 *b) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - if (r128Cmp(a, b) < 0) { - r128Copy(dst, a); - } else { - r128Copy(dst, b); - } -} - -void r128Max(R128 *dst, const R128 *a, const R128 *b) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(a != NULL); - R128_ASSERT(b != NULL); - - if (r128Cmp(a, b) > 0) { - r128Copy(dst, a); - } else { - r128Copy(dst, b); - } -} - -void r128Floor(R128 *dst, const R128 *v) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(v != NULL); - - if ((R128_S64)v->hi < 0) { - dst->hi = v->hi - (v->lo != 0); - } else { - dst->hi = v->hi; - } - dst->lo = 0; - R128_DEBUG_SET(dst); -} - -void r128Ceil(R128 *dst, const R128 *v) -{ - R128_ASSERT(dst != NULL); - R128_ASSERT(v != NULL); - - if ((R128_S64)v->hi > 0) { - dst->hi = v->hi + (v->lo != 0); - } else { - dst->hi = v->hi; - } - dst->lo = 0; - R128_DEBUG_SET(dst); -} - -#endif //R128_IMPLEMENTATION - diff --git a/thirdparty/stb_rect_pack/stb_rect_pack.h b/thirdparty/stb_rect_pack/stb_rect_pack.h deleted file mode 100644 index 3336fe7395..0000000000 --- a/thirdparty/stb_rect_pack/stb_rect_pack.h +++ /dev/null @@ -1,629 +0,0 @@ -// stb_rect_pack.h - v1.00 - public domain - rectangle packing -// Sean Barrett 2014 -// -// Useful for e.g. packing rectangular textures into an atlas. -// Does not do rotation. -// -// Not necessarily the awesomest packing method, but better than -// the totally naive one in stb_truetype (which is primarily what -// this is meant to replace). -// -// Has only had a few tests run, may have issues. -// -// More docs to come. -// -// No memory allocations; uses qsort() and assert() from stdlib. -// Can override those by defining STBRP_SORT and STBRP_ASSERT. -// -// This library currently uses the Skyline Bottom-Left algorithm. -// -// Please note: better rectangle packers are welcome! Please -// implement them to the same API, but with a different init -// function. -// -// Credits -// -// Library -// Sean Barrett -// Minor features -// Martins Mozeiko -// github:IntellectualKitty -// -// Bugfixes / warning fixes -// Jeremy Jaussaud -// Fabian Giesen -// -// Version history: -// -// 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles -// 0.99 (2019-02-07) warning fixes -// 0.11 (2017-03-03) return packing success/fail result -// 0.10 (2016-10-25) remove cast-away-const to avoid warnings -// 0.09 (2016-08-27) fix compiler warnings -// 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0) -// 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0) -// 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort -// 0.05: added STBRP_ASSERT to allow replacing assert -// 0.04: fixed minor bug in STBRP_LARGE_RECTS support -// 0.01: initial release -// -// LICENSE -// -// See end of file for license information. - -////////////////////////////////////////////////////////////////////////////// -// -// INCLUDE SECTION -// - -#ifndef STB_INCLUDE_STB_RECT_PACK_H -#define STB_INCLUDE_STB_RECT_PACK_H - -#define STB_RECT_PACK_VERSION 1 - -#ifdef STBRP_STATIC -#define STBRP_DEF static -#else -#define STBRP_DEF extern -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct stbrp_context stbrp_context; -typedef struct stbrp_node stbrp_node; -typedef struct stbrp_rect stbrp_rect; - -#ifdef STBRP_LARGE_RECTS -typedef int stbrp_coord; -#else -typedef unsigned short stbrp_coord; -#endif - -STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects); -// Assign packed locations to rectangles. The rectangles are of type -// 'stbrp_rect' defined below, stored in the array 'rects', and there -// are 'num_rects' many of them. -// -// Rectangles which are successfully packed have the 'was_packed' flag -// set to a non-zero value and 'x' and 'y' store the minimum location -// on each axis (i.e. bottom-left in cartesian coordinates, top-left -// if you imagine y increasing downwards). Rectangles which do not fit -// have the 'was_packed' flag set to 0. -// -// You should not try to access the 'rects' array from another thread -// while this function is running, as the function temporarily reorders -// the array while it executes. -// -// To pack into another rectangle, you need to call stbrp_init_target -// again. To continue packing into the same rectangle, you can call -// this function again. Calling this multiple times with multiple rect -// arrays will probably produce worse packing results than calling it -// a single time with the full rectangle array, but the option is -// available. -// -// The function returns 1 if all of the rectangles were successfully -// packed and 0 otherwise. - -struct stbrp_rect -{ - // reserved for your use: - int id; - - // input: - stbrp_coord w, h; - - // output: - stbrp_coord x, y; - int was_packed; // non-zero if valid packing - -}; // 16 bytes, nominally - - -STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes); -// Initialize a rectangle packer to: -// pack a rectangle that is 'width' by 'height' in dimensions -// using temporary storage provided by the array 'nodes', which is 'num_nodes' long -// -// You must call this function every time you start packing into a new target. -// -// There is no "shutdown" function. The 'nodes' memory must stay valid for -// the following stbrp_pack_rects() call (or calls), but can be freed after -// the call (or calls) finish. -// -// Note: to guarantee best results, either: -// 1. make sure 'num_nodes' >= 'width' -// or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1' -// -// If you don't do either of the above things, widths will be quantized to multiples -// of small integers to guarantee the algorithm doesn't run out of temporary storage. -// -// If you do #2, then the non-quantized algorithm will be used, but the algorithm -// may run out of temporary storage and be unable to pack some rectangles. - -STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem); -// Optionally call this function after init but before doing any packing to -// change the handling of the out-of-temp-memory scenario, described above. -// If you call init again, this will be reset to the default (false). - - -STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic); -// Optionally select which packing heuristic the library should use. Different -// heuristics will produce better/worse results for different data sets. -// If you call init again, this will be reset to the default. - -enum -{ - STBRP_HEURISTIC_Skyline_default=0, - STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default, - STBRP_HEURISTIC_Skyline_BF_sortHeight -}; - - -////////////////////////////////////////////////////////////////////////////// -// -// the details of the following structures don't matter to you, but they must -// be visible so you can handle the memory allocations for them - -struct stbrp_node -{ - stbrp_coord x,y; - stbrp_node *next; -}; - -struct stbrp_context -{ - int width; - int height; - int align; - int init_mode; - int heuristic; - int num_nodes; - stbrp_node *active_head; - stbrp_node *free_head; - stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2' -}; - -#ifdef __cplusplus -} -#endif - -#endif - -////////////////////////////////////////////////////////////////////////////// -// -// IMPLEMENTATION SECTION -// - -#ifdef STB_RECT_PACK_IMPLEMENTATION -#ifndef STBRP_SORT -#include -#define STBRP_SORT qsort -#endif - -#ifndef STBRP_ASSERT -#include -#define STBRP_ASSERT assert -#endif - -#ifdef _MSC_VER -#define STBRP__NOTUSED(v) (void)(v) -#else -#define STBRP__NOTUSED(v) (void)sizeof(v) -#endif - -enum -{ - STBRP__INIT_skyline = 1 -}; - -STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic) -{ - switch (context->init_mode) { - case STBRP__INIT_skyline: - STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight); - context->heuristic = heuristic; - break; - default: - STBRP_ASSERT(0); - } -} - -STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem) -{ - if (allow_out_of_mem) - // if it's ok to run out of memory, then don't bother aligning them; - // this gives better packing, but may fail due to OOM (even though - // the rectangles easily fit). @TODO a smarter approach would be to only - // quantize once we've hit OOM, then we could get rid of this parameter. - context->align = 1; - else { - // if it's not ok to run out of memory, then quantize the widths - // so that num_nodes is always enough nodes. - // - // I.e. num_nodes * align >= width - // align >= width / num_nodes - // align = ceil(width/num_nodes) - - context->align = (context->width + context->num_nodes-1) / context->num_nodes; - } -} - -STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes) -{ - int i; -#ifndef STBRP_LARGE_RECTS - STBRP_ASSERT(width <= 0xffff && height <= 0xffff); -#endif - - for (i=0; i < num_nodes-1; ++i) - nodes[i].next = &nodes[i+1]; - nodes[i].next = NULL; - context->init_mode = STBRP__INIT_skyline; - context->heuristic = STBRP_HEURISTIC_Skyline_default; - context->free_head = &nodes[0]; - context->active_head = &context->extra[0]; - context->width = width; - context->height = height; - context->num_nodes = num_nodes; - stbrp_setup_allow_out_of_mem(context, 0); - - // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly) - context->extra[0].x = 0; - context->extra[0].y = 0; - context->extra[0].next = &context->extra[1]; - context->extra[1].x = (stbrp_coord) width; -#ifdef STBRP_LARGE_RECTS - context->extra[1].y = (1<<30); -#else - context->extra[1].y = 65535; -#endif - context->extra[1].next = NULL; -} - -// find minimum y position if it starts at x1 -static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste) -{ - stbrp_node *node = first; - int x1 = x0 + width; - int min_y, visited_width, waste_area; - - STBRP__NOTUSED(c); - - STBRP_ASSERT(first->x <= x0); - - #if 0 - // skip in case we're past the node - while (node->next->x <= x0) - ++node; - #else - STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency - #endif - - STBRP_ASSERT(node->x <= x0); - - min_y = 0; - waste_area = 0; - visited_width = 0; - while (node->x < x1) { - if (node->y > min_y) { - // raise min_y higher. - // we've accounted for all waste up to min_y, - // but we'll now add more waste for everything we've visted - waste_area += visited_width * (node->y - min_y); - min_y = node->y; - // the first time through, visited_width might be reduced - if (node->x < x0) - visited_width += node->next->x - x0; - else - visited_width += node->next->x - node->x; - } else { - // add waste area - int under_width = node->next->x - node->x; - if (under_width + visited_width > width) - under_width = width - visited_width; - waste_area += under_width * (min_y - node->y); - visited_width += under_width; - } - node = node->next; - } - - *pwaste = waste_area; - return min_y; -} - -typedef struct -{ - int x,y; - stbrp_node **prev_link; -} stbrp__findresult; - -static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height) -{ - int best_waste = (1<<30), best_x, best_y = (1 << 30); - stbrp__findresult fr; - stbrp_node **prev, *node, *tail, **best = NULL; - - // align to multiple of c->align - width = (width + c->align - 1); - width -= width % c->align; - STBRP_ASSERT(width % c->align == 0); - - // if it can't possibly fit, bail immediately - if (width > c->width || height > c->height) { - fr.prev_link = NULL; - fr.x = fr.y = 0; - return fr; - } - - node = c->active_head; - prev = &c->active_head; - while (node->x + width <= c->width) { - int y,waste; - y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste); - if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL - // bottom left - if (y < best_y) { - best_y = y; - best = prev; - } - } else { - // best-fit - if (y + height <= c->height) { - // can only use it if it first vertically - if (y < best_y || (y == best_y && waste < best_waste)) { - best_y = y; - best_waste = waste; - best = prev; - } - } - } - prev = &node->next; - node = node->next; - } - - best_x = (best == NULL) ? 0 : (*best)->x; - - // if doing best-fit (BF), we also have to try aligning right edge to each node position - // - // e.g, if fitting - // - // ____________________ - // |____________________| - // - // into - // - // | | - // | ____________| - // |____________| - // - // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned - // - // This makes BF take about 2x the time - - if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) { - tail = c->active_head; - node = c->active_head; - prev = &c->active_head; - // find first node that's admissible - while (tail->x < width) - tail = tail->next; - while (tail) { - int xpos = tail->x - width; - int y,waste; - STBRP_ASSERT(xpos >= 0); - // find the left position that matches this - while (node->next->x <= xpos) { - prev = &node->next; - node = node->next; - } - STBRP_ASSERT(node->next->x > xpos && node->x <= xpos); - y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste); - if (y + height <= c->height) { - if (y <= best_y) { - if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) { - best_x = xpos; - STBRP_ASSERT(y <= best_y); - best_y = y; - best_waste = waste; - best = prev; - } - } - } - tail = tail->next; - } - } - - fr.prev_link = best; - fr.x = best_x; - fr.y = best_y; - return fr; -} - -static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height) -{ - // find best position according to heuristic - stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height); - stbrp_node *node, *cur; - - // bail if: - // 1. it failed - // 2. the best node doesn't fit (we don't always check this) - // 3. we're out of memory - if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) { - res.prev_link = NULL; - return res; - } - - // on success, create new node - node = context->free_head; - node->x = (stbrp_coord) res.x; - node->y = (stbrp_coord) (res.y + height); - - context->free_head = node->next; - - // insert the new node into the right starting point, and - // let 'cur' point to the remaining nodes needing to be - // stiched back in - - cur = *res.prev_link; - if (cur->x < res.x) { - // preserve the existing one, so start testing with the next one - stbrp_node *next = cur->next; - cur->next = node; - cur = next; - } else { - *res.prev_link = node; - } - - // from here, traverse cur and free the nodes, until we get to one - // that shouldn't be freed - while (cur->next && cur->next->x <= res.x + width) { - stbrp_node *next = cur->next; - // move the current node to the free list - cur->next = context->free_head; - context->free_head = cur; - cur = next; - } - - // stitch the list back in - node->next = cur; - - if (cur->x < res.x + width) - cur->x = (stbrp_coord) (res.x + width); - -#ifdef _DEBUG - cur = context->active_head; - while (cur->x < context->width) { - STBRP_ASSERT(cur->x < cur->next->x); - cur = cur->next; - } - STBRP_ASSERT(cur->next == NULL); - - { - int count=0; - cur = context->active_head; - while (cur) { - cur = cur->next; - ++count; - } - cur = context->free_head; - while (cur) { - cur = cur->next; - ++count; - } - STBRP_ASSERT(count == context->num_nodes+2); - } -#endif - - return res; -} - -static int rect_height_compare(const void *a, const void *b) -{ - const stbrp_rect *p = (const stbrp_rect *) a; - const stbrp_rect *q = (const stbrp_rect *) b; - if (p->h > q->h) - return -1; - if (p->h < q->h) - return 1; - return (p->w > q->w) ? -1 : (p->w < q->w); -} - -static int rect_original_order(const void *a, const void *b) -{ - const stbrp_rect *p = (const stbrp_rect *) a; - const stbrp_rect *q = (const stbrp_rect *) b; - return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed); -} - -#ifdef STBRP_LARGE_RECTS -#define STBRP__MAXVAL 0xffffffff -#else -#define STBRP__MAXVAL 0xffff -#endif - -STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects) -{ - int i, all_rects_packed = 1; - - // we use the 'was_packed' field internally to allow sorting/unsorting - for (i=0; i < num_rects; ++i) { - rects[i].was_packed = i; - } - - // sort according to heuristic - STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare); - - for (i=0; i < num_rects; ++i) { - if (rects[i].w == 0 || rects[i].h == 0) { - rects[i].x = rects[i].y = 0; // empty rect needs no space - } else { - stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h); - if (fr.prev_link) { - rects[i].x = (stbrp_coord) fr.x; - rects[i].y = (stbrp_coord) fr.y; - } else { - rects[i].x = rects[i].y = STBRP__MAXVAL; - } - } - } - - // unsort - STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order); - - // set was_packed flags and all_rects_packed status - for (i=0; i < num_rects; ++i) { - rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL); - if (!rects[i].was_packed) - all_rects_packed = 0; - } - - // return the all_rects_packed status - return all_rects_packed; -} -#endif - -/* ------------------------------------------------------------------------------- -This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------- -ALTERNATIVE A - MIT License -Copyright (c) 2017 Sean Barrett -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- -ALTERNATIVE B - Public Domain (www.unlicense.org) -This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -software, either in source code form or as a compiled binary, for any purpose, -commercial or non-commercial, and by any means. -In jurisdictions that recognize copyright laws, the author or authors of this -software dedicate any and all copyright interest in the software to the public -domain. We make this dedication for the benefit of the public at large and to -the detriment of our heirs and successors. We intend this dedication to be an -overt act of relinquishment in perpetuity of all present and future rights to -this software under copyright law. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -*/ - -- cgit v1.2.3