diff options
Diffstat (limited to 'thirdparty/thekla_atlas/nvmath')
36 files changed, 9342 insertions, 0 deletions
diff --git a/thirdparty/thekla_atlas/nvmath/Basis.cpp b/thirdparty/thekla_atlas/nvmath/Basis.cpp new file mode 100644 index 0000000000..0824179633 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Basis.cpp @@ -0,0 +1,270 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#include "Basis.h" + +using namespace nv; + + +/// Normalize basis vectors. +void Basis::normalize(float epsilon /*= NV_EPSILON*/) +{ + normal = ::normalizeSafe(normal, Vector3(0.0f), epsilon); + tangent = ::normalizeSafe(tangent, Vector3(0.0f), epsilon); + bitangent = ::normalizeSafe(bitangent, Vector3(0.0f), epsilon); +} + + +/// Gram-Schmidt orthogonalization. +/// @note Works only if the vectors are close to orthogonal. +void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/) +{ + // N' = |N| + // T' = |T - (N' dot T) N'| + // B' = |B - (N' dot B) N' - (T' dot B) T'| + + normal = ::normalize(normal, epsilon); + + tangent -= normal * dot(normal, tangent); + tangent = ::normalize(tangent, epsilon); + + bitangent -= normal * dot(normal, bitangent); + bitangent -= tangent * dot(tangent, bitangent); + bitangent = ::normalize(bitangent, epsilon); +} + + + + +/// Robust orthonormalization. +/// Returns an orthonormal basis even when the original is degenerate. +void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/) +{ + // Normalize all vectors. + normalize(epsilon); + + if (lengthSquared(normal) < epsilon*epsilon) + { + // Build normal from tangent and bitangent. + normal = cross(tangent, bitangent); + + if (lengthSquared(normal) < epsilon*epsilon) + { + // Arbitrary basis. + tangent = Vector3(1, 0, 0); + bitangent = Vector3(0, 1, 0); + normal = Vector3(0, 0, 1); + return; + } + + normal = nv::normalize(normal, epsilon); + } + + // Project tangents to normal plane. + tangent -= normal * dot(normal, tangent); + bitangent -= normal * dot(normal, bitangent); + + if (lengthSquared(tangent) < epsilon*epsilon) + { + if (lengthSquared(bitangent) < epsilon*epsilon) + { + // Arbitrary basis. + buildFrameForDirection(normal); + } + else + { + // Build tangent from bitangent. + bitangent = nv::normalize(bitangent, epsilon); + + tangent = cross(bitangent, normal); + nvDebugCheck(isNormalized(tangent, epsilon)); + } + } + else + { + tangent = nv::normalize(tangent, epsilon); +#if 0 + bitangent -= tangent * dot(tangent, bitangent); + + if (lengthSquared(bitangent) < epsilon*epsilon) + { + bitangent = cross(tangent, normal); + nvDebugCheck(isNormalized(bitangent, epsilon)); + } + else + { + bitangent = nv::normalize(bitangent, epsilon); + } +#else + if (lengthSquared(bitangent) < epsilon*epsilon) + { + // Build bitangent from tangent. + bitangent = cross(tangent, normal); + nvDebugCheck(isNormalized(bitangent, epsilon)); + } + else + { + bitangent = nv::normalize(bitangent, epsilon); + + // At this point tangent and bitangent are orthogonal to normal, but we don't know whether their orientation. + + Vector3 bisector; + if (lengthSquared(tangent + bitangent) < epsilon*epsilon) + { + bisector = tangent; + } + else + { + bisector = nv::normalize(tangent + bitangent); + } + Vector3 axis = nv::normalize(cross(bisector, normal)); + + //nvDebugCheck(isNormalized(axis, epsilon)); + nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon)); + + if (dot(axis, tangent) > 0) + { + tangent = bisector + axis; + bitangent = bisector - axis; + } + else + { + tangent = bisector - axis; + bitangent = bisector + axis; + } + + // Make sure the resulting tangents are still perpendicular to the normal. + tangent -= normal * dot(normal, tangent); + bitangent -= normal * dot(normal, bitangent); + + // Double check. + nvDebugCheck(equal(dot(normal, tangent), 0.0f, epsilon)); + nvDebugCheck(equal(dot(normal, bitangent), 0.0f, epsilon)); + + // Normalize. + tangent = nv::normalize(tangent); + bitangent = nv::normalize(bitangent); + + // If tangent and bitangent are not orthogonal, then derive bitangent from tangent, just in case... + if (!equal(dot(tangent, bitangent), 0.0f, epsilon)) { + bitangent = cross(tangent, normal); + bitangent = nv::normalize(bitangent); + } + } +#endif + } + + /*// Check vector lengths. + if (!isNormalized(normal, epsilon)) + { + nvDebug("%f %f %f\n", normal.x, normal.y, normal.z); + nvDebug("%f %f %f\n", tangent.x, tangent.y, tangent.z); + nvDebug("%f %f %f\n", bitangent.x, bitangent.y, bitangent.z); + }*/ + + nvDebugCheck(isNormalized(normal, epsilon)); + nvDebugCheck(isNormalized(tangent, epsilon)); + nvDebugCheck(isNormalized(bitangent, epsilon)); + + // Check vector angles. + nvDebugCheck(equal(dot(normal, tangent), 0.0f, epsilon)); + nvDebugCheck(equal(dot(normal, bitangent), 0.0f, epsilon)); + nvDebugCheck(equal(dot(tangent, bitangent), 0.0f, epsilon)); + + // Check vector orientation. + const float det = dot(cross(normal, tangent), bitangent); + nvDebugCheck(equal(det, 1.0f, epsilon) || equal(det, -1.0f, epsilon)); +} + + +/// Build an arbitrary frame for the given direction. +void Basis::buildFrameForDirection(Vector3::Arg d, float angle/*= 0*/) +{ + nvCheck(isNormalized(d)); + normal = d; + + // Choose minimum axis. + if (fabsf(normal.x) < fabsf(normal.y) && fabsf(normal.x) < fabsf(normal.z)) + { + tangent = Vector3(1, 0, 0); + } + else if (fabsf(normal.y) < fabsf(normal.z)) + { + tangent = Vector3(0, 1, 0); + } + else + { + tangent = Vector3(0, 0, 1); + } + + // Ortogonalize + tangent -= normal * dot(normal, tangent); + tangent = ::normalize(tangent); + + bitangent = cross(normal, tangent); + + // Rotate frame around normal according to angle. + if (angle != 0.0f) { + float c = cosf(angle); + float s = sinf(angle); + Vector3 tmp = c * tangent - s * bitangent; + bitangent = s * tangent + c * bitangent; + tangent = tmp; + } +} + +bool Basis::isValid() const +{ + if (equal(normal, Vector3(0.0f))) return false; + if (equal(tangent, Vector3(0.0f))) return false; + if (equal(bitangent, Vector3(0.0f))) return false; + + if (equal(determinant(), 0.0f)) return false; + + return true; +} + + +/// Transform by this basis. (From this basis to object space). +Vector3 Basis::transform(Vector3::Arg v) const +{ + Vector3 o = tangent * v.x; + o += bitangent * v.y; + o += normal * v.z; + return o; +} + +/// Transform by the transpose. (From object space to this basis). +Vector3 Basis::transformT(Vector3::Arg v) +{ + return Vector3(dot(tangent, v), dot(bitangent, v), dot(normal, v)); +} + +/// Transform by the inverse. (From object space to this basis). +/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned. +Vector3 Basis::transformI(Vector3::Arg v) const +{ + const float det = determinant(); + nvDebugCheck(!equal(det, 0.0f, 0.0f)); + + const float idet = 1.0f / det; + + // Rows of the inverse matrix. + Vector3 r0( + (bitangent.y * normal.z - bitangent.z * normal.y), + -(bitangent.x * normal.z - bitangent.z * normal.x), + (bitangent.x * normal.y - bitangent.y * normal.x)); + + Vector3 r1( + -(tangent.y * normal.z - tangent.z * normal.y), + (tangent.x * normal.z - tangent.z * normal.x), + -(tangent.x * normal.y - tangent.y * normal.x)); + + Vector3 r2( + (tangent.y * bitangent.z - tangent.z * bitangent.y), + -(tangent.x * bitangent.z - tangent.z * bitangent.x), + (tangent.x * bitangent.y - tangent.y * bitangent.x)); + + return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet; +} + + diff --git a/thirdparty/thekla_atlas/nvmath/Basis.h b/thirdparty/thekla_atlas/nvmath/Basis.h new file mode 100644 index 0000000000..e8146afdbe --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Basis.h @@ -0,0 +1,82 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_BASIS_H +#define NV_MATH_BASIS_H + +#include "nvmath.h" +#include "Vector.inl" +#include "Matrix.h" + +namespace nv +{ + + /// Basis class to compute tangent space basis, ortogonalizations and to + /// transform vectors from one space to another. + class Basis + { + public: + + /// Create a null basis. + Basis() : tangent(0, 0, 0), bitangent(0, 0, 0), normal(0, 0, 0) {} + + /// Create a basis given three vectors. + Basis(Vector3::Arg n, Vector3::Arg t, Vector3::Arg b) : tangent(t), bitangent(b), normal(n) {} + + /// Create a basis with the given tangent vectors and the handness. + Basis(Vector3::Arg n, Vector3::Arg t, float sign) + { + build(n, t, sign); + } + + NVMATH_API void normalize(float epsilon = NV_EPSILON); + NVMATH_API void orthonormalize(float epsilon = NV_EPSILON); + NVMATH_API void robustOrthonormalize(float epsilon = NV_EPSILON); + NVMATH_API void buildFrameForDirection(Vector3::Arg d, float angle = 0); + + /// Calculate the determinant [ F G N ] to obtain the handness of the basis. + float handness() const + { + return determinant() > 0.0f ? 1.0f : -1.0f; + } + + /// Build a basis from 2 vectors and a handness flag. + void build(Vector3::Arg n, Vector3::Arg t, float sign) + { + normal = n; + tangent = t; + bitangent = sign * cross(t, n); + } + + /// Compute the determinant of this basis. + float determinant() const + { + return + tangent.x * bitangent.y * normal.z - tangent.z * bitangent.y * normal.x + + tangent.y * bitangent.z * normal.x - tangent.y * bitangent.x * normal.z + + tangent.z * bitangent.x * normal.y - tangent.x * bitangent.z * normal.y; + } + + bool isValid() const; + + // Get transform matrix for this basis. + NVMATH_API Matrix matrix() const; + + // Transform by this basis. (From this basis to object space). + NVMATH_API Vector3 transform(Vector3::Arg v) const; + + // Transform by the transpose. (From object space to this basis). + NVMATH_API Vector3 transformT(Vector3::Arg v); + + // Transform by the inverse. (From object space to this basis). + NVMATH_API Vector3 transformI(Vector3::Arg v) const; + + + Vector3 tangent; + Vector3 bitangent; + Vector3 normal; + }; + +} // nv namespace + +#endif // NV_MATH_BASIS_H diff --git a/thirdparty/thekla_atlas/nvmath/Box.cpp b/thirdparty/thekla_atlas/nvmath/Box.cpp new file mode 100644 index 0000000000..8f2014a077 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Box.cpp @@ -0,0 +1,119 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Box.h" +#include "Box.inl" +#include "Sphere.h" + +using namespace nv; + + + + +// Clip the given segment against this box. +bool Box::clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const { + + // Avoid aliasing. + float tnear = *t_near; + float tfar = *t_far; + + // clip ray segment to box + for (int i = 0; i < 3; i++) + { + const float pos = origin.component[i] + tfar * dir.component[i]; + const float dt = tfar - tnear; + + if (dir.component[i] < 0) { + + // clip end point + if (pos < minCorner.component[i]) { + tfar = tnear + dt * (origin.component[i] - minCorner.component[i]) / (origin.component[i] - pos); + } + + // clip start point + if (origin.component[i] > maxCorner.component[i]) { + tnear = tnear + dt * (origin.component[i] - maxCorner.component[i]) / (tfar * dir.component[i]); + } + } + else { + + // clip end point + if (pos > maxCorner.component[i]) { + tfar = tnear + dt * (maxCorner.component[i] - origin.component[i]) / (pos - origin.component[i]); + } + + // clip start point + if (origin.component[i] < minCorner.component[i]) { + tnear = tnear + dt * (minCorner.component[i] - origin.component[i]) / (tfar * dir.component[i]); + } + } + + if (tnear > tfar) { + // Clipped away. + return false; + } + } + + // Return result. + *t_near = tnear; + *t_far = tfar; + return true; +} + + +float nv::distanceSquared(const Box &box, const Vector3 &point) { + Vector3 closest; + + if (point.x < box.minCorner.x) closest.x = box.minCorner.x; + else if (point.x > box.maxCorner.x) closest.x = box.maxCorner.x; + else closest.x = point.x; + + if (point.y < box.minCorner.y) closest.y = box.minCorner.y; + else if (point.y > box.maxCorner.y) closest.y = box.maxCorner.y; + else closest.y = point.y; + + if (point.z < box.minCorner.z) closest.z = box.minCorner.z; + else if (point.z > box.maxCorner.z) closest.z = box.maxCorner.z; + else closest.z = point.z; + + return lengthSquared(point - closest); +} + +bool nv::overlap(const Box &box, const Sphere &sphere) { + return distanceSquared(box, sphere.center) < sphere.radius * sphere.radius; +} + + +bool nv::intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t /*= NULL*/) { + // Precompute these in ray structure? + int sdx = (id.x < 0); + int sdy = (id.y < 0); + int sdz = (id.z < 0); + + float tmin = (box.corner( sdx).x - p.x) * id.x; + float tmax = (box.corner(1-sdx).x - p.x) * id.x; + float tymin = (box.corner( sdy).y - p.y) * id.y; + float tymax = (box.corner(1-sdy).y - p.y) * id.y; + + if ((tmin > tymax) || (tymin > tmax)) + return false; + + if (tymin > tmin) tmin = tymin; + if (tymax < tmax) tmax = tymax; + + float tzmin = (box.corner( sdz).z - p.z) * id.z; + float tzmax = (box.corner(1-sdz).z - p.z) * id.z; + + if ((tmin > tzmax) || (tzmin > tmax)) + return false; + + if (tzmin > tmin) tmin = tzmin; + if (tzmax < tmax) tmax = tzmax; + + if (tmax < 0) + return false; + + if (t != NULL) *t = tmin; + + return true; +} + diff --git a/thirdparty/thekla_atlas/nvmath/Box.h b/thirdparty/thekla_atlas/nvmath/Box.h new file mode 100644 index 0000000000..19b5f2a3a5 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Box.h @@ -0,0 +1,103 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_BOX_H +#define NV_MATH_BOX_H + +#include "Vector.h" + +#include <float.h> // FLT_MAX + +namespace nv +{ + class Vector; + class Stream; + class Sphere; + + // Axis Aligned Bounding Box. + class Box + { + public: + + inline Box() {} + inline Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) {} + inline Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) {} + + Box & operator=(const Box & b); + + operator const float * () const { return reinterpret_cast<const float *>(this); } + + // Clear the bounds. + void clearBounds(); + + // min < max + bool isValid() const; + + // Build a cube centered on center and with edge = 2*dist + void cube(const Vector3 & center, float dist); + + // Build a box, given center and extents. + void setCenterExtents(const Vector3 & center, const Vector3 & extents); + + // Get box center. + Vector3 center() const; + + // Return extents of the box. + Vector3 extents() const; + + // Return extents of the box. + float extents(uint axis) const; + + // Add a point to this box. + void addPointToBounds(const Vector3 & p); + + // Add a box to this box. + void addBoxToBounds(const Box & b); + + // Add sphere to this box. + void addSphereToBounds(const Vector3 & p, float r); + + // Translate box. + void translate(const Vector3 & v); + + // Scale the box. + void scale(float s); + + // Expand the box by a fixed amount. + void expand(float r); + + // Get the area of the box. + float area() const; + + // Get the volume of the box. + float volume() const; + + // Return true if the box contains the given point. + bool contains(const Vector3 & p) const; + + // Split the given box in 8 octants and assign the ith one to this box. + void setOctant(const Box & box, const Vector3 & center, int i); + + + // Clip the given segment against this box. + bool clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const; + + + friend Stream & operator<< (Stream & s, Box & box); + + const Vector3 & corner(int i) const { return (&minCorner)[i]; } + + Vector3 minCorner; + Vector3 maxCorner; + }; + + float distanceSquared(const Box &box, const Vector3 &point); + bool overlap(const Box &box, const Sphere &sphere); + + // p is ray origin, id is inverse ray direction. + bool intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t); + +} // nv namespace + + +#endif // NV_MATH_BOX_H diff --git a/thirdparty/thekla_atlas/nvmath/Box.inl b/thirdparty/thekla_atlas/nvmath/Box.inl new file mode 100644 index 0000000000..dcfa70ff96 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Box.inl @@ -0,0 +1,154 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_BOX_INL +#define NV_MATH_BOX_INL + +#include "Box.h" +#include "Vector.inl" + +#include <float.h> // FLT_MAX + +namespace nv +{ + // Default ctor. + //inline Box::Box() { }; + + // Copy ctor. + //inline Box::Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) { } + + // Init ctor. + //inline Box::Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) { } + + // Assignment operator. + inline Box & Box::operator=(const Box & b) { minCorner = b.minCorner; maxCorner = b.maxCorner; return *this; } + + // Clear the bounds. + inline void Box::clearBounds() + { + minCorner.set(FLT_MAX, FLT_MAX, FLT_MAX); + maxCorner.set(-FLT_MAX, -FLT_MAX, -FLT_MAX); + } + + // min < max + inline bool Box::isValid() const + { + return minCorner.x <= maxCorner.x && minCorner.y <= maxCorner.y && minCorner.z <= maxCorner.z; + } + + // Build a cube centered on center and with edge = 2*dist + inline void Box::cube(const Vector3 & center, float dist) + { + setCenterExtents(center, Vector3(dist)); + } + + // Build a box, given center and extents. + inline void Box::setCenterExtents(const Vector3 & center, const Vector3 & extents) + { + minCorner = center - extents; + maxCorner = center + extents; + } + + // Get box center. + inline Vector3 Box::center() const + { + return (minCorner + maxCorner) * 0.5f; + } + + // Return extents of the box. + inline Vector3 Box::extents() const + { + return (maxCorner - minCorner) * 0.5f; + } + + // Return extents of the box. + inline float Box::extents(uint axis) const + { + nvDebugCheck(axis < 3); + if (axis == 0) return (maxCorner.x - minCorner.x) * 0.5f; + if (axis == 1) return (maxCorner.y - minCorner.y) * 0.5f; + if (axis == 2) return (maxCorner.z - minCorner.z) * 0.5f; + nvUnreachable(); + return 0.0f; + } + + // Add a point to this box. + inline void Box::addPointToBounds(const Vector3 & p) + { + minCorner = min(minCorner, p); + maxCorner = max(maxCorner, p); + } + + // Add a box to this box. + inline void Box::addBoxToBounds(const Box & b) + { + minCorner = min(minCorner, b.minCorner); + maxCorner = max(maxCorner, b.maxCorner); + } + + // Add sphere to this box. + inline void Box::addSphereToBounds(const Vector3 & p, float r) { + minCorner = min(minCorner, p - Vector3(r)); + maxCorner = min(maxCorner, p + Vector3(r)); + } + + // Translate box. + inline void Box::translate(const Vector3 & v) + { + minCorner += v; + maxCorner += v; + } + + // Scale the box. + inline void Box::scale(float s) + { + minCorner *= s; + maxCorner *= s; + } + + // Expand the box by a fixed amount. + inline void Box::expand(float r) { + minCorner -= Vector3(r,r,r); + maxCorner += Vector3(r,r,r); + } + + // Get the area of the box. + inline float Box::area() const + { + const Vector3 d = extents(); + return 8.0f * (d.x*d.y + d.x*d.z + d.y*d.z); + } + + // Get the volume of the box. + inline float Box::volume() const + { + Vector3 d = extents(); + return 8.0f * (d.x * d.y * d.z); + } + + // Return true if the box contains the given point. + inline bool Box::contains(const Vector3 & p) const + { + return + minCorner.x < p.x && minCorner.y < p.y && minCorner.z < p.z && + maxCorner.x > p.x && maxCorner.y > p.y && maxCorner.z > p.z; + } + + // Split the given box in 8 octants and assign the ith one to this box. + inline void Box::setOctant(const Box & box, const Vector3 & center, int i) + { + minCorner = box.minCorner; + maxCorner = box.maxCorner; + + if (i & 4) minCorner.x = center.x; + else maxCorner.x = center.x; + if (i & 2) minCorner.y = center.y; + else maxCorner.y = center.y; + if (i & 1) minCorner.z = center.z; + else maxCorner.z = center.z; + } + +} // nv namespace + + +#endif // NV_MATH_BOX_INL diff --git a/thirdparty/thekla_atlas/nvmath/Color.h b/thirdparty/thekla_atlas/nvmath/Color.h new file mode 100644 index 0000000000..5cdc374bd9 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Color.h @@ -0,0 +1,150 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_COLOR_H +#define NV_MATH_COLOR_H + +#include "nvmath.h" + +namespace nv +{ + + /// 64 bit color stored as BGRA. + class NVMATH_CLASS Color64 + { + public: + Color64() { } + Color64(const Color64 & c) : u(c.u) { } + Color64(uint16 R, uint16 G, uint16 B, uint16 A) { setRGBA(R, G, B, A); } + explicit Color64(uint64 U) : u(U) { } + + void setRGBA(uint16 R, uint16 G, uint16 B, uint16 A) + { + r = R; + g = G; + b = B; + a = A; + } + + operator uint64 () const { + return u; + } + + union { + struct { +#if NV_LITTLE_ENDIAN + uint16 r, a, b, g; +#else + uint16 a: 16; + uint16 r: 16; + uint16 g: 16; + uint16 b: 16; +#endif + }; + uint64 u; + }; + }; + + /// 32 bit color stored as BGRA. + class NVMATH_CLASS Color32 + { + public: + Color32() { } + Color32(const Color32 & c) : u(c.u) { } + Color32(uint8 R, uint8 G, uint8 B) { setRGBA(R, G, B, 0xFF); } + Color32(uint8 R, uint8 G, uint8 B, uint8 A) { setRGBA( R, G, B, A); } + //Color32(uint8 c[4]) { setRGBA(c[0], c[1], c[2], c[3]); } + //Color32(float R, float G, float B) { setRGBA(uint(R*255), uint(G*255), uint(B*255), 0xFF); } + //Color32(float R, float G, float B, float A) { setRGBA(uint(R*255), uint(G*255), uint(B*255), uint(A*255)); } + explicit Color32(uint32 U) : u(U) { } + + void setRGBA(uint8 R, uint8 G, uint8 B, uint8 A) + { + r = R; + g = G; + b = B; + a = A; + } + + void setBGRA(uint8 B, uint8 G, uint8 R, uint8 A = 0xFF) + { + r = R; + g = G; + b = B; + a = A; + } + + operator uint32 () const { + return u; + } + + union { + struct { +#if NV_LITTLE_ENDIAN + uint8 b, g, r, a; +#else + uint8 a: 8; + uint8 r: 8; + uint8 g: 8; + uint8 b: 8; +#endif + }; + uint8 component[4]; + uint32 u; + }; + }; + + + /// 16 bit 565 BGR color. + class NVMATH_CLASS Color16 + { + public: + Color16() { } + Color16(const Color16 & c) : u(c.u) { } + explicit Color16(uint16 U) : u(U) { } + + union { + struct { +#if NV_LITTLE_ENDIAN + uint16 b : 5; + uint16 g : 6; + uint16 r : 5; +#else + uint16 r : 5; + uint16 g : 6; + uint16 b : 5; +#endif + }; + uint16 u; + }; + }; + + /// 16 bit 4444 BGRA color. + class NVMATH_CLASS Color16_4444 + { + public: + Color16_4444() { } + Color16_4444(const Color16_4444 & c) : u(c.u) { } + explicit Color16_4444(uint16 U) : u(U) { } + + union { + struct { +#if NV_LITTLE_ENDIAN + uint16 b : 4; + uint16 g : 4; + uint16 r : 4; + uint16 a : 4; +#else + uint16 a : 4; + uint16 r : 4; + uint16 g : 4; + uint16 b : 4; +#endif + }; + uint16 u; + }; + }; + +} // nv namespace + +#endif // NV_MATH_COLOR_H diff --git a/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp b/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp new file mode 100644 index 0000000000..a4a95dace4 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp @@ -0,0 +1,120 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#include "ConvexHull.h" + +#include "Vector.inl" + +#include "nvcore/RadixSort.h" +#include "nvcore/Array.inl" + +using namespace nv; + +inline static float triangleArea(Vector2::Arg v1, Vector2::Arg v2, Vector2::Arg v3) +{ + return 0.5f * (v3.x * v1.y + v1.x * v2.y + v2.x * v3.y - v2.x * v1.y - v3.x * v2.y - v1.x * v3.y); +} + + +// Compute the convex hull using Graham Scan. +void nv::convexHull(const Array<Vector2> & input, Array<Vector2> & output, float epsilon/*=0*/) +{ + const uint inputCount = input.count(); + + Array<float> coords; + coords.resize(inputCount); + + for (uint i = 0; i < inputCount; i++) { + coords[i] = input[i].x; + } + + RadixSort radix; + radix.sort(coords); + + const uint * ranks = radix.ranks(); + + Array<Vector2> top(inputCount); + Array<Vector2> bottom(inputCount); + + Vector2 P = input[ranks[0]]; + Vector2 Q = input[ranks[inputCount-1]]; + + float topy = max(P.y, Q.y); + float boty = min(P.y, Q.y); + + for (uint i = 0; i < inputCount; i++) { + Vector2 p = input[ranks[i]]; + if (p.y >= boty) top.append(p); + } + + for (uint i = 0; i < inputCount; i++) { + Vector2 p = input[ranks[inputCount-1-i]]; + if (p.y <= topy) bottom.append(p); + } + + // Filter top list. + output.clear(); + output.append(top[0]); + output.append(top[1]); + + for (uint i = 2; i < top.count(); ) { + Vector2 a = output[output.count()-2]; + Vector2 b = output[output.count()-1]; + Vector2 c = top[i]; + + float area = triangleArea(a, b, c); + + if (area >= -epsilon) { + output.popBack(); + } + + if (area < -epsilon || output.count() == 1) { + output.append(c); + i++; + } + } + + uint top_count = output.count(); + output.append(bottom[1]); + + // Filter bottom list. + for (uint i = 2; i < bottom.count(); ) { + Vector2 a = output[output.count()-2]; + Vector2 b = output[output.count()-1]; + Vector2 c = bottom[i]; + + float area = triangleArea(a, b, c); + + if (area >= -epsilon) { + output.popBack(); + } + + if (area < -epsilon || output.count() == top_count) { + output.append(c); + i++; + } + } + + // Remove duplicate element. + nvDebugCheck(output.front() == output.back()); + output.popBack(); +} + +/* +void testConvexHull() { + + Array<Vector2> points; + points.append(Vector2(1.00, 1.00)); + points.append(Vector2(0.00, 0.00)); + points.append(Vector2(1.00, 1.00)); + points.append(Vector2(1.00, -1.00)); + points.append(Vector2(2.00, 5.00)); + points.append(Vector2(-5.00, 3.00)); + points.append(Vector2(-4.00, -3.00)); + points.append(Vector2(7.00, -4.00)); + + Array<Vector2> hull; + convexHull(points, hull); + +} +*/ + diff --git a/thirdparty/thekla_atlas/nvmath/ConvexHull.h b/thirdparty/thekla_atlas/nvmath/ConvexHull.h new file mode 100644 index 0000000000..6c2db5d73f --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/ConvexHull.h @@ -0,0 +1,17 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_CONVEXHULL_H +#define NV_MATH_CONVEXHULL_H + +#include "nvmath.h" +#include "nvcore/Array.h" + +namespace nv { + class Vector2; + + void convexHull(const Array<Vector2> & input, Array<Vector2> & output, float epsilon = 0); + +} // namespace nv + +#endif // NV_MATH_CONVEXHULL_H diff --git a/thirdparty/thekla_atlas/nvmath/Fitting.cpp b/thirdparty/thekla_atlas/nvmath/Fitting.cpp new file mode 100644 index 0000000000..6cd5cb0f32 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Fitting.cpp @@ -0,0 +1,1205 @@ +// This code is in the public domain -- Ignacio CastaÃąo <castano@gmail.com> + +#include "Fitting.h" +#include "Vector.inl" +#include "Plane.inl" + +#include "nvcore/Array.inl" +#include "nvcore/Utils.h" // max, swap + +#include <float.h> // FLT_MAX +//#include <vector> +#include <string.h> + +using namespace nv; + +// @@ Move to EigenSolver.h + +// @@ We should be able to do something cheaper... +static Vector3 estimatePrincipalComponent(const float * __restrict matrix) +{ + const Vector3 row0(matrix[0], matrix[1], matrix[2]); + const Vector3 row1(matrix[1], matrix[3], matrix[4]); + const Vector3 row2(matrix[2], matrix[4], matrix[5]); + + float r0 = lengthSquared(row0); + float r1 = lengthSquared(row1); + float r2 = lengthSquared(row2); + + if (r0 > r1 && r0 > r2) return row0; + if (r1 > r2) return row1; + return row2; +} + + +static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + return Vector3(0.0f); + } + + Vector3 v = estimatePrincipalComponent(matrix); + + const int NUM = 8; + for (int i = 0; i < NUM; i++) + { + float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2]; + float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4]; + float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; + + float norm = max(max(x, y), z); + + v = Vector3(x, y, z) / norm; + } + + return v; +} + + +Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points) +{ + Vector3 centroid(0.0f); + + for (int i = 0; i < n; i++) + { + centroid += points[i]; + } + centroid /= float(n); + + return centroid; +} + +Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + Vector3 centroid(0.0f); + float total = 0.0f; + + for (int i = 0; i < n; i++) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + centroid /= total; + + return centroid; +} + +Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points) +{ + Vector4 centroid(0.0f); + + for (int i = 0; i < n; i++) + { + centroid += points[i]; + } + centroid /= float(n); + + return centroid; +} + +Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric) +{ + Vector4 centroid(0.0f); + float total = 0.0f; + + for (int i = 0; i < n; i++) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + centroid /= total; + + return centroid; +} + + + +Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance) +{ + // compute the centroid + Vector3 centroid = computeCentroid(n, points); + + // compute covariance matrix + for (int i = 0; i < 6; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector3 v = points[i] - centroid; + + covariance[0] += v.x * v.x; + covariance[1] += v.x * v.y; + covariance[2] += v.x * v.z; + covariance[3] += v.y * v.y; + covariance[4] += v.y * v.z; + covariance[5] += v.z * v.z; + } + + return centroid; +} + +Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance) +{ + // compute the centroid + Vector3 centroid = computeCentroid(n, points, weights, metric); + + // compute covariance matrix + for (int i = 0; i < 6; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector3 a = (points[i] - centroid) * metric; + Vector3 b = weights[i]*a; + + covariance[0] += a.x * b.x; + covariance[1] += a.x * b.y; + covariance[2] += a.x * b.z; + covariance[3] += a.y * b.y; + covariance[4] += a.y * b.z; + covariance[5] += a.z * b.z; + } + + return centroid; +} + +Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance) +{ + // compute the centroid + Vector4 centroid = computeCentroid(n, points); + + // compute covariance matrix + for (int i = 0; i < 10; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector4 v = points[i] - centroid; + + covariance[0] += v.x * v.x; + covariance[1] += v.x * v.y; + covariance[2] += v.x * v.z; + covariance[3] += v.x * v.w; + + covariance[4] += v.y * v.y; + covariance[5] += v.y * v.z; + covariance[6] += v.y * v.w; + + covariance[7] += v.z * v.z; + covariance[8] += v.z * v.w; + + covariance[9] += v.w * v.w; + } + + return centroid; +} + +Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance) +{ + // compute the centroid + Vector4 centroid = computeCentroid(n, points, weights, metric); + + // compute covariance matrix + for (int i = 0; i < 10; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector4 a = (points[i] - centroid) * metric; + Vector4 b = weights[i]*a; + + covariance[0] += a.x * b.x; + covariance[1] += a.x * b.y; + covariance[2] += a.x * b.z; + covariance[3] += a.x * b.w; + + covariance[4] += a.y * b.y; + covariance[5] += a.y * b.z; + covariance[6] += a.y * b.w; + + covariance[7] += a.z * b.z; + covariance[8] += a.z * b.w; + + covariance[9] += a.w * b.w; + } + + return centroid; +} + + + +Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points) +{ + float matrix[6]; + computeCovariance(n, points, matrix); + + return firstEigenVector_PowerMethod(matrix); +} + +Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + float matrix[6]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_PowerMethod(matrix); +} + + + +static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + return Vector3(0.0f); + } + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) + { + return Vector3(0.0f); + } + + return eigenVectors[0]; +} + +Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points) +{ + float matrix[6]; + computeCovariance(n, points, matrix); + + return firstEigenVector_EigenSolver3(matrix); +} + +Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + float matrix[6]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_EigenSolver3(matrix); +} + + + +static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0) + { + return Vector4(0.0f); + } + + float eigenValues[4]; + Vector4 eigenVectors[4]; + if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors)) + { + return Vector4(0.0f); + } + + return eigenVectors[0]; +} + +Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points) +{ + float matrix[10]; + computeCovariance(n, points, matrix); + + return firstEigenVector_EigenSolver4(matrix); +} + +Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric) +{ + float matrix[10]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_EigenSolver4(matrix); +} + + + +void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R); + +Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points) +{ + // Store the points in an n x n matrix + Array<float> Q; Q.resize(n*n, 0.0f); + for (int i = 0; i < n; ++i) + { + Q[i*n+0] = points[i].x; + Q[i*n+1] = points[i].y; + Q[i*n+2] = points[i].z; + } + + // Alloc space for the SVD outputs + Array<float> diag; diag.resize(n, 0.0f); + Array<float> R; R.resize(n*n, 0.0f); + + ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); + + // Get the principal component + return Vector3(R[0], R[1], R[2]); +} + +Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points) +{ + // Store the points in an n x n matrix + Array<float> Q; Q.resize(n*n, 0.0f); + for (int i = 0; i < n; ++i) + { + Q[i*n+0] = points[i].x; + Q[i*n+1] = points[i].y; + Q[i*n+2] = points[i].z; + Q[i*n+3] = points[i].w; + } + + // Alloc space for the SVD outputs + Array<float> diag; diag.resize(n, 0.0f); + Array<float> R; R.resize(n*n, 0.0f); + + ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); + + // Get the principal component + return Vector4(R[0], R[1], R[2], R[3]); +} + + + +Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points) +{ + // compute the centroid and covariance + float matrix[6]; + Vector3 centroid = computeCovariance(n, points, matrix); + + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + // If no plane defined, then return a horizontal plane. + return Plane(Vector3(0, 0, 1), centroid); + } + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) { + // If no plane defined, then return a horizontal plane. + return Plane(Vector3(0, 0, 1), centroid); + } + + return Plane(eigenVectors[2], centroid); +} + +bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/) +{ + // compute the centroid and covariance + float matrix[6]; + computeCovariance(n, points, matrix); + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) { + return false; + } + + return eigenValues[2] < epsilon; +} + + + +// Tridiagonal solver from Charles Bloom. +// Householder transforms followed by QL decomposition. +// Seems to be based on the code from Numerical Recipes in C. + +static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd); +static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd); + +bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) +{ + nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); + + float subd[3]; + float diag[3]; + float work[3][3]; + + work[0][0] = matrix[0]; + work[0][1] = work[1][0] = matrix[1]; + work[0][2] = work[2][0] = matrix[2]; + work[1][1] = matrix[3]; + work[1][2] = work[2][1] = matrix[4]; + work[2][2] = matrix[5]; + + EigenSolver3_Tridiagonal(work, diag, subd); + if (!EigenSolver3_QLAlgorithm(work, diag, subd)) + { + for (int i = 0; i < 3; i++) { + eigenValues[i] = 0; + eigenVectors[i] = Vector3(0); + } + return false; + } + + for (int i = 0; i < 3; i++) { + eigenValues[i] = (float)diag[i]; + } + + // eigenvectors are the columns; make them the rows : + + for (int i=0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + eigenVectors[j].component[i] = (float) work[i][j]; + } + } + + // shuffle to sort by singular value : + if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1]) + { + swap(eigenValues[0], eigenValues[2]); + swap(eigenVectors[0], eigenVectors[2]); + } + if (eigenValues[1] > eigenValues[0]) + { + swap(eigenValues[0], eigenValues[1]); + swap(eigenVectors[0], eigenVectors[1]); + } + if (eigenValues[2] > eigenValues[1]) + { + swap(eigenValues[1], eigenValues[2]); + swap(eigenVectors[1], eigenVectors[2]); + } + + nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]); + nvDebugCheck(eigenValues[1] >= eigenValues[2]); + + return true; +} + +static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd) +{ + // Householder reduction T = Q^t M Q + // Input: + // mat, symmetric 3x3 matrix M + // Output: + // mat, orthogonal matrix Q + // diag, diagonal entries of T + // subd, subdiagonal entries of T (T is symmetric) + const float epsilon = 1e-08f; + + float a = mat[0][0]; + float b = mat[0][1]; + float c = mat[0][2]; + float d = mat[1][1]; + float e = mat[1][2]; + float f = mat[2][2]; + + diag[0] = a; + subd[2] = 0.f; + if (fabsf(c) >= epsilon) + { + const float ell = sqrtf(b*b+c*c); + b /= ell; + c /= ell; + const float q = 2*b*e+c*(f-d); + diag[1] = d+c*q; + diag[2] = f-c*q; + subd[0] = ell; + subd[1] = e-b*q; + mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; + mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c; + mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b; + } + else + { + diag[1] = d; + diag[2] = f; + subd[0] = b; + subd[1] = e; + mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; + mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0; + mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1; + } +} + +static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd) +{ + // QL iteration with implicit shifting to reduce matrix from tridiagonal + // to diagonal + const int maxiter = 32; + + for (int ell = 0; ell < 3; ell++) + { + int iter; + for (iter = 0; iter < maxiter; iter++) + { + int m; + for (m = ell; m <= 1; m++) + { + float dd = fabsf(diag[m]) + fabsf(diag[m+1]); + if ( fabsf(subd[m]) + dd == dd ) + break; + } + if ( m == ell ) + break; + + float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); + float r = sqrtf(g*g+1); + if ( g < 0 ) + g = diag[m]-diag[ell]+subd[ell]/(g-r); + else + g = diag[m]-diag[ell]+subd[ell]/(g+r); + float s = 1, c = 1, p = 0; + for (int i = m-1; i >= ell; i--) + { + float f = s*subd[i], b = c*subd[i]; + if ( fabsf(f) >= fabsf(g) ) + { + c = g/f; + r = sqrtf(c*c+1); + subd[i+1] = f*r; + c *= (s = 1/r); + } + else + { + s = f/g; + r = sqrtf(s*s+1); + subd[i+1] = g*r; + s *= (c = 1/r); + } + g = diag[i+1]-p; + r = (diag[i]-g)*s+2*b*c; + p = s*r; + diag[i+1] = g+p; + g = c*r-b; + + for (int k = 0; k < 3; k++) + { + f = mat[k][i+1]; + mat[k][i+1] = s*mat[k][i]+c*f; + mat[k][i] = c*mat[k][i]-s*f; + } + } + diag[ell] -= p; + subd[ell] = g; + subd[m] = 0; + } + + if ( iter == maxiter ) + // should not get here under normal circumstances + return false; + } + + return true; +} + + + +// Tridiagonal solver for 4x4 symmetric matrices. + +static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd); +static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd); + +bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]) +{ + nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); + + float subd[4]; + float diag[4]; + float work[4][4]; + + work[0][0] = matrix[0]; + work[0][1] = work[1][0] = matrix[1]; + work[0][2] = work[2][0] = matrix[2]; + work[0][3] = work[3][0] = matrix[3]; + work[1][1] = matrix[4]; + work[1][2] = work[2][1] = matrix[5]; + work[1][3] = work[3][1] = matrix[6]; + work[2][2] = matrix[7]; + work[2][3] = work[3][2] = matrix[8]; + work[3][3] = matrix[9]; + + EigenSolver4_Tridiagonal(work, diag, subd); + if (!EigenSolver4_QLAlgorithm(work, diag, subd)) + { + for (int i = 0; i < 4; i++) { + eigenValues[i] = 0; + eigenVectors[i] = Vector4(0); + } + return false; + } + + for (int i = 0; i < 4; i++) { + eigenValues[i] = (float)diag[i]; + } + + // eigenvectors are the columns; make them the rows + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + eigenVectors[j].component[i] = (float) work[i][j]; + } + } + + // sort by singular value + + for (int i = 0; i < 3; ++i) + { + for (int j = i+1; j < 4; ++j) + { + if (eigenValues[j] > eigenValues[i]) + { + swap(eigenValues[i], eigenValues[j]); + swap(eigenVectors[i], eigenVectors[j]); + } + } + } + + nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]); + nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]); + nvDebugCheck(eigenValues[2] >= eigenValues[2]); + + return true; +} + +#include "nvmath/Matrix.inl" + +inline float signNonzero(float x) +{ + return (x >= 0.0f) ? 1.0f : -1.0f; +} + +static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd) +{ + // Householder reduction T = Q^t M Q + // Input: + // mat, symmetric 3x3 matrix M + // Output: + // mat, orthogonal matrix Q + // diag, diagonal entries of T + // subd, subdiagonal entries of T (T is symmetric) + + static const int n = 4; + + // Set epsilon relative to size of elements in matrix + static const float relEpsilon = 1e-6f; + float maxElement = FLT_MAX; + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + maxElement = max(maxElement, fabsf(mat[i][j])); + float epsilon = relEpsilon * maxElement; + + // Iterative algorithm, works for any size of matrix but might be slower than + // a closed-form solution for symmetric 4x4 matrices. Based on this article: + // http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization + + Matrix A, Q(identity); + memcpy(&A, mat, sizeof(float)*n*n); + + // We proceed from left to right, making the off-tridiagonal entries zero in + // one column of the matrix at a time. + for (int k = 0; k < n - 2; ++k) + { + float sum = 0.0f; + for (int j = k+1; j < n; ++j) + sum += A(j,k)*A(j,k); + float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum); + float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha)); + + // If r is zero, skip this column - already in tridiagonal form + if (fabsf(r) < epsilon) + continue; + + float v[n] = {}; + v[k+1] = 0.5f * (A(k+1,k) - alpha) / r; + for (int j = k+2; j < n; ++j) + v[j] = 0.5f * A(j,k) / r; + + Matrix P(identity); + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + P(i,j) -= 2.0f * v[i] * v[j]; + + A = mul(mul(P, A), P); + Q = mul(Q, P); + } + + nvDebugCheck(fabsf(A(2,0)) < epsilon); + nvDebugCheck(fabsf(A(0,2)) < epsilon); + nvDebugCheck(fabsf(A(3,0)) < epsilon); + nvDebugCheck(fabsf(A(0,3)) < epsilon); + nvDebugCheck(fabsf(A(3,1)) < epsilon); + nvDebugCheck(fabsf(A(1,3)) < epsilon); + + for (int i = 0; i < n; ++i) + diag[i] = A(i,i); + for (int i = 0; i < n - 1; ++i) + subd[i] = A(i+1,i); + subd[n-1] = 0.0f; + + memcpy(mat, &Q, sizeof(float)*n*n); +} + +static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd) +{ + // QL iteration with implicit shifting to reduce matrix from tridiagonal + // to diagonal + const int maxiter = 32; + + for (int ell = 0; ell < 4; ell++) + { + int iter; + for (iter = 0; iter < maxiter; iter++) + { + int m; + for (m = ell; m < 3; m++) + { + float dd = fabsf(diag[m]) + fabsf(diag[m+1]); + if ( fabsf(subd[m]) + dd == dd ) + break; + } + if ( m == ell ) + break; + + float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); + float r = sqrtf(g*g+1); + if ( g < 0 ) + g = diag[m]-diag[ell]+subd[ell]/(g-r); + else + g = diag[m]-diag[ell]+subd[ell]/(g+r); + float s = 1, c = 1, p = 0; + for (int i = m-1; i >= ell; i--) + { + float f = s*subd[i], b = c*subd[i]; + if ( fabsf(f) >= fabsf(g) ) + { + c = g/f; + r = sqrtf(c*c+1); + subd[i+1] = f*r; + c *= (s = 1/r); + } + else + { + s = f/g; + r = sqrtf(s*s+1); + subd[i+1] = g*r; + s *= (c = 1/r); + } + g = diag[i+1]-p; + r = (diag[i]-g)*s+2*b*c; + p = s*r; + diag[i+1] = g+p; + g = c*r-b; + + for (int k = 0; k < 4; k++) + { + f = mat[k][i+1]; + mat[k][i+1] = s*mat[k][i]+c*f; + mat[k][i] = c*mat[k][i]-s*f; + } + } + diag[ell] -= p; + subd[ell] = g; + subd[m] = 0; + } + + if ( iter == maxiter ) + // should not get here under normal circumstances + return false; + } + + return true; +} + + + +int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster) +{ + // Compute principal component. + float matrix[6]; + Vector3 centroid = computeCovariance(n, points, weights, metric, matrix); + Vector3 principal = firstEigenVector_PowerMethod(matrix); + + // Pick initial solution. + int mini, maxi; + mini = maxi = 0; + + float mindps, maxdps; + mindps = maxdps = dot(points[0] - centroid, principal); + + for (int i = 1; i < n; ++i) + { + float dps = dot(points[i] - centroid, principal); + + if (dps < mindps) { + mindps = dps; + mini = i; + } + else { + maxdps = dps; + maxi = i; + } + } + + cluster[0] = centroid + mindps * principal; + cluster[1] = centroid + maxdps * principal; + cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f; + cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f; + + // Now we have to iteratively refine the clusters. + while (true) + { + Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) }; + float total[4] = {0, 0, 0, 0}; + + for (int i = 0; i < n; ++i) + { + // Find nearest cluster. + int nearest = 0; + float mindist = FLT_MAX; + for (int j = 0; j < 4; j++) + { + float dist = lengthSquared((cluster[j] - points[i]) * metric); + if (dist < mindist) + { + mindist = dist; + nearest = j; + } + } + + newCluster[nearest] += weights[i] * points[i]; + total[nearest] += weights[i]; + } + + for (int j = 0; j < 4; j++) + { + if (total[j] != 0) + newCluster[j] /= total[j]; + } + + if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && + equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3])) + { + return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0); + } + + cluster[0] = newCluster[0]; + cluster[1] = newCluster[1]; + cluster[2] = newCluster[2]; + cluster[3] = newCluster[3]; + + // Sort clusters by weight. + for (int i = 0; i < 4; i++) + { + for (int j = i; j > 0 && total[j] > total[j - 1]; j--) + { + swap( total[j], total[j - 1] ); + swap( cluster[j], cluster[j - 1] ); + } + } + } +} + + + +// Adaptation of James Arvo's SVD code, as found in ZOH. + +inline float Sqr(float x) { return x*x; } + +inline float svd_pythag( float a, float b ) +{ + float at = fabsf(a); + float bt = fabsf(b); + if( at > bt ) + return at * sqrtf( 1.0f + Sqr( bt / at ) ); + else if( bt > 0.0f ) + return bt * sqrtf( 1.0f + Sqr( at / bt ) ); + else return 0.0f; +} + +inline float SameSign( float a, float b ) +{ + float t; + if( b >= 0.0f ) t = fabsf( a ); + else t = -fabsf( a ); + return t; +} + +void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R) +{ + static const int MaxIterations = 30; + + int i, j, k, l, p, q, iter; + float c, f, h, s, x, y, z; + float norm = 0.0f; + float g = 0.0f; + float scale = 0.0f; + + Array<float> temp; temp.resize(cols, 0.0f); + + for( i = 0; i < cols; i++ ) + { + temp[i] = scale * g; + scale = 0.0f; + g = 0.0f; + s = 0.0f; + l = i + 1; + + if( i < rows ) + { + for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] ); + if( scale != 0.0f ) + { + for( k = i; k < rows; k++ ) + { + Q[k*cols+i] /= scale; + s += Sqr( Q[k*cols+i] ); + } + f = Q[i*cols+i]; + g = -SameSign( sqrtf(s), f ); + h = f * g - s; + Q[i*cols+i] = f - g; + if( i != cols - 1 ) + { + for( j = l; j < cols; j++ ) + { + s = 0.0f; + for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j]; + f = s / h; + for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i]; + } + } + for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale; + } + } + + diag[i] = scale * g; + g = 0.0f; + s = 0.0f; + scale = 0.0f; + + if( i < rows && i != cols - 1 ) + { + for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] ); + if( scale != 0.0f ) + { + for( k = l; k < cols; k++ ) + { + Q[i*cols+k] /= scale; + s += Sqr( Q[i*cols+k] ); + } + f = Q[i*cols+l]; + g = -SameSign( sqrtf(s), f ); + h = f * g - s; + Q[i*cols+l] = f - g; + for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h; + if( i != rows - 1 ) + { + for( j = l; j < rows; j++ ) + { + s = 0.0f; + for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k]; + for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k]; + } + } + for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale; + } + } + norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) ); + } + + + for( i = cols - 1; i >= 0; i-- ) + { + if( i < cols - 1 ) + { + if( g != 0.0f ) + { + for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g; + for( j = l; j < cols; j++ ) + { + s = 0.0f; + for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k]; + for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k]; + } + } + for( j = l; j < cols; j++ ) + { + R[i*cols+j] = 0.0f; + R[j*cols+i] = 0.0f; + } + } + R[i*cols+i] = 1.0f; + g = temp[i]; + l = i; + } + + + for( i = cols - 1; i >= 0; i-- ) + { + l = i + 1; + g = diag[i]; + if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f; + if( g != 0.0f ) + { + g = 1.0f / g; + if( i != cols - 1 ) + { + for( j = l; j < cols; j++ ) + { + s = 0.0f; + for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j]; + f = ( s / Q[i*cols+i] ) * g; + for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i]; + } + } + for( j = i; j < rows; j++ ) Q[j*cols+i] *= g; + } + else + { + for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f; + } + Q[i*cols+i] += 1.0f; + } + + + for( k = cols - 1; k >= 0; k-- ) + { + for( iter = 1; iter <= MaxIterations; iter++ ) + { + int jump = 0; + + for( l = k; l >= 0; l-- ) + { + q = l - 1; + if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; } + if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; } + } + + if( !jump ) + { + c = 0.0f; + s = 1.0f; + for( i = l; i <= k; i++ ) + { + f = s * temp[i]; + temp[i] *= c; + if( fabsf( f ) + norm == norm ) break; + g = diag[i]; + h = svd_pythag( f, g ); + diag[i] = h; + h = 1.0f / h; + c = g * h; + s = -f * h; + for( j = 0; j < rows; j++ ) + { + y = Q[j*cols+q]; + z = Q[j*cols+i]; + Q[j*cols+q] = y * c + z * s; + Q[j*cols+i] = z * c - y * s; + } + } + } + + z = diag[k]; + if( l == k ) + { + if( z < 0.0f ) + { + diag[k] = -z; + for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f; + } + break; + } + if( iter >= MaxIterations ) return; + x = diag[l]; + q = k - 1; + y = diag[q]; + g = temp[q]; + h = temp[k]; + f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y ); + g = svd_pythag( f, 1.0f ); + f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x; + c = 1.0f; + s = 1.0f; + for( j = l; j <= q; j++ ) + { + i = j + 1; + g = temp[i]; + y = diag[i]; + h = s * g; + g = c * g; + z = svd_pythag( f, h ); + temp[j] = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y = y * c; + for( p = 0; p < cols; p++ ) + { + x = R[j*cols+p]; + z = R[i*cols+p]; + R[j*cols+p] = x * c + z * s; + R[i*cols+p] = z * c - x * s; + } + z = svd_pythag( f, h ); + diag[j] = z; + if( z != 0.0f ) + { + z = 1.0f / z; + c = f * z; + s = h * z; + } + f = c * g + s * y; + x = c * y - s * g; + for( p = 0; p < rows; p++ ) + { + y = Q[p*cols+j]; + z = Q[p*cols+i]; + Q[p*cols+j] = y * c + z * s; + Q[p*cols+i] = z * c - y * s; + } + } + temp[l] = 0.0f; + temp[k] = f; + diag[k] = x; + } + } + + // Sort the singular values into descending order. + + for( i = 0; i < cols - 1; i++ ) + { + float biggest = diag[i]; // Biggest singular value so far. + int bindex = i; // The row/col it occurred in. + for( j = i + 1; j < cols; j++ ) + { + if( diag[j] > biggest ) + { + biggest = diag[j]; + bindex = j; + } + } + if( bindex != i ) // Need to swap rows and columns. + { + // Swap columns in Q. + for (int j = 0; j < rows; ++j) + swap(Q[j*cols+i], Q[j*cols+bindex]); + + // Swap rows in R. + for (int j = 0; j < rows; ++j) + swap(R[i*cols+j], R[bindex*cols+j]); + + // Swap elements in diag. + swap(diag[i], diag[bindex]); + } + } +} diff --git a/thirdparty/thekla_atlas/nvmath/Fitting.h b/thirdparty/thekla_atlas/nvmath/Fitting.h new file mode 100644 index 0000000000..7a88cd28fd --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Fitting.h @@ -0,0 +1,50 @@ +// This code is in the public domain -- Ignacio CastaÃąo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_FITTING_H +#define NV_MATH_FITTING_H + +#include "Vector.h" +#include "Plane.h" + +namespace nv +{ + namespace Fit + { + Vector3 computeCentroid(int n, const Vector3 * points); + Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + + Vector4 computeCentroid(int n, const Vector4 * points); + Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + + Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); + Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); + + Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); + Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); + + Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); + Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + + Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); + Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + + Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); + Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + + Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); + Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); + + Plane bestPlane(int n, const Vector3 * points); + bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); + + bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); + bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]); + + // Returns number of clusters [1-4]. + int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); + } + +} // nv namespace + +#endif // NV_MATH_FITTING_H diff --git a/thirdparty/thekla_atlas/nvmath/KahanSum.h b/thirdparty/thekla_atlas/nvmath/KahanSum.h new file mode 100644 index 0000000000..18d475e7cb --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/KahanSum.h @@ -0,0 +1,39 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_KAHANSUM_H +#define NV_MATH_KAHANSUM_H + +#include "nvmath.h" + +namespace nv +{ + + class KahanSum + { + public: + KahanSum() : accum(0.0f), err(0) {}; + + void add(float f) + { + float compensated = f + err; + float tmp = accum + compensated; + err = accum - tmp; + err += compensated; + accum = tmp; + } + + float sum() const + { + return accum; + } + + private: + float accum; + float err; + }; + +} // nv namespace + + +#endif // NV_MATH_KAHANSUM_H diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.cpp b/thirdparty/thekla_atlas/nvmath/Matrix.cpp new file mode 100644 index 0000000000..29bd19f5f8 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Matrix.cpp @@ -0,0 +1,441 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Matrix.inl" +#include "Vector.inl" + +#include "nvcore/Array.inl" + +#include <float.h> + +#if !NV_CC_MSVC && !NV_OS_ORBIS +#include <alloca.h> +#endif + +using namespace nv; + + +// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise +// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above; +// indx[1..n] is an output vector that records the row permutation effected by the partial +// pivoting; d is output as -1 depending on whether the number of row interchanges was even +// or odd, respectively. This routine is used in combination with lubksb to solve linear equations +// or invert a matrix. +static bool ludcmp(float **a, int n, int *indx, float *d) +{ + const float TINY = 1.0e-20f; + + float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row. + + *d = 1.0; // No row interchanges yet. + for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information. + + float big = 0.0; + for (int j = 0; j < n; j++) { + big = max(big, fabsf(a[i][j])); + } + if (big == 0) { + return false; // Singular matrix + } + + // No nonzero largest element. + vv[i] = 1.0f / big; // Save the scaling. + } + + for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method. + for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j. + float sum = a[i][j]; + for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j]; + a[i][j] = sum; + } + + int imax = -1; + float big = 0.0; // Initialize for the search for largest pivot element. + for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N + float sum = a[i][j]; // of equation (2.3.13). + for (int k = 0; k < j; k++) { + sum -= a[i][k]*a[k][j]; + } + a[i][j]=sum; + + float dum = vv[i]*fabs(sum); + if (dum >= big) { + // Is the figure of merit for the pivot better than the best so far? + big = dum; + imax = i; + } + } + nvDebugCheck(imax != -1); + + if (j != imax) { // Do we need to interchange rows? + for (int k = 0; k < n; k++) { // Yes, do so... + swap(a[imax][k], a[j][k]); + } + *d = -(*d); // ...and change the parity of d. + vv[imax]=vv[j]; // Also interchange the scale factor. + } + + indx[j]=imax; + if (a[j][j] == 0.0) a[j][j] = TINY; + + // If the pivot element is zero the matrix is singular (at least to the precision of the + // algorithm). For some applications on singular matrices, it is desirable to substitute + // TINY for zero. + if (j != n-1) { // Now, finally, divide by the pivot element. + float dum = 1.0f / a[j][j]; + for (int i = j+1; i < n; i++) a[i][j] *= dum; + } + } // Go back for the next column in the reduction. + + return true; +} + + +// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix +// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input +// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector +// B, and returns with the solution vector X. a, n, and indx are not modified by this routine +// and can be left in place for successive calls with different right-hand sides b. This routine takes +// into account the possibility that b will begin with many zero elements, so it is efficient for use +// in matrix inversion. +static void lubksb(float **a, int n, int *indx, float b[]) +{ + int ii = 0; + for (int i=0; i<n; i++) { // When ii is set to a positive value, it will become + int ip = indx[i]; // the index of the first nonvanishing element of b. We now + float sum = b[ip]; // do the forward substitution, equation (2.3.6). The + b[ip] = b[i]; // only new wrinkle is to unscramble the permutation as we go. + if (ii != 0) { + for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j]; + } + else if (sum != 0.0f) { + ii = i+1; // A nonzero element was encountered, so from now on we + } + b[i] = sum; // will have to do the sums in the loop above. + } + for (int i=n-1; i>=0; i--) { // Now we do the backsubstitution, equation (2.3.7). + float sum = b[i]; + for (int j = i+1; j < n; j++) { + sum -= a[i][j]*b[j]; + } + b[i] = sum/a[i][i]; // Store a component of the solution vector X. + } // All done! +} + + +bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x) +{ + nvDebugCheck(x != NULL); + + float m[4][4]; + float *a[4] = {m[0], m[1], m[2], m[3]}; + int idx[4]; + float d; + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 4, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 4, idx, x->component); + + return true; +} + +// @@ Not tested. +Matrix nv::inverseLU(const Matrix & A) +{ + Vector4 Ai[4]; + + solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]); + solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]); + solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]); + solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]); + + return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]); +} + + + +bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x) +{ + nvDebugCheck(x != NULL); + + float m[3][3]; + float *a[3] = {m[0], m[1], m[2]}; + int idx[3]; + float d; + + for (int y = 0; y < 3; y++) { + for (int x = 0; x < 3; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 3, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 3, idx, x->component); + + return true; +} + + +bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x) +{ + nvDebugCheck(x != NULL); + + *x = transform(inverseCramer(A), b); + + return true; // @@ Return false if determinant(A) == 0 ! +} + +bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x) +{ + nvDebugCheck(x != NULL); + + const float det = A.determinant(); + if (equal(det, 0.0f)) { // @@ Use input epsilon. + return false; + } + + Matrix3 Ai = inverseCramer(A); + + *x = transform(Ai, b); + + return true; +} + + + +// Inverse using gaussian elimination. From Jon's code. +Matrix nv::inverse(const Matrix & m) { + + Matrix A = m; + Matrix B(identity); + + int i, j, k; + float max, t, det, pivot; + + det = 1.0; + for (i=0; i<4; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<4; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return B; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<4; k++) + swap(A(i, k), A(j, k)); + for (k=0; k<4; k++) + swap(B(i, k), B(j, k)); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<4; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<4; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<4; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<4; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j<i; j++) { /* eliminate in rows above i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=0; k<4; k++) /* subtract scaled row i from row j */ + B(j, k) -= B(i, k)*t; + } + } + + return B; +} + + +Matrix3 nv::inverse(const Matrix3 & m) { + + Matrix3 A = m; + Matrix3 B(identity); + + int i, j, k; + float max, t, det, pivot; + + det = 1.0; + for (i=0; i<3; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<3; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return B; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<3; k++) + swap(A(i, k), A(j, k)); + for (k=0; k<3; k++) + swap(B(i, k), B(j, k)); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<3; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<3; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<3; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<3; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<3; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j<i; j++) { /* eliminate in rows above i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=0; k<3; k++) /* subtract scaled row i from row j */ + B(j, k) -= B(i, k)*t; + } + } + + return B; +} + + + + + +#if 0 + +// Copyright (C) 1999-2004 Michael Garland. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, provided that the above +// copyright notice(s) and this permission notice appear in all copies of +// the Software and that both the above copyright notice(s) and this +// permission notice appear in supporting documentation. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL +// INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING +// FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION +// WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, use +// or other dealings in this Software without prior written authorization +// of the copyright holder. + + +// Matrix inversion code for 4x4 matrices using Gaussian elimination +// with partial pivoting. This is a specialized version of a +// procedure originally due to Paul Heckbert <ph@cs.cmu.edu>. +// +// Returns determinant of A, and B=inverse(A) +// If matrix A is singular, returns 0 and leaves trash in B. +// +#define SWAP(a, b, t) {t = a; a = b; b = t;} +double invert(Mat4& B, const Mat4& m) +{ + Mat4 A = m; + int i, j, k; + double max, t, det, pivot; + + /*---------- forward elimination ----------*/ + + for (i=0; i<4; i++) /* put identity matrix in B */ + for (j=0; j<4; j++) + B(i, j) = (double)(i==j); + + det = 1.0; + for (i=0; i<4; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<4; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<4; k++) + SWAP(A(i, k), A(j, k), t); + for (k=0; k<4; k++) + SWAP(B(i, k), B(j, k), t); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<4; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<4; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<4; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<4; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j<i; j++) { /* eliminate in rows above i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=0; k<4; k++) /* subtract scaled row i from row j */ + B(j, k) -= B(i, k)*t; + } + } + + return det; +} + +#endif // 0 + + + diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.h b/thirdparty/thekla_atlas/nvmath/Matrix.h new file mode 100644 index 0000000000..506bdad1ca --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Matrix.h @@ -0,0 +1,113 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_MATRIX_H +#define NV_MATH_MATRIX_H + +#include "Vector.h" + +// - Matrices are stored in memory in *column major* order. +// - Points are to be though of as column vectors. +// - Transformation of a point p by a matrix M is: p' = M * p + +namespace nv +{ + enum identity_t { identity }; + + // 3x3 matrix. + class NVMATH_CLASS Matrix3 + { + public: + Matrix3(); + explicit Matrix3(float f); + explicit Matrix3(identity_t); + Matrix3(const Matrix3 & m); + Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2); + + float data(uint idx) const; + float & data(uint idx); + float get(uint row, uint col) const; + float operator()(uint row, uint col) const; + float & operator()(uint row, uint col); + + Vector3 row(uint i) const; + Vector3 column(uint i) const; + + void operator*=(float s); + void operator/=(float s); + void operator+=(const Matrix3 & m); + void operator-=(const Matrix3 & m); + + void scale(float s); + void scale(Vector3::Arg s); + float determinant() const; + + private: + float m_data[9]; + }; + + // Solve equation system using LU decomposition and back-substitution. + extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x); + + // Solve equation system using Cramer's inverse. + extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); + + + // 4x4 matrix. + class NVMATH_CLASS Matrix + { + public: + typedef Matrix const & Arg; + + Matrix(); + explicit Matrix(float f); + explicit Matrix(identity_t); + Matrix(const Matrix3 & m); + Matrix(const Matrix & m); + Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); + //explicit Matrix(const float m[]); // m is assumed to contain 16 elements + + float data(uint idx) const; + float & data(uint idx); + float get(uint row, uint col) const; + float operator()(uint row, uint col) const; + float & operator()(uint row, uint col); + const float * ptr() const; + + Vector4 row(uint i) const; + Vector4 column(uint i) const; + + void zero(); + void identity(); + + void scale(float s); + void scale(Vector3::Arg s); + void translate(Vector3::Arg t); + void rotate(float theta, float v0, float v1, float v2); + float determinant() const; + + void operator+=(const Matrix & m); + void operator-=(const Matrix & m); + + void apply(Matrix::Arg m); + + private: + float m_data[16]; + }; + + // Solve equation system using LU decomposition and back-substitution. + extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x); + + // Solve equation system using Cramer's inverse. + extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x); + + // Compute inverse using LU decomposition. + extern Matrix inverseLU(const Matrix & m); + + // Compute inverse using Gaussian elimination and partial pivoting. + extern Matrix inverse(const Matrix & m); + extern Matrix3 inverse(const Matrix3 & m); + +} // nv namespace + +#endif // NV_MATH_MATRIX_H diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.inl b/thirdparty/thekla_atlas/nvmath/Matrix.inl new file mode 100644 index 0000000000..c0d99d9fe0 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Matrix.inl @@ -0,0 +1,1274 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_MATRIX_INL +#define NV_MATH_MATRIX_INL + +#include "Matrix.h" + +namespace nv +{ + inline Matrix3::Matrix3() {} + + inline Matrix3::Matrix3(float f) + { + for(int i = 0; i < 9; i++) { + m_data[i] = f; + } + } + + inline Matrix3::Matrix3(identity_t) + { + for(int i = 0; i < 3; i++) { + for(int j = 0; j < 3; j++) { + m_data[3*j+i] = (i == j) ? 1.0f : 0.0f; + } + } + } + + inline Matrix3::Matrix3(const Matrix3 & m) + { + for(int i = 0; i < 9; i++) { + m_data[i] = m.m_data[i]; + } + } + + inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2) + { + m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z; + m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z; + m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z; + } + + inline float Matrix3::data(uint idx) const + { + nvDebugCheck(idx < 9); + return m_data[idx]; + } + inline float & Matrix3::data(uint idx) + { + nvDebugCheck(idx < 9); + return m_data[idx]; + } + inline float Matrix3::get(uint row, uint col) const + { + nvDebugCheck(row < 3 && col < 3); + return m_data[col * 3 + row]; + } + inline float Matrix3::operator()(uint row, uint col) const + { + nvDebugCheck(row < 3 && col < 3); + return m_data[col * 3 + row]; + } + inline float & Matrix3::operator()(uint row, uint col) + { + nvDebugCheck(row < 3 && col < 3); + return m_data[col * 3 + row]; + } + + inline Vector3 Matrix3::row(uint i) const + { + nvDebugCheck(i < 3); + return Vector3(get(i, 0), get(i, 1), get(i, 2)); + } + inline Vector3 Matrix3::column(uint i) const + { + nvDebugCheck(i < 3); + return Vector3(get(0, i), get(1, i), get(2, i)); + } + + inline void Matrix3::operator*=(float s) + { + for(int i = 0; i < 9; i++) { + m_data[i] *= s; + } + } + + inline void Matrix3::operator/=(float s) + { + float is = 1.0f /s; + for(int i = 0; i < 9; i++) { + m_data[i] *= is; + } + } + + inline void Matrix3::operator+=(const Matrix3 & m) + { + for(int i = 0; i < 9; i++) { + m_data[i] += m.m_data[i]; + } + } + + inline void Matrix3::operator-=(const Matrix3 & m) + { + for(int i = 0; i < 9; i++) { + m_data[i] -= m.m_data[i]; + } + } + + inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b) + { + Matrix3 m = a; + m += b; + return m; + } + + inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b) + { + Matrix3 m = a; + m -= b; + return m; + } + + inline Matrix3 operator*(const Matrix3 & a, float s) + { + Matrix3 m = a; + m *= s; + return m; + } + + inline Matrix3 operator*(float s, const Matrix3 & a) + { + Matrix3 m = a; + m *= s; + return m; + } + + inline Matrix3 operator/(const Matrix3 & a, float s) + { + Matrix3 m = a; + m /= s; + return m; + } + + inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b) + { + Matrix3 m; + + for(int i = 0; i < 3; i++) { + const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2); + m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0); + m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1); + m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2); + } + + return m; + } + + inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b) + { + return mul(a, b); + } + + // Transform the given 3d vector with the given matrix. + inline Vector3 transform(const Matrix3 & m, const Vector3 & p) + { + return Vector3( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2)); + } + + inline void Matrix3::scale(float s) + { + for (int i = 0; i < 9; i++) { + m_data[i] *= s; + } + } + + inline void Matrix3::scale(Vector3::Arg s) + { + m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; + m_data[3] *= s.y; m_data[4] *= s.y; m_data[5] *= s.y; + m_data[6] *= s.z; m_data[7] *= s.z; m_data[8] *= s.z; + } + + inline float Matrix3::determinant() const + { + return + get(0,0) * get(1,1) * get(2,2) + + get(0,1) * get(1,2) * get(2,0) + + get(0,2) * get(1,0) * get(2,1) - + get(0,2) * get(1,1) * get(2,0) - + get(0,1) * get(1,0) * get(2,2) - + get(0,0) * get(1,2) * get(2,1); + } + + // Inverse using Cramer's rule. + inline Matrix3 inverseCramer(const Matrix3 & m) + { + const float det = m.determinant(); + if (equal(det, 0.0f, 0.0f)) { + return Matrix3(0); + } + + Matrix3 r; + + r.data(0) = - m.data(5) * m.data(7) + m.data(4) * m.data(8); + r.data(1) = + m.data(5) * m.data(6) - m.data(3) * m.data(8); + r.data(2) = - m.data(4) * m.data(6) + m.data(3) * m.data(7); + + r.data(3) = + m.data(2) * m.data(7) - m.data(1) * m.data(8); + r.data(4) = - m.data(2) * m.data(6) + m.data(0) * m.data(8); + r.data(5) = + m.data(1) * m.data(6) - m.data(0) * m.data(7); + + r.data(6) = - m.data(2) * m.data(4) + m.data(1) * m.data(5); + r.data(7) = + m.data(2) * m.data(3) - m.data(0) * m.data(5); + r.data(8) = - m.data(1) * m.data(3) + m.data(0) * m.data(4); + + r.scale(1.0f / det); + + return r; + } + + + + inline Matrix::Matrix() + { + } + + inline Matrix::Matrix(float f) + { + for(int i = 0; i < 16; i++) { + m_data[i] = 0.0f; + } + } + + inline Matrix::Matrix(identity_t) + { + for(int i = 0; i < 4; i++) { + for(int j = 0; j < 4; j++) { + m_data[4*j+i] = (i == j) ? 1.0f : 0.0f; + } + } + } + + inline Matrix::Matrix(const Matrix & m) + { + for(int i = 0; i < 16; i++) { + m_data[i] = m.m_data[i]; + } + } + + inline Matrix::Matrix(const Matrix3 & m) + { + for(int i = 0; i < 3; i++) { + for(int j = 0; j < 3; j++) { + operator()(i, j) = m.get(i, j); + } + } + for(int i = 0; i < 4; i++) { + operator()(3, i) = 0; + operator()(i, 3) = 0; + } + } + + inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3) + { + m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w; + m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w; + m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w; + m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w; + } + + /*inline Matrix::Matrix(const float m[]) + { + for(int i = 0; i < 16; i++) { + m_data[i] = m[i]; + } + }*/ + + + // Accessors + inline float Matrix::data(uint idx) const + { + nvDebugCheck(idx < 16); + return m_data[idx]; + } + inline float & Matrix::data(uint idx) + { + nvDebugCheck(idx < 16); + return m_data[idx]; + } + inline float Matrix::get(uint row, uint col) const + { + nvDebugCheck(row < 4 && col < 4); + return m_data[col * 4 + row]; + } + inline float Matrix::operator()(uint row, uint col) const + { + nvDebugCheck(row < 4 && col < 4); + return m_data[col * 4 + row]; + } + inline float & Matrix::operator()(uint row, uint col) + { + nvDebugCheck(row < 4 && col < 4); + return m_data[col * 4 + row]; + } + + inline const float * Matrix::ptr() const + { + return m_data; + } + + inline Vector4 Matrix::row(uint i) const + { + nvDebugCheck(i < 4); + return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3)); + } + + inline Vector4 Matrix::column(uint i) const + { + nvDebugCheck(i < 4); + return Vector4(get(0, i), get(1, i), get(2, i), get(3, i)); + } + + inline void Matrix::zero() + { + m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0; + m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0; + m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0; + m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0; + } + + inline void Matrix::identity() + { + m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0; + m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0; + m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0; + m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1; + } + + // Apply scale. + inline void Matrix::scale(float s) + { + m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; + m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; + m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s; + m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s; + } + + // Apply scale. + inline void Matrix::scale(Vector3::Arg s) + { + m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x; + m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y; + m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z; + } + + // Apply translation. + inline void Matrix::translate(Vector3::Arg t) + { + m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8] * t.z + m_data[12]; + m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9] * t.z + m_data[13]; + m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14]; + m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15]; + } + + Matrix rotation(float theta, float v0, float v1, float v2); + + // Apply rotation. + inline void Matrix::rotate(float theta, float v0, float v1, float v2) + { + Matrix R(rotation(theta, v0, v1, v2)); + apply(R); + } + + // Apply transform. + inline void Matrix::apply(Matrix::Arg m) + { + nvDebugCheck(this != &m); + + for(int i = 0; i < 4; i++) { + const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); + m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); + m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); + m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); + m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3); + } + } + + // Get scale matrix. + inline Matrix scale(Vector3::Arg s) + { + Matrix m(identity); + m(0,0) = s.x; + m(1,1) = s.y; + m(2,2) = s.z; + return m; + } + + // Get scale matrix. + inline Matrix scale(float s) + { + Matrix m(identity); + m(0,0) = m(1,1) = m(2,2) = s; + return m; + } + + // Get translation matrix. + inline Matrix translation(Vector3::Arg t) + { + Matrix m(identity); + m(0,3) = t.x; + m(1,3) = t.y; + m(2,3) = t.z; + return m; + } + + // Get rotation matrix. + inline Matrix rotation(float theta, float v0, float v1, float v2) + { + float cost = cosf(theta); + float sint = sinf(theta); + + Matrix m(identity); + + if( 1 == v0 && 0 == v1 && 0 == v2 ) { + m(1,1) = cost; m(2,1) = -sint; + m(1,2) = sint; m(2,2) = cost; + } + else if( 0 == v0 && 1 == v1 && 0 == v2 ) { + m(0,0) = cost; m(2,0) = sint; + m(1,2) = -sint; m(2,2) = cost; + } + else if( 0 == v0 && 0 == v1 && 1 == v2 ) { + m(0,0) = cost; m(1,0) = -sint; + m(0,1) = sint; m(1,1) = cost; + } + else { + float a2, b2, c2; + a2 = v0 * v0; + b2 = v1 * v1; + c2 = v2 * v2; + + float iscale = 1.0f / sqrtf(a2 + b2 + c2); + v0 *= iscale; + v1 *= iscale; + v2 *= iscale; + + float abm, acm, bcm; + float mcos, asin, bsin, csin; + mcos = 1.0f - cost; + abm = v0 * v1 * mcos; + acm = v0 * v2 * mcos; + bcm = v1 * v2 * mcos; + asin = v0 * sint; + bsin = v1 * sint; + csin = v2 * sint; + m(0,0) = a2 * mcos + cost; + m(1,0) = abm - csin; + m(2,0) = acm + bsin; + m(3,0) = abm + csin; + m(1,1) = b2 * mcos + cost; + m(2,1) = bcm - asin; + m(3,1) = acm - bsin; + m(1,2) = bcm + asin; + m(2,2) = c2 * mcos + cost; + } + return m; + } + + //Matrix rotation(float yaw, float pitch, float roll); + //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2); + + // Get frustum matrix. + inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar) + { + Matrix m(0.0f); + + float doubleznear = 2.0f * zNear; + float one_deltax = 1.0f / (xmax - xmin); + float one_deltay = 1.0f / (ymax - ymin); + float one_deltaz = 1.0f / (zFar - zNear); + + m(0,0) = doubleznear * one_deltax; + m(1,1) = doubleznear * one_deltay; + m(0,2) = (xmax + xmin) * one_deltax; + m(1,2) = (ymax + ymin) * one_deltay; + m(2,2) = -(zFar + zNear) * one_deltaz; + m(3,2) = -1.0f; + m(2,3) = -(zFar * doubleznear) * one_deltaz; + + return m; + } + + // Get inverse frustum matrix. + inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar) + { + Matrix m(0.0f); + + float one_doubleznear = 1.0f / (2.0f * zNear); + float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar); + + m(0,0) = (xmax - xmin) * one_doubleznear; + m(0,3) = (xmax + xmin) * one_doubleznear; + m(1,1) = (ymax - ymin) * one_doubleznear; + m(1,3) = (ymax + ymin) * one_doubleznear; + m(2,3) = -1; + m(3,2) = -(zFar - zNear) * one_doubleznearzfar; + m(3,3) = (zFar + zNear) * one_doubleznearzfar; + + return m; + } + + // Get infinite frustum matrix. + inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear) + { + Matrix m(0.0f); + + float doubleznear = 2.0f * zNear; + float one_deltax = 1.0f / (xmax - xmin); + float one_deltay = 1.0f / (ymax - ymin); + float nudge = 1.0; // 0.999; + + m(0,0) = doubleznear * one_deltax; + m(1,1) = doubleznear * one_deltay; + m(0,2) = (xmax + xmin) * one_deltax; + m(1,2) = (ymax + ymin) * one_deltay; + m(2,2) = -1.0f * nudge; + m(3,2) = -1.0f; + m(2,3) = -doubleznear * nudge; + + return m; + } + + // Get perspective matrix. + inline Matrix perspective(float fovy, float aspect, float zNear, float zFar) + { + float xmax = zNear * tan(fovy / 2); + float xmin = -xmax; + + float ymax = xmax / aspect; + float ymin = -ymax; + + return frustum(xmin, xmax, ymin, ymax, zNear, zFar); + } + + // Get inverse perspective matrix. + inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar) + { + float xmax = zNear * tan(fovy / 2); + float xmin = -xmax; + + float ymax = xmax / aspect; + float ymin = -ymax; + + return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar); + } + + // Get infinite perspective matrix. + inline Matrix perspective(float fovy, float aspect, float zNear) + { + float x = zNear * tan(fovy / 2); + float y = x / aspect; + return frustum( -x, x, -y, y, zNear ); + } + + // Get matrix determinant. + inline float Matrix::determinant() const + { + return + m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] + + m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] + + m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] + + m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] + + m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] + + m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15]; + } + + inline Matrix transpose(Matrix::Arg m) + { + Matrix r; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + r(i, j) = m(j, i); + } + } + return r; + } + + // Inverse using Cramer's rule. + inline Matrix inverseCramer(Matrix::Arg m) + { + Matrix r; + r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15); + r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15); + r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15); + r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11); + r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15); + r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15); + r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15); + r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11); + r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15); + r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15); + r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15); + r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11); + r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14); + r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14); + r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14); + r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10); + r.scale(1.0f / m.determinant()); + return r; + } + + inline Matrix isometryInverse(Matrix::Arg m) + { + Matrix r(identity); + + // transposed 3x3 upper left matrix + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + r(i, j) = m(j, i); + } + } + + // translate by the negative offsets + r.translate(-Vector3(m.data(12), m.data(13), m.data(14))); + + return r; + } + + // Transform the given 3d point with the given matrix. + inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p) + { + return Vector3( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3)); + } + + // Transform the given 3d vector with the given matrix. + inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p) + { + return Vector3( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2)); + } + + // Transform the given 4d vector with the given matrix. + inline Vector4 transform(Matrix::Arg m, Vector4::Arg p) + { + return Vector4( + p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3), + p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3), + p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3), + p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3)); + } + + inline Matrix mul(Matrix::Arg a, Matrix::Arg b) + { + // @@ Is this the right order? mul(a, b) = b * a + Matrix m = a; + m.apply(b); + return m; + } + + inline void Matrix::operator+=(const Matrix & m) + { + for(int i = 0; i < 16; i++) { + m_data[i] += m.m_data[i]; + } + } + + inline void Matrix::operator-=(const Matrix & m) + { + for(int i = 0; i < 16; i++) { + m_data[i] -= m.m_data[i]; + } + } + + inline Matrix operator+(const Matrix & a, const Matrix & b) + { + Matrix m = a; + m += b; + return m; + } + + inline Matrix operator-(const Matrix & a, const Matrix & b) + { + Matrix m = a; + m -= b; + return m; + } + + +} // nv namespace + + +#if 0 // old code. +/** @name Special matrices. */ +//@{ +/** Generate a translation matrix. */ +void TranslationMatrix(const Vec3 & v) { + data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0; + data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0; + data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0; + data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1; +} + +/** Rotate theta degrees around v. */ +void RotationMatrix( float theta, float v0, float v1, float v2 ) { + float cost = cos(theta); + float sint = sin(theta); + + if( 1 == v0 && 0 == v1 && 0 == v2 ) { + data[0] = 1.0f; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; + data[4] = 0.0f; data[5] = cost; data[6] = -sint;data[7] = 0.0f; + data[8] = 0.0f; data[9] = sint; data[10] = cost;data[11] = 0.0f; + data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; + } + else if( 0 == v0 && 1 == v1 && 0 == v2 ) { + data[0] = cost; data[1] = 0.0f; data[2] = sint; data[3] = 0.0f; + data[4] = 0.0f; data[5] = 1.0f; data[6] = 0.0f; data[7] = 0.0f; + data[8] = -sint;data[9] = 0.0f;data[10] = cost; data[11] = 0.0f; + data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; + } + else if( 0 == v0 && 0 == v1 && 1 == v2 ) { + data[0] = cost; data[1] = -sint;data[2] = 0.0f; data[3] = 0.0f; + data[4] = sint; data[5] = cost; data[6] = 0.0f; data[7] = 0.0f; + data[8] = 0.0f; data[9] = 0.0f; data[10] = 1.0f;data[11] = 0.0f; + data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; + } + else { + //we need scale a,b,c to unit length. + float a2, b2, c2; + a2 = v0 * v0; + b2 = v1 * v1; + c2 = v2 * v2; + + float iscale = 1.0f / sqrtf(a2 + b2 + c2); + v0 *= iscale; + v1 *= iscale; + v2 *= iscale; + + float abm, acm, bcm; + float mcos, asin, bsin, csin; + mcos = 1.0f - cost; + abm = v0 * v1 * mcos; + acm = v0 * v2 * mcos; + bcm = v1 * v2 * mcos; + asin = v0 * sint; + bsin = v1 * sint; + csin = v2 * sint; + data[0] = a2 * mcos + cost; + data[1] = abm - csin; + data[2] = acm + bsin; + data[3] = abm + csin; + data[4] = 0.0f; + data[5] = b2 * mcos + cost; + data[6] = bcm - asin; + data[7] = acm - bsin; + data[8] = 0.0f; + data[9] = bcm + asin; + data[10] = c2 * mcos + cost; + data[11] = 0.0f; + data[12] = 0.0f; + data[13] = 0.0f; + data[14] = 0.0f; + data[15] = 1.0f; + } +} + +/* +void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) { +v1.Normalize(); +v2.Normalize(); + +Vec3 v3; +v3.Cross(v1, v2); +v3.Normalize(); + +// Get skew factor. +float costheta = Vec3DotProduct(v1, v2); +float sintheta = Real.Sqrt(1 - costheta * costheta); +float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; + +// Build orthonormal matrix. +v1 = FXVector3.Cross(v3, v2); +v1.Normalize(); + +Matrix R = Matrix::Identity; +R[0, 0] = v3.X; // Not sure this is in the correct order... +R[1, 0] = v3.Y; +R[2, 0] = v3.Z; +R[0, 1] = v1.X; +R[1, 1] = v1.Y; +R[2, 1] = v1.Z; +R[0, 2] = v2.X; +R[1, 2] = v2.Y; +R[2, 2] = v2.Z; + +// Build skew matrix. +Matrix S = Matrix::Identity; +S[2, 1] = -skew; + +// Return skew transform. +return R * S * R.Transpose; // Not sure this is in the correct order... +} +*/ + +/** +* Generate rotation matrix for the euler angles. This is the same as computing +* 3 rotation matrices and multiplying them together in our custom order. +* +* @todo Have to recompute this code for our new convention. +**/ +void RotationMatrix( float yaw, float pitch, float roll ) { + float sy = sin(yaw+ToRadian(90)); + float cy = cos(yaw+ToRadian(90)); + float sp = sin(pitch-ToRadian(90)); + float cp = cos(pitch-ToRadian(90)); + float sr = sin(roll); + float cr = cos(roll); + + data[0] = cr*cy + sr*sp*sy; + data[1] = cp*sy; + data[2] = -sr*cy + cr*sp*sy; + data[3] = 0; + + data[4] = -cr*sy + sr*sp*cy; + data[5] = cp*cy; + data[6] = sr*sy + cr*sp*cy; + data[7] = 0; + + data[8] = sr*cp; + data[9] = -sp; + data[10] = cr*cp; + data[11] = 0; + + data[12] = 0; + data[13] = 0; + data[14] = 0; + data[15] = 1; +} + +/** Create a frustum matrix with the far plane at the infinity. */ +void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) { + float one_deltax, one_deltay, one_deltaz, doubleznear; + + doubleznear = 2.0f * zNear; + one_deltax = 1.0f / (xmax - xmin); + one_deltay = 1.0f / (ymax - ymin); + one_deltaz = 1.0f / (zFar - zNear); + + data[0] = (float)(doubleznear * one_deltax); + data[1] = 0.0f; + data[2] = 0.0f; + data[3] = 0.0f; + data[4] = 0.0f; + data[5] = (float)(doubleznear * one_deltay); + data[6] = 0.f; + data[7] = 0.f; + data[8] = (float)((xmax + xmin) * one_deltax); + data[9] = (float)((ymax + ymin) * one_deltay); + data[10] = (float)(-(zFar + zNear) * one_deltaz); + data[11] = -1.f; + data[12] = 0.f; + data[13] = 0.f; + data[14] = (float)(-(zFar * doubleznear) * one_deltaz); + data[15] = 0.f; +} + +/** Create a frustum matrix with the far plane at the infinity. */ +void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) { + float one_deltax, one_deltay, doubleznear, nudge; + + doubleznear = 2.0f * zNear; + one_deltax = 1.0f / (xmax - xmin); + one_deltay = 1.0f / (ymax - ymin); + nudge = 1.0; // 0.999; + + data[0] = doubleznear * one_deltax; + data[1] = 0.0f; + data[2] = 0.0f; + data[3] = 0.0f; + + data[4] = 0.0f; + data[5] = doubleznear * one_deltay; + data[6] = 0.f; + data[7] = 0.f; + + data[8] = (xmax + xmin) * one_deltax; + data[9] = (ymax + ymin) * one_deltay; + data[10] = -1.0f * nudge; + data[11] = -1.0f; + + data[12] = 0.f; + data[13] = 0.f; + data[14] = -doubleznear * nudge; + data[15] = 0.f; +} + +/** Create an inverse frustum matrix with the far plane at the infinity. */ +void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) { + // this matrix is wrong (not tested floatly) I think it should be transposed. + data[0] = (right - left) / (2 * zNear); + data[1] = 0; + data[2] = 0; + data[3] = (right + left) / (2 * zNear); + data[4] = 0; + data[5] = (top - bottom) / (2 * zNear); + data[6] = 0; + data[7] = (top + bottom) / (2 * zNear); + data[8] = 0; + data[9] = 0; + data[10] = 0; + data[11] = -1; + data[12] = 0; + data[13] = 0; + data[14] = -1 / (2 * zNear); + data[15] = 1 / (2 * zNear); +} + +/** Create an homogeneous projection matrix. */ +void Perspective( float fov, float aspect, float zNear, float zFar ) { + float xmin, xmax, ymin, ymax; + + xmax = zNear * tan( fov/2 ); + xmin = -xmax; + + ymax = xmax / aspect; + ymin = -ymax; + + Frustum(xmin, xmax, ymin, ymax, zNear, zFar); +} + +/** Create a projection matrix with the far plane at the infinity. */ +void PerspectiveInf( float fov, float aspect, float zNear ) { + float x = zNear * tan( fov/2 ); + float y = x / aspect; + FrustumInf( -x, x, -y, y, zNear ); +} + +/** Create an inverse projection matrix with far plane at the infinity. */ +void PerspectiveInfInv( float fov, float aspect, float zNear ) { + float x = zNear * tan( fov/2 ); + float y = x / aspect; + FrustumInfInv( -x, x, -y, y, zNear ); +} + +/** Build bone matrix from quatertion and offset. */ +void BoneMatrix(const Quat & q, const Vec3 & offset) { + float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; + + // calculate coefficients + x2 = q.x + q.x; + y2 = q.y + q.y; + z2 = q.z + q.z; + + xx = q.x * x2; xy = q.x * y2; xz = q.x * z2; + yy = q.y * y2; yz = q.y * z2; zz = q.z * z2; + wx = q.w * x2; wy = q.w * y2; wz = q.w * z2; + + data[0] = 1.0f - (yy + zz); + data[1] = xy - wz; + data[2] = xz + wy; + data[3] = 0.0f; + + data[4] = xy + wz; + data[5] = 1.0f - (xx + zz); + data[6] = yz - wx; + data[7] = 0.0f; + + data[8] = xz - wy; + data[9] = yz + wx; + data[10] = 1.0f - (xx + yy); + data[11] = 0.0f; + + data[12] = offset.x; + data[13] = offset.y; + data[14] = offset.z; + data[15] = 1.0f; +} + +//@} + + +/** @name Transformations: */ +//@{ + +/** Apply a general scale. */ +void Scale( float x, float y, float z ) { + data[0] *= x; data[4] *= y; data[8] *= z; + data[1] *= x; data[5] *= y; data[9] *= z; + data[2] *= x; data[6] *= y; data[10] *= z; + data[3] *= x; data[7] *= y; data[11] *= z; +} + +/** Apply a rotation of theta degrees around the axis v*/ +void Rotate( float theta, const Vec3 & v ) { + Matrix b; + b.RotationMatrix( theta, v[0], v[1], v[2] ); + Multiply4x3( b ); +} + +/** Apply a rotation of theta degrees around the axis v*/ +void Rotate( float theta, float v0, float v1, float v2 ) { + Matrix b; + b.RotationMatrix( theta, v0, v1, v2 ); + Multiply4x3( b ); +} + +/** +* Translate the matrix by t. This is the same as multiplying by a +* translation matrix with the given offset. +* this = T * this +*/ +void Translate( const Vec3 &t ) { + data[12] = data[0] * t.x + data[4] * t.y + data[8] * t.z + data[12]; + data[13] = data[1] * t.x + data[5] * t.y + data[9] * t.z + data[13]; + data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14]; + data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15]; +} + +/** +* Translate the matrix by x, y, z. This is the same as multiplying by a +* translation matrix with the given offsets. +*/ +void Translate( float x, float y, float z ) { + data[12] = data[0] * x + data[4] * y + data[8] * z + data[12]; + data[13] = data[1] * x + data[5] * y + data[9] * z + data[13]; + data[14] = data[2] * x + data[6] * y + data[10] * z + data[14]; + data[15] = data[3] * x + data[7] * y + data[11] * z + data[15]; +} + +/** Compute the transposed matrix. */ +void Transpose() { + piSwap(data[1], data[4]); + piSwap(data[2], data[8]); + piSwap(data[6], data[9]); + piSwap(data[3], data[12]); + piSwap(data[7], data[13]); + piSwap(data[11], data[14]); +} + +/** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */ +void IsometryInverse() { + // transposed 3x3 upper left matrix + piSwap(data[1], data[4]); + piSwap(data[2], data[8]); + piSwap(data[6], data[9]); + + // translate by the negative offsets + Vec3 v(-data[12], -data[13], -data[14]); + data[12] = data[13] = data[14] = 0; + Translate(v); +} + +/** Compute the inverse of the affine portion of this matrix. */ +void AffineInverse() { + data[12] = data[13] = data[14] = 0; + Transpose(); +} +//@} + +/** @name Matrix operations: */ +//@{ + +/** Return the determinant of this matrix. */ +float Determinant() const { + return data[0] * data[5] * data[10] * data[15] + + data[1] * data[6] * data[11] * data[12] + + data[2] * data[7] * data[ 8] * data[13] + + data[3] * data[4] * data[ 9] * data[14] - + data[3] * data[6] * data[ 9] * data[12] - + data[2] * data[5] * data[ 8] * data[15] - + data[1] * data[4] * data[11] * data[14] - + data[0] * data[7] * data[10] * data[12]; +} + + +/** Standard matrix product: this *= B. */ +void Multiply4x4( const Matrix & restrict B ) { + Multiply4x4(*this, B); +} + +/** Standard matrix product: this = A * B. this != B*/ +void Multiply4x4( const Matrix & A, const Matrix & restrict B ) { + piDebugCheck(this != &B); + + for(int i = 0; i < 4; i++) { + const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); + GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); + GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); + GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); + GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); + } + + /* Unrolled but does not allow this == A + data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3]; + data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3]; + data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3]; + data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3]; + data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7]; + data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7]; + data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7]; + data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7]; + data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11]; + data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11]; + data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11]; + data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11]; + data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15]; + data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15]; + data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15]; + data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15]; + */ +} + +/** Standard matrix product: this *= B. */ +void Multiply4x3( const Matrix & restrict B ) { + Multiply4x3(*this, B); +} + +/** Standard product of matrices, where the last row is [0 0 0 1]. */ +void Multiply4x3( const Matrix & A, const Matrix & restrict B ) { + piDebugCheck(this != &B); + + for(int i = 0; i < 3; i++) { + const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); + GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); + GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); + GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); + GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); + } + data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f; + + /* Unrolled but does not allow this == A + data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3]; + data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3]; + data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3]; + data[3] = 0.0f; + data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7]; + data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7]; + data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7]; + data[7] = 0.0f; + data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11]; + data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11]; + data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11]; + data[11]= 0.0f; + data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15]; + data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15]; + data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15]; + data[15]= 1.0f; + */ +} +//@} + + +/** @name Vector operations: */ +//@{ + +/** Transform 3d vector (w=0). */ +void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10]; +} +/** Transform 3d vector by the transpose (w=0). */ +void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2]; + dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6]; + dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10]; +} + +/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */ +void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; +} + +/** Transform a point, normalize it, and return w. */ +float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + float w; + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]); + *dest *= w; + return w; +} + +/** Transform a point and return w. */ +float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; +} + +/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */ +void TransformVec4(const Vec3 & orig, Vec4 * dest) const { + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; +} +//@} + +/** @name Matrix analysis. */ +//@{ + +/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ +void GetEulerAnglesZYZ(float * s, float * t, float * r) const { + if( GetElem(2,2) < 1.0f ) { + if( GetElem(2,2) > -1.0f ) { + // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr + // cs*ct*sr+ss*cr -ss*ct*sr+cs*cr st*sr + // -cs*st ss*st ct + *s = atan2(GetElem(1,2), -GetElem(0,2)); + *t = acos(GetElem(2,2)); + *r = atan2(GetElem(2,1), GetElem(2,0)); + } + else { + // -c(s-r) s(s-r) 0 + // s(s-r) c(s-r) 0 + // 0 0 -1 + *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r + *t = PI; + *r = 0; + } + } + else { + // c(s+r) -s(s+r) 0 + // s(s+r) c(s+r) 0 + // 0 0 1 + *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r + *t = 0; + *r = 0; + } +} + +//@} + +MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m ); + +/** Print to debug output. */ +void Print() const { + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] ); +} + + +public: + + float data[16]; + +}; +#endif + + +#endif // NV_MATH_MATRIX_INL diff --git a/thirdparty/thekla_atlas/nvmath/Morton.h b/thirdparty/thekla_atlas/nvmath/Morton.h new file mode 100644 index 0000000000..10e0d8152a --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Morton.h @@ -0,0 +1,83 @@ + +// Code from ryg: +// http://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ + + +// "Insert" a 0 bit after each of the 16 low bits of x +inline uint32 part1By1(uint32 x) +{ + x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 + x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 + x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 + x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 + x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 + return x; +} + +// "Insert" two 0 bits after each of the 10 low bits of x +inline uint32 part1By2(uint32 x) +{ + x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 + x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 + x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 + x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 + x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + return x; +} + +inline uint32 encodeMorton2(uint32 x, uint32 y) +{ + return (part1By1(y) << 1) + part1By1(x); +} + +inline uint32 encodeMorton3(uint32 x, uint32 y, uint32 z) +{ + return (part1By2(z) << 2) + (part1By2(y) << 1) + part1By2(x); +} + +// Inverse of part1By1 - "delete" all odd-indexed bits +inline uint32 compact1By1(uint32 x) +{ + x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 + x = (x ^ (x >> 1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 + x = (x ^ (x >> 2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 + x = (x ^ (x >> 4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 + x = (x ^ (x >> 8)) & 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 + return x; +} + +// Inverse of part1By2 - "delete" all bits not at positions divisible by 3 +inline uint32 compact1By2(uint32 x) +{ + x &= 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + x = (x ^ (x >> 2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 + x = (x ^ (x >> 4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 + x = (x ^ (x >> 8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 + x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 + return x; +} + +inline uint32 decodeMorton2X(uint32 code) +{ + return compact1By1(code >> 0); +} + +inline uint32 decodeMorton2Y(uint32 code) +{ + return compact1By1(code >> 1); +} + +inline uint32 decodeMorton3X(uint32 code) +{ + return compact1By2(code >> 0); +} + +inline uint32 decodeMorton3Y(uint32 code) +{ + return compact1By2(code >> 1); +} + +inline uint32 decodeMorton3Z(uint32 code) +{ + return compact1By2(code >> 2); +}
\ No newline at end of file diff --git a/thirdparty/thekla_atlas/nvmath/Plane.cpp b/thirdparty/thekla_atlas/nvmath/Plane.cpp new file mode 100644 index 0000000000..8b54f829ad --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Plane.cpp @@ -0,0 +1,27 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Plane.h" +#include "Plane.inl" +#include "Matrix.inl" + +namespace nv +{ + Plane transformPlane(const Matrix & m, const Plane & p) + { + Vector3 newVec = transformVector(m, p.vector()); + + Vector3 ptInPlane = p.offset() * p.vector(); + ptInPlane = transformPoint(m, ptInPlane); + + return Plane(newVec, ptInPlane); + } + + Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c) + { + return dot(a.vector(), cross(b.vector(), c.vector())) * ( + a.offset() * cross(b.vector(), c.vector()) + + c.offset() * cross(a.vector(), b.vector()) + + b.offset() * cross(c.vector(), a.vector())); + } + +} // nv namespace diff --git a/thirdparty/thekla_atlas/nvmath/Plane.h b/thirdparty/thekla_atlas/nvmath/Plane.h new file mode 100644 index 0000000000..dc468b28e2 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Plane.h @@ -0,0 +1,42 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_PLANE_H +#define NV_MATH_PLANE_H + +#include "nvmath.h" +#include "Vector.h" + +namespace nv +{ + class Matrix; + + class NVMATH_CLASS Plane + { + public: + Plane(); + Plane(float x, float y, float z, float w); + Plane(const Vector4 & v); + Plane(const Vector3 & v, float d); + Plane(const Vector3 & normal, const Vector3 & point); + Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2); + + const Plane & operator=(const Plane & v); + + Vector3 vector() const; + float offset() const; + Vector3 normal() const; + + void operator*=(float s); + + Vector4 v; + }; + + Plane transformPlane(const Matrix &, const Plane &); + + Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c); + + +} // nv namespace + +#endif // NV_MATH_PLANE_H diff --git a/thirdparty/thekla_atlas/nvmath/Plane.inl b/thirdparty/thekla_atlas/nvmath/Plane.inl new file mode 100644 index 0000000000..2277e38cd5 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Plane.inl @@ -0,0 +1,50 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_PLANE_INL +#define NV_MATH_PLANE_INL + +#include "Plane.h" +#include "Vector.inl" + +namespace nv +{ + inline Plane::Plane() {} + inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {} + inline Plane::Plane(const Vector4 & v) : v(v) {} + inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {} + inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {} + inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) { + Vector3 n = cross(v1-v0, v2-v0); + float d = -dot(n, v0); + v = Vector4(n, d); + } + + inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; } + + inline Vector3 Plane::vector() const { return v.xyz(); } + inline float Plane::offset() const { return v.w; } + inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); } + + // Normalize plane. + inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON) + { + const float len = length(plane.vector()); + const float inv = isZero(len, epsilon) ? 0 : 1.0f / len; + return Plane(plane.v * inv); + } + + // Get the signed distance from the given point to this plane. + inline float distance(const Plane & plane, const Vector3 & point) + { + return dot(plane.vector(), point) + plane.offset(); + } + + inline void Plane::operator*=(float s) + { + v *= s; + } + +} // nv namespace + +#endif // NV_MATH_PLANE_H diff --git a/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp b/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp new file mode 100644 index 0000000000..3553e48f64 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp @@ -0,0 +1,158 @@ +#include "ProximityGrid.h" + +#include "Box.inl" +#include "Morton.h" + + +using namespace nv; + +ProximityGrid::ProximityGrid() { +} + +void ProximityGrid::reset() { + cellArray.clear(); +} + +void ProximityGrid::init(const Array<Vector3> & pointArray) { + + // Compute bounding box. + Box box; + box.clearBounds(); + + const uint count = pointArray.count(); + + for (uint i = 0; i < count; i++) { + box.addPointToBounds(pointArray[i]); + } + + init(box, count); + + // Insert all points. + for (uint i = 0; i < count; i++) { + add(pointArray[i], i); + } +} + + +void ProximityGrid::init(const Box & box, uint count) { + reset(); + + // Determine grid size. + float cellWidth; + + Vector3 diagonal = box.extents() * 2.f; + float volume = box.volume(); + + if (equal(volume, 0)) { + // Degenerate box, treat like a quad. + Vector2 quad; + if (diagonal.x < diagonal.y && diagonal.x < diagonal.z) { + quad.x = diagonal.y; + quad.y = diagonal.z; + } + else if (diagonal.y < diagonal.x && diagonal.y < diagonal.z) { + quad.x = diagonal.x; + quad.y = diagonal.z; + } + else { + quad.x = diagonal.x; + quad.y = diagonal.y; + } + + float cellArea = quad.x * quad.y / count; + cellWidth = sqrtf(cellArea); // pow(cellArea, 1.0f / 2.0f); + } + else { + // Ideally we want one cell per point. + float cellVolume = volume / count; + cellWidth = pow(cellVolume, 1.0f / 3.0f); + } + + nvDebugCheck(cellWidth != 0); + + sx = max(1, ftoi_ceil(diagonal.x / cellWidth)); + sy = max(1, ftoi_ceil(diagonal.y / cellWidth)); + sz = max(1, ftoi_ceil(diagonal.z / cellWidth)); + + invCellSize.x = float(sx) / diagonal.x; + invCellSize.y = float(sy) / diagonal.y; + invCellSize.z = float(sz) / diagonal.z; + + cellArray.resize(sx * sy * sz); + + corner = box.minCorner; // @@ Align grid better? +} + +// Gather all points inside the given sphere. +// Radius is assumed to be small, so we don't bother culling the cells. +void ProximityGrid::gather(const Vector3 & position, float radius, Array<uint> & indexArray) { + int x0 = index_x(position.x - radius); + int x1 = index_x(position.x + radius); + + int y0 = index_y(position.y - radius); + int y1 = index_y(position.y + radius); + + int z0 = index_z(position.z - radius); + int z1 = index_z(position.z + radius); + + for (int z = z0; z <= z1; z++) { + for (int y = y0; y <= y1; y++) { + for (int x = x0; x <= x1; x++) { + int idx = index(x, y, z); + indexArray.append(cellArray[idx].indexArray); + } + } + } +} + + +uint32 ProximityGrid::mortonCount() const { + uint64 s = U64(max3(sx, sy, sz)); + s = nextPowerOfTwo(s); + + if (s > 1024) { + return U32(s * s * min3(sx, sy, sz)); + } + + return U32(s * s * s); +} + +int ProximityGrid::mortonIndex(uint32 code) const { + uint32 x, y, z; + + uint s = U32(max3(sx, sy, sz)); + if (s > 1024) { + // Use layered two-dimensional morton order. + s = nextPowerOfTwo(s); + uint layer = code / (s * s); + code = code % (s * s); + + uint layer_count = U32(min3(sx, sy, sz)); + if (sx == layer_count) { + x = layer; + y = decodeMorton2X(code); + z = decodeMorton2Y(code); + } + else if (sy == layer_count) { + x = decodeMorton2Y(code); + y = layer; + z = decodeMorton2X(code); + } + else /*if (sz == layer_count)*/ { + x = decodeMorton2X(code); + y = decodeMorton2Y(code); + z = layer; + } + } + else { + x = decodeMorton3X(code); + y = decodeMorton3Y(code); + z = decodeMorton3Z(code); + } + + if (x >= U32(sx) || y >= U32(sy) || z >= U32(sz)) { + return -1; + } + + return index(x, y, z); +} diff --git a/thirdparty/thekla_atlas/nvmath/ProximityGrid.h b/thirdparty/thekla_atlas/nvmath/ProximityGrid.h new file mode 100644 index 0000000000..a21bb3bd68 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/ProximityGrid.h @@ -0,0 +1,99 @@ +#pragma once +#ifndef NV_MATH_PROXIMITYGRID_H +#define NV_MATH_PROXIMITYGRID_H + +#include "Vector.h" +#include "ftoi.h" + +#include "nvcore/Array.inl" + + +// A simple, dynamic proximity grid based on Jon's code. +// Instead of storing pointers here I store indices. + +namespace nv { + + class Box; + + struct Cell { + Array<uint> indexArray; + }; + + struct ProximityGrid { + ProximityGrid(); + + void reset(); + void init(const Array<Vector3> & pointArray); + void init(const Box & box, uint count); + + int index_x(float x) const; + int index_y(float y) const; + int index_z(float z) const; + int index(int x, int y, int z) const; + int index(const Vector3 & pos) const; + + uint32 mortonCount() const; + int mortonIndex(uint32 code) const; + + void add(const Vector3 & pos, uint key); + bool remove(const Vector3 & pos, uint key); + + void gather(const Vector3 & pos, float radius, Array<uint> & indices); + + Array<Cell> cellArray; + + Vector3 corner; + Vector3 invCellSize; + int sx, sy, sz; + }; + + // For morton traversal, do: + // for (int code = 0; code < mortonCount(); code++) { + // int idx = mortonIndex(code); + // if (idx < 0) continue; + // } + + + + inline int ProximityGrid::index_x(float x) const { + return clamp(ftoi_floor((x - corner.x) * invCellSize.x), 0, sx-1); + } + + inline int ProximityGrid::index_y(float y) const { + return clamp(ftoi_floor((y - corner.y) * invCellSize.y), 0, sy-1); + } + + inline int ProximityGrid::index_z(float z) const { + return clamp(ftoi_floor((z - corner.z) * invCellSize.z), 0, sz-1); + } + + inline int ProximityGrid::index(int x, int y, int z) const { + nvDebugCheck(x >= 0 && x < sx); + nvDebugCheck(y >= 0 && y < sy); + nvDebugCheck(z >= 0 && z < sz); + int idx = (z * sy + y) * sx + x; + nvDebugCheck(idx >= 0 && uint(idx) < cellArray.count()); + return idx; + } + + inline int ProximityGrid::index(const Vector3 & pos) const { + int x = index_x(pos.x); + int y = index_y(pos.y); + int z = index_z(pos.z); + return index(x, y, z); + } + + + inline void ProximityGrid::add(const Vector3 & pos, uint key) { + uint idx = index(pos); + cellArray[idx].indexArray.append(key); + } + + inline bool ProximityGrid::remove(const Vector3 & pos, uint key) { + uint idx = index(pos); + return cellArray[idx].indexArray.remove(key); + } + +} // nv namespace + +#endif // NV_MATH_PROXIMITYGRID_H diff --git a/thirdparty/thekla_atlas/nvmath/Quaternion.h b/thirdparty/thekla_atlas/nvmath/Quaternion.h new file mode 100644 index 0000000000..dc5219e5e4 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Quaternion.h @@ -0,0 +1,213 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_MATH_QUATERNION_H +#define NV_MATH_QUATERNION_H + +#include "nvmath/nvmath.h" +#include "nvmath/Vector.inl" // @@ Do not include inl files from header files. +#include "nvmath/Matrix.h" + +namespace nv +{ + + class NVMATH_CLASS Quaternion + { + public: + typedef Quaternion const & Arg; + + Quaternion(); + explicit Quaternion(float f); + Quaternion(float x, float y, float z, float w); + Quaternion(Vector4::Arg v); + + const Quaternion & operator=(Quaternion::Arg v); + + Vector4 asVector() const; + + union { + struct { + float x, y, z, w; + }; + float component[4]; + }; + }; + + inline Quaternion::Quaternion() {} + inline Quaternion::Quaternion(float f) : x(f), y(f), z(f), w(f) {} + inline Quaternion::Quaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} + inline Quaternion::Quaternion(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + + // @@ Move all these to Quaternion.inl! + + inline const Quaternion & Quaternion::operator=(Quaternion::Arg v) { + x = v.x; + y = v.y; + z = v.z; + w = v.w; + return *this; + } + + inline Vector4 Quaternion::asVector() const { return Vector4(x, y, z, w); } + + inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b) + { + return Quaternion( + + a.x*b.w + a.y*b.z - a.z*b.y + a.w*b.x, + - a.x*b.z + a.y*b.w + a.z*b.x + a.w*b.y, + + a.x*b.y - a.y*b.x + a.z*b.w + a.w*b.z, + - a.x*b.x - a.y*b.y - a.z*b.z + a.w*b.w); + } + + inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b) + { + return Quaternion( + + a.y*b.z - a.z*b.y + a.w*b.x, + - a.x*b.z + a.z*b.x + a.w*b.y, + + a.x*b.y - a.y*b.x + a.w*b.z, + - a.x*b.x - a.y*b.y - a.z*b.z ); + } + + inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b) + { + return Quaternion( + + a.x*b.w + a.y*b.z - a.z*b.y, + - a.x*b.z + a.y*b.w + a.z*b.x, + + a.x*b.y - a.y*b.x + a.z*b.w, + - a.x*b.x - a.y*b.y - a.z*b.z); + } + + inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b) + { + return mul(a, b); + } + + inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b) + { + return mul(a, b); + } + + inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b) + { + return mul(a, b); + } + + + inline Quaternion scale(Quaternion::Arg q, float s) + { + return scale(q.asVector(), s); + } + inline Quaternion operator *(Quaternion::Arg q, float s) + { + return scale(q, s); + } + inline Quaternion operator *(float s, Quaternion::Arg q) + { + return scale(q, s); + } + + inline Quaternion scale(Quaternion::Arg q, Vector4::Arg s) + { + return scale(q.asVector(), s); + } + /*inline Quaternion operator *(Quaternion::Arg q, Vector4::Arg s) + { + return scale(q, s); + } + inline Quaternion operator *(Vector4::Arg s, Quaternion::Arg q) + { + return scale(q, s); + }*/ + + inline Quaternion conjugate(Quaternion::Arg q) + { + return scale(q, Vector4(-1, -1, -1, 1)); + } + + inline float length(Quaternion::Arg q) + { + return length(q.asVector()); + } + + inline bool isNormalized(Quaternion::Arg q, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(q), 1, epsilon); + } + + inline Quaternion normalize(Quaternion::Arg q, float epsilon = NV_EPSILON) + { + float l = length(q); + nvDebugCheck(!isZero(l, epsilon)); + Quaternion n = scale(q, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Quaternion inverse(Quaternion::Arg q) + { + return conjugate(normalize(q)); + } + + /// Create a rotation quaternion for @a angle alpha around normal vector @a v. + inline Quaternion axisAngle(Vector3::Arg v, float alpha) + { + float s = sinf(alpha * 0.5f); + float c = cosf(alpha * 0.5f); + return Quaternion(Vector4(v * s, c)); + } + + inline Vector3 imag(Quaternion::Arg q) + { + return q.asVector().xyz(); + } + + inline float real(Quaternion::Arg q) + { + return q.w; + } + + + /// Transform vector. + inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v) + { + //Quaternion t = q * v * conjugate(q); + //return imag(t); + + // Faster method by Fabian Giesen and others: + // http://molecularmusings.wordpress.com/2013/05/24/a-faster-quaternion-vector-multiplication/ + // http://mollyrocket.com/forums/viewtopic.php?t=833&sid=3a84e00a70ccb046cfc87ac39881a3d0 + + Vector3 t = 2 * cross(imag(q), v); + return v + q.w * t + cross(imag(q), t); + } + + // @@ Not tested. + // From Insomniac's Mike Day: + // http://www.insomniacgames.com/converting-a-rotation-matrix-to-a-quaternion/ + inline Quaternion fromMatrix(const Matrix & m) { + if (m(2, 2) < 0) { + if (m(0, 0) < m(1,1)) { + float t = 1 - m(0, 0) - m(1, 1) - m(2, 2); + return Quaternion(t, m(0,1)+m(1,0), m(2,0)+m(0,2), m(1,2)-m(2,1)); + } + else { + float t = 1 - m(0, 0) + m(1, 1) - m(2, 2); + return Quaternion(t, m(0,1) + m(1,0), m(1,2) + m(2,1), m(2,0) - m(0,2)); + } + } + else { + if (m(0, 0) < -m(1, 1)) { + float t = 1 - m(0, 0) - m(1, 1) + m(2, 2); + return Quaternion(t, m(2,0) + m(0,2), m(1,2) + m(2,1), m(0,1) - m(1,0)); + } + else { + float t = 1 + m(0, 0) + m(1, 1) + m(2, 2); + return Quaternion(t, m(1,2) - m(2,1), m(2,0) - m(0,2), m(0,1) - m(1,0)); + } + } + } + + +} // nv namespace + +#endif // NV_MATH_QUATERNION_H diff --git a/thirdparty/thekla_atlas/nvmath/Random.cpp b/thirdparty/thekla_atlas/nvmath/Random.cpp new file mode 100644 index 0000000000..1a60e7f5e7 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Random.cpp @@ -0,0 +1,54 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include <nvmath/Random.h> +#include <time.h> + +using namespace nv; + +// Statics +const uint16 Rand48::a0 = 0xE66D; +const uint16 Rand48::a1 = 0xDEEC; +const uint16 Rand48::a2 = 0x0005; +const uint16 Rand48::c0 = 0x000B; + + +/// Get a random seed based on the current time. +uint Rand::randomSeed() +{ + return (uint)time(NULL); +} + + +void MTRand::initialize( uint32 seed ) +{ + // Initialize generator state with seed + // See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier. + // In previous versions, most significant bits (MSBs) of the seed affect + // only MSBs of the state array. Modified 9 Jan 2002 by Makoto Matsumoto. + uint32 *s = state; + uint32 *r = state; + int i = 1; + *s++ = seed & 0xffffffffUL; + for( ; i < N; ++i ) + { + *s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL; + r++; + } +} + + +void MTRand::reload() +{ + // Generate N new values in state + // Made clearer and faster by Matthew Bellew (matthew.bellew@home.com) + uint32 *p = state; + int i; + for( i = N - M; i--; ++p ) + *p = twist( p[M], p[0], p[1] ); + for( i = M; --i; ++p ) + *p = twist( p[M-N], p[0], p[1] ); + *p = twist( p[M-N], p[0], state[0] ); + + left = N, next = state; +} + diff --git a/thirdparty/thekla_atlas/nvmath/Random.h b/thirdparty/thekla_atlas/nvmath/Random.h new file mode 100644 index 0000000000..223292706a --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Random.h @@ -0,0 +1,376 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_RANDOM_H +#define NV_MATH_RANDOM_H + +#include "nvmath.h" +#include "nvcore/Utils.h" // nextPowerOfTwo + + +namespace nv +{ + + /// Interface of the random number generators. + class Rand + { + public: + + virtual ~Rand() {} + + enum time_e { Time }; + + /// Provide a new seed. + virtual void seed( uint s ) { /* empty */ }; + + /// Get an integer random number. + virtual uint get() = 0; + + /// Get a random number on [0, max] interval. + uint getRange( uint max ) + { + if (max == 0) return 0; + if (max == NV_UINT32_MAX) return get(); + + const uint np2 = nextPowerOfTwo( max+1 ); // @@ This fails if max == NV_UINT32_MAX + const uint mask = np2 - 1; + uint n; + do { n = get() & mask; } while( n > max ); + return n; + } + + /// Random number on [0.0, 1.0] interval. + float getFloat() + { + union + { + uint32 i; + float f; + } pun; + + pun.i = 0x3f800000UL | (get() & 0x007fffffUL); + return pun.f - 1.0f; + } + + float getFloatRange(float min, float max) { + return getFloat() * (max - min) + min; + } + + /* + /// Random number on [0.0, 1.0] interval. + double getReal() + { + return double(get()) * (1.0/4294967295.0); // 2^32-1 + } + + /// Random number on [0.0, 1.0) interval. + double getRealExclusive() + { + return double(get()) * (1.0/4294967296.0); // 2^32 + } + */ + + /// Get the max value of the random number. + uint max() const { return NV_UINT32_MAX; } + + // Get a random seed. + static uint randomSeed(); + + }; + + + /// Very simple random number generator with low storage requirements. + class SimpleRand : public Rand + { + public: + + /// Constructor that uses the current time as the seed. + SimpleRand( time_e ) + { + seed(randomSeed()); + } + + /// Constructor that uses the given seed. + SimpleRand( uint s = 0 ) + { + seed(s); + } + + /// Set the given seed. + virtual void seed( uint s ) + { + current = s; + } + + /// Get a random number. + virtual uint get() + { + return current = current * 1103515245 + 12345; + } + + private: + + uint current; + + }; + + + /// Mersenne twister random number generator. + class MTRand : public Rand + { + public: + + enum { N = 624 }; // length of state vector + enum { M = 397 }; + + /// Constructor that uses the current time as the seed. + MTRand( time_e ) + { + seed(randomSeed()); + } + + /// Constructor that uses the given seed. + MTRand( uint s = 0 ) + { + seed(s); + } + + /// Constructor that uses the given seeds. + NVMATH_API MTRand( const uint * seed_array, uint length ); + + + /// Provide a new seed. + virtual void seed( uint s ) + { + initialize(s); + reload(); + } + + /// Get a random number between 0 - 65536. + virtual uint get() + { + // Pull a 32-bit integer from the generator state + // Every other access function simply transforms the numbers extracted here + if( left == 0 ) { + reload(); + } + left--; + + uint s1; + s1 = *next++; + s1 ^= (s1 >> 11); + s1 ^= (s1 << 7) & 0x9d2c5680U; + s1 ^= (s1 << 15) & 0xefc60000U; + return ( s1 ^ (s1 >> 18) ); + }; + + + private: + + NVMATH_API void initialize( uint32 seed ); + NVMATH_API void reload(); + + uint hiBit( uint u ) const { return u & 0x80000000U; } + uint loBit( uint u ) const { return u & 0x00000001U; } + uint loBits( uint u ) const { return u & 0x7fffffffU; } + uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); } + uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); } + + private: + + uint state[N]; // internal state + uint * next; // next value to get from state + int left; // number of values left before reload needed + + }; + + + + /** George Marsaglia's random number generator. + * Code based on Thatcher Ulrich public domain source code: + * http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto + * + * PRNG code adapted from the complimentary-multiply-with-carry + * code in the article: George Marsaglia, "Seeds for Random Number + * Generators", Communications of the ACM, May 2003, Vol 46 No 5, + * pp90-93. + * + * The article says: + * + * "Any one of the choices for seed table size and multiplier will + * provide a RNG that has passed extensive tests of randomness, + * particularly those in [3], yet is simple and fast -- + * approximately 30 million random 32-bit integers per second on a + * 850MHz PC. The period is a*b^n, where a is the multiplier, n + * the size of the seed table and b=2^32-1. (a is chosen so that + * b is a primitive root of the prime a*b^n + 1.)" + * + * [3] Marsaglia, G., Zaman, A., and Tsang, W. Toward a universal + * random number generator. _Statistics and Probability Letters + * 8_ (1990), 35-39. + */ + class GMRand : public Rand + { + public: + + enum { SEED_COUNT = 8 }; + + // const uint64 a = 123471786; // for SEED_COUNT=1024 + // const uint64 a = 123554632; // for SEED_COUNT=512 + // const uint64 a = 8001634; // for SEED_COUNT=255 + // const uint64 a = 8007626; // for SEED_COUNT=128 + // const uint64 a = 647535442; // for SEED_COUNT=64 + // const uint64 a = 547416522; // for SEED_COUNT=32 + // const uint64 a = 487198574; // for SEED_COUNT=16 + // const uint64 a = 716514398U; // for SEED_COUNT=8 + enum { a = 716514398U }; + + + GMRand( time_e ) + { + seed(randomSeed()); + } + + GMRand(uint s = 987654321) + { + seed(s); + } + + + /// Provide a new seed. + virtual void seed( uint s ) + { + c = 362436; + i = SEED_COUNT - 1; + + for(int i = 0; i < SEED_COUNT; i++) { + s = s ^ (s << 13); + s = s ^ (s >> 17); + s = s ^ (s << 5); + Q[i] = s; + } + } + + /// Get a random number between 0 - 65536. + virtual uint get() + { + const uint32 r = 0xFFFFFFFE; + + uint64 t; + uint32 x; + + i = (i + 1) & (SEED_COUNT - 1); + t = a * Q[i] + c; + c = uint32(t >> 32); + x = uint32(t + c); + + if( x < c ) { + x++; + c++; + } + + uint32 val = r - x; + Q[i] = val; + return val; + }; + + + private: + + uint32 c; + uint32 i; + uint32 Q[8]; + + }; + + + /** Random number implementation from the GNU Sci. Lib. (GSL). + * Adapted from Nicholas Chapman version: + * + * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough + * This is the Unix rand48() generator. The generator returns the + * upper 32 bits from each term of the sequence, + * + * x_{n+1} = (a x_n + c) mod m + * + * using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB + * and m = 2^48. The seed specifies the upper 32 bits of the initial + * value, x_1, with the lower 16 bits set to 0x330E. + * + * The theoretical value of x_{10001} is 244131582646046. + * + * The period of this generator is ? FIXME (probably around 2^48). + */ + class Rand48 : public Rand + { + public: + + Rand48( time_e ) + { + seed(randomSeed()); + } + + Rand48( uint s = 0x1234ABCD ) + { + seed(s); + } + + + /** Set the given seed. */ + virtual void seed( uint s ) { + vstate.x0 = 0x330E; + vstate.x1 = uint16(s & 0xFFFF); + vstate.x2 = uint16((s >> 16) & 0xFFFF); + } + + /** Get a random number. */ + virtual uint get() { + + advance(); + + uint x1 = vstate.x1; + uint x2 = vstate.x2; + return (x2 << 16) + x1; + } + + + private: + + void advance() + { + /* work with unsigned long ints throughout to get correct integer + promotions of any unsigned short ints */ + const uint32 x0 = vstate.x0; + const uint32 x1 = vstate.x1; + const uint32 x2 = vstate.x2; + + uint32 a; + a = a0 * x0 + c0; + + vstate.x0 = uint16(a & 0xFFFF); + a >>= 16; + + /* although the next line may overflow we only need the top 16 bits + in the following stage, so it does not matter */ + + a += a0 * x1 + a1 * x0; + vstate.x1 = uint16(a & 0xFFFF); + + a >>= 16; + a += a0 * x2 + a1 * x1 + a2 * x0; + vstate.x2 = uint16(a & 0xFFFF); + } + + + private: + NVMATH_API static const uint16 a0, a1, a2, c0; + + struct rand48_state_t { + uint16 x0, x1, x2; + } vstate; + + }; + +} // nv namespace + +#endif // NV_MATH_RANDOM_H diff --git a/thirdparty/thekla_atlas/nvmath/Solver.cpp b/thirdparty/thekla_atlas/nvmath/Solver.cpp new file mode 100644 index 0000000000..191793ee29 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Solver.cpp @@ -0,0 +1,744 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Solver.h" +#include "Sparse.h" + +#include "nvcore/Array.inl" + +using namespace nv; + +namespace +{ + class Preconditioner + { + public: + // Virtual dtor. + virtual ~Preconditioner() { } + + // Apply preconditioning step. + virtual void apply(const FullVector & x, FullVector & y) const = 0; + }; + + + // Jacobi preconditioner. + class JacobiPreconditioner : public Preconditioner + { + public: + + JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width()) + { + nvCheck(M.isSquare()); + + for(uint x = 0; x < M.width(); x++) + { + float elem = M.getCoefficient(x, x); + //nvDebugCheck( elem != 0.0f ); // This can be zero in the presence of zero area triangles. + + if (symmetric) + { + m_inverseDiagonal[x] = (elem != 0) ? 1.0f / sqrtf(fabsf(elem)) : 1.0f; + } + else + { + m_inverseDiagonal[x] = (elem != 0) ? 1.0f / elem : 1.0f; + } + } + } + + void apply(const FullVector & x, FullVector & y) const + { + nvDebugCheck(x.dimension() == m_inverseDiagonal.dimension()); + nvDebugCheck(y.dimension() == m_inverseDiagonal.dimension()); + + // @@ Wrap vector component-wise product into a separate function. + const uint D = x.dimension(); + for (uint i = 0; i < D; i++) + { + y[i] = m_inverseDiagonal[i] * x[i]; + } + } + + private: + + FullVector m_inverseDiagonal; + + }; + +} // namespace + + +static bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon); +static bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon); + + +// Solve the symmetric system: At·A·x = At·b +bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/) +{ + nvDebugCheck(A.width() == x.dimension()); + nvDebugCheck(A.height() == b.dimension()); + nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly... + + const uint D = A.width(); + + SparseMatrix At(A.height(), A.width()); + transpose(A, At); + + FullVector Atb(D); + //mult(Transposed, A, b, Atb); + mult(At, b, Atb); + + SparseMatrix AtA(D); + //mult(Transposed, A, NoTransposed, A, AtA); + mult(At, A, AtA); + + return SymmetricSolver(AtA, Atb, x, epsilon); +} + + +// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007 +bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/) +{ + nvDebugCheck(A.width() == x.dimension()); + nvDebugCheck(A.height() == b.dimension()); + nvDebugCheck(A.height() >= A.width() - lockedCount); + + // @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly. + + const uint D = A.width() - lockedCount; + nvDebugCheck(D > 0); + + // Compute: b - Al * xl + FullVector b_Alxl(b); + + for (uint y = 0; y < A.height(); y++) + { + const uint count = A.getRow(y).count(); + for (uint e = 0; e < count; e++) + { + uint column = A.getRow(y)[e].x; + + bool isFree = true; + for (uint i = 0; i < lockedCount; i++) + { + isFree &= (lockedParameters[i] != column); + } + + if (!isFree) + { + b_Alxl[y] -= x[column] * A.getRow(y)[e].v; + } + } + } + + // Remove locked columns from A. + SparseMatrix Af(D, A.height()); + + for (uint y = 0; y < A.height(); y++) + { + const uint count = A.getRow(y).count(); + for (uint e = 0; e < count; e++) + { + uint column = A.getRow(y)[e].x; + uint ix = column; + + bool isFree = true; + for (uint i = 0; i < lockedCount; i++) + { + isFree &= (lockedParameters[i] != column); + if (column > lockedParameters[i]) ix--; // shift columns + } + + if (isFree) + { + Af.setCoefficient(ix, y, A.getRow(y)[e].v); + } + } + } + + // Remove elements from x + FullVector xf(D); + + for (uint i = 0, j = 0; i < A.width(); i++) + { + bool isFree = true; + for (uint l = 0; l < lockedCount; l++) + { + isFree &= (lockedParameters[l] != i); + } + + if (isFree) + { + xf[j++] = x[i]; + } + } + + // Solve reduced system. + bool result = LeastSquaresSolver(Af, b_Alxl, xf, epsilon); + + // Copy results back to x. + for (uint i = 0, j = 0; i < A.width(); i++) + { + bool isFree = true; + for (uint l = 0; l < lockedCount; l++) + { + isFree &= (lockedParameters[l] != i); + } + + if (isFree) + { + x[i] = xf[j++]; + } + } + + return result; +} + + +bool nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/) +{ + nvDebugCheck(A.height() == A.width()); + nvDebugCheck(A.height() == b.dimension()); + nvDebugCheck(b.dimension() == x.dimension()); + + JacobiPreconditioner jacobi(A, true); + return ConjugateGradientSolver(jacobi, A, b, x, epsilon); + + //return ConjugateGradientSolver(A, b, x, epsilon); +} + + +/** +* Compute the solution of the sparse linear system Ab=x using the Conjugate +* Gradient method. +* +* Solving sparse linear systems: +* (1) A·x = b +* +* The conjugate gradient algorithm solves (1) only in the case that A is +* symmetric and positive definite. It is based on the idea of minimizing the +* function +* +* (2) f(x) = 1/2·x·A·x - b·x +* +* This function is minimized when its gradient +* +* (3) df = A·x - b +* +* is zero, which is equivalent to (1). The minimization is carried out by +* generating a succession of search directions p.k and improved minimizers x.k. +* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k), +* and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are +* built up in such a way that x.k+1 is also the minimizer of f over the whole +* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N +* iterations you arrive at the minimizer over the entire vector space, i.e., the +* solution to (1). +* +* For a really good explanation of the method see: +* +* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain", +* Jonhathan Richard Shewchuk. +* +**/ +/*static*/ bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon) +{ + nvDebugCheck( A.isSquare() ); + nvDebugCheck( A.width() == b.dimension() ); + nvDebugCheck( A.width() == x.dimension() ); + + int i = 0; + const int D = A.width(); + const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not. + + FullVector r(D); // residual + FullVector p(D); // search direction + FullVector q(D); // + float delta_0; + float delta_old; + float delta_new; + float alpha; + float beta; + + // r = b - A·x; + copy(b, r); + sgemv(-1, A, x, 1, r); + + // p = r; + copy(r, p); + + delta_new = dot( r, r ); + delta_0 = delta_new; + + while (i < i_max && delta_new > epsilon*epsilon*delta_0) + { + i++; + + // q = A·p + mult(A, p, q); + + // alpha = delta_new / p·q + alpha = delta_new / dot( p, q ); + + // x = alfa·p + x + saxpy(alpha, p, x); + + if ((i & 31) == 0) // recompute r after 32 steps + { + // r = b - A·x + copy(b, r); + sgemv(-1, A, x, 1, r); + } + else + { + // r = r - alpha·q + saxpy(-alpha, q, r); + } + + delta_old = delta_new; + delta_new = dot( r, r ); + + beta = delta_new / delta_old; + + // p = beta·p + r + scal(beta, p); + saxpy(1, r, p); + } + + return delta_new <= epsilon*epsilon*delta_0; +} + + +// Conjugate gradient with preconditioner. +/*static*/ bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon) +{ + nvDebugCheck( A.isSquare() ); + nvDebugCheck( A.width() == b.dimension() ); + nvDebugCheck( A.width() == x.dimension() ); + + int i = 0; + const int D = A.width(); + const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not. + + FullVector r(D); // residual + FullVector p(D); // search direction + FullVector q(D); // + FullVector s(D); // preconditioned + float delta_0; + float delta_old; + float delta_new; + float alpha; + float beta; + + // r = b - A·x + copy(b, r); + sgemv(-1, A, x, 1, r); + + + // p = M^-1 · r + preconditioner.apply(r, p); + //copy(r, p); + + + delta_new = dot(r, p); + delta_0 = delta_new; + + while (i < i_max && delta_new > epsilon*epsilon*delta_0) + { + i++; + + // q = A·p + mult(A, p, q); + + // alpha = delta_new / p·q + alpha = delta_new / dot(p, q); + + // x = alfa·p + x + saxpy(alpha, p, x); + + if ((i & 31) == 0) // recompute r after 32 steps + { + // r = b - A·x + copy(b, r); + sgemv(-1, A, x, 1, r); + } + else + { + // r = r - alfa·q + saxpy(-alpha, q, r); + } + + // s = M^-1 · r + preconditioner.apply(r, s); + //copy(r, s); + + delta_old = delta_new; + delta_new = dot( r, s ); + + beta = delta_new / delta_old; + + // p = s + beta·p + scal(beta, p); + saxpy(1, s, p); + } + + return delta_new <= epsilon*epsilon*delta_0; +} + + +#if 0 // Nonsymmetric solvers + +/** Bi-conjugate gradient method. */ +MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) { + piDebugCheck( A.IsSquare() ); + piDebugCheck( A.Width() == b.Dim() ); + piDebugCheck( A.Width() == x.Dim() ); + + int i = 0; + const int D = A.Width(); + const int i_max = 4 * D; + + float resid; + float rho_1 = 0; + float rho_2 = 0; + float alpha; + float beta; + + DenseVector r(D); + DenseVector rtilde(D); + DenseVector p(D); + DenseVector ptilde(D); + DenseVector q(D); + DenseVector qtilde(D); + DenseVector tmp(D); // temporal vector. + + // r = b - A·x; + A.Product( x, tmp ); + r.Sub( b, tmp ); + + // rtilde = r + rtilde.Set( r ); + + // p = r; + p.Set( r ); + + // ptilde = rtilde + ptilde.Set( rtilde ); + + + + float normb = b.Norm(); + if( normb == 0.0 ) normb = 1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges? + return 0; + } + + + while( i < i_max ) { + + i++; + + rho_1 = DenseVectorDotProduct( r, rtilde ); + + if( rho_1 == 0 ) { + // method fails. + return -i; + } + + if (i == 1) { + p.Set( r ); + ptilde.Set( rtilde ); + } + else { + beta = rho_1 / rho_2; + + // p = r + beta * p; + p.Mad( r, p, beta ); + + // ptilde = ztilde + beta * ptilde; + ptilde.Mad( rtilde, ptilde, beta ); + } + + // q = A * p; + A.Product( p, q ); + + // qtilde = A^t * ptilde; + A.TransProduct( ptilde, qtilde ); + + alpha = rho_1 / DenseVectorDotProduct( ptilde, q ); + + // x += alpha * p; + x.Mad( x, p, alpha ); + + // r -= alpha * q; + r.Mad( r, q, -alpha ); + + // rtilde -= alpha * qtilde; + rtilde.Mad( rtilde, qtilde, -alpha ); + + rho_2 = rho_1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges + return i; + } + } + + return i; +} + + +/** Bi-conjugate gradient stabilized method. */ +int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) { + piDebugCheck( A.IsSquare() ); + piDebugCheck( A.Width() == b.Dim() ); + piDebugCheck( A.Width() == x.Dim() ); + + int i = 0; + const int D = A.Width(); + const int i_max = 2 * D; + + + float resid; + float rho_1 = 0; + float rho_2 = 0; + float alpha = 0; + float beta = 0; + float omega = 0; + + DenseVector p(D); + DenseVector phat(D); + DenseVector s(D); + DenseVector shat(D); + DenseVector t(D); + DenseVector v(D); + + DenseVector r(D); + DenseVector rtilde(D); + + DenseVector tmp(D); + + // r = b - A·x; + A.Product( x, tmp ); + r.Sub( b, tmp ); + + // rtilde = r + rtilde.Set( r ); + + + float normb = b.Norm(); + if( normb == 0.0 ) normb = 1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges? + return 0; + } + + + while( i<i_max ) { + + i++; + + rho_1 = DenseVectorDotProduct( rtilde, r ); + if( rho_1 == 0 ) { + // method fails + return -i; + } + + + if( i == 1 ) { + p.Set( r ); + } + else { + beta = (rho_1 / rho_2) * (alpha / omega); + + // p = r + beta * (p - omega * v); + p.Mad( p, v, -omega ); + p.Mad( r, p, beta ); + } + + //phat = M.solve(p); + phat.Set( p ); + //Precond( &phat, p ); + + //v = A * phat; + A.Product( phat, v ); + + alpha = rho_1 / DenseVectorDotProduct( rtilde, v ); + + // s = r - alpha * v; + s.Mad( r, v, -alpha ); + + + resid = s.Norm() / normb; + if( resid < epsilon ) { + // x += alpha * phat; + x.Mad( x, phat, alpha ); + return i; + } + + //shat = M.solve(s); + shat.Set( s ); + //Precond( &shat, s ); + + //t = A * shat; + A.Product( shat, t ); + + omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t ); + + // x += alpha * phat + omega * shat; + x.Mad( x, shat, omega ); + x.Mad( x, phat, alpha ); + + //r = s - omega * t; + r.Mad( s, t, -omega ); + + rho_2 = rho_1; + + resid = r.Norm() / normb; + if( resid < epsilon ) { + return i; + } + + if( omega == 0 ) { + return -i; // ??? + } + } + + return i; +} + + +/** Bi-conjugate gradient stabilized method. */ +int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) { + piDebugCheck( A.IsSquare() ); + piDebugCheck( A.Width() == b.Dim() ); + piDebugCheck( A.Width() == x.Dim() ); + + int i = 0; + const int D = A.Width(); + const int i_max = D; + // const int i_max = 1000; + + + float resid; + float rho_1 = 0; + float rho_2 = 0; + float alpha = 0; + float beta = 0; + float omega = 0; + + DenseVector p(D); + DenseVector phat(D); + DenseVector s(D); + DenseVector shat(D); + DenseVector t(D); + DenseVector v(D); + + DenseVector r(D); + DenseVector rtilde(D); + + DenseVector tmp(D); + + // r = b - A·x; + A.Product( x, tmp ); + r.Sub( b, tmp ); + + // rtilde = r + rtilde.Set( r ); + + + float normb = b.Norm(); + if( normb == 0.0 ) normb = 1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges? + return 0; + } + + + while( i<i_max ) { + + i++; + + rho_1 = DenseVectorDotProduct( rtilde, r ); + if( rho_1 == 0 ) { + // method fails + return -i; + } + + + if( i == 1 ) { + p.Set( r ); + } + else { + beta = (rho_1 / rho_2) * (alpha / omega); + + // p = r + beta * (p - omega * v); + p.Mad( p, v, -omega ); + p.Mad( r, p, beta ); + } + + //phat = M.solve(p); + //phat.Set( p ); + M.Precond( &phat, p ); + + //v = A * phat; + A.Product( phat, v ); + + alpha = rho_1 / DenseVectorDotProduct( rtilde, v ); + + // s = r - alpha * v; + s.Mad( r, v, -alpha ); + + + resid = s.Norm() / normb; + + //printf( "--- Iteration %d: residual = %f\n", i, resid ); + + if( resid < epsilon ) { + // x += alpha * phat; + x.Mad( x, phat, alpha ); + return i; + } + + //shat = M.solve(s); + //shat.Set( s ); + M.Precond( &shat, s ); + + //t = A * shat; + A.Product( shat, t ); + + omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t ); + + // x += alpha * phat + omega * shat; + x.Mad( x, shat, omega ); + x.Mad( x, phat, alpha ); + + //r = s - omega * t; + r.Mad( s, t, -omega ); + + rho_2 = rho_1; + + resid = r.Norm() / normb; + if( resid < epsilon ) { + return i; + } + + if( omega == 0 ) { + return -i; // ??? + } + } + + return i; +} + +#endif diff --git a/thirdparty/thekla_atlas/nvmath/Solver.h b/thirdparty/thekla_atlas/nvmath/Solver.h new file mode 100644 index 0000000000..2bbf92736a --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Solver.h @@ -0,0 +1,24 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_SOLVER_H +#define NV_MATH_SOLVER_H + +#include "nvmath.h" + +namespace nv +{ + class SparseMatrix; + class FullVector; + + + // Linear solvers. + NVMATH_API bool LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f); + NVMATH_API bool LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f); + NVMATH_API bool SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f); + //NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f); + +} // nv namespace + + +#endif // NV_MATH_SOLVER_H diff --git a/thirdparty/thekla_atlas/nvmath/Sparse.cpp b/thirdparty/thekla_atlas/nvmath/Sparse.cpp new file mode 100644 index 0000000000..421e7ee022 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Sparse.cpp @@ -0,0 +1,889 @@ +// This code is in the public domain -- Ignacio Castaņo <castanyo@yahoo.es> + +#include "Sparse.h" +#include "KahanSum.h" + +#include "nvcore/Array.inl" + +#define USE_KAHAN_SUM 0 + + +using namespace nv; + + +FullVector::FullVector(uint dim) +{ + m_array.resize(dim); +} + +FullVector::FullVector(const FullVector & v) : m_array(v.m_array) +{ +} + +const FullVector & FullVector::operator=(const FullVector & v) +{ + nvCheck(dimension() == v.dimension()); + + m_array = v.m_array; + + return *this; +} + + +void FullVector::fill(float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] = f; + } +} + +void FullVector::operator+= (const FullVector & v) +{ + nvDebugCheck(dimension() == v.dimension()); + + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] += v.m_array[i]; + } +} + +void FullVector::operator-= (const FullVector & v) +{ + nvDebugCheck(dimension() == v.dimension()); + + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] -= v.m_array[i]; + } +} + +void FullVector::operator*= (const FullVector & v) +{ + nvDebugCheck(dimension() == v.dimension()); + + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] *= v.m_array[i]; + } +} + +void FullVector::operator+= (float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] += f; + } +} + +void FullVector::operator-= (float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] -= f; + } +} + +void FullVector::operator*= (float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] *= f; + } +} + + +void nv::saxpy(float a, const FullVector & x, FullVector & y) +{ + nvDebugCheck(x.dimension() == y.dimension()); + + const uint dim = x.dimension(); + for (uint i = 0; i < dim; i++) + { + y[i] += a * x[i]; + } +} + +void nv::copy(const FullVector & x, FullVector & y) +{ + nvDebugCheck(x.dimension() == y.dimension()); + + const uint dim = x.dimension(); + for (uint i = 0; i < dim; i++) + { + y[i] = x[i]; + } +} + +void nv::scal(float a, FullVector & x) +{ + const uint dim = x.dimension(); + for (uint i = 0; i < dim; i++) + { + x[i] *= a; + } +} + +float nv::dot(const FullVector & x, const FullVector & y) +{ + nvDebugCheck(x.dimension() == y.dimension()); + + const uint dim = x.dimension(); + +#if USE_KAHAN_SUM + KahanSum kahan; + for (uint i = 0; i < dim; i++) + { + kahan.add(x[i] * y[i]); + } + return kahan.sum(); +#else + float sum = 0; + for (uint i = 0; i < dim; i++) + { + sum += x[i] * y[i]; + } + return sum; +#endif +} + + +FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d) +{ + m_array.resize(d*d, 0.0f); +} + +FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h) +{ + m_array.resize(w*h, 0.0f); +} + +FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height) +{ + m_array = m.m_array; +} + +const FullMatrix & FullMatrix::operator=(const FullMatrix & m) +{ + nvCheck(width() == m.width()); + nvCheck(height() == m.height()); + + m_array = m.m_array; + + return *this; +} + + +float FullMatrix::getCoefficient(uint x, uint y) const +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + return m_array[y * width() + x]; +} + +void FullMatrix::setCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + m_array[y * width() + x] = f; +} + +void FullMatrix::addCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + m_array[y * width() + x] += f; +} + +void FullMatrix::mulCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + m_array[y * width() + x] *= f; +} + +float FullMatrix::dotRow(uint y, const FullVector & v) const +{ + nvDebugCheck( v.dimension() == width() ); + nvDebugCheck( y < height() ); + + float sum = 0; + + const uint count = v.dimension(); + for (uint i = 0; i < count; i++) + { + sum += m_array[y * count + i] * v[i]; + } + + return sum; +} + +void FullMatrix::madRow(uint y, float alpha, FullVector & v) const +{ + nvDebugCheck( v.dimension() == width() ); + nvDebugCheck( y < height() ); + + const uint count = v.dimension(); + for (uint i = 0; i < count; i++) + { + v[i] += m_array[y * count + i]; + } +} + + +// y = M * x +void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y) +{ + mult(NoTransposed, M, x, y); +} + +void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y) +{ + const uint w = M.width(); + const uint h = M.height(); + + if (TM == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + y.fill(0.0f); + + for (uint i = 0; i < h; i++) + { + M.madRow(i, x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = M.dotRow(i, x); + } + } +} + +// y = alpha*A*x + beta*y +void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + sgemv(alpha, NoTransposed, A, x, beta, y); +} + +void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + const uint w = A.width(); + const uint h = A.height(); + + if (TA == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + A.madRow(i, alpha * x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = alpha * A.dotRow(i, x) + beta * y[i]; + } + } +} + + +// Multiply a row of A by a column of B. +static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B) +{ + const uint w = (TA == NoTransposed) ? A.width() : A.height(); + nvDebugCheck(w == ((TB == NoTransposed) ? B.height() : A.width())); + + float sum = 0.0f; + + for (uint k = 0; k < w; k++) + { + const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop? + const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i); + sum += a * b; + } + + return sum; +} + + +// C = A * B +void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C) +{ + mult(NoTransposed, A, NoTransposed, B, C); +} + +void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C) +{ + sgemm(1.0f, TA, A, TB, B, 0.0f, C); +} + +// C = alpha*A*B + beta*C +void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C) +{ + sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C); +} + +void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C) +{ + const uint w = C.width(); + const uint h = C.height(); + + uint aw = (TA == NoTransposed) ? A.width() : A.height(); + uint ah = (TA == NoTransposed) ? A.height() : A.width(); + uint bw = (TB == NoTransposed) ? B.width() : B.height(); + uint bh = (TB == NoTransposed) ? B.height() : B.width(); + + nvDebugCheck(aw == bh); + nvDebugCheck(bw == ah); + nvDebugCheck(w == bw); + nvDebugCheck(h == ah); + + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y); + C.setCoefficient(x, y, c); + } + } +} + + + + + +/// Ctor. Init the size of the sparse matrix. +SparseMatrix::SparseMatrix(uint d) : m_width(d) +{ + m_array.resize(d); +} + +/// Ctor. Init the size of the sparse matrix. +SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w) +{ + m_array.resize(h); +} + +SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width) +{ + m_array = m.m_array; +} + +const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m) +{ + nvCheck(width() == m.width()); + nvCheck(height() == m.height()); + + m_array = m.m_array; + + return *this; +} + + +// x is column, y is row +float SparseMatrix::getCoefficient(uint x, uint y) const +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) return m_array[y][i].v; + } + + return 0.0f; +} + +void SparseMatrix::setCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) + { + m_array[y][i].v = f; + return; + } + } + + if (f != 0.0f) + { + Coefficient c = { x, f }; + m_array[y].append( c ); + } +} + +void SparseMatrix::addCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + if (f != 0.0f) + { + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) + { + m_array[y][i].v += f; + return; + } + } + + Coefficient c = { x, f }; + m_array[y].append( c ); + } +} + +void SparseMatrix::mulCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) + { + m_array[y][i].v *= f; + return; + } + } + + if (f != 0.0f) + { + Coefficient c = { x, f }; + m_array[y].append( c ); + } +} + + +float SparseMatrix::sumRow(uint y) const +{ + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + +#if USE_KAHAN_SUM + KahanSum kahan; + for (uint i = 0; i < count; i++) + { + kahan.add(m_array[y][i].v); + } + return kahan.sum(); +#else + float sum = 0; + for (uint i = 0; i < count; i++) + { + sum += m_array[y][i].v; + } + return sum; +#endif +} + +float SparseMatrix::dotRow(uint y, const FullVector & v) const +{ + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + +#if USE_KAHAN_SUM + KahanSum kahan; + for (uint i = 0; i < count; i++) + { + kahan.add(m_array[y][i].v * v[m_array[y][i].x]); + } + return kahan.sum(); +#else + float sum = 0; + for (uint i = 0; i < count; i++) + { + sum += m_array[y][i].v * v[m_array[y][i].x]; + } + return sum; +#endif +} + +void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const +{ + nvDebugCheck(y < height()); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + v[m_array[y][i].x] += alpha * m_array[y][i].v; + } +} + + +void SparseMatrix::clearRow(uint y) +{ + nvDebugCheck( y < height() ); + + m_array[y].clear(); +} + +void SparseMatrix::scaleRow(uint y, float f) +{ + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + m_array[y][i].v *= f; + } +} + +void SparseMatrix::normalizeRow(uint y) +{ + nvDebugCheck( y < height() ); + + float norm = 0.0f; + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + float f = m_array[y][i].v; + norm += f * f; + } + + scaleRow(y, 1.0f / sqrtf(norm)); +} + + +void SparseMatrix::clearColumn(uint x) +{ + nvDebugCheck(x < width()); + + for (uint y = 0; y < height(); y++) + { + const uint count = m_array[y].count(); + for (uint e = 0; e < count; e++) + { + if (m_array[y][e].x == x) + { + m_array[y][e].v = 0.0f; + break; + } + } + } +} + +void SparseMatrix::scaleColumn(uint x, float f) +{ + nvDebugCheck(x < width()); + + for (uint y = 0; y < height(); y++) + { + const uint count = m_array[y].count(); + for (uint e = 0; e < count; e++) + { + if (m_array[y][e].x == x) + { + m_array[y][e].v *= f; + break; + } + } + } +} + +const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const +{ + return m_array[y]; +} + + +bool SparseMatrix::isSymmetric() const +{ + for (uint y = 0; y < height(); y++) + { + const uint count = m_array[y].count(); + for (uint e = 0; e < count; e++) + { + const uint x = m_array[y][e].x; + if (x > y) { + float v = m_array[y][e].v; + + if (!equal(getCoefficient(y, x), v)) { // @@ epsilon + return false; + } + } + } + } + + return true; +} + + +// y = M * x +void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y) +{ + mult(NoTransposed, M, x, y); +} + +void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y) +{ + const uint w = M.width(); + const uint h = M.height(); + + if (TM == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + y.fill(0.0f); + + for (uint i = 0; i < h; i++) + { + M.madRow(i, x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = M.dotRow(i, x); + } + } +} + +// y = alpha*A*x + beta*y +void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + sgemv(alpha, NoTransposed, A, x, beta, y); +} + +void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + const uint w = A.width(); + const uint h = A.height(); + + if (TA == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + A.madRow(i, alpha * x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = alpha * A.dotRow(i, x) + beta * y[i]; + } + } +} + + +// dot y-row of A by x-column of B +static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B) +{ + const Array<SparseMatrix::Coefficient> & row = A.getRow(y); + + const uint count = row.count(); + +#if USE_KAHAN_SUM + KahanSum kahan; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + kahan.add(c.v * B.getCoefficient(x, c.x)); + } + return kahan.sum(); +#else + float sum = 0.0f; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + sum += c.v * B.getCoefficient(x, c.x); + } + return sum; +#endif +} + +// dot y-row of A by x-row of B +static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B) +{ + const Array<SparseMatrix::Coefficient> & row = A.getRow(y); + + const uint count = row.count(); + +#if USE_KAHAN_SUM + KahanSum kahan; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + kahan.add(c.v * B.getCoefficient(c.x, x)); + } + return kahan.sum(); +#else + float sum = 0.0f; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + sum += c.v * B.getCoefficient(c.x, x); + } + return sum; +#endif +} + +// dot y-column of A by x-column of B +static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B) +{ + nvDebugCheck(A.height() == B.height()); + + const uint h = A.height(); + +#if USE_KAHAN_SUM + KahanSum kahan; + for (uint i = 0; i < h; i++) + { + kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i)); + } + return kahan.sum(); +#else + float sum = 0.0f; + for (uint i = 0; i < h; i++) + { + sum += A.getCoefficient(y, i) * B.getCoefficient(x, i); + } + return sum; +#endif +} + + +void nv::transpose(const SparseMatrix & A, SparseMatrix & B) +{ + nvDebugCheck(A.width() == B.height()); + nvDebugCheck(B.width() == A.height()); + + const uint w = A.width(); + for (uint x = 0; x < w; x++) + { + B.clearRow(x); + } + + const uint h = A.height(); + for (uint y = 0; y < h; y++) + { + const Array<SparseMatrix::Coefficient> & row = A.getRow(y); + + const uint count = row.count(); + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + nvDebugCheck(c.x < w); + + B.setCoefficient(y, c.x, c.v); + } + } +} + +// C = A * B +void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C) +{ + mult(NoTransposed, A, NoTransposed, B, C); +} + +void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C) +{ + sgemm(1.0f, TA, A, TB, B, 0.0f, C); +} + +// C = alpha*A*B + beta*C +void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C) +{ + sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C); +} + +void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C) +{ + const uint w = C.width(); + const uint h = C.height(); + + uint aw = (TA == NoTransposed) ? A.width() : A.height(); + uint ah = (TA == NoTransposed) ? A.height() : A.width(); + uint bw = (TB == NoTransposed) ? B.width() : B.height(); + uint bh = (TB == NoTransposed) ? B.height() : B.width(); + + nvDebugCheck(aw == bh); + nvDebugCheck(bw == ah); + nvDebugCheck(w == bw); + nvDebugCheck(h == ah); + + + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + float c = beta * C.getCoefficient(x, y); + + if (TA == NoTransposed && TB == NoTransposed) + { + // dot y-row of A by x-column of B. + c += alpha * dotRowColumn(y, A, x, B); + } + else if (TA == Transposed && TB == Transposed) + { + // dot y-column of A by x-row of B. + c += alpha * dotRowColumn(x, B, y, A); + } + else if (TA == Transposed && TB == NoTransposed) + { + // dot y-column of A by x-column of B. + c += alpha * dotColumnColumn(y, A, x, B); + } + else if (TA == NoTransposed && TB == Transposed) + { + // dot y-row of A by x-row of B. + c += alpha * dotRowRow(y, A, x, B); + } + + C.setCoefficient(x, y, c); + } + } +} + +// C = At * A +void nv::sqm(const SparseMatrix & A, SparseMatrix & C) +{ + // This is quite expensive... + mult(Transposed, A, NoTransposed, A, C); +} diff --git a/thirdparty/thekla_atlas/nvmath/Sparse.h b/thirdparty/thekla_atlas/nvmath/Sparse.h new file mode 100644 index 0000000000..6b03ed51f3 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Sparse.h @@ -0,0 +1,204 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_SPARSE_H +#define NV_MATH_SPARSE_H + +#include "nvmath.h" +#include "nvcore/Array.h" + + +// Full and sparse vector and matrix classes. BLAS subset. + +namespace nv +{ + class FullVector; + class FullMatrix; + class SparseMatrix; + + + /// Fixed size vector class. + class FullVector + { + public: + + FullVector(uint dim); + FullVector(const FullVector & v); + + const FullVector & operator=(const FullVector & v); + + uint dimension() const { return m_array.count(); } + + const float & operator[]( uint index ) const { return m_array[index]; } + float & operator[] ( uint index ) { return m_array[index]; } + + void fill(float f); + + void operator+= (const FullVector & v); + void operator-= (const FullVector & v); + void operator*= (const FullVector & v); + + void operator+= (float f); + void operator-= (float f); + void operator*= (float f); + + + private: + + Array<float> m_array; + + }; + + // Pseudo-BLAS interface. + NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y + NVMATH_API void copy(const FullVector & x, FullVector & y); + NVMATH_API void scal(float a, FullVector & x); + NVMATH_API float dot(const FullVector & x, const FullVector & y); + + + enum Transpose + { + NoTransposed = 0, + Transposed = 1 + }; + + /// Full matrix class. + class FullMatrix + { + public: + + FullMatrix(uint d); + FullMatrix(uint w, uint h); + FullMatrix(const FullMatrix & m); + + const FullMatrix & operator=(const FullMatrix & m); + + uint width() const { return m_width; } + uint height() const { return m_height; } + bool isSquare() const { return m_width == m_height; } + + float getCoefficient(uint x, uint y) const; + + void setCoefficient(uint x, uint y, float f); + void addCoefficient(uint x, uint y, float f); + void mulCoefficient(uint x, uint y, float f); + + float dotRow(uint y, const FullVector & v) const; + void madRow(uint y, float alpha, FullVector & v) const; + + protected: + + bool isValid() const { + return m_array.size() == (m_width * m_height); + } + + private: + + const uint m_width; + const uint m_height; + Array<float> m_array; + + }; + + NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y); + NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y); + + // y = alpha*A*x + beta*y + NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y); + NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y); + + NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C); + NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C); + + // C = alpha*A*B + beta*C + NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C); + NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C); + + + /** + * Sparse matrix class. The matrix is assumed to be sparse and to have + * very few non-zero elements, for this reason it's stored in indexed + * format. To multiply column vectors efficiently, the matrix stores + * the elements in indexed-column order, there is a list of indexed + * elements for each row of the matrix. As with the FullVector the + * dimension of the matrix is constant. + **/ + class SparseMatrix + { + friend class FullMatrix; + public: + + // An element of the sparse array. + struct Coefficient { + uint x; // column + float v; // value + }; + + + public: + + SparseMatrix(uint d); + SparseMatrix(uint w, uint h); + SparseMatrix(const SparseMatrix & m); + + const SparseMatrix & operator=(const SparseMatrix & m); + + + uint width() const { return m_width; } + uint height() const { return m_array.count(); } + bool isSquare() const { return width() == height(); } + + float getCoefficient(uint x, uint y) const; // x is column, y is row + + void setCoefficient(uint x, uint y, float f); + void addCoefficient(uint x, uint y, float f); + void mulCoefficient(uint x, uint y, float f); + + float sumRow(uint y) const; + float dotRow(uint y, const FullVector & v) const; + void madRow(uint y, float alpha, FullVector & v) const; + + void clearRow(uint y); + void scaleRow(uint y, float f); + void normalizeRow(uint y); + + void clearColumn(uint x); + void scaleColumn(uint x, float f); + + const Array<Coefficient> & getRow(uint y) const; + + bool isSymmetric() const; + + private: + + /// Number of columns. + const uint m_width; + + /// Array of matrix elements. + Array< Array<Coefficient> > m_array; + + }; + + NVMATH_API void transpose(const SparseMatrix & A, SparseMatrix & B); + + NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y); + NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y); + + // y = alpha*A*x + beta*y + NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y); + NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y); + + NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C); + NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C); + + // C = alpha*A*B + beta*C + NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C); + NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C); + + // C = At * A + NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C); + +} // nv namespace + + +#endif // NV_MATH_SPARSE_H diff --git a/thirdparty/thekla_atlas/nvmath/Sphere.cpp b/thirdparty/thekla_atlas/nvmath/Sphere.cpp new file mode 100644 index 0000000000..e0c1ad652c --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Sphere.cpp @@ -0,0 +1,431 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#include "Sphere.h" +#include "Vector.inl" +#include "Box.inl" + +#include <float.h> // FLT_MAX + +using namespace nv; + +const float radiusEpsilon = 1e-4f; + +Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1) +{ + if (p0 == p1) *this = Sphere(p0); + else { + center = (p0 + p1) * 0.5f; + radius = length(p0 - center) + radiusEpsilon; + + float d0 = length(p0 - center); + float d1 = length(p1 - center); + nvDebugCheck(equal(d0, radius - radiusEpsilon)); + nvDebugCheck(equal(d1, radius - radiusEpsilon)); + } +} + +Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2) +{ + if (p0 == p1 || p0 == p2) *this = Sphere(p1, p2); + else if (p1 == p2) *this = Sphere(p0, p2); + else { + Vector3 a = p1 - p0; + Vector3 b = p2 - p0; + Vector3 c = cross(a, b); + + float denominator = 2.0f * lengthSquared(c); + + if (!isZero(denominator)) { + Vector3 d = (lengthSquared(b) * cross(c, a) + lengthSquared(a) * cross(b, c)) / denominator; + + center = p0 + d; + radius = length(d) + radiusEpsilon; + + float d0 = length(p0 - center); + float d1 = length(p1 - center); + float d2 = length(p2 - center); + nvDebugCheck(equal(d0, radius - radiusEpsilon)); + nvDebugCheck(equal(d1, radius - radiusEpsilon)); + nvDebugCheck(equal(d2, radius - radiusEpsilon)); + } + else { + // @@ This is a specialization of the code below, but really, the only thing we need to do here is to find the two most distant points. + // Compute all possible spheres, invalidate those that do not contain the four points, keep the smallest. + Sphere s0(p1, p2); + float d0 = distanceSquared(s0, p0); + if (d0 > 0) s0.radius = NV_FLOAT_MAX; + + Sphere s1(p0, p2); + float d1 = distanceSquared(s1, p1); + if (d1 > 0) s1.radius = NV_FLOAT_MAX; + + Sphere s2(p0, p1); + float d2 = distanceSquared(s2, p2); + if (d2 > 0) s1.radius = NV_FLOAT_MAX; + + if (s0.radius < s1.radius && s0.radius < s2.radius) { + center = s0.center; + radius = s0.radius; + } + else if (s1.radius < s2.radius) { + center = s1.center; + radius = s1.radius; + } + else { + center = s2.center; + radius = s2.radius; + } + } + } +} + +Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector3::Arg p3) +{ + if (p0 == p1 || p0 == p2 || p0 == p3) *this = Sphere(p1, p2, p3); + else if (p1 == p2 || p1 == p3) *this = Sphere(p0, p2, p3); + else if (p2 == p3) *this = Sphere(p0, p1, p2); + else { + // @@ This only works if the points are not coplanar! + Vector3 a = p1 - p0; + Vector3 b = p2 - p0; + Vector3 c = p3 - p0; + + float denominator = 2.0f * dot(c, cross(a, b)); // triple product. + + if (!isZero(denominator)) { + Vector3 d = (lengthSquared(c) * cross(a, b) + lengthSquared(b) * cross(c, a) + lengthSquared(a) * cross(b, c)) / denominator; + + center = p0 + d; + radius = length(d) + radiusEpsilon; + + float d0 = length(p0 - center); + float d1 = length(p1 - center); + float d2 = length(p2 - center); + float d3 = length(p3 - center); + nvDebugCheck(equal(d0, radius - radiusEpsilon)); + nvDebugCheck(equal(d1, radius - radiusEpsilon)); + nvDebugCheck(equal(d2, radius - radiusEpsilon)); + nvDebugCheck(equal(d3, radius - radiusEpsilon)); + } + else { + // Compute all possible spheres, invalidate those that do not contain the four points, keep the smallest. + Sphere s0(p1, p2, p3); + float d0 = distanceSquared(s0, p0); + if (d0 > 0) s0.radius = NV_FLOAT_MAX; + + Sphere s1(p0, p2, p3); + float d1 = distanceSquared(s1, p1); + if (d1 > 0) s1.radius = NV_FLOAT_MAX; + + Sphere s2(p0, p1, p3); + float d2 = distanceSquared(s2, p2); + if (d2 > 0) s2.radius = NV_FLOAT_MAX; + + Sphere s3(p0, p1, p2); + float d3 = distanceSquared(s3, p3); + if (d3 > 0) s2.radius = NV_FLOAT_MAX; + + if (s0.radius < s1.radius && s0.radius < s2.radius && s0.radius < s3.radius) { + center = s0.center; + radius = s0.radius; + } + else if (s1.radius < s2.radius && s1.radius < s3.radius) { + center = s1.center; + radius = s1.radius; + } + else if (s1.radius < s3.radius) { + center = s2.center; + radius = s2.radius; + } + else { + center = s3.center; + radius = s3.radius; + } + } + } +} + + +float nv::distanceSquared(const Sphere & sphere, const Vector3 & point) +{ + return lengthSquared(sphere.center - point) - square(sphere.radius); +} + + + +// Implementation of "MiniBall" based on: +// http://www.flipcode.com/archives/Smallest_Enclosing_Spheres.shtml + +static Sphere recurseMini(const Vector3 *P[], uint p, uint b = 0) +{ + Sphere MB; + + switch(b) + { + case 0: + MB = Sphere(*P[0]); + break; + case 1: + MB = Sphere(*P[-1]); + break; + case 2: + MB = Sphere(*P[-1], *P[-2]); + break; + case 3: + MB = Sphere(*P[-1], *P[-2], *P[-3]); + break; + case 4: + MB = Sphere(*P[-1], *P[-2], *P[-3], *P[-4]); + return MB; + } + + for (uint i = 0; i < p; i++) + { + if (distanceSquared(MB, *P[i]) > 0) // Signed square distance to sphere + { + for (uint j = i; j > 0; j--) + { + swap(P[j], P[j-1]); + } + + MB = recurseMini(P + 1, i, b + 1); + } + } + + return MB; +} + +static bool allInside(const Sphere & sphere, const Vector3 * pointArray, const uint pointCount) { + for (uint i = 0; i < pointCount; i++) { + if (distanceSquared(sphere, pointArray[i]) >= NV_EPSILON) { + return false; + } + } + return true; +} + + +Sphere nv::miniBall(const Vector3 * pointArray, const uint pointCount) +{ + nvDebugCheck(pointArray != NULL); + nvDebugCheck(pointCount > 0); + + const Vector3 **L = new const Vector3*[pointCount]; + + for (uint i = 0; i < pointCount; i++) { + L[i] = &pointArray[i]; + } + + Sphere sphere = recurseMini(L, pointCount); + + delete [] L; + + nvDebugCheck(allInside(sphere, pointArray, pointCount)); + + return sphere; +} + + +// Approximate bounding sphere, based on "An Efficient Bounding Sphere" by Jack Ritter, from "Graphics Gems" +Sphere nv::approximateSphere_Ritter(const Vector3 * pointArray, const uint pointCount) +{ + nvDebugCheck(pointArray != NULL); + nvDebugCheck(pointCount > 0); + + Vector3 xmin, xmax, ymin, ymax, zmin, zmax; + + xmin = xmax = ymin = ymax = zmin = zmax = pointArray[0]; + + // FIRST PASS: find 6 minima/maxima points + xmin.x = ymin.y = zmin.z = FLT_MAX; + xmax.x = ymax.y = zmax.z = -FLT_MAX; + + for (uint i = 0; i < pointCount; i++) + { + const Vector3 & p = pointArray[i]; + if (p.x < xmin.x) xmin = p; + if (p.x > xmax.x) xmax = p; + if (p.y < ymin.y) ymin = p; + if (p.y > ymax.y) ymax = p; + if (p.z < zmin.z) zmin = p; + if (p.z > zmax.z) zmax = p; + } + + float xspan = lengthSquared(xmax - xmin); + float yspan = lengthSquared(ymax - ymin); + float zspan = lengthSquared(zmax - zmin); + + // Set points dia1 & dia2 to the maximally separated pair. + Vector3 dia1 = xmin; + Vector3 dia2 = xmax; + float maxspan = xspan; + if (yspan > maxspan) { + maxspan = yspan; + dia1 = ymin; + dia2 = ymax; + } + if (zspan > maxspan) { + dia1 = zmin; + dia2 = zmax; + } + + // |dia1-dia2| is a diameter of initial sphere + + // calc initial center + Sphere sphere; + sphere.center = (dia1 + dia2) / 2.0f; + + // calculate initial radius**2 and radius + float rad_sq = lengthSquared(dia2 - sphere.center); + sphere.radius = sqrtf(rad_sq); + + + // SECOND PASS: increment current sphere + for (uint i = 0; i < pointCount; i++) + { + const Vector3 & p = pointArray[i]; + + float old_to_p_sq = lengthSquared(p - sphere.center); + + if (old_to_p_sq > rad_sq) // do r**2 test first + { + // this point is outside of current sphere + float old_to_p = sqrtf(old_to_p_sq); + + // calc radius of new sphere + sphere.radius = (sphere.radius + old_to_p) / 2.0f; + rad_sq = sphere.radius * sphere.radius; // for next r**2 compare + + float old_to_new = old_to_p - sphere.radius; + + // calc center of new sphere + sphere.center = (sphere.radius * sphere.center + old_to_new * p) / old_to_p; + } + } + + nvDebugCheck(allInside(sphere, pointArray, pointCount)); + + return sphere; +} + + +static float computeSphereRadius(const Vector3 & center, const Vector3 * pointArray, const uint pointCount) { + + float maxRadius2 = 0; + + for (uint i = 0; i < pointCount; i++) + { + const Vector3 & p = pointArray[i]; + + float r2 = lengthSquared(center - p); + + if (r2 > maxRadius2) { + maxRadius2 = r2; + } + } + + return sqrtf(maxRadius2) + radiusEpsilon; +} + + +Sphere nv::approximateSphere_AABB(const Vector3 * pointArray, const uint pointCount) +{ + nvDebugCheck(pointArray != NULL); + nvDebugCheck(pointCount > 0); + + Box box; + box.clearBounds(); + + for (uint i = 0; i < pointCount; i++) { + box.addPointToBounds(pointArray[i]); + } + + Sphere sphere; + sphere.center = box.center(); + sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount); + + nvDebugCheck(allInside(sphere, pointArray, pointCount)); + + return sphere; +} + + +static void computeExtremalPoints(const Vector3 & dir, const Vector3 * pointArray, uint pointCount, Vector3 * minPoint, Vector3 * maxPoint) { + nvDebugCheck(pointCount > 0); + + uint mini = 0; + uint maxi = 0; + float minDist = FLT_MAX; + float maxDist = -FLT_MAX; + + for (uint i = 0; i < pointCount; i++) { + float d = dot(dir, pointArray[i]); + + if (d < minDist) { + minDist = d; + mini = i; + } + if (d > maxDist) { + maxDist = d; + maxi = i; + } + } + nvDebugCheck(minDist != FLT_MAX); + nvDebugCheck(maxDist != -FLT_MAX); + + *minPoint = pointArray[mini]; + *maxPoint = pointArray[maxi]; +} + +// EPOS algorithm based on: +// http://www.ep.liu.se/ecp/034/009/ecp083409.pdf +Sphere nv::approximateSphere_EPOS6(const Vector3 * pointArray, uint pointCount) +{ + nvDebugCheck(pointArray != NULL); + nvDebugCheck(pointCount > 0); + + Vector3 extremalPoints[6]; + + // Compute 6 extremal points. + computeExtremalPoints(Vector3(1, 0, 0), pointArray, pointCount, extremalPoints+0, extremalPoints+1); + computeExtremalPoints(Vector3(0, 1, 0), pointArray, pointCount, extremalPoints+2, extremalPoints+3); + computeExtremalPoints(Vector3(0, 0, 1), pointArray, pointCount, extremalPoints+4, extremalPoints+5); + + Sphere sphere = miniBall(extremalPoints, 6); + sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount); + + nvDebugCheck(allInside(sphere, pointArray, pointCount)); + + return sphere; +} + +Sphere nv::approximateSphere_EPOS14(const Vector3 * pointArray, uint pointCount) +{ + nvDebugCheck(pointArray != NULL); + nvDebugCheck(pointCount > 0); + + Vector3 extremalPoints[14]; + + // Compute 14 extremal points. + computeExtremalPoints(Vector3(1, 0, 0), pointArray, pointCount, extremalPoints+0, extremalPoints+1); + computeExtremalPoints(Vector3(0, 1, 0), pointArray, pointCount, extremalPoints+2, extremalPoints+3); + computeExtremalPoints(Vector3(0, 0, 1), pointArray, pointCount, extremalPoints+4, extremalPoints+5); + + float d = sqrtf(1.0f/3.0f); + + computeExtremalPoints(Vector3(d, d, d), pointArray, pointCount, extremalPoints+6, extremalPoints+7); + computeExtremalPoints(Vector3(-d, d, d), pointArray, pointCount, extremalPoints+8, extremalPoints+9); + computeExtremalPoints(Vector3(-d, -d, d), pointArray, pointCount, extremalPoints+10, extremalPoints+11); + computeExtremalPoints(Vector3(d, -d, d), pointArray, pointCount, extremalPoints+12, extremalPoints+13); + + + Sphere sphere = miniBall(extremalPoints, 14); + sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount); + + nvDebugCheck(allInside(sphere, pointArray, pointCount)); + + return sphere; +} + + + diff --git a/thirdparty/thekla_atlas/nvmath/Sphere.h b/thirdparty/thekla_atlas/nvmath/Sphere.h new file mode 100644 index 0000000000..300731af44 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Sphere.h @@ -0,0 +1,43 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_SPHERE_H +#define NV_MATH_SPHERE_H + +#include "Vector.h" + +namespace nv +{ + + class Sphere + { + public: + Sphere() {} + Sphere(Vector3::Arg center, float radius) : center(center), radius(radius) {} + + Sphere(Vector3::Arg center) : center(center), radius(0.0f) {} + Sphere(Vector3::Arg p0, Vector3::Arg p1); + Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2); + Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector3::Arg p3); + + Vector3 center; + float radius; + }; + + // Returns negative values if point is inside. + float distanceSquared(const Sphere & sphere, const Vector3 &point); + + + // Welz's algorithm. Fairly slow, recursive implementation uses large stack. + Sphere miniBall(const Vector3 * pointArray, uint pointCount); + + Sphere approximateSphere_Ritter(const Vector3 * pointArray, uint pointCount); + Sphere approximateSphere_AABB(const Vector3 * pointArray, uint pointCount); + Sphere approximateSphere_EPOS6(const Vector3 * pointArray, uint pointCount); + Sphere approximateSphere_EPOS14(const Vector3 * pointArray, uint pointCount); + + +} // nv namespace + + +#endif // NV_MATH_SPHERE_H diff --git a/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp b/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp new file mode 100644 index 0000000000..72fa678f47 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp @@ -0,0 +1,54 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#include "TypeSerialization.h" + +#include "nvcore/Stream.h" + +#include "nvmath/Vector.h" +#include "nvmath/Matrix.h" +#include "nvmath/Quaternion.h" +#include "nvmath/Basis.h" +#include "nvmath/Box.h" +#include "nvmath/Plane.inl" + +using namespace nv; + +Stream & nv::operator<< (Stream & s, Vector2 & v) +{ + return s << v.x << v.y; +} + +Stream & nv::operator<< (Stream & s, Vector3 & v) +{ + return s << v.x << v.y << v.z; +} + +Stream & nv::operator<< (Stream & s, Vector4 & v) +{ + return s << v.x << v.y << v.z << v.w; +} + +Stream & nv::operator<< (Stream & s, Matrix & m) +{ + return s; +} + +Stream & nv::operator<< (Stream & s, Quaternion & q) +{ + return s << q.x << q.y << q.z << q.w; +} + +Stream & nv::operator<< (Stream & s, Basis & basis) +{ + return s << basis.tangent << basis.bitangent << basis.normal; +} + +Stream & nv::operator<< (Stream & s, Box & box) +{ + return s << box.minCorner << box.maxCorner; +} + +Stream & nv::operator<< (Stream & s, Plane & plane) +{ + return s << plane.v; +} diff --git a/thirdparty/thekla_atlas/nvmath/TypeSerialization.h b/thirdparty/thekla_atlas/nvmath/TypeSerialization.h new file mode 100644 index 0000000000..32d6de827e --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/TypeSerialization.h @@ -0,0 +1,35 @@ +// This code is in the public domain -- Ignacio Castaņo <castano@gmail.com> + +#pragma once +#ifndef NV_MATH_TYPESERIALIZATION_H +#define NV_MATH_TYPESERIALIZATION_H + +#include "nvmath.h" + +namespace nv +{ + class Stream; + + class Vector2; + class Vector3; + class Vector4; + + class Matrix; + class Quaternion; + class Basis; + class Box; + class Plane; + + NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj); + NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj); + NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj); + + NVMATH_API Stream & operator<< (Stream & s, Matrix & obj); + NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj); + NVMATH_API Stream & operator<< (Stream & s, Basis & obj); + NVMATH_API Stream & operator<< (Stream & s, Box & obj); + NVMATH_API Stream & operator<< (Stream & s, Plane & obj); + +} // nv namespace + +#endif // NV_MATH_TYPESERIALIZATION_H diff --git a/thirdparty/thekla_atlas/nvmath/Vector.cpp b/thirdparty/thekla_atlas/nvmath/Vector.cpp new file mode 100644 index 0000000000..9122a1b0e9 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Vector.cpp @@ -0,0 +1,4 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Vector.h" +#include "Vector.inl" diff --git a/thirdparty/thekla_atlas/nvmath/Vector.h b/thirdparty/thekla_atlas/nvmath/Vector.h new file mode 100644 index 0000000000..ad18672a8a --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Vector.h @@ -0,0 +1,149 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_VECTOR_H +#define NV_MATH_VECTOR_H + +#include "nvmath.h" + +namespace nv +{ + class NVMATH_CLASS Vector2 + { + public: + typedef Vector2 const & Arg; + + Vector2(); + explicit Vector2(float f); + Vector2(float x, float y); + Vector2(Vector2::Arg v); + + //template <typename T> explicit Vector2(const T & v) : x(v.x), y(v.y) {} + //template <typename T> operator T() const { return T(x, y); } + + const Vector2 & operator=(Vector2::Arg v); + + const float * ptr() const; + + void set(float x, float y); + + Vector2 operator-() const; + void operator+=(Vector2::Arg v); + void operator-=(Vector2::Arg v); + void operator*=(float s); + void operator*=(Vector2::Arg v); + + friend bool operator==(Vector2::Arg a, Vector2::Arg b); + friend bool operator!=(Vector2::Arg a, Vector2::Arg b); + + union { + struct { + float x, y; + }; + float component[2]; + }; + }; + + class NVMATH_CLASS Vector3 + { + public: + typedef Vector3 const & Arg; + + Vector3(); + explicit Vector3(float x); + //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {} + Vector3(float x, float y, float z); + Vector3(Vector2::Arg v, float z); + Vector3(Vector3::Arg v); + + //template <typename T> explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {} + //template <typename T> operator T() const { return T(x, y, z); } + + const Vector3 & operator=(Vector3::Arg v); + + Vector2 xy() const; + + const float * ptr() const; + + void set(float x, float y, float z); + + Vector3 operator-() const; + void operator+=(Vector3::Arg v); + void operator-=(Vector3::Arg v); + void operator*=(float s); + void operator/=(float s); + void operator*=(Vector3::Arg v); + void operator/=(Vector3::Arg v); + + friend bool operator==(Vector3::Arg a, Vector3::Arg b); + friend bool operator!=(Vector3::Arg a, Vector3::Arg b); + + union { + struct { + float x, y, z; + }; + float component[3]; + }; + }; + + class NVMATH_CLASS Vector4 + { + public: + typedef Vector4 const & Arg; + + Vector4(); + explicit Vector4(float x); + Vector4(float x, float y, float z, float w); + Vector4(Vector2::Arg v, float z, float w); + Vector4(Vector2::Arg v, Vector2::Arg u); + Vector4(Vector3::Arg v, float w); + Vector4(Vector4::Arg v); + // Vector4(const Quaternion & v); + + //template <typename T> explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + //template <typename T> operator T() const { return T(x, y, z, w); } + + const Vector4 & operator=(Vector4::Arg v); + + Vector2 xy() const; + Vector2 zw() const; + Vector3 xyz() const; + + const float * ptr() const; + + void set(float x, float y, float z, float w); + + Vector4 operator-() const; + void operator+=(Vector4::Arg v); + void operator-=(Vector4::Arg v); + void operator*=(float s); + void operator/=(float s); + void operator*=(Vector4::Arg v); + void operator/=(Vector4::Arg v); + + friend bool operator==(Vector4::Arg a, Vector4::Arg b); + friend bool operator!=(Vector4::Arg a, Vector4::Arg b); + + union { + struct { + float x, y, z, w; + }; + float component[4]; + }; + }; + +} // nv namespace + +// If we had these functions, they would be ambiguous, the compiler would not know which one to pick: +//template <typename T> Vector2 to(const T & v) { return Vector2(v.x, v.y); } +//template <typename T> Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); } +//template <typename T> Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); } + +// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages. + +// Instead we simply have explicit casts: +template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); } +template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); } +template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); } + +#endif // NV_MATH_VECTOR_H diff --git a/thirdparty/thekla_atlas/nvmath/Vector.inl b/thirdparty/thekla_atlas/nvmath/Vector.inl new file mode 100644 index 0000000000..bcaec7bf2a --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/Vector.inl @@ -0,0 +1,919 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_VECTOR_INL +#define NV_MATH_VECTOR_INL + +#include "Vector.h" +#include "nvcore/Utils.h" // min, max +#include "nvcore/Hash.h" // hash + +namespace nv +{ + + // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor. + //template <typename T> T to(Vector2::Arg v) { return T(v.x, v.y); } + + // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. + //template <typename T> T to(Vector3::Arg v) { return T(v.x, v.y, v.z); } + + // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. + //template <typename T> T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); } + + + // Vector2 + inline Vector2::Vector2() {} + inline Vector2::Vector2(float f) : x(f), y(f) {} + inline Vector2::Vector2(float x, float y) : x(x), y(y) {} + inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {} + + inline const Vector2 & Vector2::operator=(Vector2::Arg v) + { + x = v.x; + y = v.y; + return *this; + } + + inline const float * Vector2::ptr() const + { + return &x; + } + + inline void Vector2::set(float x, float y) + { + this->x = x; + this->y = y; + } + + inline Vector2 Vector2::operator-() const + { + return Vector2(-x, -y); + } + + inline void Vector2::operator+=(Vector2::Arg v) + { + x += v.x; + y += v.y; + } + + inline void Vector2::operator-=(Vector2::Arg v) + { + x -= v.x; + y -= v.y; + } + + inline void Vector2::operator*=(float s) + { + x *= s; + y *= s; + } + + inline void Vector2::operator*=(Vector2::Arg v) + { + x *= v.x; + y *= v.y; + } + + inline bool operator==(Vector2::Arg a, Vector2::Arg b) + { + return a.x == b.x && a.y == b.y; + } + inline bool operator!=(Vector2::Arg a, Vector2::Arg b) + { + return a.x != b.x || a.y != b.y; + } + + + // Vector3 + inline Vector3::Vector3() {} + inline Vector3::Vector3(float f) : x(f), y(f), z(f) {} + inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {} + inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {} + inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {} + + inline const Vector3 & Vector3::operator=(Vector3::Arg v) + { + x = v.x; + y = v.y; + z = v.z; + return *this; + } + + + inline Vector2 Vector3::xy() const + { + return Vector2(x, y); + } + + inline const float * Vector3::ptr() const + { + return &x; + } + + inline void Vector3::set(float x, float y, float z) + { + this->x = x; + this->y = y; + this->z = z; + } + + inline Vector3 Vector3::operator-() const + { + return Vector3(-x, -y, -z); + } + + inline void Vector3::operator+=(Vector3::Arg v) + { + x += v.x; + y += v.y; + z += v.z; + } + + inline void Vector3::operator-=(Vector3::Arg v) + { + x -= v.x; + y -= v.y; + z -= v.z; + } + + inline void Vector3::operator*=(float s) + { + x *= s; + y *= s; + z *= s; + } + + inline void Vector3::operator/=(float s) + { + float is = 1.0f / s; + x *= is; + y *= is; + z *= is; + } + + inline void Vector3::operator*=(Vector3::Arg v) + { + x *= v.x; + y *= v.y; + z *= v.z; + } + + inline void Vector3::operator/=(Vector3::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + } + + inline bool operator==(Vector3::Arg a, Vector3::Arg b) + { + return a.x == b.x && a.y == b.y && a.z == b.z; + } + inline bool operator!=(Vector3::Arg a, Vector3::Arg b) + { + return a.x != b.x || a.y != b.y || a.z != b.z; + } + + + // Vector4 + inline Vector4::Vector4() {} + inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {} + inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {} + inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {} + inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + + inline const Vector4 & Vector4::operator=(const Vector4 & v) + { + x = v.x; + y = v.y; + z = v.z; + w = v.w; + return *this; + } + + inline Vector2 Vector4::xy() const + { + return Vector2(x, y); + } + + inline Vector2 Vector4::zw() const + { + return Vector2(z, w); + } + + inline Vector3 Vector4::xyz() const + { + return Vector3(x, y, z); + } + + inline const float * Vector4::ptr() const + { + return &x; + } + + inline void Vector4::set(float x, float y, float z, float w) + { + this->x = x; + this->y = y; + this->z = z; + this->w = w; + } + + inline Vector4 Vector4::operator-() const + { + return Vector4(-x, -y, -z, -w); + } + + inline void Vector4::operator+=(Vector4::Arg v) + { + x += v.x; + y += v.y; + z += v.z; + w += v.w; + } + + inline void Vector4::operator-=(Vector4::Arg v) + { + x -= v.x; + y -= v.y; + z -= v.z; + w -= v.w; + } + + inline void Vector4::operator*=(float s) + { + x *= s; + y *= s; + z *= s; + w *= s; + } + + inline void Vector4::operator/=(float s) + { + x /= s; + y /= s; + z /= s; + w /= s; + } + + inline void Vector4::operator*=(Vector4::Arg v) + { + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + } + + inline void Vector4::operator/=(Vector4::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + } + + inline bool operator==(Vector4::Arg a, Vector4::Arg b) + { + return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; + } + inline bool operator!=(Vector4::Arg a, Vector4::Arg b) + { + return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; + } + + + + // Functions + + + // Vector2 + + inline Vector2 add(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(a.x + b.x, a.y + b.y); + } + inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b) + { + return add(a, b); + } + + inline Vector2 sub(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(a.x - b.x, a.y - b.y); + } + inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b) + { + return sub(a, b); + } + + inline Vector2 scale(Vector2::Arg v, float s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 scale(Vector2::Arg v, Vector2::Arg s) + { + return Vector2(v.x * s.x, v.y * s.y); + } + + inline Vector2 operator*(Vector2::Arg v, float s) + { + return scale(v, s); + } + + inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2) + { + return Vector2(v1.x*v2.x, v1.y*v2.y); + } + + inline Vector2 operator*(float s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y); + } + + inline float dot(Vector2::Arg a, Vector2::Arg b) + { + return a.x * b.x + a.y * b.y; + } + + inline float lengthSquared(Vector2::Arg v) + { + return v.x * v.x + v.y * v.y; + } + + inline float length(Vector2::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float distance(Vector2::Arg a, Vector2::Arg b) + { + return length(a - b); + } + + inline float inverseLength(Vector2::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector2 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector2 normalizeFast(Vector2::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon); + } + + inline Vector2 min(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(min(a.x, b.x), min(a.y, b.y)); + } + + inline Vector2 max(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(max(a.x, b.x), max(a.y, b.y)); + } + + inline Vector2 clamp(Vector2::Arg v, float min, float max) + { + return Vector2(clamp(v.x, min, max), clamp(v.y, min, max)); + } + + inline Vector2 saturate(Vector2::Arg v) + { + return Vector2(saturate(v.x), saturate(v.y)); + } + + inline bool isFinite(Vector2::Arg v) + { + return isFinite(v.x) && isFinite(v.y); + } + + inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector2 vf = v; + nv::floatCleanup(vf.component, 2); + return vf; + } + + // Note, this is the area scaled by 2! + inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1) + { + return (v0.x * v1.y - v0.y * v1.x); // * 0.5f; + } + inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c) + { + // IC: While it may be appealing to use the following expression: + //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f; + + // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point + // numbers and the results becomes very unstable and dependent on the order of the factors. + + // Instead, it's preferable to subtract the vertices first, and multiply the resulting small values together. The result + // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of + // the triangle. + + //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f; + return triangleArea(a-c, b-c); + } + + + template <> + inline uint hash(const Vector2 & v, uint h) + { + return sdbmFloatHash(v.component, 2, h); + } + + + + // Vector3 + + inline Vector3 add(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); + } + inline Vector3 add(Vector3::Arg a, float b) + { + return Vector3(a.x + b, a.y + b, a.z + b); + } + inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b) + { + return add(a, b); + } + inline Vector3 operator+(Vector3::Arg a, float b) + { + return add(a, b); + } + + inline Vector3 sub(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); + } + inline Vector3 sub(Vector3::Arg a, float b) + { + return Vector3(a.x - b, a.y - b, a.z - b); + } + inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b) + { + return sub(a, b); + } + inline Vector3 operator-(Vector3::Arg a, float b) + { + return sub(a, b); + } + + inline Vector3 cross(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); + } + + inline Vector3 scale(Vector3::Arg v, float s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 scale(Vector3::Arg v, Vector3::Arg s) + { + return Vector3(v.x * s.x, v.y * s.y, v.z * s.z); + } + + inline Vector3 operator*(Vector3::Arg v, float s) + { + return scale(v, s); + } + + inline Vector3 operator*(float s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s) + { + return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s); + }*/ + + inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z); + } + + inline float dot(Vector3::Arg a, Vector3::Arg b) + { + return a.x * b.x + a.y * b.y + a.z * b.z; + } + + inline float lengthSquared(Vector3::Arg v) + { + return v.x * v.x + v.y * v.y + v.z * v.z; + } + + inline float length(Vector3::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float distance(Vector3::Arg a, Vector3::Arg b) + { + return length(a - b); + } + + inline float distanceSquared(Vector3::Arg a, Vector3::Arg b) + { + return lengthSquared(a - b); + } + + inline float inverseLength(Vector3::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector3 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector3 normalizeFast(Vector3::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon); + } + + inline Vector3 min(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); + } + + inline Vector3 max(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); + } + + inline Vector3 clamp(Vector3::Arg v, float min, float max) + { + return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max)); + } + + inline Vector3 saturate(Vector3::Arg v) + { + return Vector3(saturate(v.x), saturate(v.y), saturate(v.z)); + } + + inline Vector3 floor(Vector3::Arg v) + { + return Vector3(floorf(v.x), floorf(v.y), floorf(v.z)); + } + + inline Vector3 ceil(Vector3::Arg v) + { + return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z)); + } + + inline bool isFinite(Vector3::Arg v) + { + return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); + } + + inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector3 vf = v; + nv::floatCleanup(vf.component, 3); + return vf; + } + + inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n) + { + return v - (2 * dot(v, n)) * n; + } + + template <> + inline uint hash(const Vector3 & v, uint h) + { + return sdbmFloatHash(v.component, 3, h); + } + + + // Vector4 + + inline Vector4 add(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b) + { + return add(a, b); + } + + inline Vector4 sub(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b) + { + return sub(a, b); + } + + inline Vector4 scale(Vector4::Arg v, float s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 scale(Vector4::Arg v, Vector4::Arg s) + { + return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w); + } + + inline Vector4 operator*(Vector4::Arg v, float s) + { + return scale(v, s); + } + + inline Vector4 operator*(float s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s) + { + return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s); + }*/ + + inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w); + } + + inline float dot(Vector4::Arg a, Vector4::Arg b) + { + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; + } + + inline float lengthSquared(Vector4::Arg v) + { + return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; + } + + inline float length(Vector4::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float inverseLength(Vector4::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector4 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector4 normalizeFast(Vector4::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon); + } + + inline Vector4 min(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); + } + + inline Vector4 max(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); + } + + inline Vector4 clamp(Vector4::Arg v, float min, float max) + { + return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max)); + } + + inline Vector4 saturate(Vector4::Arg v) + { + return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w)); + } + + inline bool isFinite(Vector4::Arg v) + { + return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w); + } + + inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector4 vf = v; + nv::floatCleanup(vf.component, 4); + return vf; + } + + template <> + inline uint hash(const Vector4 & v, uint h) + { + return sdbmFloatHash(v.component, 4, h); + } + + +#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float + + //int: + + inline Vector2 scale(Vector2::Arg v, int s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 operator*(Vector2::Arg v, int s) + { + return scale(v, s); + } + + inline Vector2 operator*(int s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector3 scale(Vector3::Arg v, int s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 operator*(Vector3::Arg v, int s) + { + return scale(v, s); + } + + inline Vector3 operator*(int s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector4 scale(Vector4::Arg v, int s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 operator*(Vector4::Arg v, int s) + { + return scale(v, s); + } + + inline Vector4 operator*(int s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + //double: + + inline Vector3 operator*(Vector3::Arg v, double s) + { + return scale(v, (float)s); + } + + inline Vector3 operator*(double s, Vector3::Arg v) + { + return scale(v, (float)s); + } + + inline Vector3 operator/(Vector3::Arg v, double s) + { + return scale(v, 1.f/((float)s)); + } + +#endif //NV_OS_IOS + +} // nv namespace + +#endif // NV_MATH_VECTOR_INL diff --git a/thirdparty/thekla_atlas/nvmath/ftoi.h b/thirdparty/thekla_atlas/nvmath/ftoi.h new file mode 100644 index 0000000000..182c56d1c3 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/ftoi.h @@ -0,0 +1,261 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_MATH_FTOI_H +#define NV_MATH_FTOI_H + +#include "nvmath/nvmath.h" + +#include <math.h> + +namespace nv +{ + // Optimized float to int conversions. See: + // http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html + // http://www.stereopsis.com/sree/fpu2006.html + // http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/ + // http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point + + + union DoubleAnd64 { + uint64 i; + double d; + }; + + static const double floatutil_xs_doublemagic = (6755399441055744.0); // 2^52 * 1.5 + static const double floatutil_xs_doublemagicdelta = (1.5e-8); // almost .5f = .5f + 1e^(number of exp bit) + static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta); // almost .5f = .5f - 1e^(number of exp bit) + + NV_FORCEINLINE int ftoi_round_xs(double val, double magic) { +#if 1 + DoubleAnd64 dunion; + dunion.d = val + magic; + return (int32) dunion.i; // just cast to grab the bottom bits +#else + val += magic; + return ((int*)&val)[0]; // @@ Assumes little endian. +#endif + } + + NV_FORCEINLINE int ftoi_round_xs(float val) { + return ftoi_round_xs(val, floatutil_xs_doublemagic); + } + + NV_FORCEINLINE int ftoi_floor_xs(float val) { + return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic); + } + + NV_FORCEINLINE int ftoi_ceil_xs(float val) { + return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic); + } + + NV_FORCEINLINE int ftoi_trunc_xs(float val) { + return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val); + } + +// -- GODOT start -- +//#if NV_CPU_X86 || NV_CPU_X86_64 +#if NV_USE_SSE +// -- GODOT end -- + + NV_FORCEINLINE int ftoi_round_sse(float f) { + return _mm_cvt_ss2si(_mm_set_ss(f)); + } + + NV_FORCEINLINE int ftoi_trunc_sse(float f) { + return _mm_cvtt_ss2si(_mm_set_ss(f)); + } + +#endif + + + +#if NV_USE_SSE + + NV_FORCEINLINE int ftoi_round(float val) { + return ftoi_round_sse(val); + } + + NV_FORCEINLINE int ftoi_trunc(float f) { + return ftoi_trunc_sse(f); + } + + // We can probably do better than this. See for example: + // http://dss.stephanierct.com/DevBlog/?p=8 + NV_FORCEINLINE int ftoi_floor(float val) { + return ftoi_round(floorf(val)); + } + + NV_FORCEINLINE int ftoi_ceil(float val) { + return ftoi_round(ceilf(val)); + } + +#else + + // In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code + // when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode. + + NV_FORCEINLINE int ftoi_round(float val) { + return ftoi_round_xs(val); + } + + NV_FORCEINLINE int ftoi_floor(float val) { + return ftoi_floor_xs(val); + } + + NV_FORCEINLINE int ftoi_ceil(float val) { + return ftoi_ceil_xs(val); + } + + NV_FORCEINLINE int ftoi_trunc(float f) { + return ftoi_trunc_xs(f); + } + +#endif + + + inline void test_ftoi() { + + // Round to nearest integer. + nvCheck(ftoi_round(0.1f) == 0); + nvCheck(ftoi_round(0.6f) == 1); + nvCheck(ftoi_round(-0.2f) == 0); + nvCheck(ftoi_round(-0.7f) == -1); + nvCheck(ftoi_round(10.1f) == 10); + nvCheck(ftoi_round(10.6f) == 11); + nvCheck(ftoi_round(-90.1f) == -90); + nvCheck(ftoi_round(-90.6f) == -91); + + nvCheck(ftoi_round(0) == 0); + nvCheck(ftoi_round(1) == 1); + nvCheck(ftoi_round(-1) == -1); + + nvCheck(ftoi_round(0.5f) == 0); // How are midpoints rounded? Bankers rounding. + nvCheck(ftoi_round(1.5f) == 2); + nvCheck(ftoi_round(2.5f) == 2); + nvCheck(ftoi_round(3.5f) == 4); + nvCheck(ftoi_round(4.5f) == 4); + nvCheck(ftoi_round(-0.5f) == 0); + nvCheck(ftoi_round(-1.5f) == -2); + + + // Truncation (round down if > 0, round up if < 0). + nvCheck(ftoi_trunc(0.1f) == 0); + nvCheck(ftoi_trunc(0.6f) == 0); + nvCheck(ftoi_trunc(-0.2f) == 0); + nvCheck(ftoi_trunc(-0.7f) == 0); // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition. + nvCheck(ftoi_trunc(1.99f) == 1); + nvCheck(ftoi_trunc(-1.2f) == -1); + + // Floor (round down). + nvCheck(ftoi_floor(0.1f) == 0); + nvCheck(ftoi_floor(0.6f) == 0); + nvCheck(ftoi_floor(-0.2f) == -1); + nvCheck(ftoi_floor(-0.7f) == -1); + nvCheck(ftoi_floor(1.99f) == 1); + nvCheck(ftoi_floor(-1.2f) == -2); + + nvCheck(ftoi_floor(0) == 0); + nvCheck(ftoi_floor(1) == 1); + nvCheck(ftoi_floor(-1) == -1); + nvCheck(ftoi_floor(2) == 2); + nvCheck(ftoi_floor(-2) == -2); + + // Ceil (round up). + nvCheck(ftoi_ceil(0.1f) == 1); + nvCheck(ftoi_ceil(0.6f) == 1); + nvCheck(ftoi_ceil(-0.2f) == 0); + nvCheck(ftoi_ceil(-0.7f) == 0); + nvCheck(ftoi_ceil(1.99f) == 2); + nvCheck(ftoi_ceil(-1.2f) == -1); + + nvCheck(ftoi_ceil(0) == 0); + nvCheck(ftoi_ceil(1) == 1); + nvCheck(ftoi_ceil(-1) == -1); + nvCheck(ftoi_ceil(2) == 2); + nvCheck(ftoi_ceil(-2) == -2); + } + + + + + + // Safe versions using standard casts. + + inline int iround(float f) + { + return ftoi_round(f); + //return int(floorf(f + 0.5f)); + } + + inline int iround(double f) + { + return int(::floor(f + 0.5)); + } + + inline int ifloor(float f) + { + return ftoi_floor(f); + //return int(floorf(f)); + } + + inline int iceil(float f) + { + return int(ceilf(f)); + } + + + + // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule. + // Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html + + // Quantize a float in the [0,1] range, using exact end points or uniform bins. + inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) { + nvDebugCheck(bits <= 16); + + float range = float(1 << bits); + if (exactEndPoints) { + return floorf(x * (range-1) + 0.5f) / (range-1); + } + else { + return (floorf(x * range) + 0.5f) / range; + } + } + + + // This is the most common rounding mode: + // + // 0 1 2 3 + // |___|_______|_______|___| + // 0 1 + // + // You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`. + // You reconstruct the original float dividing by 'N-1': `f = i / (N-1)` + + + // 0 1 2 3 + // |_____|_____|_____|_____| + // 0 1 + + /*enum BinningMode { + RoundMode_ExactEndPoints, + RoundMode_UniformBins, + };*/ + + template <int N> + inline uint unitFloatToFixed(float f) { + return ftoi_round(f * ((1<<N)-1)); + } + + inline uint8 unitFloatToFixed8(float f) { + return (uint8)unitFloatToFixed<8>(f); + } + + inline uint16 unitFloatToFixed16(float f) { + return (uint16)unitFloatToFixed<16>(f); + } + + +} // nv + +#endif // NV_MATH_FTOI_H diff --git a/thirdparty/thekla_atlas/nvmath/nvmath.h b/thirdparty/thekla_atlas/nvmath/nvmath.h new file mode 100644 index 0000000000..f2b69426e1 --- /dev/null +++ b/thirdparty/thekla_atlas/nvmath/nvmath.h @@ -0,0 +1,346 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_H +#define NV_MATH_H + +#include "nvcore/nvcore.h" +#include "nvcore/Debug.h" // nvDebugCheck +#include "nvcore/Utils.h" // max, clamp + +#include <math.h> + +#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO +#include <float.h> // finite, isnan +#endif + +// -- GODOT start -- +//#if NV_CPU_X86 || NV_CPU_X86_64 +// //#include <intrin.h> +// #include <xmmintrin.h> +//#endif +// -- GODOT end -- + + + +// Function linkage +#if NVMATH_SHARED +#ifdef NVMATH_EXPORTS +#define NVMATH_API DLL_EXPORT +#define NVMATH_CLASS DLL_EXPORT_CLASS +#else +#define NVMATH_API DLL_IMPORT +#define NVMATH_CLASS DLL_IMPORT +#endif +#else // NVMATH_SHARED +#define NVMATH_API +#define NVMATH_CLASS +#endif // NVMATH_SHARED + +// Set some reasonable defaults. +#ifndef NV_USE_ALTIVEC +# define NV_USE_ALTIVEC NV_CPU_PPC +//# define NV_USE_ALTIVEC defined(__VEC__) +#endif + +#ifndef NV_USE_SSE +# if NV_CPU_X86_64 + // x64 always supports at least SSE2 +# define NV_USE_SSE 2 +# elif NV_CC_MSVC && defined(_M_IX86_FP) + // Also on x86 with the /arch:SSE flag in MSVC. +# define NV_USE_SSE _M_IX86_FP // 1=SSE, 2=SS2 +# elif defined(__SSE__) +# define NV_USE_SSE 1 +# elif defined(__SSE2__) +# define NV_USE_SSE 2 +# else + // Otherwise we assume no SSE. +# define NV_USE_SSE 0 +# endif +#endif + + +// Internally set NV_USE_SIMD when either altivec or sse is available. +#if NV_USE_ALTIVEC && NV_USE_SSE +# error "Cannot enable both altivec and sse!" +#endif + + +// -- GODOT start -- +#if NV_USE_SSE + //#include <intrin.h> + #include <xmmintrin.h> +#endif +// -- GODOT end -- + + +#ifndef PI +#define PI float(3.1415926535897932384626433833) +#endif + +#define NV_EPSILON (0.0001f) +#define NV_NORMAL_EPSILON (0.001f) + +/* +#define SQ(r) ((r)*(r)) + +#define SIGN_BITMASK 0x80000000 + +/// Integer representation of a floating-point value. +#define IR(x) ((uint32 &)(x)) + +/// Absolute integer representation of a floating-point value +#define AIR(x) (IR(x) & 0x7fffffff) + +/// Floating-point representation of an integer value. +#define FR(x) ((float&)(x)) + +/// Integer-based comparison of a floating point value. +/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context. +#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK) +*/ + +extern "C" inline double sqrt_assert(const double f) +{ + nvDebugCheck(f >= 0.0f); + return sqrt(f); +} + +inline float sqrtf_assert(const float f) +{ + nvDebugCheck(f >= 0.0f); + return sqrtf(f); +} + +extern "C" inline double acos_assert(const double f) +{ + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return acos(f); +} + +inline float acosf_assert(const float f) +{ + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return acosf(f); +} + +extern "C" inline double asin_assert(const double f) +{ + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return asin(f); +} + +inline float asinf_assert(const float f) +{ + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return asinf(f); +} + +// Replace default functions with asserting ones. +#if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700)) // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194 +#define sqrt sqrt_assert +#define sqrtf sqrtf_assert +#define acos acos_assert +#define acosf acosf_assert +#define asin asin_assert +#define asinf asinf_assert +#endif + +#if NV_CC_MSVC +NV_FORCEINLINE float log2f(float x) +{ + nvCheck(x >= 0); + return logf(x) / logf(2.0f); +} +NV_FORCEINLINE float exp2f(float x) +{ + return powf(2.0f, x); +} +#endif + +namespace nv +{ + inline float toRadian(float degree) { return degree * (PI / 180.0f); } + inline float toDegree(float radian) { return radian * (180.0f / PI); } + + // Robust floating point comparisons: + // http://realtimecollisiondetection.net/blog/?p=89 + inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON) + { + //return fabs(f0-f1) <= epsilon; + return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1)); + } + + inline bool isZero(const float f, const float epsilon = NV_EPSILON) + { + return fabs(f) <= epsilon; + } + + inline bool isFinite(const float f) + { +#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO + return _finite(f) != 0; +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS + return isfinite(f); +#elif NV_OS_LINUX + return finitef(f); +#else +# error "isFinite not supported" +#endif + //return std::isfinite (f); + //return finite (f); + } + + inline bool isNan(const float f) + { +#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO + return _isnan(f) != 0; +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS + return isnan(f); +#elif NV_OS_LINUX + return isnanf(f); +#else +# error "isNan not supported" +#endif + } + + inline uint log2(uint32 i) + { + uint32 value = 0; + while( i >>= 1 ) value++; + return value; + } + + inline uint log2(uint64 i) + { + uint64 value = 0; + while (i >>= 1) value++; + return U32(value); + } + + inline float lerp(float f0, float f1, float t) + { + const float s = 1.0f - t; + return f0 * s + f1 * t; + } + + inline float square(float f) { return f * f; } + inline int square(int i) { return i * i; } + + inline float cube(float f) { return f * f * f; } + inline int cube(int i) { return i * i * i; } + + inline float frac(float f) + { + return f - floor(f); + } + + inline float floatRound(float f) + { + return floorf(f + 0.5f); + } + + // Eliminates negative zeros from a float array. + inline void floatCleanup(float * fp, int n) + { + for (int i = 0; i < n; i++) { + //nvDebugCheck(isFinite(fp[i])); + union { float f; uint32 i; } x = { fp[i] }; + if (x.i == 0x80000000) fp[i] = 0.0f; + } + } + + inline float saturate(float f) { + return clamp(f, 0.0f, 1.0f); + } + + inline float linearstep(float edge0, float edge1, float x) { + // Scale, bias and saturate x to 0..1 range + return saturate((x - edge0) / (edge1 - edge0)); + } + + inline float smoothstep(float edge0, float edge1, float x) { + x = linearstep(edge0, edge1, x); + + // Evaluate polynomial + return x*x*(3 - 2*x); + } + + inline int sign(float a) + { + return (a > 0) - (a < 0); + //if (a > 0.0f) return 1; + //if (a < 0.0f) return -1; + //return 0; + } + + union Float754 { + unsigned int raw; + float value; + struct { + #if NV_BIG_ENDIAN + unsigned int negative:1; + unsigned int biasedexponent:8; + unsigned int mantissa:23; + #else + unsigned int mantissa:23; + unsigned int biasedexponent:8; + unsigned int negative:1; + #endif + } field; + }; + + // Return the exponent of x ~ Floor(Log2(x)) + inline int floatExponent(float x) + { + Float754 f; + f.value = x; + return (f.field.biasedexponent - 127); + } + + + // FloatRGB9E5 + union Float3SE { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 e : 5; + uint32 zm : 9; + uint32 ym : 9; + uint32 xm : 9; + #else + uint32 xm : 9; + uint32 ym : 9; + uint32 zm : 9; + uint32 e : 5; + #endif + }; + }; + + // FloatR11G11B10 + union Float3PK { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 ze : 5; + uint32 zm : 5; + uint32 ye : 5; + uint32 ym : 6; + uint32 xe : 5; + uint32 xm : 6; + #else + uint32 xm : 6; + uint32 xe : 5; + uint32 ym : 6; + uint32 ye : 5; + uint32 zm : 5; + uint32 ze : 5; + #endif + }; + }; + + +} // nv + +#endif // NV_MATH_H |