summaryrefslogtreecommitdiff
path: root/thirdparty/embree/common/math
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/common/math')
-rw-r--r--thirdparty/embree/common/math/affinespace.h361
-rw-r--r--thirdparty/embree/common/math/bbox.h331
-rw-r--r--thirdparty/embree/common/math/col3.h47
-rw-r--r--thirdparty/embree/common/math/col4.h47
-rw-r--r--thirdparty/embree/common/math/color.h241
-rw-r--r--thirdparty/embree/common/math/constants.cpp27
-rw-r--r--thirdparty/embree/common/math/constants.h197
-rw-r--r--thirdparty/embree/common/math/interval.h161
-rw-r--r--thirdparty/embree/common/math/lbbox.h289
-rw-r--r--thirdparty/embree/common/math/linearspace2.h148
-rw-r--r--thirdparty/embree/common/math/linearspace3.h213
-rw-r--r--thirdparty/embree/common/math/math.h369
-rw-r--r--thirdparty/embree/common/math/obbox.h39
-rw-r--r--thirdparty/embree/common/math/quaternion.h254
-rw-r--r--thirdparty/embree/common/math/range.h137
-rw-r--r--thirdparty/embree/common/math/transcendental.h525
-rw-r--r--thirdparty/embree/common/math/vec2.h235
-rw-r--r--thirdparty/embree/common/math/vec2fa.h301
-rw-r--r--thirdparty/embree/common/math/vec3.h337
-rw-r--r--thirdparty/embree/common/math/vec3ba.h120
-rw-r--r--thirdparty/embree/common/math/vec3fa.h727
-rw-r--r--thirdparty/embree/common/math/vec3ia.h186
-rw-r--r--thirdparty/embree/common/math/vec4.h243
23 files changed, 5535 insertions, 0 deletions
diff --git a/thirdparty/embree/common/math/affinespace.h b/thirdparty/embree/common/math/affinespace.h
new file mode 100644
index 0000000000..9d4a0f0846
--- /dev/null
+++ b/thirdparty/embree/common/math/affinespace.h
@@ -0,0 +1,361 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "linearspace2.h"
+#include "linearspace3.h"
+#include "quaternion.h"
+#include "bbox.h"
+#include "vec4.h"
+
+namespace embree
+{
+ #define VectorT typename L::Vector
+ #define ScalarT typename L::Vector::Scalar
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Affine Space
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename L>
+ struct AffineSpaceT
+ {
+ L l; /*< linear part of affine space */
+ VectorT p; /*< affine part of affine space */
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Constructors, Assignment, Cast, Copy Operations
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline AffineSpaceT ( ) { }
+ __forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; }
+ __forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); }
+ __forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; }
+
+ __forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {}
+ __forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {}
+
+ template<typename L1> __forceinline AffineSpaceT( const AffineSpaceT<L1>& s ) : l(s.l), p(s.p) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {}
+ __forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {}
+
+ /*! return matrix for scaling */
+ static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); }
+
+ /*! return matrix for translation */
+ static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); }
+
+ /*! return matrix for rotation, only in 2D */
+ static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); }
+
+ /*! return matrix for rotation around arbitrary point (2D) or axis (3D) */
+ static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); }
+
+ /*! return matrix for rotation around arbitrary axis and point, only in 3D */
+ static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); }
+
+ /*! return matrix for looking at given point, only in 3D */
+ static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) {
+ VectorT Z = normalize(point-eye);
+ VectorT U = normalize(cross(up,Z));
+ VectorT V = normalize(cross(Z,U));
+ return AffineSpaceT(L(U,V,Z),eye);
+ }
+
+ };
+
+ // template specialization to get correct identity matrix for type AffineSpace3fa
+ template<>
+ __forceinline AffineSpaceT<LinearSpace3ff>::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename L> __forceinline AffineSpaceT<L> operator -( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(-a.l,-a.p); }
+ template<typename L> __forceinline AffineSpaceT<L> operator +( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(+a.l,+a.p); }
+ template<typename L> __forceinline AffineSpaceT<L> rcp( const AffineSpaceT<L>& a ) { L il = rcp(a.l); return AffineSpaceT<L>(il,-(il*a.p)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename L> __forceinline const AffineSpaceT<L> operator +( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l+b.l,a.p+b.p); }
+ template<typename L> __forceinline const AffineSpaceT<L> operator -( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l-b.l,a.p-b.p); }
+
+ template<typename L> __forceinline const AffineSpaceT<L> operator *( const ScalarT & a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a*b.l,a*b.p); }
+ template<typename L> __forceinline const AffineSpaceT<L> operator *( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l*b.l,a.l*b.p+a.p); }
+ template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a * rcp(b); }
+ template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const ScalarT & b ) { return a * rcp(b); }
+
+ template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a * b; }
+ template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a * b; }
+ template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a / b; }
+ template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a / b; }
+
+ template<typename L> __forceinline VectorT xfmPoint (const AffineSpaceT<L>& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); }
+ template<typename L> __forceinline VectorT xfmVector(const AffineSpaceT<L>& m, const VectorT& v) { return xfmVector(m.l,v); }
+ template<typename L> __forceinline VectorT xfmNormal(const AffineSpaceT<L>& m, const VectorT& n) { return xfmNormal(m.l,n); }
+
+ __forceinline const BBox<Vec3fa> xfmBounds(const AffineSpaceT<LinearSpace3<Vec3fa> >& m, const BBox<Vec3fa>& b)
+ {
+ BBox3fa dst = empty;
+ const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0));
+ const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1));
+ const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2));
+ const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3));
+ const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4));
+ const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5));
+ const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6));
+ const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7));
+ return dst;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename L> __forceinline bool operator ==( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l == b.l && a.p == b.p; }
+ template<typename L> __forceinline bool operator !=( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l != b.l || a.p != b.p; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename L> __forceinline AffineSpaceT<L> select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT<L>& t, const AffineSpaceT<L>& f ) {
+ return AffineSpaceT<L>(select(s,t.l,f.l),select(s,t.p,f.p));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename L> static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT<L>& m) {
+ return cout << "{ l = " << m.l << ", p = " << m.p << " }";
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Template Instantiations
+ ////////////////////////////////////////////////////////////////////////////////
+
+ typedef AffineSpaceT<LinearSpace2f> AffineSpace2f;
+ typedef AffineSpaceT<LinearSpace3f> AffineSpace3f;
+ typedef AffineSpaceT<LinearSpace3fa> AffineSpace3fa;
+ typedef AffineSpaceT<LinearSpace3fx> AffineSpace3fx;
+ typedef AffineSpaceT<LinearSpace3ff> AffineSpace3ff;
+ typedef AffineSpaceT<Quaternion3f > OrthonormalSpace3f;
+
+ template<int N> using AffineSpace3vf = AffineSpaceT<LinearSpace3<Vec3<vfloat<N>>>>;
+ typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<4>>>> AffineSpace3vf4;
+ typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<8>>>> AffineSpace3vf8;
+ typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<16>>>> AffineSpace3vf16;
+
+ template<int N> using AffineSpace3vff = AffineSpaceT<LinearSpace3<Vec4<vfloat<N>>>>;
+ typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<4>>>> AffineSpace3vfa4;
+ typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<8>>>> AffineSpace3vfa8;
+ typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<16>>>> AffineSpace3vfa16;
+
+ //////////////////////////////////////////////////////////////////////////////
+ /// Interpolation
+ //////////////////////////////////////////////////////////////////////////////
+ template<typename T, typename R>
+ __forceinline AffineSpaceT<T> lerp(const AffineSpaceT<T>& M0,
+ const AffineSpaceT<T>& M1,
+ const R& t)
+ {
+ return AffineSpaceT<T>(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t));
+ }
+
+ // slerp interprets the 16 floats of the matrix M = D * R * S as components of
+ // three matrizes (D, R, S) that are interpolated individually.
+ template<typename T> __forceinline AffineSpaceT<LinearSpace3<Vec3<T>>>
+ slerp(const AffineSpaceT<LinearSpace3<Vec4<T>>>& M0,
+ const AffineSpaceT<LinearSpace3<Vec4<T>>>& M1,
+ const T& t)
+ {
+ QuaternionT<T> q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
+ QuaternionT<T> q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
+ QuaternionT<T> q = slerp(q0, q1, t);
+
+ AffineSpaceT<LinearSpace3<Vec3<T>>> S = lerp(M0, M1, t);
+ AffineSpaceT<LinearSpace3<Vec3<T>>> D(one);
+ D.p.x = S.l.vx.y;
+ D.p.y = S.l.vx.z;
+ D.p.z = S.l.vy.z;
+ S.l.vx.y = 0;
+ S.l.vx.z = 0;
+ S.l.vy.z = 0;
+
+ AffineSpaceT<LinearSpace3<Vec3<T>>> R = LinearSpace3<Vec3<T>>(q);
+ return D * R * S;
+ }
+
+ // this is a specialized version for Vec3fa because that does
+ // not play along nicely with the other templated Vec3/Vec4 types
+ __forceinline AffineSpace3fa slerp(const AffineSpace3ff& M0,
+ const AffineSpace3ff& M1,
+ const float& t)
+ {
+ Quaternion3f q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
+ Quaternion3f q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
+ Quaternion3f q = slerp(q0, q1, t);
+
+ AffineSpace3fa S = lerp(M0, M1, t);
+ AffineSpace3fa D(one);
+ D.p.x = S.l.vx.y;
+ D.p.y = S.l.vx.z;
+ D.p.z = S.l.vy.z;
+ S.l.vx.y = 0;
+ S.l.vx.z = 0;
+ S.l.vy.z = 0;
+
+ AffineSpace3fa R = LinearSpace3fa(q);
+ return D * R * S;
+ }
+
+ __forceinline AffineSpace3fa quaternionDecompositionToAffineSpace(const AffineSpace3ff& qd)
+ {
+ // compute affine transform from quaternion decomposition
+ Quaternion3f q(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
+ AffineSpace3fa M = qd;
+ AffineSpace3fa D(one);
+ D.p.x = M.l.vx.y;
+ D.p.y = M.l.vx.z;
+ D.p.z = M.l.vy.z;
+ M.l.vx.y = 0;
+ M.l.vx.z = 0;
+ M.l.vy.z = 0;
+ AffineSpace3fa R = LinearSpace3fa(q);
+ return D * R * M;
+ }
+
+ __forceinline void quaternionDecomposition(const AffineSpace3ff& qd, Vec3fa& T, Quaternion3f& q, AffineSpace3fa& S)
+ {
+ q = Quaternion3f(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
+ S = qd;
+ T.x = qd.l.vx.y;
+ T.y = qd.l.vx.z;
+ T.z = qd.l.vy.z;
+ S.l.vx.y = 0;
+ S.l.vx.z = 0;
+ S.l.vy.z = 0;
+ }
+
+ __forceinline AffineSpace3fx quaternionDecomposition(Vec3fa const& T, Quaternion3f const& q, AffineSpace3fa const& S)
+ {
+ AffineSpace3ff M = S;
+ M.l.vx.w = q.i;
+ M.l.vy.w = q.j;
+ M.l.vz.w = q.k;
+ M.p.w = q.r;
+ M.l.vx.y = T.x;
+ M.l.vx.z = T.y;
+ M.l.vy.z = T.z;
+ return M;
+ }
+
+ struct __aligned(16) QuaternionDecomposition
+ {
+ float scale_x = 1.f;
+ float scale_y = 1.f;
+ float scale_z = 1.f;
+ float skew_xy = 0.f;
+ float skew_xz = 0.f;
+ float skew_yz = 0.f;
+ float shift_x = 0.f;
+ float shift_y = 0.f;
+ float shift_z = 0.f;
+ float quaternion_r = 1.f;
+ float quaternion_i = 0.f;
+ float quaternion_j = 0.f;
+ float quaternion_k = 0.f;
+ float translation_x = 0.f;
+ float translation_y = 0.f;
+ float translation_z = 0.f;
+ };
+
+ __forceinline QuaternionDecomposition quaternionDecomposition(AffineSpace3ff const& M)
+ {
+ QuaternionDecomposition qd;
+ qd.scale_x = M.l.vx.x;
+ qd.scale_y = M.l.vy.y;
+ qd.scale_z = M.l.vz.z;
+ qd.shift_x = M.p.x;
+ qd.shift_y = M.p.y;
+ qd.shift_z = M.p.z;
+ qd.translation_x = M.l.vx.y;
+ qd.translation_y = M.l.vx.z;
+ qd.translation_z = M.l.vy.z;
+ qd.skew_xy = M.l.vy.x;
+ qd.skew_xz = M.l.vz.x;
+ qd.skew_yz = M.l.vz.y;
+ qd.quaternion_r = M.p.w;
+ qd.quaternion_i = M.l.vx.w;
+ qd.quaternion_j = M.l.vy.w;
+ qd.quaternion_k = M.l.vz.w;
+ return qd;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /*
+ * ! Template Specialization for 2D: return matrix for rotation around point
+ * (rotation around arbitrarty vector is not meaningful in 2D)
+ */
+ template<> __forceinline
+ AffineSpace2f AffineSpace2f::rotate(const Vec2f& p, const float& r) {
+ return translate(+p)*AffineSpace2f(LinearSpace2f::rotate(r))*translate(-p);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Similarity Transform
+ //
+ // checks, if M is a similarity transformation, i.e if there exists a factor D
+ // such that for all x,y: distance(Mx, My) = D * distance(x, y)
+ ////////////////////////////////////////////////////////////////////////////////
+ __forceinline bool similarityTransform(const AffineSpace3fa& M, float* D)
+ {
+ if (D) *D = 0.f;
+ if (abs(dot(M.l.vx, M.l.vy)) > 1e-5f) return false;
+ if (abs(dot(M.l.vx, M.l.vz)) > 1e-5f) return false;
+ if (abs(dot(M.l.vy, M.l.vz)) > 1e-5f) return false;
+
+ const float D_x = dot(M.l.vx, M.l.vx);
+ const float D_y = dot(M.l.vy, M.l.vy);
+ const float D_z = dot(M.l.vz, M.l.vz);
+
+ if (abs(D_x - D_y) > 1e-5f ||
+ abs(D_x - D_z) > 1e-5f ||
+ abs(D_y - D_z) > 1e-5f)
+ return false;
+
+ if (D) *D = sqrtf(D_x);
+ return true;
+ }
+
+ __forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
+ {
+ Vec3fa::storeu(&ptr->l.vx, source.l.vx);
+ Vec3fa::storeu(&ptr->l.vy, source.l.vy);
+ Vec3fa::storeu(&ptr->l.vz, source.l.vz);
+ Vec3fa::storeu(&ptr->p, source.p);
+ }
+
+ __forceinline AffineSpace3fa AffineSpace3fa_load_unaligned(AffineSpace3fa* ptr)
+ {
+ AffineSpace3fa space;
+ space.l.vx = Vec3fa::loadu(&ptr->l.vx);
+ space.l.vy = Vec3fa::loadu(&ptr->l.vy);
+ space.l.vz = Vec3fa::loadu(&ptr->l.vz);
+ space.p = Vec3fa::loadu(&ptr->p);
+ return space;
+ }
+
+ #undef VectorT
+ #undef ScalarT
+}
diff --git a/thirdparty/embree/common/math/bbox.h b/thirdparty/embree/common/math/bbox.h
new file mode 100644
index 0000000000..bc43155358
--- /dev/null
+++ b/thirdparty/embree/common/math/bbox.h
@@ -0,0 +1,331 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "vec2.h"
+#include "vec3.h"
+
+namespace embree
+{
+ namespace internal {
+
+ template <typename T> __forceinline T divideByTwo(const T& v) { return v / T(2); }
+ template <> __forceinline float divideByTwo<float>(const float& v) { return v * 0.5f; }
+ template <> __forceinline double divideByTwo<double>(const double& v) { return v * 0.5; }
+
+ } // namespace internal
+ template<typename T>
+ struct BBox
+ {
+ T lower, upper;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline BBox ( ) { }
+ template<typename T1>
+ __forceinline BBox ( const BBox<T1>& other ) : lower(other.lower), upper(other.upper) {}
+ __forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; }
+
+ __forceinline BBox ( const T& v ) : lower(v), upper(v) {}
+ __forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Extending Bounds
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
+ __forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
+
+ /*! tests if box is empty */
+ __forceinline bool empty() const { for (int i=0; i<T::N; i++) if (lower[i] > upper[i]) return true; return false; }
+
+ /*! computes the size of the box */
+ __forceinline T size() const { return upper - lower; }
+
+ /*! computes the center of the box */
+ __forceinline T center() const { return internal::divideByTwo<T>(lower+upper); }
+
+ /*! computes twice the center of the box */
+ __forceinline T center2() const { return lower+upper; }
+
+ /*! merges two boxes */
+ __forceinline static const BBox merge (const BBox& a, const BBox& b) {
+ return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
+ }
+
+ /*! enlarge box by some scaling factor */
+ __forceinline BBox enlarge_by(const float a) const {
+ return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
+ __forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {}
+ __forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {}
+ __forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {}
+ __forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {}
+ __forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {}
+ };
+
+ template<> __forceinline bool BBox<float>::empty() const {
+ return lower > upper;
+ }
+
+#if defined(__SSE__)
+ template<> __forceinline bool BBox<Vec3fa>::empty() const {
+ return !all(le_mask(lower,upper));
+ }
+ template<> __forceinline bool BBox<Vec3fx>::empty() const {
+ return !all(le_mask(lower,upper));
+ }
+#endif
+
+ /*! tests if box is finite */
+ __forceinline bool isvalid( const BBox<Vec3fa>& v ) {
+ return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)));
+ }
+
+ /*! tests if box is finite and non-empty*/
+ __forceinline bool isvalid_non_empty( const BBox<Vec3fa>& v ) {
+ return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper));
+ }
+
+ /*! tests if box has finite entries */
+ __forceinline bool is_finite( const BBox<Vec3fa>& b) {
+ return is_finite(b.lower) && is_finite(b.upper);
+ }
+
+ /*! test if point contained in box */
+ __forceinline bool inside ( const BBox<Vec3fa>& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); }
+
+ /*! computes the center of the box */
+ template<typename T> __forceinline const T center2(const BBox<T>& box) { return box.lower + box.upper; }
+ template<typename T> __forceinline const T center (const BBox<T>& box) { return internal::divideByTwo<T>(center2(box)); }
+
+ /*! computes the volume of a bounding box */
+ __forceinline float volume ( const BBox<Vec3fa>& b ) { return reduce_mul(b.size()); }
+ __forceinline float safeVolume( const BBox<Vec3fa>& b ) { if (b.empty()) return 0.0f; else return volume(b); }
+
+ /*! computes the volume of a bounding box */
+ __forceinline float volume( const BBox<Vec3f>& b ) { return reduce_mul(b.size()); }
+
+ /*! computes the surface area of a bounding box */
+ template<typename T> __forceinline const T area( const BBox<Vec2<T> >& b ) { const Vec2<T> d = b.size(); return d.x*d.y; }
+
+ template<typename T> __forceinline const T halfArea( const BBox<Vec3<T> >& b ) { return halfArea(b.size()); }
+ template<typename T> __forceinline const T area( const BBox<Vec3<T> >& b ) { return T(2)*halfArea(b); }
+
+ __forceinline float halfArea( const BBox<Vec3fa>& b ) { return halfArea(b.size()); }
+ __forceinline float area( const BBox<Vec3fa>& b ) { return 2.0f*halfArea(b); }
+
+ __forceinline float halfArea( const BBox<Vec3fx>& b ) { return halfArea(b.size()); }
+ __forceinline float area( const BBox<Vec3fx>& b ) { return 2.0f*halfArea(b); }
+
+ template<typename Vec> __forceinline float safeArea( const BBox<Vec>& b ) { if (b.empty()) return 0.0f; else return area(b); }
+
+ template<typename T> __forceinline float expectedApproxHalfArea(const BBox<T>& box) {
+ return halfArea(box);
+ }
+
+ /*! merges bounding boxes and points */
+ template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const T& b ) { return BBox<T>(min(a.lower, b ), max(a.upper, b )); }
+ template<typename T> __forceinline const BBox<T> merge( const T& a, const BBox<T>& b ) { return BBox<T>(min(a , b.lower), max(a , b.upper)); }
+ template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(min(a.lower, b.lower), max(a.upper, b.upper)); }
+
+ /*! Merges three boxes. */
+ template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return merge(a,merge(b,c)); }
+
+ /*! Merges four boxes. */
+ template<typename T> __forceinline BBox<T> merge(const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d) {
+ return merge(merge(a,b),merge(c,d));
+ }
+
+ /*! Comparison Operators */
+ template<typename T> __forceinline bool operator==( const BBox<T>& a, const BBox<T>& b ) { return a.lower == b.lower && a.upper == b.upper; }
+ template<typename T> __forceinline bool operator!=( const BBox<T>& a, const BBox<T>& b ) { return a.lower != b.lower || a.upper != b.upper; }
+
+ /*! scaling */
+ template<typename T> __forceinline BBox<T> operator *( const float& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
+ template<typename T> __forceinline BBox<T> operator *( const T& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
+
+ /*! translations */
+ template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower+b.lower,a.upper+b.upper); }
+ template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower-b.lower,a.upper-b.upper); }
+ template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower+b ,a.upper+b ); }
+ template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower-b ,a.upper-b ); }
+
+ /*! extension */
+ template<typename T> __forceinline BBox<T> enlarge(const BBox<T>& a, const T& b) { return BBox<T>(a.lower-b, a.upper+b); }
+
+ /*! intersect bounding boxes */
+ template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(max(a.lower, b.lower), min(a.upper, b.upper)); }
+ template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return intersect(a,intersect(b,c)); }
+ template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d ) { return intersect(intersect(a,b),intersect(c,d)); }
+
+ /*! subtract bounds from each other */
+ template<typename T> __forceinline void subtract(const BBox<T>& a, const BBox<T>& b, BBox<T>& c, BBox<T>& d)
+ {
+ c.lower = a.lower;
+ c.upper = min(a.upper,b.lower);
+ d.lower = max(a.lower,b.upper);
+ d.upper = a.upper;
+ }
+
+ /*! tests if bounding boxes (and points) are disjoint (empty intersection) */
+ template<typename T> __inline bool disjoint( const BBox<T>& a, const BBox<T>& b ) { return intersect(a,b).empty(); }
+ template<typename T> __inline bool disjoint( const BBox<T>& a, const T& b ) { return disjoint(a,BBox<T>(b)); }
+ template<typename T> __inline bool disjoint( const T& a, const BBox<T>& b ) { return disjoint(BBox<T>(a),b); }
+
+ /*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */
+ template<typename T> __inline bool conjoint( const BBox<T>& a, const BBox<T>& b ) { return !intersect(a,b).empty(); }
+ template<typename T> __inline bool conjoint( const BBox<T>& a, const T& b ) { return conjoint(a,BBox<T>(b)); }
+ template<typename T> __inline bool conjoint( const T& a, const BBox<T>& b ) { return conjoint(BBox<T>(a),b); }
+
+ /*! subset relation */
+ template<typename T> __inline bool subset( const BBox<T>& a, const BBox<T>& b )
+ {
+ for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false;
+ for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false;
+ return true;
+ }
+
+ template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
+ return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
+ }
+
+ template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
+ return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
+ }
+
+ /*! blending */
+ template<typename T>
+ __forceinline BBox<T> lerp(const BBox<T>& b0, const BBox<T>& b1, const float t) {
+ return BBox<T>(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t));
+ }
+
+ /*! output operator */
+ template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const BBox<T>& box) {
+ return cout << "[" << box.lower << "; " << box.upper << "]";
+ }
+
+ /*! default template instantiations */
+ typedef BBox<float> BBox1f;
+ typedef BBox<Vec2f> BBox2f;
+ typedef BBox<Vec2fa> BBox2fa;
+ typedef BBox<Vec3f> BBox3f;
+ typedef BBox<Vec3fa> BBox3fa;
+ typedef BBox<Vec3fx> BBox3fx;
+ typedef BBox<Vec3ff> BBox3ff;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// SSE / AVX / MIC specializations
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined __SSE__
+#include "../simd/sse.h"
+#endif
+
+#if defined __AVX__
+#include "../simd/avx.h"
+#endif
+
+#if defined(__AVX512F__)
+#include "../simd/avx512.h"
+#endif
+
+namespace embree
+{
+ template<int N>
+ __forceinline BBox<Vec3<vfloat<N>>> transpose(const BBox3fa* bounds);
+
+ template<>
+ __forceinline BBox<Vec3<vfloat4>> transpose<4>(const BBox3fa* bounds)
+ {
+ BBox<Vec3<vfloat4>> dest;
+
+ transpose((vfloat4&)bounds[0].lower,
+ (vfloat4&)bounds[1].lower,
+ (vfloat4&)bounds[2].lower,
+ (vfloat4&)bounds[3].lower,
+ dest.lower.x,
+ dest.lower.y,
+ dest.lower.z);
+
+ transpose((vfloat4&)bounds[0].upper,
+ (vfloat4&)bounds[1].upper,
+ (vfloat4&)bounds[2].upper,
+ (vfloat4&)bounds[3].upper,
+ dest.upper.x,
+ dest.upper.y,
+ dest.upper.z);
+
+ return dest;
+ }
+
+#if defined(__AVX__)
+ template<>
+ __forceinline BBox<Vec3<vfloat8>> transpose<8>(const BBox3fa* bounds)
+ {
+ BBox<Vec3<vfloat8>> dest;
+
+ transpose((vfloat4&)bounds[0].lower,
+ (vfloat4&)bounds[1].lower,
+ (vfloat4&)bounds[2].lower,
+ (vfloat4&)bounds[3].lower,
+ (vfloat4&)bounds[4].lower,
+ (vfloat4&)bounds[5].lower,
+ (vfloat4&)bounds[6].lower,
+ (vfloat4&)bounds[7].lower,
+ dest.lower.x,
+ dest.lower.y,
+ dest.lower.z);
+
+ transpose((vfloat4&)bounds[0].upper,
+ (vfloat4&)bounds[1].upper,
+ (vfloat4&)bounds[2].upper,
+ (vfloat4&)bounds[3].upper,
+ (vfloat4&)bounds[4].upper,
+ (vfloat4&)bounds[5].upper,
+ (vfloat4&)bounds[6].upper,
+ (vfloat4&)bounds[7].upper,
+ dest.upper.x,
+ dest.upper.y,
+ dest.upper.z);
+
+ return dest;
+ }
+#endif
+
+ template<int N>
+ __forceinline BBox3fa merge(const BBox3fa* bounds);
+
+ template<>
+ __forceinline BBox3fa merge<4>(const BBox3fa* bounds)
+ {
+ const Vec3fa lower = min(min(bounds[0].lower,bounds[1].lower),
+ min(bounds[2].lower,bounds[3].lower));
+ const Vec3fa upper = max(max(bounds[0].upper,bounds[1].upper),
+ max(bounds[2].upper,bounds[3].upper));
+ return BBox3fa(lower,upper);
+ }
+
+#if defined(__AVX__)
+ template<>
+ __forceinline BBox3fa merge<8>(const BBox3fa* bounds)
+ {
+ const Vec3fa lower = min(min(min(bounds[0].lower,bounds[1].lower),min(bounds[2].lower,bounds[3].lower)),
+ min(min(bounds[4].lower,bounds[5].lower),min(bounds[6].lower,bounds[7].lower)));
+ const Vec3fa upper = max(max(max(bounds[0].upper,bounds[1].upper),max(bounds[2].upper,bounds[3].upper)),
+ max(max(bounds[4].upper,bounds[5].upper),max(bounds[6].upper,bounds[7].upper)));
+ return BBox3fa(lower,upper);
+ }
+#endif
+}
+
diff --git a/thirdparty/embree/common/math/col3.h b/thirdparty/embree/common/math/col3.h
new file mode 100644
index 0000000000..3f50c04393
--- /dev/null
+++ b/thirdparty/embree/common/math/col3.h
@@ -0,0 +1,47 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "math.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// RGB Color Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct Col3
+ {
+ T r, g, b;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Col3 ( ) { }
+ __forceinline Col3 ( const Col3& other ) { r = other.r; g = other.g; b = other.b; }
+ __forceinline Col3& operator=( const Col3& other ) { r = other.r; g = other.g; b = other.b; return *this; }
+
+ __forceinline explicit Col3 (const T& v) : r(v), g(v), b(v) {}
+ __forceinline Col3 (const T& r, const T& g, const T& b) : r(r), g(g), b(b) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Col3 (ZeroTy) : r(zero) , g(zero) , b(zero) {}
+ __forceinline Col3 (OneTy) : r(one) , g(one) , b(one) {}
+ __forceinline Col3 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf) {}
+ __forceinline Col3 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf) {}
+ };
+
+ /*! output operator */
+ template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col3<T>& a) {
+ return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
+ }
+
+ /*! default template instantiations */
+ typedef Col3<unsigned char> Col3uc;
+ typedef Col3<float > Col3f;
+}
diff --git a/thirdparty/embree/common/math/col4.h b/thirdparty/embree/common/math/col4.h
new file mode 100644
index 0000000000..788508516b
--- /dev/null
+++ b/thirdparty/embree/common/math/col4.h
@@ -0,0 +1,47 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "math.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// RGBA Color Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct Col4
+ {
+ T r, g, b, a;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Col4 ( ) { }
+ __forceinline Col4 ( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; }
+ __forceinline Col4& operator=( const Col4& other ) { r = other.r; g = other.g; b = other.b; a = other.a; return *this; }
+
+ __forceinline explicit Col4 (const T& v) : r(v), g(v), b(v), a(v) {}
+ __forceinline Col4 (const T& r, const T& g, const T& b, const T& a) : r(r), g(g), b(b), a(a) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Col4 (ZeroTy) : r(zero) , g(zero) , b(zero) , a(zero) {}
+ __forceinline Col4 (OneTy) : r(one) , g(one) , b(one) , a(one) {}
+ __forceinline Col4 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf), a(pos_inf) {}
+ __forceinline Col4 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf), a(neg_inf) {}
+ };
+
+ /*! output operator */
+ template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col4<T>& a) {
+ return cout << "(" << a.r << ", " << a.g << ", " << a.b << ", " << a.a << ")";
+ }
+
+ /*! default template instantiations */
+ typedef Col4<unsigned char> Col4uc;
+ typedef Col4<float > Col4f;
+}
diff --git a/thirdparty/embree/common/math/color.h b/thirdparty/embree/common/math/color.h
new file mode 100644
index 0000000000..529584ea16
--- /dev/null
+++ b/thirdparty/embree/common/math/color.h
@@ -0,0 +1,241 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "constants.h"
+#include "col3.h"
+#include "col4.h"
+
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE RGBA Color Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct Color4
+ {
+ union {
+ __m128 m128;
+ struct { float r,g,b,a; };
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color4 () {}
+ __forceinline Color4 ( const __m128 a ) : m128(a) {}
+
+ __forceinline explicit Color4 (const float v) : m128(_mm_set1_ps(v)) {}
+ __forceinline Color4 (const float r, const float g, const float b, const float a) : m128(_mm_set_ps(a,b,g,r)) {}
+
+ __forceinline explicit Color4 ( const Col3uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(255.0f,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
+ __forceinline explicit Color4 ( const Col3f& other ) { m128 = _mm_set_ps(1.0f,other.b,other.g,other.r); }
+ __forceinline explicit Color4 ( const Col4uc& other ) { m128 = _mm_mul_ps(_mm_set_ps(other.a,other.b,other.g,other.r),_mm_set1_ps(one_over_255)); }
+ __forceinline explicit Color4 ( const Col4f& other ) { m128 = _mm_set_ps(other.a,other.b,other.g,other.r); }
+
+ __forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
+ __forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
+
+ __forceinline operator const __m128&() const { return m128; }
+ __forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Set
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
+ __forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
+ __forceinline void set(Col3uc& d) const
+ {
+ vfloat4 s = clamp(vfloat4(m128))*255.0f;
+ d.r = (unsigned char)(s[0]);
+ d.g = (unsigned char)(s[1]);
+ d.b = (unsigned char)(s[2]);
+ }
+ __forceinline void set(Col4uc& d) const
+ {
+ vfloat4 s = clamp(vfloat4(m128))*255.0f;
+ d.r = (unsigned char)(s[0]);
+ d.g = (unsigned char)(s[1]);
+ d.b = (unsigned char)(s[2]);
+ d.a = (unsigned char)(s[3]);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color4( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
+ __forceinline Color4( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
+ __forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ __forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE RGB Color Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct Color
+ {
+ union {
+ __m128 m128;
+ struct { float r,g,b; };
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color () {}
+ __forceinline Color ( const __m128 a ) : m128(a) {}
+
+ __forceinline explicit Color (const float v) : m128(_mm_set1_ps(v)) {}
+ __forceinline Color (const float r, const float g, const float b) : m128(_mm_set_ps(0.0f,b,g,r)) {}
+
+ __forceinline Color ( const Color& other ) : m128(other.m128) {}
+ __forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
+
+ __forceinline Color ( const Color4& other ) : m128(other.m128) {}
+ __forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
+
+ __forceinline operator const __m128&() const { return m128; }
+ __forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Set
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
+ __forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
+ __forceinline void set(Col3uc& d) const
+ {
+ vfloat4 s = clamp(vfloat4(m128))*255.0f;
+ d.r = (unsigned char)(s[0]);
+ d.g = (unsigned char)(s[1]);
+ d.b = (unsigned char)(s[2]);
+ }
+ __forceinline void set(Col4uc& d) const
+ {
+ vfloat4 s = clamp(vfloat4(m128))*255.0f;
+ d.r = (unsigned char)(s[0]);
+ d.g = (unsigned char)(s[1]);
+ d.b = (unsigned char)(s[2]);
+ d.a = 255;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Color( ZeroTy ) : m128(_mm_set1_ps(0.0f)) {}
+ __forceinline Color( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
+ __forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ __forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color operator +( const Color& a ) { return a; }
+ __forceinline const Color operator -( const Color& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+ return _mm_xor_ps(a.m128, mask);
+ }
+ __forceinline const Color abs ( const Color& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+ return _mm_and_ps(a.m128, mask);
+ }
+ __forceinline const Color rcp ( const Color& a )
+ {
+#if defined(__AVX512VL__)
+ const Color r = _mm_rcp14_ps(a.m128);
+#else
+ const Color r = _mm_rcp_ps(a.m128);
+#endif
+ return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+ }
+ __forceinline const Color rsqrt( const Color& a )
+ {
+#if defined(__AVX512VL__)
+ __m128 r = _mm_rsqrt14_ps(a.m128);
+#else
+ __m128 r = _mm_rsqrt_ps(a.m128);
+#endif
+ return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+ }
+ __forceinline const Color sqrt ( const Color& a ) { return _mm_sqrt_ps(a.m128); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color operator +( const Color& a, const Color& b ) { return _mm_add_ps(a.m128, b.m128); }
+ __forceinline const Color operator -( const Color& a, const Color& b ) { return _mm_sub_ps(a.m128, b.m128); }
+ __forceinline const Color operator *( const Color& a, const Color& b ) { return _mm_mul_ps(a.m128, b.m128); }
+ __forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
+ __forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
+ __forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
+ __forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
+
+ __forceinline const Color min( const Color& a, const Color& b ) { return _mm_min_ps(a.m128,b.m128); }
+ __forceinline const Color max( const Color& a, const Color& b ) { return _mm_max_ps(a.m128,b.m128); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
+ __forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
+ __forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
+ __forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
+ __forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
+ __forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
+ __forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
+ __forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
+ __forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
+ __forceinline bool operator !=( const Color& a, const Color& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
+ __forceinline bool operator < ( const Color& a, const Color& b ) {
+ if (a.r != b.r) return a.r < b.r;
+ if (a.g != b.g) return a.g < b.g;
+ if (a.b != b.b) return a.b < b.b;
+ return false;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const Color select( bool s, const Color& t, const Color& f ) {
+ __m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
+ return blendv_ps(f, t, mask);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Special Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ /*! computes luminance of a color */
+ __forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
+
+ /*! output operator */
+ __forceinline embree_ostream operator<<(embree_ostream cout, const Color& a) {
+ return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
+ }
+}
diff --git a/thirdparty/embree/common/math/constants.cpp b/thirdparty/embree/common/math/constants.cpp
new file mode 100644
index 0000000000..03919ae20c
--- /dev/null
+++ b/thirdparty/embree/common/math/constants.cpp
@@ -0,0 +1,27 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "constants.h"
+
+namespace embree
+{
+ TrueTy True;
+ FalseTy False;
+ ZeroTy zero;
+ OneTy one;
+ NegInfTy neg_inf;
+ PosInfTy inf;
+ PosInfTy pos_inf;
+ NaNTy nan;
+ UlpTy ulp;
+ PiTy pi;
+ OneOverPiTy one_over_pi;
+ TwoPiTy two_pi;
+ OneOverTwoPiTy one_over_two_pi;
+ FourPiTy four_pi;
+ OneOverFourPiTy one_over_four_pi;
+ StepTy step;
+ ReverseStepTy reverse_step;
+ EmptyTy empty;
+ UndefinedTy undefined;
+}
diff --git a/thirdparty/embree/common/math/constants.h b/thirdparty/embree/common/math/constants.h
new file mode 100644
index 0000000000..578473a8ab
--- /dev/null
+++ b/thirdparty/embree/common/math/constants.h
@@ -0,0 +1,197 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/platform.h"
+
+#include <limits>
+
+#define _USE_MATH_DEFINES
+#include <math.h> // using cmath causes issues under Windows
+#include <cfloat>
+#include <climits>
+
+namespace embree
+{
+ static MAYBE_UNUSED const float one_over_255 = 1.0f/255.0f;
+ static MAYBE_UNUSED const float min_rcp_input = 1E-18f; // for abs(x) >= min_rcp_input the newton raphson rcp calculation does not fail
+
+ /* we consider floating point numbers in that range as valid input numbers */
+ static MAYBE_UNUSED float FLT_LARGE = 1.844E18f;
+
+ struct TrueTy {
+ __forceinline operator bool( ) const { return true; }
+ };
+
+ extern MAYBE_UNUSED TrueTy True;
+
+ struct FalseTy {
+ __forceinline operator bool( ) const { return false; }
+ };
+
+ extern MAYBE_UNUSED FalseTy False;
+
+ struct ZeroTy
+ {
+ __forceinline operator double ( ) const { return 0; }
+ __forceinline operator float ( ) const { return 0; }
+ __forceinline operator long long( ) const { return 0; }
+ __forceinline operator unsigned long long( ) const { return 0; }
+ __forceinline operator long ( ) const { return 0; }
+ __forceinline operator unsigned long ( ) const { return 0; }
+ __forceinline operator int ( ) const { return 0; }
+ __forceinline operator unsigned int ( ) const { return 0; }
+ __forceinline operator short ( ) const { return 0; }
+ __forceinline operator unsigned short ( ) const { return 0; }
+ __forceinline operator char ( ) const { return 0; }
+ __forceinline operator unsigned char ( ) const { return 0; }
+ };
+
+ extern MAYBE_UNUSED ZeroTy zero;
+
+ struct OneTy
+ {
+ __forceinline operator double ( ) const { return 1; }
+ __forceinline operator float ( ) const { return 1; }
+ __forceinline operator long long( ) const { return 1; }
+ __forceinline operator unsigned long long( ) const { return 1; }
+ __forceinline operator long ( ) const { return 1; }
+ __forceinline operator unsigned long ( ) const { return 1; }
+ __forceinline operator int ( ) const { return 1; }
+ __forceinline operator unsigned int ( ) const { return 1; }
+ __forceinline operator short ( ) const { return 1; }
+ __forceinline operator unsigned short ( ) const { return 1; }
+ __forceinline operator char ( ) const { return 1; }
+ __forceinline operator unsigned char ( ) const { return 1; }
+ };
+
+ extern MAYBE_UNUSED OneTy one;
+
+ struct NegInfTy
+ {
+ __forceinline operator double ( ) const { return -std::numeric_limits<double>::infinity(); }
+ __forceinline operator float ( ) const { return -std::numeric_limits<float>::infinity(); }
+ __forceinline operator long long( ) const { return std::numeric_limits<long long>::min(); }
+ __forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::min(); }
+ __forceinline operator long ( ) const { return std::numeric_limits<long>::min(); }
+ __forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::min(); }
+ __forceinline operator int ( ) const { return std::numeric_limits<int>::min(); }
+ __forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::min(); }
+ __forceinline operator short ( ) const { return std::numeric_limits<short>::min(); }
+ __forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::min(); }
+ __forceinline operator char ( ) const { return std::numeric_limits<char>::min(); }
+ __forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::min(); }
+
+ };
+
+ extern MAYBE_UNUSED NegInfTy neg_inf;
+
+ struct PosInfTy
+ {
+ __forceinline operator double ( ) const { return std::numeric_limits<double>::infinity(); }
+ __forceinline operator float ( ) const { return std::numeric_limits<float>::infinity(); }
+ __forceinline operator long long( ) const { return std::numeric_limits<long long>::max(); }
+ __forceinline operator unsigned long long( ) const { return std::numeric_limits<unsigned long long>::max(); }
+ __forceinline operator long ( ) const { return std::numeric_limits<long>::max(); }
+ __forceinline operator unsigned long ( ) const { return std::numeric_limits<unsigned long>::max(); }
+ __forceinline operator int ( ) const { return std::numeric_limits<int>::max(); }
+ __forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::max(); }
+ __forceinline operator short ( ) const { return std::numeric_limits<short>::max(); }
+ __forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::max(); }
+ __forceinline operator char ( ) const { return std::numeric_limits<char>::max(); }
+ __forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::max(); }
+ };
+
+ extern MAYBE_UNUSED PosInfTy inf;
+ extern MAYBE_UNUSED PosInfTy pos_inf;
+
+ struct NaNTy
+ {
+ __forceinline operator double( ) const { return std::numeric_limits<double>::quiet_NaN(); }
+ __forceinline operator float ( ) const { return std::numeric_limits<float>::quiet_NaN(); }
+ };
+
+ extern MAYBE_UNUSED NaNTy nan;
+
+ struct UlpTy
+ {
+ __forceinline operator double( ) const { return std::numeric_limits<double>::epsilon(); }
+ __forceinline operator float ( ) const { return std::numeric_limits<float>::epsilon(); }
+ };
+
+ extern MAYBE_UNUSED UlpTy ulp;
+
+ struct PiTy
+ {
+ __forceinline operator double( ) const { return double(M_PI); }
+ __forceinline operator float ( ) const { return float(M_PI); }
+ };
+
+ extern MAYBE_UNUSED PiTy pi;
+
+ struct OneOverPiTy
+ {
+ __forceinline operator double( ) const { return double(M_1_PI); }
+ __forceinline operator float ( ) const { return float(M_1_PI); }
+ };
+
+ extern MAYBE_UNUSED OneOverPiTy one_over_pi;
+
+ struct TwoPiTy
+ {
+ __forceinline operator double( ) const { return double(2.0*M_PI); }
+ __forceinline operator float ( ) const { return float(2.0*M_PI); }
+ };
+
+ extern MAYBE_UNUSED TwoPiTy two_pi;
+
+ struct OneOverTwoPiTy
+ {
+ __forceinline operator double( ) const { return double(0.5*M_1_PI); }
+ __forceinline operator float ( ) const { return float(0.5*M_1_PI); }
+ };
+
+ extern MAYBE_UNUSED OneOverTwoPiTy one_over_two_pi;
+
+ struct FourPiTy
+ {
+ __forceinline operator double( ) const { return double(4.0*M_PI); }
+ __forceinline operator float ( ) const { return float(4.0*M_PI); }
+ };
+
+ extern MAYBE_UNUSED FourPiTy four_pi;
+
+ struct OneOverFourPiTy
+ {
+ __forceinline operator double( ) const { return double(0.25*M_1_PI); }
+ __forceinline operator float ( ) const { return float(0.25*M_1_PI); }
+ };
+
+ extern MAYBE_UNUSED OneOverFourPiTy one_over_four_pi;
+
+ struct StepTy {
+ };
+
+ extern MAYBE_UNUSED StepTy step;
+
+ struct ReverseStepTy {
+ };
+
+ extern MAYBE_UNUSED ReverseStepTy reverse_step;
+
+ struct EmptyTy {
+ };
+
+ extern MAYBE_UNUSED EmptyTy empty;
+
+ struct FullTy {
+ };
+
+ extern MAYBE_UNUSED FullTy full;
+
+ struct UndefinedTy {
+ };
+
+ extern MAYBE_UNUSED UndefinedTy undefined;
+}
diff --git a/thirdparty/embree/common/math/interval.h b/thirdparty/embree/common/math/interval.h
new file mode 100644
index 0000000000..310add2129
--- /dev/null
+++ b/thirdparty/embree/common/math/interval.h
@@ -0,0 +1,161 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "vec2.h"
+#include "vec3.h"
+#include "bbox.h"
+
+namespace embree
+{
+ template<typename V>
+ struct Interval
+ {
+ V lower, upper;
+
+ __forceinline Interval() {}
+ __forceinline Interval ( const Interval& other ) { lower = other.lower; upper = other.upper; }
+ __forceinline Interval& operator=( const Interval& other ) { lower = other.lower; upper = other.upper; return *this; }
+
+ __forceinline Interval(const V& a) : lower(a), upper(a) {}
+ __forceinline Interval(const V& lower, const V& upper) : lower(lower), upper(upper) {}
+ __forceinline Interval(const BBox<V>& a) : lower(a.lower), upper(a.upper) {}
+
+ /*! tests if box is empty */
+ //__forceinline bool empty() const { return lower > upper; }
+
+ /*! computes the size of the interval */
+ __forceinline V size() const { return upper - lower; }
+
+ __forceinline V center() const { return 0.5f*(lower+upper); }
+
+ __forceinline const Interval& extend(const Interval& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
+ __forceinline const Interval& extend(const V & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
+
+ __forceinline friend Interval operator +( const Interval& a, const Interval& b ) {
+ return Interval(a.lower+b.lower,a.upper+b.upper);
+ }
+
+ __forceinline friend Interval operator -( const Interval& a, const Interval& b ) {
+ return Interval(a.lower-b.upper,a.upper-b.lower);
+ }
+
+ __forceinline friend Interval operator -( const Interval& a, const V& b ) {
+ return Interval(a.lower-b,a.upper-b);
+ }
+
+ __forceinline friend Interval operator *( const Interval& a, const Interval& b )
+ {
+ const V ll = a.lower*b.lower;
+ const V lu = a.lower*b.upper;
+ const V ul = a.upper*b.lower;
+ const V uu = a.upper*b.upper;
+ return Interval(min(ll,lu,ul,uu),max(ll,lu,ul,uu));
+ }
+
+ __forceinline friend Interval merge( const Interval& a, const Interval& b) {
+ return Interval(min(a.lower,b.lower),max(a.upper,b.upper));
+ }
+
+ __forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c) {
+ return merge(merge(a,b),c);
+ }
+
+ __forceinline friend Interval merge( const Interval& a, const Interval& b, const Interval& c, const Interval& d) {
+ return merge(merge(a,b),merge(c,d));
+ }
+
+ /*! intersect bounding boxes */
+ __forceinline friend const Interval intersect( const Interval& a, const Interval& b ) { return Interval(max(a.lower, b.lower), min(a.upper, b.upper)); }
+ __forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c ) { return intersect(a,intersect(b,c)); }
+ __forceinline friend const Interval intersect( const Interval& a, const Interval& b, const Interval& c, const Interval& d ) { return intersect(intersect(a,b),intersect(c,d)); }
+
+ friend embree_ostream operator<<(embree_ostream cout, const Interval& a) {
+ return cout << "[" << a.lower << ", " << a.upper << "]";
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Interval( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
+ __forceinline Interval( FullTy ) : lower(neg_inf), upper(pos_inf) {}
+ };
+
+ __forceinline bool isEmpty(const Interval<float>& v) {
+ return v.lower > v.upper;
+ }
+
+ __forceinline vboolx isEmpty(const Interval<vfloatx>& v) {
+ return v.lower > v.upper;
+ }
+
+ /*! subset relation */
+ template<typename T> __forceinline bool subset( const Interval<T>& a, const Interval<T>& b ) {
+ return (a.lower > b.lower) && (a.upper < b.upper);
+ }
+
+ template<typename T> __forceinline bool subset( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
+ return subset(a.x,b.x) && subset(a.y,b.y);
+ }
+
+ template<typename T> __forceinline const Vec2<Interval<T>> intersect( const Vec2<Interval<T>>& a, const Vec2<Interval<T>>& b ) {
+ return Vec2<Interval<T>>(intersect(a.x,b.x),intersect(a.y,b.y));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Interval<T> select ( bool s, const Interval<T>& t, const Interval<T>& f ) {
+ return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
+ }
+
+ template<typename T> __forceinline Interval<T> select ( const typename T::Bool& s, const Interval<T>& t, const Interval<T>& f ) {
+ return Interval<T>(select(s,t.lower,f.lower),select(s,t.upper,f.upper));
+ }
+
+ __forceinline int numRoots(const Interval<float>& p0, const Interval<float>& p1)
+ {
+ float eps = 1E-4f;
+ bool neg0 = p0.lower < eps; bool pos0 = p0.upper > -eps;
+ bool neg1 = p1.lower < eps; bool pos1 = p1.upper > -eps;
+ return (neg0 && pos1) || (pos0 && neg1) || (neg0 && pos0) || (neg1 && pos1);
+ }
+
+ typedef Interval<float> Interval1f;
+ typedef Vec2<Interval<float>> Interval2f;
+ typedef Vec3<Interval<float>> Interval3f;
+
+inline void swap(float& a, float& b) { float tmp = a; a = b; b = tmp; }
+
+inline Interval1f shift(const Interval1f& v, float shift) { return Interval1f(v.lower + shift, v.upper + shift); }
+
+#define TWO_PI (2.0*M_PI)
+inline Interval1f sin(Interval1f interval)
+{
+ if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
+ if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
+ if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
+ float sinLower = sin(interval.lower);
+ float sinUpper = sin(interval.upper);
+ if (sinLower > sinUpper) swap(sinLower, sinUpper);
+ if (interval.lower < M_PI / 2.0 && interval.upper > M_PI / 2.0) sinUpper = 1.0;
+ if (interval.lower < 3.0 * M_PI / 2.0 && interval.upper > 3.0 * M_PI / 2.0) sinLower = -1.0;
+ return Interval1f(sinLower, sinUpper);
+}
+
+inline Interval1f cos(Interval1f interval)
+{
+ if (interval.upper-interval.lower >= M_PI) { return Interval1f(-1.0, 1.0); }
+ if (interval.upper > TWO_PI) { interval = shift(interval, -TWO_PI*floor(interval.upper/TWO_PI)); }
+ if (interval.lower < 0) { interval = shift(interval, -TWO_PI*floor(interval.lower/TWO_PI)); }
+ float cosLower = cos(interval.lower);
+ float cosUpper = cos(interval.upper);
+ if (cosLower > cosUpper) swap(cosLower, cosUpper);
+ if (interval.lower < M_PI && interval.upper > M_PI) cosLower = -1.0;
+ return Interval1f(cosLower, cosUpper);
+}
+#undef TWO_PI
+}
diff --git a/thirdparty/embree/common/math/lbbox.h b/thirdparty/embree/common/math/lbbox.h
new file mode 100644
index 0000000000..2b397a05c8
--- /dev/null
+++ b/thirdparty/embree/common/math/lbbox.h
@@ -0,0 +1,289 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "bbox.h"
+#include "range.h"
+
+namespace embree
+{
+ template<typename T>
+ __forceinline std::pair<T,T> globalLinear(const std::pair<T,T>& v, const BBox1f& dt)
+ {
+ const float rcp_dt_size = float(1.0f)/dt.size();
+ const T g0 = lerp(v.first,v.second,-dt.lower*rcp_dt_size);
+ const T g1 = lerp(v.first,v.second,(1.0f-dt.lower)*rcp_dt_size);
+ return std::make_pair(g0,g1);
+ }
+
+ template<typename T>
+ struct LBBox
+ {
+ public:
+ __forceinline LBBox () {}
+
+ template<typename T1>
+ __forceinline LBBox ( const LBBox<T1>& other )
+ : bounds0(other.bounds0), bounds1(other.bounds1) {}
+
+ __forceinline LBBox& operator= ( const LBBox& other ) {
+ bounds0 = other.bounds0; bounds1 = other.bounds1; return *this;
+ }
+
+ __forceinline LBBox (EmptyTy)
+ : bounds0(EmptyTy()), bounds1(EmptyTy()) {}
+
+ __forceinline explicit LBBox ( const BBox<T>& bounds)
+ : bounds0(bounds), bounds1(bounds) { }
+
+ __forceinline LBBox ( const BBox<T>& bounds0, const BBox<T>& bounds1)
+ : bounds0(bounds0), bounds1(bounds1) { }
+
+ LBBox ( const avector<BBox<T>>& bounds )
+ {
+ assert(bounds.size());
+ BBox<T> b0 = bounds.front();
+ BBox<T> b1 = bounds.back();
+ for (size_t i=1; i<bounds.size()-1; i++) {
+ const float f = float(i)/float(bounds.size()-1);
+ const BBox<T> bt = lerp(b0,b1,f);
+ const T dlower = min(bounds[i].lower-bt.lower,T(zero));
+ const T dupper = max(bounds[i].upper-bt.upper,T(zero));
+ b0.lower += dlower; b1.lower += dlower;
+ b0.upper += dupper; b1.upper += dupper;
+ }
+ bounds0 = b0;
+ bounds1 = b1;
+ }
+
+ /*! calculates the linear bounds of a primitive for the specified time range */
+ template<typename BoundsFunc>
+ __forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range, float numTimeSegments)
+ {
+ const float lower = time_range.lower*numTimeSegments;
+ const float upper = time_range.upper*numTimeSegments;
+ const float ilowerf = floor(lower);
+ const float iupperf = ceil(upper);
+ const int ilower = (int)ilowerf;
+ const int iupper = (int)iupperf;
+
+ const BBox<T> blower0 = bounds(ilower);
+ const BBox<T> bupper1 = bounds(iupper);
+
+ if (iupper-ilower == 1) {
+ bounds0 = lerp(blower0, bupper1, lower-ilowerf);
+ bounds1 = lerp(bupper1, blower0, iupperf-upper);
+ return;
+ }
+
+ const BBox<T> blower1 = bounds(ilower+1);
+ const BBox<T> bupper0 = bounds(iupper-1);
+ BBox<T> b0 = lerp(blower0, blower1, lower-ilowerf);
+ BBox<T> b1 = lerp(bupper1, bupper0, iupperf-upper);
+
+ for (int i = ilower+1; i < iupper; i++)
+ {
+ const float f = (float(i)/numTimeSegments - time_range.lower) / time_range.size();
+ const BBox<T> bt = lerp(b0, b1, f);
+ const BBox<T> bi = bounds(i);
+ const T dlower = min(bi.lower-bt.lower, T(zero));
+ const T dupper = max(bi.upper-bt.upper, T(zero));
+ b0.lower += dlower; b1.lower += dlower;
+ b0.upper += dupper; b1.upper += dupper;
+ }
+
+ bounds0 = b0;
+ bounds1 = b1;
+ }
+
+ /*! calculates the linear bounds of a primitive for the specified time range */
+ template<typename BoundsFunc>
+ __forceinline LBBox(const BoundsFunc& bounds, const BBox1f& time_range_in, const BBox1f& geom_time_range, float geom_time_segments)
+ {
+ /* normalize global time_range_in to local geom_time_range */
+ const BBox1f time_range((time_range_in.lower-geom_time_range.lower)/geom_time_range.size(),
+ (time_range_in.upper-geom_time_range.lower)/geom_time_range.size());
+
+ const float lower = time_range.lower*geom_time_segments;
+ const float upper = time_range.upper*geom_time_segments;
+ const float ilowerf = floor(lower);
+ const float iupperf = ceil(upper);
+ const float ilowerfc = max(0.0f,ilowerf);
+ const float iupperfc = min(iupperf,geom_time_segments);
+ const int ilowerc = (int)ilowerfc;
+ const int iupperc = (int)iupperfc;
+ assert(iupperc-ilowerc > 0);
+
+ /* this larger iteration range guarantees that we process borders of geom_time_range is (partially) inside time_range_in */
+ const int ilower_iter = max(-1,(int)ilowerf);
+ const int iupper_iter = min((int)iupperf,(int)geom_time_segments+1);
+
+ const BBox<T> blower0 = bounds(ilowerc);
+ const BBox<T> bupper1 = bounds(iupperc);
+ if (iupper_iter-ilower_iter == 1) {
+ bounds0 = lerp(blower0, bupper1, max(0.0f,lower-ilowerfc));
+ bounds1 = lerp(bupper1, blower0, max(0.0f,iupperfc-upper));
+ return;
+ }
+
+ const BBox<T> blower1 = bounds(ilowerc+1);
+ const BBox<T> bupper0 = bounds(iupperc-1);
+ BBox<T> b0 = lerp(blower0, blower1, max(0.0f,lower-ilowerfc));
+ BBox<T> b1 = lerp(bupper1, bupper0, max(0.0f,iupperfc-upper));
+
+ for (int i = ilower_iter+1; i < iupper_iter; i++)
+ {
+ const float f = (float(i)/geom_time_segments - time_range.lower) / time_range.size();
+ const BBox<T> bt = lerp(b0, b1, f);
+ const BBox<T> bi = bounds(i);
+ const T dlower = min(bi.lower-bt.lower, T(zero));
+ const T dupper = max(bi.upper-bt.upper, T(zero));
+ b0.lower += dlower; b1.lower += dlower;
+ b0.upper += dupper; b1.upper += dupper;
+ }
+
+ bounds0 = b0;
+ bounds1 = b1;
+ }
+
+ /*! calculates the linear bounds of a primitive for the specified time range */
+ template<typename BoundsFunc>
+ __forceinline LBBox(const BoundsFunc& bounds, const range<int>& time_range, int numTimeSegments)
+ {
+ const int ilower = time_range.begin();
+ const int iupper = time_range.end();
+
+ BBox<T> b0 = bounds(ilower);
+ BBox<T> b1 = bounds(iupper);
+
+ if (iupper-ilower == 1)
+ {
+ bounds0 = b0;
+ bounds1 = b1;
+ return;
+ }
+
+ for (int i = ilower+1; i<iupper; i++)
+ {
+ const float f = float(i - time_range.begin()) / float(time_range.size());
+ const BBox<T> bt = lerp(b0, b1, f);
+ const BBox<T> bi = bounds(i);
+ const T dlower = min(bi.lower-bt.lower, T(zero));
+ const T dupper = max(bi.upper-bt.upper, T(zero));
+ b0.lower += dlower; b1.lower += dlower;
+ b0.upper += dupper; b1.upper += dupper;
+ }
+
+ bounds0 = b0;
+ bounds1 = b1;
+ }
+
+ public:
+
+ __forceinline bool empty() const {
+ return bounds().empty();
+ }
+
+ __forceinline BBox<T> bounds () const {
+ return merge(bounds0,bounds1);
+ }
+
+ __forceinline BBox<T> interpolate( const float t ) const {
+ return lerp(bounds0,bounds1,t);
+ }
+
+ __forceinline LBBox<T> interpolate( const BBox1f& dt ) const {
+ return LBBox<T>(interpolate(dt.lower),interpolate(dt.upper));
+ }
+
+ __forceinline void extend( const LBBox& other ) {
+ bounds0.extend(other.bounds0);
+ bounds1.extend(other.bounds1);
+ }
+
+ __forceinline float expectedHalfArea() const;
+
+ __forceinline float expectedHalfArea(const BBox1f& dt) const {
+ return interpolate(dt).expectedHalfArea();
+ }
+
+ __forceinline float expectedApproxHalfArea() const {
+ return 0.5f*(halfArea(bounds0) + halfArea(bounds1));
+ }
+
+ /* calculates bounds for [0,1] time range from bounds in dt time range */
+ __forceinline LBBox global(const BBox1f& dt) const
+ {
+ const float rcp_dt_size = 1.0f/dt.size();
+ const BBox<T> b0 = interpolate(-dt.lower*rcp_dt_size);
+ const BBox<T> b1 = interpolate((1.0f-dt.lower)*rcp_dt_size);
+ return LBBox(b0,b1);
+ }
+
+ /*! Comparison Operators */
+ //template<typename TT> friend __forceinline bool operator==( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
+ //template<typename TT> friend __forceinline bool operator!=( const LBBox<TT>& a, const LBBox<TT>& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
+ friend __forceinline bool operator==( const LBBox& a, const LBBox& b ) { return a.bounds0 == b.bounds0 && a.bounds1 == b.bounds1; }
+ friend __forceinline bool operator!=( const LBBox& a, const LBBox& b ) { return a.bounds0 != b.bounds0 || a.bounds1 != b.bounds1; }
+
+ /*! output operator */
+ friend __forceinline embree_ostream operator<<(embree_ostream cout, const LBBox& box) {
+ return cout << "LBBox { " << box.bounds0 << "; " << box.bounds1 << " }";
+ }
+
+ public:
+ BBox<T> bounds0, bounds1;
+ };
+
+ /*! tests if box is finite */
+ template<typename T>
+ __forceinline bool isvalid( const LBBox<T>& v ) {
+ return isvalid(v.bounds0) && isvalid(v.bounds1);
+ }
+
+ template<typename T>
+ __forceinline bool isvalid_non_empty( const LBBox<T>& v ) {
+ return isvalid_non_empty(v.bounds0) && isvalid_non_empty(v.bounds1);
+ }
+
+ template<typename T>
+ __forceinline T expectedArea(const T& a0, const T& a1, const T& b0, const T& b1)
+ {
+ const T da = a1-a0;
+ const T db = b1-b0;
+ return a0*b0+(a0*db+da*b0)*T(0.5f) + da*db*T(1.0f/3.0f);
+ }
+
+ template<> __forceinline float LBBox<Vec3fa>::expectedHalfArea() const
+ {
+ const Vec3fa d0 = bounds0.size();
+ const Vec3fa d1 = bounds1.size();
+ return reduce_add(expectedArea(Vec3fa(d0.x,d0.y,d0.z),
+ Vec3fa(d1.x,d1.y,d1.z),
+ Vec3fa(d0.y,d0.z,d0.x),
+ Vec3fa(d1.y,d1.z,d1.x)));
+ }
+
+ template<typename T>
+ __forceinline float expectedApproxHalfArea(const LBBox<T>& box) {
+ return box.expectedApproxHalfArea();
+ }
+
+ template<typename T>
+ __forceinline LBBox<T> merge(const LBBox<T>& a, const LBBox<T>& b) {
+ return LBBox<T>(merge(a.bounds0, b.bounds0), merge(a.bounds1, b.bounds1));
+ }
+
+ /*! subset relation */
+ template<typename T> __inline bool subset( const LBBox<T>& a, const LBBox<T>& b ) {
+ return subset(a.bounds0,b.bounds0) && subset(a.bounds1,b.bounds1);
+ }
+
+ /*! default template instantiations */
+ typedef LBBox<float> LBBox1f;
+ typedef LBBox<Vec2f> LBBox2f;
+ typedef LBBox<Vec3f> LBBox3f;
+ typedef LBBox<Vec3fa> LBBox3fa;
+ typedef LBBox<Vec3fx> LBBox3fx;
+}
diff --git a/thirdparty/embree/common/math/linearspace2.h b/thirdparty/embree/common/math/linearspace2.h
new file mode 100644
index 0000000000..184ee695fb
--- /dev/null
+++ b/thirdparty/embree/common/math/linearspace2.h
@@ -0,0 +1,148 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "vec2.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// 2D Linear Transform (2x2 Matrix)
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct LinearSpace2
+ {
+ typedef T Vector;
+ typedef typename T::Scalar Scalar;
+
+ /*! default matrix constructor */
+ __forceinline LinearSpace2 ( ) {}
+ __forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
+ __forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }
+
+ template<typename L1> __forceinline LinearSpace2( const LinearSpace2<L1>& s ) : vx(s.vx), vy(s.vy) {}
+
+ /*! matrix construction from column vectors */
+ __forceinline LinearSpace2(const Vector& vx, const Vector& vy)
+ : vx(vx), vy(vy) {}
+
+ /*! matrix construction from row mayor data */
+ __forceinline LinearSpace2(const Scalar& m00, const Scalar& m01,
+ const Scalar& m10, const Scalar& m11)
+ : vx(m00,m10), vy(m01,m11) {}
+
+ /*! compute the determinant of the matrix */
+ __forceinline const Scalar det() const { return vx.x*vy.y - vx.y*vy.x; }
+
+ /*! compute adjoint matrix */
+ __forceinline const LinearSpace2 adjoint() const { return LinearSpace2(vy.y,-vy.x,-vx.y,vx.x); }
+
+ /*! compute inverse matrix */
+ __forceinline const LinearSpace2 inverse() const { return adjoint()/det(); }
+
+ /*! compute transposed matrix */
+ __forceinline const LinearSpace2 transposed() const { return LinearSpace2(vx.x,vx.y,vy.x,vy.y); }
+
+ /*! returns first row of matrix */
+ __forceinline Vector row0() const { return Vector(vx.x,vy.x); }
+
+ /*! returns second row of matrix */
+ __forceinline Vector row1() const { return Vector(vx.y,vy.y); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline LinearSpace2( ZeroTy ) : vx(zero), vy(zero) {}
+ __forceinline LinearSpace2( OneTy ) : vx(one, zero), vy(zero, one) {}
+
+ /*! return matrix for scaling */
+ static __forceinline LinearSpace2 scale(const Vector& s) {
+ return LinearSpace2(s.x, 0,
+ 0 , s.y);
+ }
+
+ /*! return matrix for rotation */
+ static __forceinline LinearSpace2 rotate(const Scalar& r) {
+ Scalar s = sin(r), c = cos(r);
+ return LinearSpace2(c, -s,
+ s, c);
+ }
+
+ /*! return closest orthogonal matrix (i.e. a general rotation including reflection) */
+ LinearSpace2 orthogonal() const
+ {
+ LinearSpace2 m = *this;
+
+ // mirrored?
+ Scalar mirror(one);
+ if (m.det() < Scalar(zero)) {
+ m.vx = -m.vx;
+ mirror = -mirror;
+ }
+
+ // rotation
+ for (int i = 0; i < 99; i++) {
+ const LinearSpace2 m_next = 0.5 * (m + m.transposed().inverse());
+ const LinearSpace2 d = m_next - m;
+ m = m_next;
+ // norm^2 of difference small enough?
+ if (max(dot(d.vx, d.vx), dot(d.vy, d.vy)) < 1e-8)
+ break;
+ }
+
+ // rotation * mirror_x
+ return LinearSpace2(mirror*m.vx, m.vy);
+ }
+
+ public:
+
+ /*! the column vectors of the matrix */
+ Vector vx,vy;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a ) { return LinearSpace2<T>(-a.vx,-a.vy); }
+ template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a ) { return LinearSpace2<T>(+a.vx,+a.vy); }
+ template<typename T> __forceinline LinearSpace2<T> rcp ( const LinearSpace2<T>& a ) { return a.inverse(); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline LinearSpace2<T> operator +( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx+b.vx,a.vy+b.vy); }
+ template<typename T> __forceinline LinearSpace2<T> operator -( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return LinearSpace2<T>(a.vx-b.vx,a.vy-b.vy); }
+
+ template<typename T> __forceinline LinearSpace2<T> operator*(const typename T::Scalar & a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
+ template<typename T> __forceinline T operator*(const LinearSpace2<T>& a, const T & b) { return b.x*a.vx + b.y*a.vy; }
+ template<typename T> __forceinline LinearSpace2<T> operator*(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return LinearSpace2<T>(a*b.vx, a*b.vy); }
+
+ template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const typename T::Scalar & b) { return LinearSpace2<T>(a.vx/b, a.vy/b); }
+ template<typename T> __forceinline LinearSpace2<T> operator/(const LinearSpace2<T>& a, const LinearSpace2<T>& b) { return a * rcp(b); }
+
+ template<typename T> __forceinline LinearSpace2<T>& operator *=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a * b; }
+ template<typename T> __forceinline LinearSpace2<T>& operator /=( LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline bool operator ==( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx == b.vx && a.vy == b.vy; }
+ template<typename T> __forceinline bool operator !=( const LinearSpace2<T>& a, const LinearSpace2<T>& b ) { return a.vx != b.vx || a.vy != b.vy; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace2<T>& m) {
+ return cout << "{ vx = " << m.vx << ", vy = " << m.vy << "}";
+ }
+
+ /*! Shortcuts for common linear spaces. */
+ typedef LinearSpace2<Vec2f> LinearSpace2f;
+ typedef LinearSpace2<Vec2fa> LinearSpace2fa;
+}
diff --git a/thirdparty/embree/common/math/linearspace3.h b/thirdparty/embree/common/math/linearspace3.h
new file mode 100644
index 0000000000..9eaa2cc2bb
--- /dev/null
+++ b/thirdparty/embree/common/math/linearspace3.h
@@ -0,0 +1,213 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "vec3.h"
+#include "quaternion.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// 3D Linear Transform (3x3 Matrix)
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct LinearSpace3
+ {
+ typedef T Vector;
+ typedef typename T::Scalar Scalar;
+
+ /*! default matrix constructor */
+ __forceinline LinearSpace3 ( ) {}
+ __forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
+ __forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
+
+ template<typename L1> __forceinline LinearSpace3( const LinearSpace3<L1>& s ) : vx(s.vx), vy(s.vy), vz(s.vz) {}
+
+ /*! matrix construction from column vectors */
+ __forceinline LinearSpace3(const Vector& vx, const Vector& vy, const Vector& vz)
+ : vx(vx), vy(vy), vz(vz) {}
+
+ /*! construction from quaternion */
+ __forceinline LinearSpace3( const QuaternionT<Scalar>& q )
+ : vx((q.r*q.r + q.i*q.i - q.j*q.j - q.k*q.k), 2.0f*(q.i*q.j + q.r*q.k), 2.0f*(q.i*q.k - q.r*q.j))
+ , vy(2.0f*(q.i*q.j - q.r*q.k), (q.r*q.r - q.i*q.i + q.j*q.j - q.k*q.k), 2.0f*(q.j*q.k + q.r*q.i))
+ , vz(2.0f*(q.i*q.k + q.r*q.j), 2.0f*(q.j*q.k - q.r*q.i), (q.r*q.r - q.i*q.i - q.j*q.j + q.k*q.k)) {}
+
+ /*! matrix construction from row mayor data */
+ __forceinline LinearSpace3(const Scalar& m00, const Scalar& m01, const Scalar& m02,
+ const Scalar& m10, const Scalar& m11, const Scalar& m12,
+ const Scalar& m20, const Scalar& m21, const Scalar& m22)
+ : vx(m00,m10,m20), vy(m01,m11,m21), vz(m02,m12,m22) {}
+
+ /*! compute the determinant of the matrix */
+ __forceinline const Scalar det() const { return dot(vx,cross(vy,vz)); }
+
+ /*! compute adjoint matrix */
+ __forceinline const LinearSpace3 adjoint() const { return LinearSpace3(cross(vy,vz),cross(vz,vx),cross(vx,vy)).transposed(); }
+
+ /*! compute inverse matrix */
+ __forceinline const LinearSpace3 inverse() const { return adjoint()/det(); }
+
+ /*! compute transposed matrix */
+ __forceinline const LinearSpace3 transposed() const { return LinearSpace3(vx.x,vx.y,vx.z,vy.x,vy.y,vy.z,vz.x,vz.y,vz.z); }
+
+ /*! returns first row of matrix */
+ __forceinline Vector row0() const { return Vector(vx.x,vy.x,vz.x); }
+
+ /*! returns second row of matrix */
+ __forceinline Vector row1() const { return Vector(vx.y,vy.y,vz.y); }
+
+ /*! returns third row of matrix */
+ __forceinline Vector row2() const { return Vector(vx.z,vy.z,vz.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline LinearSpace3( ZeroTy ) : vx(zero), vy(zero), vz(zero) {}
+ __forceinline LinearSpace3( OneTy ) : vx(one, zero, zero), vy(zero, one, zero), vz(zero, zero, one) {}
+
+ /*! return matrix for scaling */
+ static __forceinline LinearSpace3 scale(const Vector& s) {
+ return LinearSpace3(s.x, 0, 0,
+ 0 , s.y, 0,
+ 0 , 0, s.z);
+ }
+
+ /*! return matrix for rotation around arbitrary axis */
+ static __forceinline LinearSpace3 rotate(const Vector& _u, const Scalar& r) {
+ Vector u = normalize(_u);
+ Scalar s = sin(r), c = cos(r);
+ return LinearSpace3(u.x*u.x+(1-u.x*u.x)*c, u.x*u.y*(1-c)-u.z*s, u.x*u.z*(1-c)+u.y*s,
+ u.x*u.y*(1-c)+u.z*s, u.y*u.y+(1-u.y*u.y)*c, u.y*u.z*(1-c)-u.x*s,
+ u.x*u.z*(1-c)-u.y*s, u.y*u.z*(1-c)+u.x*s, u.z*u.z+(1-u.z*u.z)*c);
+ }
+
+ public:
+
+ /*! the column vectors of the matrix */
+ Vector vx,vy,vz;
+ };
+
+ /*! compute transposed matrix */
+ template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
+ vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
+ return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
+ }
+
+ template<typename T>
+ __forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {
+ return xfm.transposed();
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a ) { return LinearSpace3<T>(-a.vx,-a.vy,-a.vz); }
+ template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a ) { return LinearSpace3<T>(+a.vx,+a.vy,+a.vz); }
+ template<typename T> __forceinline LinearSpace3<T> rcp ( const LinearSpace3<T>& a ) { return a.inverse(); }
+
+ /* constructs a coordinate frame form a normalized normal */
+ template<typename T> __forceinline LinearSpace3<T> frame(const T& N)
+ {
+ const T dx0(0,N.z,-N.y);
+ const T dx1(-N.z,0,N.x);
+ const T dx = normalize(select(dot(dx0,dx0) > dot(dx1,dx1),dx0,dx1));
+ const T dy = normalize(cross(N,dx));
+ return LinearSpace3<T>(dx,dy,N);
+ }
+
+ /* constructs a coordinate frame from a normal and approximate x-direction */
+ template<typename T> __forceinline LinearSpace3<T> frame(const T& N, const T& dxi)
+ {
+ if (abs(dot(dxi,N)) > 0.99f) return frame(N); // fallback in case N and dxi are very parallel
+ const T dx = normalize(cross(dxi,N));
+ const T dy = normalize(cross(N,dx));
+ return LinearSpace3<T>(dx,dy,N);
+ }
+
+ /* clamps linear space to range -1 to +1 */
+ template<typename T> __forceinline LinearSpace3<T> clamp(const LinearSpace3<T>& space) {
+ return LinearSpace3<T>(clamp(space.vx,T(-1.0f),T(1.0f)),
+ clamp(space.vy,T(-1.0f),T(1.0f)),
+ clamp(space.vz,T(-1.0f),T(1.0f)));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline LinearSpace3<T> operator +( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx+b.vx,a.vy+b.vy,a.vz+b.vz); }
+ template<typename T> __forceinline LinearSpace3<T> operator -( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return LinearSpace3<T>(a.vx-b.vx,a.vy-b.vy,a.vz-b.vz); }
+
+ template<typename T> __forceinline LinearSpace3<T> operator*(const typename T::Scalar & a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
+ template<typename T> __forceinline T operator*(const LinearSpace3<T>& a, const T & b) { return madd(T(b.x),a.vx,madd(T(b.y),a.vy,T(b.z)*a.vz)); }
+ template<typename T> __forceinline LinearSpace3<T> operator*(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return LinearSpace3<T>(a*b.vx, a*b.vy, a*b.vz); }
+
+ template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const typename T::Scalar & b) { return LinearSpace3<T>(a.vx/b, a.vy/b, a.vz/b); }
+ template<typename T> __forceinline LinearSpace3<T> operator/(const LinearSpace3<T>& a, const LinearSpace3<T>& b) { return a * rcp(b); }
+
+ template<typename T> __forceinline LinearSpace3<T>& operator *=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a * b; }
+ template<typename T> __forceinline LinearSpace3<T>& operator /=( LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a = a / b; }
+
+ template<typename T> __forceinline T xfmPoint (const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
+ template<typename T> __forceinline T xfmVector(const LinearSpace3<T>& s, const T & a) { return madd(T(a.x),s.vx,madd(T(a.y),s.vy,T(a.z)*s.vz)); }
+ template<typename T> __forceinline T xfmNormal(const LinearSpace3<T>& s, const T & a) { return xfmVector(s.inverse().transposed(),a); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline bool operator ==( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; }
+ template<typename T> __forceinline bool operator !=( const LinearSpace3<T>& a, const LinearSpace3<T>& b ) { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline LinearSpace3<T> select ( const typename T::Scalar::Bool& s, const LinearSpace3<T>& t, const LinearSpace3<T>& f ) {
+ return LinearSpace3<T>(select(s,t.vx,f.vx),select(s,t.vy,f.vy),select(s,t.vz,f.vz));
+ }
+
+ /*! blending */
+ template<typename T>
+ __forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0, const LinearSpace3<T>& l1, const float t)
+ {
+ return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
+ lerp(l0.vy,l1.vy,t),
+ lerp(l0.vz,l1.vz,t));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> static embree_ostream operator<<(embree_ostream cout, const LinearSpace3<T>& m) {
+ return cout << "{ vx = " << m.vx << ", vy = " << m.vy << ", vz = " << m.vz << "}";
+ }
+
+ /*! Shortcuts for common linear spaces. */
+ typedef LinearSpace3<Vec3f> LinearSpace3f;
+ typedef LinearSpace3<Vec3fa> LinearSpace3fa;
+ typedef LinearSpace3<Vec3fx> LinearSpace3fx;
+ typedef LinearSpace3<Vec3ff> LinearSpace3ff;
+
+ template<int N> using LinearSpace3vf = LinearSpace3<Vec3<vfloat<N>>>;
+ typedef LinearSpace3<Vec3<vfloat<4>>> LinearSpace3vf4;
+ typedef LinearSpace3<Vec3<vfloat<8>>> LinearSpace3vf8;
+ typedef LinearSpace3<Vec3<vfloat<16>>> LinearSpace3vf16;
+
+ /*! blending */
+ template<typename T, typename S>
+ __forceinline LinearSpace3<T> lerp(const LinearSpace3<T>& l0,
+ const LinearSpace3<T>& l1,
+ const S& t)
+ {
+ return LinearSpace3<T>(lerp(l0.vx,l1.vx,t),
+ lerp(l0.vy,l1.vy,t),
+ lerp(l0.vz,l1.vz,t));
+ }
+
+}
diff --git a/thirdparty/embree/common/math/math.h b/thirdparty/embree/common/math/math.h
new file mode 100644
index 0000000000..4bc54c1a6a
--- /dev/null
+++ b/thirdparty/embree/common/math/math.h
@@ -0,0 +1,369 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/platform.h"
+#include "../sys/intrinsics.h"
+#include "constants.h"
+#include <cmath>
+
+#if defined(__ARM_NEON)
+#include "../simd/arm/emulation.h"
+#else
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#include <immintrin.h>
+#endif
+
+#if defined(__WIN32__)
+#if defined(_MSC_VER) && (_MSC_VER <= 1700)
+namespace std
+{
+ __forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
+ __forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
+ __forceinline bool isfinite (const float x) { return _finite(x) != 0; }
+}
+#endif
+#endif
+
+namespace embree
+{
+ __forceinline bool isvalid ( const float& v ) {
+ return (v > -FLT_LARGE) & (v < +FLT_LARGE);
+ }
+
+ __forceinline int cast_f2i(float f) {
+ union { float f; int i; } v; v.f = f; return v.i;
+ }
+
+ __forceinline float cast_i2f(int i) {
+ union { float f; int i; } v; v.i = i; return v.f;
+ }
+
+ __forceinline int toInt (const float& a) { return int(a); }
+ __forceinline float toFloat(const int& a) { return float(a); }
+
+#if defined(__WIN32__)
+ __forceinline bool finite ( const float x ) { return _finite(x) != 0; }
+#endif
+
+ __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
+ __forceinline float sqr ( const float x ) { return x*x; }
+
+ __forceinline float rcp ( const float x )
+ {
+ const __m128 a = _mm_set_ss(x);
+
+#if defined(__AVX512VL__)
+ const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
+#else
+ const __m128 r = _mm_rcp_ss(a);
+#endif
+
+#if defined(__AVX2__)
+ return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
+#else
+ return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
+#endif
+ }
+
+ __forceinline float signmsk ( const float x ) {
+ return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
+ }
+ __forceinline float xorf( const float x, const float y ) {
+ return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
+ }
+ __forceinline float andf( const float x, const unsigned y ) {
+ return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
+ }
+ __forceinline float rsqrt( const float x )
+ {
+ const __m128 a = _mm_set_ss(x);
+#if defined(__AVX512VL__)
+ __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
+#else
+ __m128 r = _mm_rsqrt_ss(a);
+#endif
+ r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
+#if defined(__ARM_NEON)
+ r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
+#endif
+ return _mm_cvtss_f32(r);
+ }
+
+#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
+ __forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
+ __forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
+ __forceinline int roundf(float f) { return (int)(f + 0.5f); }
+#else
+ __forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
+ __forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
+#endif
+
+ __forceinline float abs ( const float x ) { return ::fabsf(x); }
+ __forceinline float acos ( const float x ) { return ::acosf (x); }
+ __forceinline float asin ( const float x ) { return ::asinf (x); }
+ __forceinline float atan ( const float x ) { return ::atanf (x); }
+ __forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
+ __forceinline float cos ( const float x ) { return ::cosf (x); }
+ __forceinline float cosh ( const float x ) { return ::coshf (x); }
+ __forceinline float exp ( const float x ) { return ::expf (x); }
+ __forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
+ __forceinline float log ( const float x ) { return ::logf (x); }
+ __forceinline float log10( const float x ) { return ::log10f(x); }
+ __forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
+ __forceinline float sin ( const float x ) { return ::sinf (x); }
+ __forceinline float sinh ( const float x ) { return ::sinhf (x); }
+ __forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
+ __forceinline float tan ( const float x ) { return ::tanf (x); }
+ __forceinline float tanh ( const float x ) { return ::tanhf (x); }
+ __forceinline float floor( const float x ) { return ::floorf (x); }
+ __forceinline float ceil ( const float x ) { return ::ceilf (x); }
+ __forceinline float frac ( const float x ) { return x-floor(x); }
+
+ __forceinline double abs ( const double x ) { return ::fabs(x); }
+ __forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
+ __forceinline double acos ( const double x ) { return ::acos (x); }
+ __forceinline double asin ( const double x ) { return ::asin (x); }
+ __forceinline double atan ( const double x ) { return ::atan (x); }
+ __forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
+ __forceinline double cos ( const double x ) { return ::cos (x); }
+ __forceinline double cosh ( const double x ) { return ::cosh (x); }
+ __forceinline double exp ( const double x ) { return ::exp (x); }
+ __forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
+ __forceinline double log ( const double x ) { return ::log (x); }
+ __forceinline double log10( const double x ) { return ::log10(x); }
+ __forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
+ __forceinline double rcp ( const double x ) { return 1.0/x; }
+ __forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
+ __forceinline double sin ( const double x ) { return ::sin (x); }
+ __forceinline double sinh ( const double x ) { return ::sinh (x); }
+ __forceinline double sqr ( const double x ) { return x*x; }
+ __forceinline double sqrt ( const double x ) { return ::sqrt (x); }
+ __forceinline double tan ( const double x ) { return ::tan (x); }
+ __forceinline double tanh ( const double x ) { return ::tanh (x); }
+ __forceinline double floor( const double x ) { return ::floor (x); }
+ __forceinline double ceil ( const double x ) { return ::ceil (x); }
+
+#if defined(__SSE4_1__)
+ __forceinline float mini(float a, float b) {
+ const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
+ const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
+ const __m128i ci = _mm_min_epi32(ai,bi);
+ return _mm_cvtss_f32(_mm_castsi128_ps(ci));
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline float maxi(float a, float b) {
+ const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
+ const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
+ const __m128i ci = _mm_max_epi32(ai,bi);
+ return _mm_cvtss_f32(_mm_castsi128_ps(ci));
+ }
+#endif
+
+ template<typename T>
+ __forceinline T twice(const T& a) { return a+a; }
+
+ __forceinline int min(int a, int b) { return a<b ? a:b; }
+ __forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
+ __forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
+ __forceinline float min(float a, float b) { return a<b ? a:b; }
+ __forceinline double min(double a, double b) { return a<b ? a:b; }
+#if defined(__64BIT__)
+ __forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
+#endif
+
+ template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
+ template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
+ template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
+
+ template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
+ template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
+ template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
+
+ __forceinline int max(int a, int b) { return a<b ? b:a; }
+ __forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
+ __forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
+ __forceinline float max(float a, float b) { return a<b ? b:a; }
+ __forceinline double max(double a, double b) { return a<b ? b:a; }
+#if defined(__64BIT__)
+ __forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
+#endif
+
+ template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
+ template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
+ template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
+
+ template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
+ template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
+ template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
+
+#if defined(__MACOSX__)
+ __forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
+ __forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
+#endif
+
+#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
+ __forceinline void sincosf(float x, float *sin, float *cos) {
+ __sincosf(x,sin,cos);
+ }
+#endif
+
+#if defined(__WIN32__) || defined(__FreeBSD__)
+ __forceinline void sincosf(float x, float *s, float *c) {
+ *s = sinf(x); *c = cosf(x);
+ }
+#endif
+
+ template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
+ template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
+
+ template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
+ template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
+ template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
+ template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
+
+#if defined(__AVX2__)
+ __forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
+ __forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
+ __forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
+ __forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
+#else
+ __forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
+ __forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
+ __forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
+ __forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
+#endif
+
+ /*! random functions */
+ template<typename T> T random() { return T(0); }
+#if defined(_WIN32)
+ template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
+ template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
+#else
+ template<> __forceinline int random() { return int(rand()); }
+ template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
+#endif
+ template<> __forceinline float random() { return rand()/float(RAND_MAX); }
+ template<> __forceinline double random() { return rand()/double(RAND_MAX); }
+
+#if _WIN32
+ __forceinline double drand48() {
+ return double(rand())/double(RAND_MAX);
+ }
+
+ __forceinline void srand48(long seed) {
+ return srand(seed);
+ }
+#endif
+
+ /*! selects */
+ __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
+ __forceinline int select(bool s, int t, int f) { return s ? t : f; }
+ __forceinline float select(bool s, float t, float f) { return s ? t : f; }
+
+ __forceinline bool all(bool s) { return s; }
+
+ __forceinline float lerp(const float v0, const float v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ template<typename T>
+ __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
+ return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
+ }
+
+ /*! exchange */
+ template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
+
+ /* load/store */
+ template<typename Ty> struct mem;
+
+ template<> struct mem<float> {
+ static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
+ static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
+
+ static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
+ static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
+ };
+
+ /*! bit reverse operation */
+ template<class T>
+ __forceinline T bitReverse(const T& vin)
+ {
+ T v = vin;
+ v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+ v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+ v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+ v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+ v = ( v >> 16 ) | ( v << 16);
+ return v;
+ }
+
+ /*! bit interleave operation */
+ template<class T>
+ __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
+ {
+ T x = xin, y = yin, z = zin;
+ x = (x | (x << 16)) & 0x030000FF;
+ x = (x | (x << 8)) & 0x0300F00F;
+ x = (x | (x << 4)) & 0x030C30C3;
+ x = (x | (x << 2)) & 0x09249249;
+
+ y = (y | (y << 16)) & 0x030000FF;
+ y = (y | (y << 8)) & 0x0300F00F;
+ y = (y | (y << 4)) & 0x030C30C3;
+ y = (y | (y << 2)) & 0x09249249;
+
+ z = (z | (z << 16)) & 0x030000FF;
+ z = (z | (z << 8)) & 0x0300F00F;
+ z = (z | (z << 4)) & 0x030C30C3;
+ z = (z | (z << 2)) & 0x09249249;
+
+ return x | (y << 1) | (z << 2);
+ }
+
+#if defined(__AVX2__)
+
+ template<>
+ __forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
+ {
+ const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
+ const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
+ const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
+ return xx | yy | zz;
+ }
+
+#endif
+
+ /*! bit interleave operation for 64bit data types*/
+ template<class T>
+ __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
+ T x = xin & 0x1fffff;
+ T y = yin & 0x1fffff;
+ T z = zin & 0x1fffff;
+
+ x = (x | x << 32) & 0x1f00000000ffff;
+ x = (x | x << 16) & 0x1f0000ff0000ff;
+ x = (x | x << 8) & 0x100f00f00f00f00f;
+ x = (x | x << 4) & 0x10c30c30c30c30c3;
+ x = (x | x << 2) & 0x1249249249249249;
+
+ y = (y | y << 32) & 0x1f00000000ffff;
+ y = (y | y << 16) & 0x1f0000ff0000ff;
+ y = (y | y << 8) & 0x100f00f00f00f00f;
+ y = (y | y << 4) & 0x10c30c30c30c30c3;
+ y = (y | y << 2) & 0x1249249249249249;
+
+ z = (z | z << 32) & 0x1f00000000ffff;
+ z = (z | z << 16) & 0x1f0000ff0000ff;
+ z = (z | z << 8) & 0x100f00f00f00f00f;
+ z = (z | z << 4) & 0x10c30c30c30c30c3;
+ z = (z | z << 2) & 0x1249249249249249;
+
+ return x | (y << 1) | (z << 2);
+ }
+}
diff --git a/thirdparty/embree/common/math/obbox.h b/thirdparty/embree/common/math/obbox.h
new file mode 100644
index 0000000000..2fe8bbf071
--- /dev/null
+++ b/thirdparty/embree/common/math/obbox.h
@@ -0,0 +1,39 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "bbox.h"
+#include "linearspace3.h"
+
+namespace embree
+{
+ /*! Oriented bounding box */
+ template<typename T>
+ struct OBBox
+ {
+ public:
+
+ __forceinline OBBox () {}
+
+ __forceinline OBBox (EmptyTy)
+ : space(one), bounds(empty) {}
+
+ __forceinline OBBox (const BBox<T>& bounds)
+ : space(one), bounds(bounds) {}
+
+ __forceinline OBBox (const LinearSpace3<T>& space, const BBox<T>& bounds)
+ : space(space), bounds(bounds) {}
+
+ friend embree_ostream operator<<(embree_ostream cout, const OBBox& p) {
+ return cout << "{ space = " << p.space << ", bounds = " << p.bounds << "}";
+ }
+
+ public:
+ LinearSpace3<T> space; //!< orthonormal transformation
+ BBox<T> bounds; //!< bounds in transformed space
+ };
+
+ typedef OBBox<Vec3f> OBBox3f;
+ typedef OBBox<Vec3fa> OBBox3fa;
+}
diff --git a/thirdparty/embree/common/math/quaternion.h b/thirdparty/embree/common/math/quaternion.h
new file mode 100644
index 0000000000..080800efcd
--- /dev/null
+++ b/thirdparty/embree/common/math/quaternion.h
@@ -0,0 +1,254 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "vec3.h"
+#include "vec4.h"
+
+#include "transcendental.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////
+ // Quaternion Struct
+ ////////////////////////////////////////////////////////////////
+
+ template<typename T>
+ struct QuaternionT
+ {
+ typedef Vec3<T> Vector;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline QuaternionT () { }
+ __forceinline QuaternionT ( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; }
+ __forceinline QuaternionT& operator=( const QuaternionT& other ) { r = other.r; i = other.i; j = other.j; k = other.k; return *this; }
+
+ __forceinline QuaternionT( const T& r ) : r(r), i(zero), j(zero), k(zero) {}
+ __forceinline explicit QuaternionT( const Vec3<T>& v ) : r(zero), i(v.x), j(v.y), k(v.z) {}
+ __forceinline explicit QuaternionT( const Vec4<T>& v ) : r(v.x), i(v.y), j(v.z), k(v.w) {}
+ __forceinline QuaternionT( const T& r, const T& i, const T& j, const T& k ) : r(r), i(i), j(j), k(k) {}
+ __forceinline QuaternionT( const T& r, const Vec3<T>& v ) : r(r), i(v.x), j(v.y), k(v.z) {}
+
+ __inline QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz );
+ __inline QuaternionT( const T& yaw, const T& pitch, const T& roll );
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline QuaternionT( ZeroTy ) : r(zero), i(zero), j(zero), k(zero) {}
+ __forceinline QuaternionT( OneTy ) : r( one), i(zero), j(zero), k(zero) {}
+
+ /*! return quaternion for rotation around arbitrary axis */
+ static __forceinline QuaternionT rotate(const Vec3<T>& u, const T& r) {
+ return QuaternionT<T>(cos(T(0.5)*r),sin(T(0.5)*r)*normalize(u));
+ }
+
+ /*! returns the rotation axis of the quaternion as a vector */
+ __forceinline Vec3<T> v( ) const { return Vec3<T>(i, j, k); }
+
+ public:
+ T r, i, j, k;
+ };
+
+ template<typename T> __forceinline QuaternionT<T> operator *( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a * b.r, a * b.i, a * b.j, a * b.k); }
+ template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r * b, a.i * b, a.j * b, a.k * b); }
+
+ ////////////////////////////////////////////////////////////////
+ // Unary Operators
+ ////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a ) { return QuaternionT<T>(+a.r, +a.i, +a.j, +a.k); }
+ template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a ) { return QuaternionT<T>(-a.r, -a.i, -a.j, -a.k); }
+ template<typename T> __forceinline QuaternionT<T> conj ( const QuaternionT<T>& a ) { return QuaternionT<T>(a.r, -a.i, -a.j, -a.k); }
+ template<typename T> __forceinline T abs ( const QuaternionT<T>& a ) { return sqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
+ template<typename T> __forceinline QuaternionT<T> rcp ( const QuaternionT<T>& a ) { return conj(a)*rcp(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
+ template<typename T> __forceinline QuaternionT<T> normalize ( const QuaternionT<T>& a ) { return a*rsqrt(a.r*a.r + a.i*a.i + a.j*a.j + a.k*a.k); }
+
+ // evaluates a*q-r
+ template<typename T> __forceinline QuaternionT<T>
+ msub(const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
+ {
+ return QuaternionT<T>(msub(a, q.r, p.r),
+ msub(a, q.i, p.i),
+ msub(a, q.j, p.j),
+ msub(a, q.k, p.k));
+ }
+ // evaluates a*q-r
+ template<typename T> __forceinline QuaternionT<T>
+ madd (const T& a, const QuaternionT<T>& q, const QuaternionT<T>& p)
+ {
+ return QuaternionT<T>(madd(a, q.r, p.r),
+ madd(a, q.i, p.i),
+ madd(a, q.j, p.j),
+ madd(a, q.k, p.k));
+ }
+
+ ////////////////////////////////////////////////////////////////
+ // Binary Operators
+ ////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline QuaternionT<T> operator +( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a + b.r, b.i, b.j, b.k); }
+ template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r + b, a.i, a.j, a.k); }
+ template<typename T> __forceinline QuaternionT<T> operator +( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r + b.r, a.i + b.i, a.j + b.j, a.k + b.k); }
+ template<typename T> __forceinline QuaternionT<T> operator -( const T & a, const QuaternionT<T>& b ) { return QuaternionT<T>(a - b.r, -b.i, -b.j, -b.k); }
+ template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const T & b ) { return QuaternionT<T>(a.r - b, a.i, a.j, a.k); }
+ template<typename T> __forceinline QuaternionT<T> operator -( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return QuaternionT<T>(a.r - b.r, a.i - b.i, a.j - b.j, a.k - b.k); }
+
+ template<typename T> __forceinline Vec3<T> operator *( const QuaternionT<T>& a, const Vec3<T> & b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
+ template<typename T> __forceinline QuaternionT<T> operator *( const QuaternionT<T>& a, const QuaternionT<T>& b ) {
+ return QuaternionT<T>(a.r*b.r - a.i*b.i - a.j*b.j - a.k*b.k,
+ a.r*b.i + a.i*b.r + a.j*b.k - a.k*b.j,
+ a.r*b.j - a.i*b.k + a.j*b.r + a.k*b.i,
+ a.r*b.k + a.i*b.j - a.j*b.i + a.k*b.r);
+ }
+ template<typename T> __forceinline QuaternionT<T> operator /( const T & a, const QuaternionT<T>& b ) { return a*rcp(b); }
+ template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const T & b ) { return a*rcp(b); }
+ template<typename T> __forceinline QuaternionT<T> operator /( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a*rcp(b); }
+
+ template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const T & b ) { return a = a+b; }
+ template<typename T> __forceinline QuaternionT<T>& operator +=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a+b; }
+ template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const T & b ) { return a = a-b; }
+ template<typename T> __forceinline QuaternionT<T>& operator -=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a-b; }
+ template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const T & b ) { return a = a*b; }
+ template<typename T> __forceinline QuaternionT<T>& operator *=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*b; }
+ template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const T & b ) { return a = a*rcp(b); }
+ template<typename T> __forceinline QuaternionT<T>& operator /=( QuaternionT<T>& a, const QuaternionT<T>& b ) { return a = a*rcp(b); }
+
+ template<typename T, typename M> __forceinline QuaternionT<T>
+ select(const M& m, const QuaternionT<T>& q, const QuaternionT<T>& p)
+ {
+ return QuaternionT<T>(select(m, q.r, p.r),
+ select(m, q.i, p.i),
+ select(m, q.j, p.j),
+ select(m, q.k, p.k));
+ }
+
+
+ template<typename T> __forceinline Vec3<T> xfmPoint ( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
+ template<typename T> __forceinline Vec3<T> xfmVector( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
+ template<typename T> __forceinline Vec3<T> xfmNormal( const QuaternionT<T>& a, const Vec3<T>& b ) { return (a*QuaternionT<T>(b)*conj(a)).v(); }
+
+ template<typename T> __forceinline T dot(const QuaternionT<T>& a, const QuaternionT<T>& b) { return a.r*b.r + a.i*b.i + a.j*b.j + a.k*b.k; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline bool operator ==( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r == b.r && a.i == b.i && a.j == b.j && a.k == b.k; }
+ template<typename T> __forceinline bool operator !=( const QuaternionT<T>& a, const QuaternionT<T>& b ) { return a.r != b.r || a.i != b.i || a.j != b.j || a.k != b.k; }
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Orientation Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> QuaternionT<T>::QuaternionT( const Vec3<T>& vx, const Vec3<T>& vy, const Vec3<T>& vz )
+ {
+ if ( vx.x + vy.y + vz.z >= T(zero) )
+ {
+ const T t = T(one) + (vx.x + vy.y + vz.z);
+ const T s = rsqrt(t)*T(0.5f);
+ r = t*s;
+ i = (vy.z - vz.y)*s;
+ j = (vz.x - vx.z)*s;
+ k = (vx.y - vy.x)*s;
+ }
+ else if ( vx.x >= max(vy.y, vz.z) )
+ {
+ const T t = (T(one) + vx.x) - (vy.y + vz.z);
+ const T s = rsqrt(t)*T(0.5f);
+ r = (vy.z - vz.y)*s;
+ i = t*s;
+ j = (vx.y + vy.x)*s;
+ k = (vz.x + vx.z)*s;
+ }
+ else if ( vy.y >= vz.z ) // if ( vy.y >= max(vz.z, vx.x) )
+ {
+ const T t = (T(one) + vy.y) - (vz.z + vx.x);
+ const T s = rsqrt(t)*T(0.5f);
+ r = (vz.x - vx.z)*s;
+ i = (vx.y + vy.x)*s;
+ j = t*s;
+ k = (vy.z + vz.y)*s;
+ }
+ else //if ( vz.z >= max(vy.y, vx.x) )
+ {
+ const T t = (T(one) + vz.z) - (vx.x + vy.y);
+ const T s = rsqrt(t)*T(0.5f);
+ r = (vx.y - vy.x)*s;
+ i = (vz.x + vx.z)*s;
+ j = (vy.z + vz.y)*s;
+ k = t*s;
+ }
+ }
+
+ template<typename T> QuaternionT<T>::QuaternionT( const T& yaw, const T& pitch, const T& roll )
+ {
+ const T cya = cos(yaw *T(0.5f));
+ const T cpi = cos(pitch*T(0.5f));
+ const T cro = cos(roll *T(0.5f));
+ const T sya = sin(yaw *T(0.5f));
+ const T spi = sin(pitch*T(0.5f));
+ const T sro = sin(roll *T(0.5f));
+ r = cro*cya*cpi + sro*sya*spi;
+ i = cro*cya*spi + sro*sya*cpi;
+ j = cro*sya*cpi - sro*cya*spi;
+ k = sro*cya*cpi - cro*sya*spi;
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ //////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> static embree_ostream operator<<(embree_ostream cout, const QuaternionT<T>& q) {
+ return cout << "{ r = " << q.r << ", i = " << q.i << ", j = " << q.j << ", k = " << q.k << " }";
+ }
+
+ /*! default template instantiations */
+ typedef QuaternionT<float> Quaternion3f;
+ typedef QuaternionT<double> Quaternion3d;
+
+ template<int N> using Quaternion3vf = QuaternionT<vfloat<N>>;
+ typedef QuaternionT<vfloat<4>> Quaternion3vf4;
+ typedef QuaternionT<vfloat<8>> Quaternion3vf8;
+ typedef QuaternionT<vfloat<16>> Quaternion3vf16;
+
+ //////////////////////////////////////////////////////////////////////////////
+ /// Interpolation
+ //////////////////////////////////////////////////////////////////////////////
+ template<typename T>
+ __forceinline QuaternionT<T>lerp(const QuaternionT<T>& q0,
+ const QuaternionT<T>& q1,
+ const T& factor)
+ {
+ QuaternionT<T> q;
+ q.r = lerp(q0.r, q1.r, factor);
+ q.i = lerp(q0.i, q1.i, factor);
+ q.j = lerp(q0.j, q1.j, factor);
+ q.k = lerp(q0.k, q1.k, factor);
+ return q;
+ }
+
+ template<typename T>
+ __forceinline QuaternionT<T> slerp(const QuaternionT<T>& q0,
+ const QuaternionT<T>& q1_,
+ const T& t)
+ {
+ T cosTheta = dot(q0, q1_);
+ QuaternionT<T> q1 = select(cosTheta < 0.f, -q1_, q1_);
+ cosTheta = select(cosTheta < 0.f, -cosTheta, cosTheta);
+ if (unlikely(all(cosTheta > 0.9995f))) {
+ return normalize(lerp(q0, q1, t));
+ }
+ const T phi = t * fastapprox::acos(cosTheta);
+ T sinPhi, cosPhi;
+ fastapprox::sincos(phi, sinPhi, cosPhi);
+ QuaternionT<T> qperp = sinPhi * normalize(msub(cosTheta, q0, q1));
+ return msub(cosPhi, q0, qperp);
+ }
+}
diff --git a/thirdparty/embree/common/math/range.h b/thirdparty/embree/common/math/range.h
new file mode 100644
index 0000000000..909fadb995
--- /dev/null
+++ b/thirdparty/embree/common/math/range.h
@@ -0,0 +1,137 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/platform.h"
+#include "../math/math.h"
+
+namespace embree
+{
+ template<typename Ty>
+ struct range
+ {
+ __forceinline range() {}
+
+ __forceinline range(const Ty& begin)
+ : _begin(begin), _end(begin+1) {}
+
+ __forceinline range(const Ty& begin, const Ty& end)
+ : _begin(begin), _end(end) {}
+
+ __forceinline range(const range& other)
+ : _begin(other._begin), _end(other._end) {}
+
+ template<typename T1>
+ __forceinline range(const range<T1>& other)
+ : _begin(Ty(other._begin)), _end(Ty(other._end)) {}
+
+ template<typename T1>
+ __forceinline range& operator =(const range<T1>& other) {
+ _begin = other._begin;
+ _end = other._end;
+ return *this;
+ }
+
+ __forceinline Ty begin() const {
+ return _begin;
+ }
+
+ __forceinline Ty end() const {
+ return _end;
+ }
+
+ __forceinline range intersect(const range& r) const {
+ return range (max(_begin,r._begin),min(_end,r._end));
+ }
+
+ __forceinline Ty size() const {
+ return _end - _begin;
+ }
+
+ __forceinline bool empty() const {
+ return _end <= _begin;
+ }
+
+ __forceinline Ty center() const {
+ return (_begin + _end)/2;
+ }
+
+ __forceinline std::pair<range,range> split() const
+ {
+ const Ty _center = center();
+ return std::make_pair(range(_begin,_center),range(_center,_end));
+ }
+
+ __forceinline void split(range& left_o, range& right_o) const
+ {
+ const Ty _center = center();
+ left_o = range(_begin,_center);
+ right_o = range(_center,_end);
+ }
+
+ __forceinline friend bool operator< (const range& r0, const range& r1) {
+ return r0.size() < r1.size();
+ }
+
+ friend embree_ostream operator<<(embree_ostream cout, const range& r) {
+ return cout << "range [" << r.begin() << ", " << r.end() << "]";
+ }
+
+ Ty _begin, _end;
+ };
+
+ template<typename Ty>
+ range<Ty> make_range(const Ty& begin, const Ty& end) {
+ return range<Ty>(begin,end);
+ }
+
+ template<typename Ty>
+ struct extended_range : public range<Ty>
+ {
+ __forceinline extended_range () {}
+
+ __forceinline extended_range (const Ty& begin)
+ : range<Ty>(begin), _ext_end(begin+1) {}
+
+ __forceinline extended_range (const Ty& begin, const Ty& end)
+ : range<Ty>(begin,end), _ext_end(end) {}
+
+ __forceinline extended_range (const Ty& begin, const Ty& end, const Ty& ext_end)
+ : range<Ty>(begin,end), _ext_end(ext_end) {}
+
+ __forceinline Ty ext_end() const {
+ return _ext_end;
+ }
+
+ __forceinline Ty ext_size() const {
+ return _ext_end - range<Ty>::_begin;
+ }
+
+ __forceinline Ty ext_range_size() const {
+ return _ext_end - range<Ty>::_end;
+ }
+
+ __forceinline bool has_ext_range() const {
+ assert(_ext_end >= range<Ty>::_end);
+ return (_ext_end - range<Ty>::_end) > 0;
+ }
+
+ __forceinline void set_ext_range(const size_t ext_end){
+ assert(ext_end >= range<Ty>::_end);
+ _ext_end = ext_end;
+ }
+
+ __forceinline void move_right(const size_t plus){
+ range<Ty>::_begin += plus;
+ range<Ty>::_end += plus;
+ _ext_end += plus;
+ }
+
+ friend embree_ostream operator<<(embree_ostream cout, const extended_range& r) {
+ return cout << "extended_range [" << r.begin() << ", " << r.end() << " (" << r.ext_end() << ")]";
+ }
+
+ Ty _ext_end;
+ };
+}
diff --git a/thirdparty/embree/common/math/transcendental.h b/thirdparty/embree/common/math/transcendental.h
new file mode 100644
index 0000000000..fd16c26e81
--- /dev/null
+++ b/thirdparty/embree/common/math/transcendental.h
@@ -0,0 +1,525 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+// Transcendental functions from "ispc": https://github.com/ispc/ispc/
+// Most of the transcendental implementations in ispc code come from
+// Solomon Boulos's "syrah": https://github.com/boulos/syrah/
+
+#include "../simd/simd.h"
+
+namespace embree
+{
+
+namespace fastapprox
+{
+
+template <typename T>
+__forceinline T sin(const T &v)
+{
+ static const float piOverTwoVec = 1.57079637050628662109375;
+ static const float twoOverPiVec = 0.636619746685028076171875;
+ auto scaled = v * twoOverPiVec;
+ auto kReal = floor(scaled);
+ auto k = toInt(kReal);
+
+ // Reduced range version of x
+ auto x = v - kReal * piOverTwoVec;
+ auto kMod4 = k & 3;
+ auto sinUseCos = (kMod4 == 1 | kMod4 == 3);
+ auto flipSign = (kMod4 > 1);
+
+ // These coefficients are from sollya with fpminimax(sin(x)/x, [|0, 2,
+ // 4, 6, 8, 10|], [|single...|], [0;Pi/2]);
+ static const float sinC2 = -0.16666667163372039794921875;
+ static const float sinC4 = +8.333347737789154052734375e-3;
+ static const float sinC6 = -1.9842604524455964565277099609375e-4;
+ static const float sinC8 = +2.760012648650445044040679931640625e-6;
+ static const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
+
+ static const float cosC2 = -0.5;
+ static const float cosC4 = +4.166664183139801025390625e-2;
+ static const float cosC6 = -1.388833043165504932403564453125e-3;
+ static const float cosC8 = +2.47562347794882953166961669921875e-5;
+ static const float cosC10 = -2.59630184018533327616751194000244140625e-7;
+
+ auto outside = select(sinUseCos, 1., x);
+ auto c2 = select(sinUseCos, T(cosC2), T(sinC2));
+ auto c4 = select(sinUseCos, T(cosC4), T(sinC4));
+ auto c6 = select(sinUseCos, T(cosC6), T(sinC6));
+ auto c8 = select(sinUseCos, T(cosC8), T(sinC8));
+ auto c10 = select(sinUseCos, T(cosC10), T(sinC10));
+
+ auto x2 = x * x;
+ auto formula = x2 * c10 + c8;
+ formula = x2 * formula + c6;
+ formula = x2 * formula + c4;
+ formula = x2 * formula + c2;
+ formula = x2 * formula + 1.;
+ formula *= outside;
+
+ formula = select(flipSign, -formula, formula);
+ return formula;
+}
+
+template <typename T>
+__forceinline T cos(const T &v)
+{
+ static const float piOverTwoVec = 1.57079637050628662109375;
+ static const float twoOverPiVec = 0.636619746685028076171875;
+ auto scaled = v * twoOverPiVec;
+ auto kReal = floor(scaled);
+ auto k = toInt(kReal);
+
+ // Reduced range version of x
+ auto x = v - kReal * piOverTwoVec;
+
+ auto kMod4 = k & 3;
+ auto cosUseCos = (kMod4 == 0 | kMod4 == 2);
+ auto flipSign = (kMod4 == 1 | kMod4 == 2);
+
+ const float sinC2 = -0.16666667163372039794921875;
+ const float sinC4 = +8.333347737789154052734375e-3;
+ const float sinC6 = -1.9842604524455964565277099609375e-4;
+ const float sinC8 = +2.760012648650445044040679931640625e-6;
+ const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
+
+ const float cosC2 = -0.5;
+ const float cosC4 = +4.166664183139801025390625e-2;
+ const float cosC6 = -1.388833043165504932403564453125e-3;
+ const float cosC8 = +2.47562347794882953166961669921875e-5;
+ const float cosC10 = -2.59630184018533327616751194000244140625e-7;
+
+ auto outside = select(cosUseCos, 1., x);
+ auto c2 = select(cosUseCos, T(cosC2), T(sinC2));
+ auto c4 = select(cosUseCos, T(cosC4), T(sinC4));
+ auto c6 = select(cosUseCos, T(cosC6), T(sinC6));
+ auto c8 = select(cosUseCos, T(cosC8), T(sinC8));
+ auto c10 = select(cosUseCos, T(cosC10), T(sinC10));
+
+ auto x2 = x * x;
+ auto formula = x2 * c10 + c8;
+ formula = x2 * formula + c6;
+ formula = x2 * formula + c4;
+ formula = x2 * formula + c2;
+ formula = x2 * formula + 1.;
+ formula *= outside;
+
+ formula = select(flipSign, -formula, formula);
+ return formula;
+}
+
+template <typename T>
+__forceinline void sincos(const T &v, T &sinResult, T &cosResult)
+{
+ const float piOverTwoVec = 1.57079637050628662109375;
+ const float twoOverPiVec = 0.636619746685028076171875;
+ auto scaled = v * twoOverPiVec;
+ auto kReal = floor(scaled);
+ auto k = toInt(kReal);
+
+ // Reduced range version of x
+ auto x = v - kReal * piOverTwoVec;
+ auto kMod4 = k & 3;
+ auto cosUseCos = ((kMod4 == 0) | (kMod4 == 2));
+ auto sinUseCos = ((kMod4 == 1) | (kMod4 == 3));
+ auto sinFlipSign = (kMod4 > 1);
+ auto cosFlipSign = ((kMod4 == 1) | (kMod4 == 2));
+
+ const float oneVec = +1.;
+ const float sinC2 = -0.16666667163372039794921875;
+ const float sinC4 = +8.333347737789154052734375e-3;
+ const float sinC6 = -1.9842604524455964565277099609375e-4;
+ const float sinC8 = +2.760012648650445044040679931640625e-6;
+ const float sinC10 = -2.50293279435709337121807038784027099609375e-8;
+
+ const float cosC2 = -0.5;
+ const float cosC4 = +4.166664183139801025390625e-2;
+ const float cosC6 = -1.388833043165504932403564453125e-3;
+ const float cosC8 = +2.47562347794882953166961669921875e-5;
+ const float cosC10 = -2.59630184018533327616751194000244140625e-7;
+
+ auto x2 = x * x;
+
+ auto sinFormula = x2 * sinC10 + sinC8;
+ auto cosFormula = x2 * cosC10 + cosC8;
+ sinFormula = x2 * sinFormula + sinC6;
+ cosFormula = x2 * cosFormula + cosC6;
+
+ sinFormula = x2 * sinFormula + sinC4;
+ cosFormula = x2 * cosFormula + cosC4;
+
+ sinFormula = x2 * sinFormula + sinC2;
+ cosFormula = x2 * cosFormula + cosC2;
+
+ sinFormula = x2 * sinFormula + oneVec;
+ cosFormula = x2 * cosFormula + oneVec;
+
+ sinFormula *= x;
+
+ sinResult = select(sinUseCos, cosFormula, sinFormula);
+ cosResult = select(cosUseCos, cosFormula, sinFormula);
+
+ sinResult = select(sinFlipSign, -sinResult, sinResult);
+ cosResult = select(cosFlipSign, -cosResult, cosResult);
+}
+
+template <typename T>
+__forceinline T tan(const T &v)
+{
+ const float piOverFourVec = 0.785398185253143310546875;
+ const float fourOverPiVec = 1.27323949337005615234375;
+
+ auto xLt0 = v < 0.;
+ auto y = select(xLt0, -v, v);
+ auto scaled = y * fourOverPiVec;
+
+ auto kReal = floor(scaled);
+ auto k = toInt(kReal);
+
+ auto x = y - kReal * piOverFourVec;
+
+ // If k & 1, x -= Pi/4
+ auto needOffset = (k & 1) != 0;
+ x = select(needOffset, x - piOverFourVec, x);
+
+ // If k & 3 == (0 or 3) let z = tan_In...(y) otherwise z = -cot_In0To...
+ auto kMod4 = k & 3;
+ auto useCotan = (kMod4 == 1) | (kMod4 == 2);
+
+ const float oneVec = 1.0;
+
+ const float tanC2 = +0.33333075046539306640625;
+ const float tanC4 = +0.13339905440807342529296875;
+ const float tanC6 = +5.3348250687122344970703125e-2;
+ const float tanC8 = +2.46033705770969390869140625e-2;
+ const float tanC10 = +2.892402000725269317626953125e-3;
+ const float tanC12 = +9.500005282461643218994140625e-3;
+
+ const float cotC2 = -0.3333333432674407958984375;
+ const float cotC4 = -2.222204394638538360595703125e-2;
+ const float cotC6 = -2.11752182804048061370849609375e-3;
+ const float cotC8 = -2.0846328698098659515380859375e-4;
+ const float cotC10 = -2.548247357481159269809722900390625e-5;
+ const float cotC12 = -3.5257363606433500535786151885986328125e-7;
+
+ auto x2 = x * x;
+ T z;
+ if (any(useCotan))
+ {
+ auto cotVal = x2 * cotC12 + cotC10;
+ cotVal = x2 * cotVal + cotC8;
+ cotVal = x2 * cotVal + cotC6;
+ cotVal = x2 * cotVal + cotC4;
+ cotVal = x2 * cotVal + cotC2;
+ cotVal = x2 * cotVal + oneVec;
+ // The equation is for x * cot(x) but we need -x * cot(x) for the tan part.
+ cotVal /= -x;
+ z = cotVal;
+ }
+ auto useTan = !useCotan;
+ if (any(useTan))
+ {
+ auto tanVal = x2 * tanC12 + tanC10;
+ tanVal = x2 * tanVal + tanC8;
+ tanVal = x2 * tanVal + tanC6;
+ tanVal = x2 * tanVal + tanC4;
+ tanVal = x2 * tanVal + tanC2;
+ tanVal = x2 * tanVal + oneVec;
+ // Equation was for tan(x)/x
+ tanVal *= x;
+ z = select(useTan, tanVal, z);
+ }
+ return select(xLt0, -z, z);
+}
+
+template <typename T>
+__forceinline T asin(const T &x0)
+{
+ auto isneg = (x0 < 0.f);
+ auto x = abs(x0);
+ auto isnan = (x > 1.f);
+
+ // sollya
+ // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
+ // [1e-20;.9999999999999999]);
+ // avg error: 1.1105439e-06, max error 1.3187528e-06
+ auto v = 1.57079517841339111328125f +
+ x * (-0.21450997889041900634765625f +
+ x * (8.78556668758392333984375e-2f +
+ x * (-4.489909112453460693359375e-2f +
+ x * (1.928029954433441162109375e-2f +
+ x * (-4.3095736764371395111083984375e-3f)))));
+
+ v *= -sqrt(1.f - x);
+ v = v + 1.57079637050628662109375f;
+
+ v = select(v < 0.f, T(0.f), v);
+ v = select(isneg, -v, v);
+ v = select(isnan, T(cast_i2f(0x7fc00000)), v);
+
+ return v;
+}
+
+template <typename T>
+__forceinline T acos(const T &v)
+{
+ return 1.57079637050628662109375f - asin(v);
+}
+
+template <typename T>
+__forceinline T atan(const T &v)
+{
+ const float piOverTwoVec = 1.57079637050628662109375;
+ // atan(-x) = -atan(x) (so flip from negative to positive first)
+ // If x > 1 -> atan(x) = Pi/2 - atan(1/x)
+ auto xNeg = v < 0.f;
+ auto xFlipped = select(xNeg, -v, v);
+
+ auto xGt1 = xFlipped > 1.;
+ auto x = select(xGt1, rcpSafe(xFlipped), xFlipped);
+
+ // These coefficients approximate atan(x)/x
+ const float atanC0 = +0.99999988079071044921875;
+ const float atanC2 = -0.3333191573619842529296875;
+ const float atanC4 = +0.199689209461212158203125;
+ const float atanC6 = -0.14015688002109527587890625;
+ const float atanC8 = +9.905083477497100830078125e-2;
+ const float atanC10 = -5.93664981424808502197265625e-2;
+ const float atanC12 = +2.417283318936824798583984375e-2;
+ const float atanC14 = -4.6721356920897960662841796875e-3;
+
+ auto x2 = x * x;
+ auto result = x2 * atanC14 + atanC12;
+ result = x2 * result + atanC10;
+ result = x2 * result + atanC8;
+ result = x2 * result + atanC6;
+ result = x2 * result + atanC4;
+ result = x2 * result + atanC2;
+ result = x2 * result + atanC0;
+ result *= x;
+
+ result = select(xGt1, piOverTwoVec - result, result);
+ result = select(xNeg, -result, result);
+ return result;
+}
+
+template <typename T>
+__forceinline T atan2(const T &y, const T &x)
+{
+ const float piVec = 3.1415926536;
+ // atan2(y, x) =
+ //
+ // atan2(y > 0, x = +-0) -> Pi/2
+ // atan2(y < 0, x = +-0) -> -Pi/2
+ // atan2(y = +-0, x < +0) -> +-Pi
+ // atan2(y = +-0, x >= +0) -> +-0
+ //
+ // atan2(y >= 0, x < 0) -> Pi + atan(y/x)
+ // atan2(y < 0, x < 0) -> -Pi + atan(y/x)
+ // atan2(y, x > 0) -> atan(y/x)
+ //
+ // and then a bunch of code for dealing with infinities.
+ auto yOverX = y * rcpSafe(x);
+ auto atanArg = atan(yOverX);
+ auto xLt0 = x < 0.f;
+ auto yLt0 = y < 0.f;
+ auto offset = select(xLt0,
+ select(yLt0, T(-piVec), T(piVec)), 0.f);
+ return offset + atanArg;
+}
+
+template <typename T>
+__forceinline T exp(const T &v)
+{
+ const float ln2Part1 = 0.6931457519;
+ const float ln2Part2 = 1.4286067653e-6;
+ const float oneOverLn2 = 1.44269502162933349609375;
+
+ auto scaled = v * oneOverLn2;
+ auto kReal = floor(scaled);
+ auto k = toInt(kReal);
+
+ // Reduced range version of x
+ auto x = v - kReal * ln2Part1;
+ x -= kReal * ln2Part2;
+
+ // These coefficients are for e^x in [0, ln(2)]
+ const float one = 1.;
+ const float c2 = 0.4999999105930328369140625;
+ const float c3 = 0.166668415069580078125;
+ const float c4 = 4.16539050638675689697265625e-2;
+ const float c5 = 8.378830738365650177001953125e-3;
+ const float c6 = 1.304379315115511417388916015625e-3;
+ const float c7 = 2.7555381529964506626129150390625e-4;
+
+ auto result = x * c7 + c6;
+ result = x * result + c5;
+ result = x * result + c4;
+ result = x * result + c3;
+ result = x * result + c2;
+ result = x * result + one;
+ result = x * result + one;
+
+ // Compute 2^k (should differ for float and double, but I'll avoid
+ // it for now and just do floats)
+ const int fpbias = 127;
+ auto biasedN = k + fpbias;
+ auto overflow = kReal > fpbias;
+ // Minimum exponent is -126, so if k is <= -127 (k + 127 <= 0)
+ // we've got underflow. -127 * ln(2) -> -88.02. So the most
+ // negative float input that doesn't result in zero is like -88.
+ auto underflow = kReal <= -fpbias;
+ const int infBits = 0x7f800000;
+ biasedN <<= 23;
+ // Reinterpret this thing as float
+ auto twoToTheN = asFloat(biasedN);
+ // Handle both doubles and floats (hopefully eliding the copy for float)
+ auto elemtype2n = twoToTheN;
+ result *= elemtype2n;
+ result = select(overflow, cast_i2f(infBits), result);
+ result = select(underflow, 0., result);
+ return result;
+}
+
+// Range reduction for logarithms takes log(x) -> log(2^n * y) -> n
+// * log(2) + log(y) where y is the reduced range (usually in [1/2, 1)).
+template <typename T, typename R>
+__forceinline void __rangeReduceLog(const T &input,
+ T &reduced,
+ R &exponent)
+{
+ auto intVersion = asInt(input);
+ // single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
+ // exponent mask = 0111 1111 1000 0000 0000 0000 0000 0000
+ // 0x7 0xF 0x8 0x0 0x0 0x0 0x0 0x0
+ // non-exponent = 1000 0000 0111 1111 1111 1111 1111 1111
+ // = 0x8 0x0 0x7 0xF 0xF 0xF 0xF 0xF
+
+ //const int exponentMask(0x7F800000)
+ static const int nonexponentMask = 0x807FFFFF;
+
+ // We want the reduced version to have an exponent of -1 which is
+ // -1 + 127 after biasing or 126
+ static const int exponentNeg1 = (126l << 23);
+ // NOTE(boulos): We don't need to mask anything out since we know
+ // the sign bit has to be 0. If it's 1, we need to return infinity/nan
+ // anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN).
+ auto biasedExponent = intVersion >> 23; // This number is [0, 255] but it means [-127, 128]
+
+ auto offsetExponent = biasedExponent + 1; // Treat the number as if it were 2^{e+1} * (1.m)/2
+ exponent = offsetExponent - 127; // get the real value
+
+ // Blend the offset_exponent with the original input (do this in
+ // int for now, until I decide if float can have & and &not)
+ auto blended = (intVersion & nonexponentMask) | (exponentNeg1);
+ reduced = asFloat(blended);
+}
+
+template <typename T> struct ExponentType { };
+template <int N> struct ExponentType<vfloat_impl<N>> { typedef vint<N> Ty; };
+template <> struct ExponentType<float> { typedef int Ty; };
+
+template <typename T>
+__forceinline T log(const T &v)
+{
+ T reduced;
+ typename ExponentType<T>::Ty exponent;
+
+ const int nanBits = 0x7fc00000;
+ const int negInfBits = 0xFF800000;
+ const float nan = cast_i2f(nanBits);
+ const float negInf = cast_i2f(negInfBits);
+ auto useNan = v < 0.;
+ auto useInf = v == 0.;
+ auto exceptional = useNan | useInf;
+ const float one = 1.0;
+
+ auto patched = select(exceptional, one, v);
+ __rangeReduceLog(patched, reduced, exponent);
+
+ const float ln2 = 0.693147182464599609375;
+
+ auto x1 = one - reduced;
+ const float c1 = +0.50000095367431640625;
+ const float c2 = +0.33326041698455810546875;
+ const float c3 = +0.2519190013408660888671875;
+ const float c4 = +0.17541764676570892333984375;
+ const float c5 = +0.3424419462680816650390625;
+ const float c6 = -0.599632322788238525390625;
+ const float c7 = +1.98442304134368896484375;
+ const float c8 = -2.4899270534515380859375;
+ const float c9 = +1.7491014003753662109375;
+
+ auto result = x1 * c9 + c8;
+ result = x1 * result + c7;
+ result = x1 * result + c6;
+ result = x1 * result + c5;
+ result = x1 * result + c4;
+ result = x1 * result + c3;
+ result = x1 * result + c2;
+ result = x1 * result + c1;
+ result = x1 * result + one;
+
+ // Equation was for -(ln(red)/(1-red))
+ result *= -x1;
+ result += toFloat(exponent) * ln2;
+
+ return select(exceptional,
+ select(useNan, T(nan), T(negInf)),
+ result);
+}
+
+template <typename T>
+__forceinline T pow(const T &x, const T &y)
+{
+ auto x1 = abs(x);
+ auto z = exp(y * log(x1));
+
+ // Handle special cases
+ const float twoOver23 = 8388608.0f;
+ auto yInt = y == round(y);
+ auto yOddInt = select(yInt, asInt(abs(y) + twoOver23) << 31, 0); // set sign bit
+
+ // x == 0
+ z = select(x == 0.0f,
+ select(y < 0.0f, T(inf) | signmsk(x),
+ select(y == 0.0f, T(1.0f), asFloat(yOddInt) & x)), z);
+
+ // x < 0
+ auto xNegative = x < 0.0f;
+ if (any(xNegative))
+ {
+ auto z1 = z | asFloat(yOddInt);
+ z1 = select(yInt, z1, std::numeric_limits<float>::quiet_NaN());
+ z = select(xNegative, z1, z);
+ }
+
+ auto xFinite = isfinite(x);
+ auto yFinite = isfinite(y);
+ if (all(xFinite & yFinite))
+ return z;
+
+ // x finite and y infinite
+ z = select(andn(xFinite, yFinite),
+ select(x1 == 1.0f, 1.0f,
+ select((x1 > 1.0f) ^ (y < 0.0f), inf, T(0.0f))), z);
+
+ // x infinite
+ z = select(xFinite, z,
+ select(y == 0.0f, 1.0f,
+ select(y < 0.0f, T(0.0f), inf) | (asFloat(yOddInt) & x)));
+
+ return z;
+}
+
+template <typename T>
+__forceinline T pow(const T &x, float y)
+{
+ return pow(x, T(y));
+}
+
+} // namespace fastapprox
+
+} // namespace embree
diff --git a/thirdparty/embree/common/math/vec2.h b/thirdparty/embree/common/math/vec2.h
new file mode 100644
index 0000000000..d62aef51f3
--- /dev/null
+++ b/thirdparty/embree/common/math/vec2.h
@@ -0,0 +1,235 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "math.h"
+
+namespace embree
+{
+ struct Vec2fa;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Generic 2D vector Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct Vec2
+ {
+ enum { N = 2 };
+ union {
+ struct { T x, y; };
+#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
+ T components[N];
+#endif
+ };
+
+ typedef T Scalar;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2( ) {}
+ __forceinline explicit Vec2( const T& a ) : x(a), y(a) {}
+ __forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
+
+ __forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
+ __forceinline Vec2( const Vec2fa& other );
+
+ template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
+ template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
+
+ __forceinline Vec2& operator =( const Vec2& other ) { x = other.x; y = other.y; return *this; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2( ZeroTy ) : x(zero), y(zero) {}
+ __forceinline Vec2( OneTy ) : x(one), y(one) {}
+ __forceinline Vec2( PosInfTy ) : x(pos_inf), y(pos_inf) {}
+ __forceinline Vec2( NegInfTy ) : x(neg_inf), y(neg_inf) {}
+
+#if defined(__WIN32__) && _MSC_VER == 1800 // workaround for older VS 2013 compiler
+ __forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return (&x)[axis]; }
+ __forceinline T& operator [](const size_t axis) { assert(axis < 2); return (&x)[axis]; }
+#else
+ __forceinline const T& operator [](const size_t axis) const { assert(axis < 2); return components[axis]; }
+ __forceinline T& operator [](const size_t axis ) { assert(axis < 2); return components[axis]; }
+#endif
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a ) { return Vec2<T>(+a.x, +a.y); }
+ template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a ) { return Vec2<T>(-a.x, -a.y); }
+ template<typename T> __forceinline Vec2<T> abs ( const Vec2<T>& a ) { return Vec2<T>(abs (a.x), abs (a.y)); }
+ template<typename T> __forceinline Vec2<T> rcp ( const Vec2<T>& a ) { return Vec2<T>(rcp (a.x), rcp (a.y)); }
+ template<typename T> __forceinline Vec2<T> rsqrt ( const Vec2<T>& a ) { return Vec2<T>(rsqrt(a.x), rsqrt(a.y)); }
+ template<typename T> __forceinline Vec2<T> sqrt ( const Vec2<T>& a ) { return Vec2<T>(sqrt (a.x), sqrt (a.y)); }
+ template<typename T> __forceinline Vec2<T> frac ( const Vec2<T>& a ) { return Vec2<T>(frac (a.x), frac (a.y)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x + b.x, a.y + b.y); }
+ template<typename T> __forceinline Vec2<T> operator +( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x + b , a.y + b ); }
+ template<typename T> __forceinline Vec2<T> operator +( const T& a, const Vec2<T>& b ) { return Vec2<T>(a + b.x, a + b.y); }
+ template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x - b.x, a.y - b.y); }
+ template<typename T> __forceinline Vec2<T> operator -( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x - b , a.y - b ); }
+ template<typename T> __forceinline Vec2<T> operator -( const T& a, const Vec2<T>& b ) { return Vec2<T>(a - b.x, a - b.y); }
+ template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x * b.x, a.y * b.y); }
+ template<typename T> __forceinline Vec2<T> operator *( const T& a, const Vec2<T>& b ) { return Vec2<T>(a * b.x, a * b.y); }
+ template<typename T> __forceinline Vec2<T> operator *( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x * b , a.y * b ); }
+ template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const Vec2<T>& b ) { return Vec2<T>(a.x / b.x, a.y / b.y); }
+ template<typename T> __forceinline Vec2<T> operator /( const Vec2<T>& a, const T& b ) { return Vec2<T>(a.x / b , a.y / b ); }
+ template<typename T> __forceinline Vec2<T> operator /( const T& a, const Vec2<T>& b ) { return Vec2<T>(a / b.x, a / b.y); }
+
+ template<typename T> __forceinline Vec2<T> min(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(min(a.x, b.x), min(a.y, b.y)); }
+ template<typename T> __forceinline Vec2<T> max(const Vec2<T>& a, const Vec2<T>& b) { return Vec2<T>(max(a.x, b.x), max(a.y, b.y)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec2<T> madd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y) ); }
+ template<typename T> __forceinline Vec2<T> msub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y) ); }
+ template<typename T> __forceinline Vec2<T> nmadd ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y) ); }
+ template<typename T> __forceinline Vec2<T> nmsub ( const Vec2<T>& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y) ); }
+
+ template<typename T> __forceinline Vec2<T> madd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( madd(a,b.x,c.x), madd(a,b.y,c.y) ); }
+ template<typename T> __forceinline Vec2<T> msub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>( msub(a,b.x,c.x), msub(a,b.y,c.y) ); }
+ template<typename T> __forceinline Vec2<T> nmadd ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y) ); }
+ template<typename T> __forceinline Vec2<T> nmsub ( const T& a, const Vec2<T>& b, const Vec2<T>& c) { return Vec2<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y) ); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec2<T>& operator +=( Vec2<T>& a, const Vec2<T>& b ) { a.x += b.x; a.y += b.y; return a; }
+ template<typename T> __forceinline Vec2<T>& operator -=( Vec2<T>& a, const Vec2<T>& b ) { a.x -= b.x; a.y -= b.y; return a; }
+ template<typename T> __forceinline Vec2<T>& operator *=( Vec2<T>& a, const T& b ) { a.x *= b ; a.y *= b ; return a; }
+ template<typename T> __forceinline Vec2<T>& operator /=( Vec2<T>& a, const T& b ) { a.x /= b ; a.y /= b ; return a; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reduction Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline T reduce_add( const Vec2<T>& a ) { return a.x + a.y; }
+ template<typename T> __forceinline T reduce_mul( const Vec2<T>& a ) { return a.x * a.y; }
+ template<typename T> __forceinline T reduce_min( const Vec2<T>& a ) { return min(a.x, a.y); }
+ template<typename T> __forceinline T reduce_max( const Vec2<T>& a ) { return max(a.x, a.y); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline bool operator ==( const Vec2<T>& a, const Vec2<T>& b ) { return a.x == b.x && a.y == b.y; }
+ template<typename T> __forceinline bool operator !=( const Vec2<T>& a, const Vec2<T>& b ) { return a.x != b.x || a.y != b.y; }
+ template<typename T> __forceinline bool operator < ( const Vec2<T>& a, const Vec2<T>& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ return false;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Shift Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec2<T> shift_right_1( const Vec2<T>& a ) {
+ return Vec2<T>(shift_right_1(a.x),shift_right_1(a.y));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline T dot ( const Vec2<T>& a, const Vec2<T>& b ) { return madd(a.x,b.x,a.y*b.y); }
+ template<typename T> __forceinline Vec2<T> cross ( const Vec2<T>& a ) { return Vec2<T>(-a.y,a.x); }
+ template<typename T> __forceinline T length ( const Vec2<T>& a ) { return sqrt(dot(a,a)); }
+ template<typename T> __forceinline Vec2<T> normalize( const Vec2<T>& a ) { return a*rsqrt(dot(a,a)); }
+ template<typename T> __forceinline T distance ( const Vec2<T>& a, const Vec2<T>& b ) { return length(a-b); }
+ template<typename T> __forceinline T det ( const Vec2<T>& a, const Vec2<T>& b ) { return a.x*b.y - a.y*b.x; }
+
+ template<typename T> __forceinline Vec2<T> normalize_safe( const Vec2<T>& a ) {
+ const T d = dot(a,a); return select(d == T( zero ),a, a*rsqrt(d) );
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec2<T> select ( bool s, const Vec2<T>& t, const Vec2<T>& f ) {
+ return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
+ }
+
+ template<typename T> __forceinline Vec2<T> select ( const Vec2<bool>& s, const Vec2<T>& t, const Vec2<T>& f ) {
+ return Vec2<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y));
+ }
+
+ template<typename T> __forceinline Vec2<T> select ( const typename T::Bool& s, const Vec2<T>& t, const Vec2<T>& f ) {
+ return Vec2<T>(select(s,t.x,f.x),select(s,t.y,f.y));
+ }
+
+ template<typename T>
+ __forceinline Vec2<T> lerp(const Vec2<T>& v0, const Vec2<T>& v1, const T& t) {
+ return madd(Vec2<T>(T(1.0f)-t),v0,t*v1);
+ }
+
+ template<typename T> __forceinline int maxDim ( const Vec2<T>& a )
+ {
+ const Vec2<T> b = abs(a);
+ if (b.x > b.y) return 0;
+ else return 1;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec2<T>& a) {
+ return cout << "(" << a.x << ", " << a.y << ")";
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Default template instantiations
+ ////////////////////////////////////////////////////////////////////////////////
+
+ typedef Vec2<bool > Vec2b;
+ typedef Vec2<int > Vec2i;
+ typedef Vec2<float> Vec2f;
+}
+
+#include "vec2fa.h"
+
+#if defined __SSE__
+#include "../simd/sse.h"
+#endif
+
+#if defined __AVX__
+#include "../simd/avx.h"
+#endif
+
+#if defined(__AVX512F__)
+#include "../simd/avx512.h"
+#endif
+
+namespace embree
+{
+ template<> __forceinline Vec2<float>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
+
+#if defined(__SSE__)
+ template<> __forceinline Vec2<vfloat4>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
+#endif
+
+#if defined(__AVX__)
+ template<> __forceinline Vec2<vfloat8>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
+#endif
+
+#if defined(__AVX512F__)
+ template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
+#endif
+}
diff --git a/thirdparty/embree/common/math/vec2fa.h b/thirdparty/embree/common/math/vec2fa.h
new file mode 100644
index 0000000000..a51fb68fd0
--- /dev/null
+++ b/thirdparty/embree/common/math/vec2fa.h
@@ -0,0 +1,301 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "math.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec2fa Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec2fa
+ {
+ ALIGNED_STRUCT_(16);
+
+ typedef float Scalar;
+ enum { N = 2 };
+ union {
+ __m128 m128;
+ struct { float x,y,az,aw; };
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa( ) {}
+ __forceinline Vec2fa( const __m128 a ) : m128(a) {}
+
+ __forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
+ __forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
+
+ __forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128; }
+ __forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128; return *this; }
+
+ __forceinline explicit Vec2fa( const float a ) : m128(_mm_set1_ps(a)) {}
+ __forceinline Vec2fa( const float x, const float y) : m128(_mm_set_ps(y, y, y, x)) {}
+
+ __forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
+
+ __forceinline operator const __m128&() const { return m128; }
+ __forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Loads and Stores
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static __forceinline Vec2fa load( const void* const a ) {
+ return Vec2fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
+ }
+
+ static __forceinline Vec2fa loadu( const void* const a ) {
+ return Vec2fa(_mm_and_ps(_mm_loadu_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
+ }
+
+ static __forceinline void storeu ( void* ptr, const Vec2fa& v ) {
+ _mm_storeu_ps((float*)ptr,v);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
+ __forceinline Vec2fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
+ __forceinline Vec2fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ __forceinline Vec2fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
+ __forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
+ __forceinline Vec2fa operator -( const Vec2fa& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+ return _mm_xor_ps(a.m128, mask);
+ }
+ __forceinline Vec2fa abs ( const Vec2fa& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+ return _mm_and_ps(a.m128, mask);
+ }
+ __forceinline Vec2fa sign ( const Vec2fa& a ) {
+ return blendv_ps(Vec2fa(one), -Vec2fa(one), _mm_cmplt_ps (a,Vec2fa(zero)));
+ }
+
+ __forceinline Vec2fa rcp ( const Vec2fa& a )
+ {
+#if defined(__AVX512VL__)
+ const Vec2fa r = _mm_rcp14_ps(a.m128);
+#else
+ const Vec2fa r = _mm_rcp_ps(a.m128);
+#endif
+
+#if defined(__AVX2__)
+ const Vec2fa res = _mm_mul_ps(r,_mm_fnmadd_ps(r, a, vfloat4(2.0f)));
+#else
+ const Vec2fa res = _mm_mul_ps(r,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r, a)));
+ //return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+#endif
+
+ return res;
+ }
+
+ __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
+ __forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a,a); }
+
+ __forceinline Vec2fa rsqrt( const Vec2fa& a )
+ {
+#if defined(__AVX512VL__)
+ __m128 r = _mm_rsqrt14_ps(a.m128);
+#else
+ __m128 r = _mm_rsqrt_ps(a.m128);
+#endif
+ return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+ }
+
+ __forceinline Vec2fa zero_fix(const Vec2fa& a) {
+ return blendv_ps(a, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
+ }
+ __forceinline Vec2fa rcp_safe(const Vec2fa& a) {
+ return rcp(zero_fix(a));
+ }
+ __forceinline Vec2fa log ( const Vec2fa& a ) {
+ return Vec2fa(logf(a.x),logf(a.y));
+ }
+
+ __forceinline Vec2fa exp ( const Vec2fa& a ) {
+ return Vec2fa(expf(a.x),expf(a.y));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.m128, b.m128); }
+ __forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
+ __forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
+ __forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
+ __forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
+ __forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.m128,b.m128); }
+ __forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
+ __forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
+
+ __forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
+ __forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
+
+#if defined(__SSE4_1__)
+ __forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_min_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
+ const vint4 ai = _mm_castps_si128(a);
+ const vint4 bi = _mm_castps_si128(b);
+ const vint4 ci = _mm_max_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+ __forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
+ return Vec2fa(powf(a.x,b),powf(a.y,b));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__AVX2__)
+ __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a,b,c); }
+ __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a,b,c); }
+ __forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a,b,c); }
+ __forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a,b,c); }
+#else
+ __forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
+ __forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
+ __forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b+c;}
+ __forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b-c; }
+#endif
+
+ __forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
+ __forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
+ __forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
+ __forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
+ __forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
+ __forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
+ __forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
+ __forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
+ __forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
+ __forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
+ __forceinline float reduce_min(const Vec2fa& v) { return min(v.x,v.y); }
+ __forceinline float reduce_max(const Vec2fa& v) { return max(v.x,v.y); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 3) == 3; }
+ __forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__SSE4_1__)
+ __forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
+ return _mm_cvtss_f32(_mm_dp_ps(a,b,0x3F));
+ }
+#else
+ __forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
+ return reduce_add(a*b);
+ }
+#endif
+
+ __forceinline Vec2fa cross ( const Vec2fa& a ) {
+ return Vec2fa(-a.y,a.x);
+ }
+
+ __forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
+ __forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
+ __forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
+ __forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
+ __forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
+ __forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
+ __m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
+ return blendv_ps(f, t, mask);
+ }
+
+ __forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ __forceinline int maxDim ( const Vec2fa& a )
+ {
+ const Vec2fa b = abs(a);
+ if (b.x > b.y) return 0;
+ else return 1;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Rounding Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__aarch64__)
+ //__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
+ __forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); }
+ __forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); }
+#elif defined (__SSE4_1__)
+ //__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
+ __forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); }
+ __forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); }
+#else
+ //__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
+ __forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(floorf(a.x),floorf(a.y)); }
+ __forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(ceilf (a.x),ceilf (a.y)); }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
+ return cout << "(" << a.x << ", " << a.y << ")";
+ }
+
+ typedef Vec2fa Vec2fa_t;
+}
diff --git a/thirdparty/embree/common/math/vec3.h b/thirdparty/embree/common/math/vec3.h
new file mode 100644
index 0000000000..ce94eff327
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3.h
@@ -0,0 +1,337 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "math.h"
+
+namespace embree
+{
+ struct Vec3fa;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Generic 3D vector Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct Vec3
+ {
+ enum { N = 3 };
+
+ union {
+ struct {
+ T x, y, z;
+ };
+#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
+ T components[N];
+#endif
+ };
+
+ typedef T Scalar;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3( ) {}
+ __forceinline explicit Vec3( const T& a ) : x(a), y(a), z(a) {}
+ __forceinline Vec3( const T& x, const T& y, const T& z ) : x(x), y(y), z(z) {}
+
+ __forceinline Vec3( const Vec3& other ) { x = other.x; y = other.y; z = other.z; }
+ __forceinline Vec3( const Vec3fa& other );
+
+ template<typename T1> __forceinline Vec3( const Vec3<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)) {}
+ template<typename T1> __forceinline Vec3& operator =(const Vec3<T1>& other) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ __forceinline Vec3& operator =(const Vec3& other) { x = other.x; y = other.y; z = other.z; return *this; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3( ZeroTy ) : x(zero), y(zero), z(zero) {}
+ __forceinline Vec3( OneTy ) : x(one), y(one), z(one) {}
+ __forceinline Vec3( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf) {}
+ __forceinline Vec3( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf) {}
+
+#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
+ __forceinline const T& operator []( const size_t axis ) const { assert(axis < 3); return (&x)[axis]; }
+ __forceinline T& operator []( const size_t axis ) { assert(axis < 3); return (&x)[axis]; }
+#else
+ __forceinline const T& operator [](const size_t axis) const { assert(axis < 3); return components[axis]; }
+ __forceinline T& operator [](const size_t axis) { assert(axis < 3); return components[axis]; }
+#endif
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a ) { return Vec3<T>(+a.x, +a.y, +a.z); }
+ template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a ) { return Vec3<T>(-a.x, -a.y, -a.z); }
+ template<typename T> __forceinline Vec3<T> abs ( const Vec3<T>& a ) { return Vec3<T>(abs (a.x), abs (a.y), abs (a.z)); }
+ template<typename T> __forceinline Vec3<T> rcp ( const Vec3<T>& a ) { return Vec3<T>(rcp (a.x), rcp (a.y), rcp (a.z)); }
+ template<typename T> __forceinline Vec3<T> rsqrt ( const Vec3<T>& a ) { return Vec3<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z)); }
+ template<typename T> __forceinline Vec3<T> sqrt ( const Vec3<T>& a ) { return Vec3<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z)); }
+
+ template<typename T> __forceinline Vec3<T> zero_fix( const Vec3<T>& a )
+ {
+ return Vec3<T>(select(abs(a.x)<min_rcp_input,T(min_rcp_input),a.x),
+ select(abs(a.y)<min_rcp_input,T(min_rcp_input),a.y),
+ select(abs(a.z)<min_rcp_input,T(min_rcp_input),a.z));
+ }
+ template<typename T> __forceinline Vec3<T> rcp_safe(const Vec3<T>& a) { return rcp(zero_fix(a)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<T> operator +( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x + b.x, a.y + b.y, a.z + b.z); }
+ template<typename T> __forceinline Vec3<T> operator -( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x - b.x, a.y - b.y, a.z - b.z); }
+ template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x * b.x, a.y * b.y, a.z * b.z); }
+ template<typename T> __forceinline Vec3<T> operator *( const T& a, const Vec3<T>& b ) { return Vec3<T>(a * b.x, a * b.y, a * b.z); }
+ template<typename T> __forceinline Vec3<T> operator *( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x * b , a.y * b , a.z * b ); }
+ template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const T& b ) { return Vec3<T>(a.x / b , a.y / b , a.z / b ); }
+ template<typename T> __forceinline Vec3<T> operator /( const T& a, const Vec3<T>& b ) { return Vec3<T>(a / b.x, a / b.y, a / b.z); }
+ template<typename T> __forceinline Vec3<T> operator /( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(a.x / b.x, a.y / b.y, a.z / b.z); }
+
+ template<typename T> __forceinline Vec3<T> min(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); }
+ template<typename T> __forceinline Vec3<T> max(const Vec3<T>& a, const Vec3<T>& b) { return Vec3<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); }
+
+ template<typename T> __forceinline Vec3<T> operator >>( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x >> b, a.y >> b, a.z >> b); }
+ template<typename T> __forceinline Vec3<T> operator <<( const Vec3<T>& a, const int b ) { return Vec3<T>(a.x << b, a.y << b, a.z << b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<T> madd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
+ template<typename T> __forceinline Vec3<T> msub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
+ template<typename T> __forceinline Vec3<T> nmadd ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z));}
+ template<typename T> __forceinline Vec3<T> nmsub ( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z)); }
+
+ template<typename T> __forceinline Vec3<T> madd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z)); }
+ template<typename T> __forceinline Vec3<T> msub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z)); }
+ template<typename T> __forceinline Vec3<T> nmadd ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z));}
+ template<typename T> __forceinline Vec3<T> nmsub ( const T& a, const Vec3<T>& b, const Vec3<T>& c) { return Vec3<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const T b ) { a.x += b; a.y += b; a.z += b; return a; }
+ template<typename T> __forceinline Vec3<T>& operator +=( Vec3<T>& a, const Vec3<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; return a; }
+ template<typename T> __forceinline Vec3<T>& operator -=( Vec3<T>& a, const Vec3<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; return a; }
+ template<typename T> __forceinline Vec3<T>& operator *=( Vec3<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; return a; }
+ template<typename T> __forceinline Vec3<T>& operator /=( Vec3<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; return a; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reduction Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline T reduce_add( const Vec3<T>& a ) { return a.x + a.y + a.z; }
+ template<typename T> __forceinline T reduce_mul( const Vec3<T>& a ) { return a.x * a.y * a.z; }
+ template<typename T> __forceinline T reduce_min( const Vec3<T>& a ) { return min(a.x, a.y, a.z); }
+ template<typename T> __forceinline T reduce_max( const Vec3<T>& a ) { return max(a.x, a.y, a.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline bool operator ==( const Vec3<T>& a, const Vec3<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
+ template<typename T> __forceinline bool operator !=( const Vec3<T>& a, const Vec3<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
+ template<typename T> __forceinline bool operator < ( const Vec3<T>& a, const Vec3<T>& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ if (a.z != b.z) return a.z < b.z;
+ return false;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Shift Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<T> shift_right_1( const Vec3<T>& a ) {
+ return Vec3<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<T> select ( bool s, const Vec3<T>& t, const Vec3<T>& f ) {
+ return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
+ }
+
+ template<typename T> __forceinline Vec3<T> select ( const Vec3<bool>& s, const Vec3<T>& t, const Vec3<T>& f ) {
+ return Vec3<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z));
+ }
+
+ template<typename T> __forceinline Vec3<T> select ( const typename T::Bool& s, const Vec3<T>& t, const Vec3<T>& f ) {
+ return Vec3<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z));
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> lerp(const Vec3<T>& v0, const Vec3<T>& v1, const T& t) {
+ return madd(Vec3<T>(T(1.0f)-t),v0,t*v1);
+ }
+
+ template<typename T> __forceinline int maxDim ( const Vec3<T>& a )
+ {
+ const Vec3<T> b = abs(a);
+ if (b.x > b.y) {
+ if (b.x > b.z) return 0; else return 2;
+ } else {
+ if (b.y > b.z) return 1; else return 2;
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec3<bool> eq_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x==b.x,a.y==b.y,a.z==b.z); }
+ template<typename T> __forceinline Vec3<bool> neq_mask(const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x!=b.x,a.y!=b.y,a.z!=b.z); }
+ template<typename T> __forceinline Vec3<bool> lt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x< b.x,a.y< b.y,a.z< b.z); }
+ template<typename T> __forceinline Vec3<bool> le_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x<=b.x,a.y<=b.y,a.z<=b.z); }
+ template<typename T> __forceinline Vec3<bool> gt_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x> b.x,a.y> b.y,a.z> b.z); }
+ template<typename T> __forceinline Vec3<bool> ge_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x>=b.x,a.y>=b.y,a.z>=b.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline T sqr ( const Vec3<T>& a ) { return dot(a,a); }
+ template<typename T> __forceinline T dot ( const Vec3<T>& a, const Vec3<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,a.z*b.z)); }
+ template<typename T> __forceinline T length ( const Vec3<T>& a ) { return sqrt(sqr(a)); }
+ template<typename T> __forceinline T rcp_length( const Vec3<T>& a ) { return rsqrt(sqr(a)); }
+ template<typename T> __forceinline Vec3<T> normalize( const Vec3<T>& a ) { return a*rsqrt(sqr(a)); }
+ template<typename T> __forceinline T distance ( const Vec3<T>& a, const Vec3<T>& b ) { return length(a-b); }
+ template<typename T> __forceinline Vec3<T> cross ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); }
+
+ template<typename T> __forceinline Vec3<T> stable_triangle_normal( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c )
+ {
+ const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x;
+ const T bc_x = b.z*c.y, bc_y = b.x*c.z, bc_z = b.y*c.x;
+ const Vec3<T> cross_ab(msub(a.y,b.z,ab_x), msub(a.z,b.x,ab_y), msub(a.x,b.y,ab_z));
+ const Vec3<T> cross_bc(msub(b.y,c.z,bc_x), msub(b.z,c.x,bc_y), msub(b.x,c.y,bc_z));
+ const auto sx = abs(ab_x) < abs(bc_x);
+ const auto sy = abs(ab_y) < abs(bc_y);
+ const auto sz = abs(ab_z) < abs(bc_z);
+ return Vec3<T>(select(sx,cross_ab.x,cross_bc.x),
+ select(sy,cross_ab.y,cross_bc.y),
+ select(sz,cross_ab.z,cross_bc.z));
+ }
+
+ template<typename T> __forceinline T sum ( const Vec3<T>& a ) { return a.x+a.y+a.z; }
+
+ template<typename T> __forceinline T halfArea ( const Vec3<T>& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
+ template<typename T> __forceinline T area ( const Vec3<T>& d ) { return 2.0f*halfArea(d); }
+
+ template<typename T> __forceinline Vec3<T> normalize_safe( const Vec3<T>& a ) {
+ const T d = dot(a,a); return select(d == T( zero ), a , a*rsqrt(d) );
+ }
+
+ template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& P, const Vec3<T>& Q0, const Vec3<T>& Q1)
+ {
+ const Vec3<T> N = cross(P-Q0,Q1-Q0);
+ const Vec3<T> D = Q1-Q0;
+ return dot(N,N)*rcp(dot(D,D));
+ }
+
+ template<typename T> __forceinline T sqr_point_to_line_distance(const Vec3<T>& PmQ0, const Vec3<T>& Q1mQ0)
+ {
+ const Vec3<T> N = cross(PmQ0,Q1mQ0);
+ const Vec3<T> D = Q1mQ0;
+ return dot(N,N)*rcp(dot(D,D));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3<T>& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
+ }
+
+ typedef Vec3<bool > Vec3b;
+ typedef Vec3<int > Vec3i;
+ typedef Vec3<float> Vec3f;
+}
+
+#include "vec3ba.h"
+#include "vec3ia.h"
+#include "vec3fa.h"
+
+////////////////////////////////////////////////////////////////////////////////
+/// SSE / AVX / MIC specializations
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined __SSE__
+#include "../simd/sse.h"
+#endif
+
+#if defined __AVX__
+#include "../simd/avx.h"
+#endif
+
+#if defined(__AVX512F__)
+#include "../simd/avx512.h"
+#endif
+
+namespace embree
+{
+ template<typename Out, typename In>
+ __forceinline Vec3<Out> broadcast(const Vec3<In>& a, const size_t k) {
+ return Vec3<Out>(Out(a.x[k]), Out(a.y[k]), Out(a.z[k]));
+ }
+
+ template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
+
+#if defined(__AVX__)
+ template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
+ x = a.x; y = a.y; z = a.z;
+ }
+#elif defined(__SSE__)
+ template<>
+ __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
+ const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
+ }
+#endif
+
+#if defined(__SSE__)
+ template<>
+ __forceinline Vec3<vfloat4> broadcast<vfloat4,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
+ return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]));
+ }
+
+ template<int i0, int i1, int i2, int i3>
+ __forceinline Vec3<vfloat4> shuffle(const Vec3<vfloat4>& b) {
+ return Vec3<vfloat4>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
+ }
+#endif
+
+#if defined(__AVX__)
+ template<>
+ __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
+ x = a.x; y = a.y; z = a.z;
+ }
+
+ template<>
+ __forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
+ return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
+ }
+ template<>
+ __forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat8>(const Vec3<vfloat8>& a, const size_t k) {
+ return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]));
+ }
+
+ template<int i0, int i1, int i2, int i3>
+ __forceinline Vec3<vfloat8> shuffle(const Vec3<vfloat8>& b) {
+ return Vec3<vfloat8>(shuffle<i0,i1,i2,i3>(b.x), shuffle<i0,i1,i2,i3>(b.y), shuffle<i0,i1,i2,i3>(b.z));
+ }
+#endif
+
+#if defined(__AVX512F__)
+ template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
+#endif
+}
diff --git a/thirdparty/embree/common/math/vec3ba.h b/thirdparty/embree/common/math/vec3ba.h
new file mode 100644
index 0000000000..a021b522dc
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3ba.h
@@ -0,0 +1,120 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "math.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3ba Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3ba
+ {
+ ALIGNED_STRUCT_(16);
+
+ union {
+ __m128 m128;
+ struct { int x,y,z; };
+ };
+
+ typedef int Scalar;
+ enum { N = 3 };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba( ) {}
+ __forceinline Vec3ba( const __m128 input ) : m128(input) {}
+ __forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {}
+ __forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; }
+
+ __forceinline explicit Vec3ba( bool a )
+ : m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
+ __forceinline Vec3ba( bool a, bool b, bool c)
+ : m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
+
+ __forceinline operator const __m128&() const { return m128; }
+ __forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {}
+ __forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a.m128, Vec3ba(embree::True)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a.m128, b.m128); }
+ __forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a.m128, b.m128); }
+ __forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a.m128, b.m128); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
+ __forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
+ __forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators + Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
+ return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) == 7;
+ }
+ __forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
+ return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) != 7;
+ }
+ __forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ if (a.z != b.z) return a.z < b.z;
+ return false;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reduction Operations
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) == 0x7; }
+ __forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) != 0x0; }
+
+ __forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x7; }
+ __forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) != 0x0; }
+ __forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x0; }
+
+ __forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a) & 0x7; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
+ return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
+ }
+}
diff --git a/thirdparty/embree/common/math/vec3fa.h b/thirdparty/embree/common/math/vec3fa.h
new file mode 100644
index 0000000000..586039741d
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3fa.h
@@ -0,0 +1,727 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "math.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3fa Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3fa
+ {
+ ALIGNED_STRUCT_(16);
+
+ typedef float Scalar;
+ enum { N = 3 };
+ union {
+ __m128 m128;
+ struct { float x,y,z; };
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa( ) {}
+ __forceinline Vec3fa( const __m128 a ) : m128(a) {}
+
+ __forceinline Vec3fa ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
+ //__forceinline Vec3fa& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
+
+ __forceinline Vec3fa ( const Vec3fa& other ) { m128 = other.m128; }
+ __forceinline Vec3fa& operator =( const Vec3fa& other ) { m128 = other.m128; return *this; }
+
+ __forceinline explicit Vec3fa( const float a ) : m128(_mm_set1_ps(a)) {}
+ __forceinline Vec3fa( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
+
+ __forceinline explicit Vec3fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
+
+ __forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
+ __forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
+ __forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
+ __forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Loads and Stores
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static __forceinline Vec3fa load( const void* const a ) {
+ return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
+ }
+
+ static __forceinline Vec3fa loadu( const void* const a ) {
+ return Vec3fa(_mm_loadu_ps((float*)a));
+ }
+
+ static __forceinline void storeu ( void* ptr, const Vec3fa& v ) {
+ _mm_storeu_ps((float*)ptr,v.m128);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
+ __forceinline Vec3fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
+ __forceinline Vec3fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ __forceinline Vec3fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
+ __forceinline Vec3fa operator -( const Vec3fa& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+ return _mm_xor_ps(a.m128, mask);
+ }
+ __forceinline Vec3fa abs ( const Vec3fa& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+ return _mm_and_ps(a.m128, mask);
+ }
+ __forceinline Vec3fa sign ( const Vec3fa& a ) {
+ return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128));
+ }
+
+ __forceinline Vec3fa rcp ( const Vec3fa& a )
+ {
+#if defined(__AVX512VL__)
+ const Vec3fa r = _mm_rcp14_ps(a.m128);
+#else
+ const Vec3fa r = _mm_rcp_ps(a.m128);
+#endif
+
+#if defined(__AVX2__)
+ const Vec3fa res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f)));
+#else
+ const Vec3fa res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128)));
+ //return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+#endif
+
+ return res;
+ }
+
+ __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); }
+ __forceinline Vec3fa sqr ( const Vec3fa& a ) { return _mm_mul_ps(a.m128,a.m128); }
+
+ __forceinline Vec3fa rsqrt( const Vec3fa& a )
+ {
+#if defined(__AVX512VL__)
+ __m128 r = _mm_rsqrt14_ps(a.m128);
+#else
+ __m128 r = _mm_rsqrt_ps(a.m128);
+#endif
+ return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+ }
+
+ __forceinline Vec3fa zero_fix(const Vec3fa& a) {
+ return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
+ }
+ __forceinline Vec3fa rcp_safe(const Vec3fa& a) {
+ return rcp(zero_fix(a));
+ }
+ __forceinline Vec3fa log ( const Vec3fa& a ) {
+ return Vec3fa(logf(a.x),logf(a.y),logf(a.z));
+ }
+
+ __forceinline Vec3fa exp ( const Vec3fa& a ) {
+ return Vec3fa(expf(a.x),expf(a.y),expf(a.z));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return _mm_add_ps(a.m128, b.m128); }
+ __forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
+ __forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
+ __forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
+ __forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
+ __forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return _mm_div_ps(a.m128,b.m128); }
+ __forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
+ __forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
+
+ __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); }
+ __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); }
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
+ const vint4 ai = _mm_castps_si128(a.m128);
+ const vint4 bi = _mm_castps_si128(b.m128);
+ const vint4 ci = _mm_min_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
+ const vint4 ai = _mm_castps_si128(a.m128);
+ const vint4 bi = _mm_castps_si128(b.m128);
+ const vint4 ci = _mm_max_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+ __forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
+ return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__AVX2__)
+ __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
+ __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
+ __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
+ __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
+#else
+ __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; }
+ __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; }
+ __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;}
+ __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; }
+#endif
+
+ __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
+ __forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
+ __forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
+ __forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
+ __forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
+ __forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
+ __forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
+ __forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
+ __forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Vec3fa& v) {
+ const vfloat4 a(v.m128);
+ const vfloat4 b = shuffle<1>(a);
+ const vfloat4 c = shuffle<2>(a);
+ return _mm_cvtss_f32(a+b+c);
+ }
+
+ __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
+ __forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); }
+ __forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
+ __forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
+
+ __forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
+ __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
+ __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
+ __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); }
+ __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnle_ps(a.m128, b.m128); }
+ __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); }
+
+ __forceinline bool isvalid ( const Vec3fa& v ) {
+ return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
+ }
+
+ __forceinline bool is_finite ( const Vec3fa& a ) {
+ return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
+ }
+
+ __forceinline bool isvalid4 ( const Vec3fa& v ) {
+ return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
+ }
+
+ __forceinline bool is_finite4 ( const Vec3fa& a ) {
+ return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__SSE4_1__)
+ __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
+ return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
+ }
+#else
+ __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
+ return reduce_add(a*b);
+ }
+#endif
+
+ __forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b )
+ {
+ vfloat4 a0 = vfloat4(a.m128);
+ vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
+ vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
+ vfloat4 b1 = vfloat4(b.m128);
+ return Vec3fa(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
+ }
+
+ __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
+ __forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
+ __forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
+ __forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
+ __forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
+ __forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
+ __forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
+ __forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
+
+ __forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
+ const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
+ }
+
+ /*! differentiated normalization */
+ __forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
+ {
+ const float pp = dot(p,p);
+ const float pdp = dot(p,dp);
+ return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
+ __m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
+ return blendv_ps(f.m128, t.m128, mask);
+ }
+
+ __forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
+ return blendv_ps(f.m128, t.m128, s);
+ }
+
+ __forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ __forceinline int maxDim ( const Vec3fa& a )
+ {
+ const Vec3fa b = abs(a);
+ if (b.x > b.y) {
+ if (b.x > b.z) return 0; else return 2;
+ } else {
+ if (b.y > b.z) return 1; else return 2;
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Rounding Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined (__SSE4_1__)
+ __forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
+ __forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
+ __forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
+#else
+ __forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
+ __forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(floorf(a.x),floorf(a.y),floorf(a.z)); }
+ __forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
+ }
+
+ typedef Vec3fa Vec3fa_t;
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3fx Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3fx
+ {
+ ALIGNED_STRUCT_(16);
+
+ typedef float Scalar;
+ enum { N = 3 };
+ union {
+ __m128 m128;
+ struct { float x,y,z; union { int a; unsigned u; float w; }; };
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx( ) {}
+ __forceinline Vec3fx( const __m128 a ) : m128(a) {}
+
+ __forceinline explicit Vec3fx(const Vec3fa& v) : m128(v.m128) {}
+ __forceinline operator Vec3fa () const { return Vec3fa(m128); }
+
+ __forceinline explicit Vec3fx ( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); }
+ //__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
+
+ __forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
+
+ __forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
+
+ __forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
+ __forceinline Vec3fx( const float x, const float y, const float z) : m128(_mm_set_ps(0, z, y, x)) {}
+
+ __forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; }
+ __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; }
+ __forceinline Vec3fx( const Vec3fa& other, const float w1) {
+#if defined (__SSE4_1__)
+ m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4);
+#else
+ const vint4 mask(-1,-1,-1,0);
+ m128 = select(vboolf4(_mm_castsi128_ps(mask)),vfloat4(other.m128),vfloat4(w1));
+#endif
+ }
+ //__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
+ //__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
+ __forceinline Vec3fx( const float x, const float y, const float z, const float w) : m128(_mm_set_ps(w, z, y, x)) {}
+
+ //__forceinline explicit Vec3fx( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
+
+ __forceinline explicit operator const vfloat4() const { return vfloat4(m128); }
+ __forceinline explicit operator const vint4() const { return vint4(_mm_cvtps_epi32(m128)); }
+ __forceinline explicit operator const Vec2fa() const { return Vec2fa(m128); }
+ __forceinline explicit operator const Vec3ia() const { return Vec3ia(_mm_cvtps_epi32(m128)); }
+
+ //__forceinline operator const __m128&() const { return m128; }
+ //__forceinline operator __m128&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Loads and Stores
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static __forceinline Vec3fx load( const void* const a ) {
+ return Vec3fx(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
+ }
+
+ static __forceinline Vec3fx loadu( const void* const a ) {
+ return Vec3fx(_mm_loadu_ps((float*)a));
+ }
+
+ static __forceinline void storeu ( void* ptr, const Vec3fx& v ) {
+ _mm_storeu_ps((float*)ptr,v.m128);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx( ZeroTy ) : m128(_mm_setzero_ps()) {}
+ __forceinline Vec3fx( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
+ __forceinline Vec3fx( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
+ __forceinline Vec3fx( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
+ __forceinline Vec3fx operator -( const Vec3fx& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+ return _mm_xor_ps(a.m128, mask);
+ }
+ __forceinline Vec3fx abs ( const Vec3fx& a ) {
+ const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+ return _mm_and_ps(a.m128, mask);
+ }
+ __forceinline Vec3fx sign ( const Vec3fx& a ) {
+ return blendv_ps(Vec3fx(one).m128, (-Vec3fx(one)).m128, _mm_cmplt_ps (a.m128,Vec3fx(zero).m128));
+ }
+
+ __forceinline Vec3fx rcp ( const Vec3fx& a )
+ {
+#if defined(__AVX512VL__)
+ const Vec3fx r = _mm_rcp14_ps(a.m128);
+#else
+ const Vec3fx r = _mm_rcp_ps(a.m128);
+#endif
+
+#if defined(__AVX2__)
+ const Vec3fx res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f)));
+#else
+ const Vec3fx res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128)));
+ //return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+#endif
+
+ return res;
+ }
+
+ __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return _mm_sqrt_ps(a.m128); }
+ __forceinline Vec3fx sqr ( const Vec3fx& a ) { return _mm_mul_ps(a.m128,a.m128); }
+
+ __forceinline Vec3fx rsqrt( const Vec3fx& a )
+ {
+#if defined(__AVX512VL__)
+ __m128 r = _mm_rsqrt14_ps(a.m128);
+#else
+ __m128 r = _mm_rsqrt_ps(a.m128);
+#endif
+ return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+ }
+
+ __forceinline Vec3fx zero_fix(const Vec3fx& a) {
+ return blendv_ps(a.m128, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
+ }
+ __forceinline Vec3fx rcp_safe(const Vec3fx& a) {
+ return rcp(zero_fix(a));
+ }
+ __forceinline Vec3fx log ( const Vec3fx& a ) {
+ return Vec3fx(logf(a.x),logf(a.y),logf(a.z));
+ }
+
+ __forceinline Vec3fx exp ( const Vec3fx& a ) {
+ return Vec3fx(expf(a.x),expf(a.y),expf(a.z));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return _mm_add_ps(a.m128, b.m128); }
+ __forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return _mm_sub_ps(a.m128, b.m128); }
+ __forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return _mm_mul_ps(a.m128, b.m128); }
+ __forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
+ __forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
+ __forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return _mm_div_ps(a.m128,b.m128); }
+ __forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
+ __forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
+
+ __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); }
+ __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); }
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
+ const vint4 ai = _mm_castps_si128(a.m128);
+ const vint4 bi = _mm_castps_si128(b.m128);
+ const vint4 ci = _mm_min_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
+ const vint4 ai = _mm_castps_si128(a.m128);
+ const vint4 bi = _mm_castps_si128(b.m128);
+ const vint4 ci = _mm_max_epi32(ai,bi);
+ return _mm_castsi128_ps(ci);
+ }
+#endif
+
+ __forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
+ return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__AVX2__)
+ __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
+ __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
+ __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
+ __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
+#else
+ __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b+c; }
+ __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return a*b-c; }
+ __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b+c;}
+ __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return -a*b-c; }
+#endif
+
+ __forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
+ __forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
+ __forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
+ __forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
+ __forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
+ __forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
+ __forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
+ __forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
+ __forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline float reduce_add(const Vec3fx& v) {
+ const vfloat4 a(v.m128);
+ const vfloat4 b = shuffle<1>(a);
+ const vfloat4 c = shuffle<2>(a);
+ return _mm_cvtss_f32(a+b+c);
+ }
+
+ __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
+ __forceinline float reduce_min(const Vec3fx& v) { return min(v.x,v.y,v.z); }
+ __forceinline float reduce_max(const Vec3fx& v) { return max(v.x,v.y,v.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 7) == 7; }
+ __forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 7) != 0; }
+
+ __forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpeq_ps (a.m128, b.m128); }
+ __forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
+ __forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
+ __forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmple_ps (a.m128, b.m128); }
+ __forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnle_ps(a.m128, b.m128); }
+ __forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); }
+
+ __forceinline bool isvalid ( const Vec3fx& v ) {
+ return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
+ }
+
+ __forceinline bool is_finite ( const Vec3fx& a ) {
+ return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
+ }
+
+ __forceinline bool isvalid4 ( const Vec3fx& v ) {
+ return all((vfloat4(v.m128) > vfloat4(-FLT_LARGE)) & (vfloat4(v.m128) < vfloat4(+FLT_LARGE)));
+ }
+
+ __forceinline bool is_finite4 ( const Vec3fx& a ) {
+ return all((vfloat4(a.m128) >= vfloat4(-FLT_MAX)) & (vfloat4(a.m128) <= vfloat4(+FLT_MAX)));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__SSE4_1__)
+ __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
+ return _mm_cvtss_f32(_mm_dp_ps(a.m128,b.m128,0x7F));
+ }
+#else
+ __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
+ return reduce_add(a*b);
+ }
+#endif
+
+ __forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b )
+ {
+ vfloat4 a0 = vfloat4(a.m128);
+ vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128));
+ vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128));
+ vfloat4 b1 = vfloat4(b.m128);
+ return Vec3fx(shuffle<1,2,0,3>(msub(a0,b0,a1*b1)));
+ }
+
+ __forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
+ __forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
+ __forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
+ __forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
+ __forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
+ __forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
+ __forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
+ __forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
+
+ __forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
+ const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
+ }
+
+ /*! differentiated normalization */
+ __forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
+ {
+ const float pp = dot(p,p);
+ const float pdp = dot(p,dp);
+ return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
+ __m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
+ return blendv_ps(f.m128, t.m128, mask);
+ }
+
+ __forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
+ return blendv_ps(f.m128, t.m128, s);
+ }
+
+ __forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
+ return madd(1.0f-t,v0,t*v1);
+ }
+
+ __forceinline int maxDim ( const Vec3fx& a )
+ {
+ const Vec3fx b = abs(a);
+ if (b.x > b.y) {
+ if (b.x > b.z) return 0; else return 2;
+ } else {
+ if (b.y > b.z) return 1; else return 2;
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Rounding Functions
+ ////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__aarch64__)
+ __forceinline Vec3fx trunc(const Vec3fx& a) { return vrndq_f32(a.m128); }
+ __forceinline Vec3fx floor(const Vec3fx& a) { return vrndmq_f32(a.m128); }
+ __forceinline Vec3fx ceil (const Vec3fx& a) { return vrndpq_f32(a.m128); }
+#elif defined (__SSE4_1__)
+ __forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
+ __forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); }
+ __forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); }
+#else
+ __forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(truncf(a.x),truncf(a.y),truncf(a.z)); }
+ __forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(floorf(a.x),floorf(a.y),floorf(a.z)); }
+ __forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(ceilf (a.x),ceilf (a.y),ceilf (a.z)); }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
+ }
+
+
+ typedef Vec3fx Vec3ff;
+}
diff --git a/thirdparty/embree/common/math/vec3ia.h b/thirdparty/embree/common/math/vec3ia.h
new file mode 100644
index 0000000000..694804c40d
--- /dev/null
+++ b/thirdparty/embree/common/math/vec3ia.h
@@ -0,0 +1,186 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/alloc.h"
+#include "math.h"
+#include "../simd/sse.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// SSE Vec3ia Type
+ ////////////////////////////////////////////////////////////////////////////////
+
+ struct __aligned(16) Vec3ia
+ {
+ ALIGNED_STRUCT_(16);
+
+ union {
+ __m128i m128;
+ struct { int x,y,z; };
+ };
+
+ typedef int Scalar;
+ enum { N = 3 };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia( ) {}
+ __forceinline Vec3ia( const __m128i a ) : m128(a) {}
+ __forceinline Vec3ia( const Vec3ia& other ) : m128(other.m128) {}
+ __forceinline Vec3ia& operator =(const Vec3ia& other) { m128 = other.m128; return *this; }
+
+ __forceinline explicit Vec3ia( const int a ) : m128(_mm_set1_epi32(a)) {}
+ __forceinline Vec3ia( const int x, const int y, const int z) : m128(_mm_set_epi32(z, z, y, x)) {}
+ __forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
+
+ __forceinline operator const __m128i&() const { return m128; }
+ __forceinline operator __m128i&() { return m128; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia( ZeroTy ) : m128(_mm_setzero_si128()) {}
+ __forceinline Vec3ia( OneTy ) : m128(_mm_set1_epi32(1)) {}
+ __forceinline Vec3ia( PosInfTy ) : m128(_mm_set1_epi32(pos_inf)) {}
+ __forceinline Vec3ia( NegInfTy ) : m128(_mm_set1_epi32(neg_inf)) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
+ __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
+ };
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia operator +( const Vec3ia& a ) { return a; }
+ __forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); }
+#if defined(__SSSE3__)
+ __forceinline Vec3ia abs ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return _mm_add_epi32(a.m128, b.m128); }
+ __forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
+ __forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
+
+ __forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return _mm_sub_epi32(a.m128, b.m128); }
+ __forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
+ __forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); }
+ __forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
+ __forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
+#endif
+
+ __forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return _mm_and_si128(a.m128, b.m128); }
+ __forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
+ __forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
+
+ __forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return _mm_or_si128(a.m128, b.m128); }
+ __forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
+ __forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
+
+ __forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return _mm_xor_si128(a.m128, b.m128); }
+ __forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
+ __forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
+
+ __forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return _mm_slli_epi32(a.m128, n); }
+ __forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return _mm_srai_epi32(a.m128, n); }
+
+ __forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return _mm_slli_epi32(a.m128, b); }
+ __forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return _mm_srai_epi32(a.m128, b); }
+ __forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return _mm_srli_epi32(a.m128, b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
+ __forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
+
+ __forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
+ __forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
+ __forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
+#endif
+
+ __forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
+ __forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
+
+ __forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
+ __forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
+
+ __forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
+ __forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reductions
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
+ __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
+ __forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); }
+ __forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) == 7; }
+ __forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128))) & 7) != 7; }
+ __forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ if (a.z != b.z) return a.z < b.z;
+ return false;
+ }
+
+ __forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32 (a.m128, b.m128)); }
+ __forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); }
+ __forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
+#if defined(__SSE4_1__)
+ return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
+#else
+ return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f));
+#endif
+ }
+
+#if defined(__SSE4_1__)
+ __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); }
+ __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); }
+#else
+ __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return select(lt_mask(a,b),a,b); }
+ __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return select(gt_mask(a,b),a,b); }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
+ }
+}
diff --git a/thirdparty/embree/common/math/vec4.h b/thirdparty/embree/common/math/vec4.h
new file mode 100644
index 0000000000..0ed107928a
--- /dev/null
+++ b/thirdparty/embree/common/math/vec4.h
@@ -0,0 +1,243 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "math.h"
+#include "vec3.h"
+
+namespace embree
+{
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Generic 4D vector Class
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> struct Vec4
+ {
+ enum { N = 4 };
+ union {
+ struct { T x, y, z, w; };
+#if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler
+ T components[N];
+#endif
+ };
+
+ typedef T Scalar;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Construction
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec4( ) {}
+ __forceinline explicit Vec4( const T& a ) : x(a), y(a), z(a), w(a) {}
+ __forceinline Vec4( const T& x, const T& y, const T& z, const T& w ) : x(x), y(y), z(z), w(w) {}
+ __forceinline Vec4( const Vec3<T>& xyz, const T& w ) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
+
+ __forceinline Vec4( const Vec4& other ) { x = other.x; y = other.y; z = other.z; w = other.w; }
+ __forceinline Vec4( const Vec3fx& other );
+
+ template<typename T1> __forceinline Vec4( const Vec4<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)), w(T(a.w)) {}
+ template<typename T1> __forceinline Vec4& operator =(const Vec4<T1>& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
+
+ __forceinline Vec4& operator =(const Vec4& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
+
+ __forceinline operator Vec3<T> () const { return Vec3<T>(x,y,z); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec4( ZeroTy ) : x(zero), y(zero), z(zero), w(zero) {}
+ __forceinline Vec4( OneTy ) : x(one), y(one), z(one), w(one) {}
+ __forceinline Vec4( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf), w(pos_inf) {}
+ __forceinline Vec4( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf), w(neg_inf) {}
+
+#if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler
+ __forceinline const T& operator [](const size_t axis) const { assert(axis < 4); return (&x)[axis]; }
+ __forceinline T& operator [](const size_t axis) { assert(axis < 4); return (&x)[axis]; }
+#else
+ __forceinline const T& operator [](const size_t axis ) const { assert(axis < 4); return components[axis]; }
+ __forceinline T& operator [](const size_t axis) { assert(axis < 4); return components[axis]; }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Swizzles
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline Vec3<T> xyz() const { return Vec3<T>(x, y, z); }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Unary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a ) { return Vec4<T>(+a.x, +a.y, +a.z, +a.w); }
+ template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a ) { return Vec4<T>(-a.x, -a.y, -a.z, -a.w); }
+ template<typename T> __forceinline Vec4<T> abs ( const Vec4<T>& a ) { return Vec4<T>(abs (a.x), abs (a.y), abs (a.z), abs (a.w)); }
+ template<typename T> __forceinline Vec4<T> rcp ( const Vec4<T>& a ) { return Vec4<T>(rcp (a.x), rcp (a.y), rcp (a.z), rcp (a.w)); }
+ template<typename T> __forceinline Vec4<T> rsqrt ( const Vec4<T>& a ) { return Vec4<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z), rsqrt(a.w)); }
+ template<typename T> __forceinline Vec4<T> sqrt ( const Vec4<T>& a ) { return Vec4<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z), sqrt (a.w)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Binary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
+ template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
+ template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
+ template<typename T> __forceinline Vec4<T> operator *( const T& a, const Vec4<T>& b ) { return Vec4<T>(a * b.x, a * b.y, a * b.z, a * b.w); }
+ template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x * b , a.y * b , a.z * b , a.w * b ); }
+ template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); }
+ template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x / b , a.y / b , a.z / b , a.w / b ); }
+ template<typename T> __forceinline Vec4<T> operator /( const T& a, const Vec4<T>& b ) { return Vec4<T>(a / b.x, a / b.y, a / b.z, a / b.w); }
+
+ template<typename T> __forceinline Vec4<T> min(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); }
+ template<typename T> __forceinline Vec4<T> max(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Ternary Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec4<T> madd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
+ template<typename T> __forceinline Vec4<T> msub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
+ template<typename T> __forceinline Vec4<T> nmadd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z),nmadd(a.w,b.w,c.w)); }
+ template<typename T> __forceinline Vec4<T> nmsub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z),nmsub(a.w,b.w,c.w)); }
+
+ template<typename T> __forceinline Vec4<T> madd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z), madd(a,b.w,c.w)); }
+ template<typename T> __forceinline Vec4<T> msub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z), msub(a,b.w,c.w)); }
+ template<typename T> __forceinline Vec4<T> nmadd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z),nmadd(a,b.w,c.w)); }
+ template<typename T> __forceinline Vec4<T> nmsub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z),nmsub(a,b.w,c.w)); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Assignment Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec4<T>& operator +=( Vec4<T>& a, const Vec4<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; }
+ template<typename T> __forceinline Vec4<T>& operator -=( Vec4<T>& a, const Vec4<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; return a; }
+ template<typename T> __forceinline Vec4<T>& operator *=( Vec4<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; a.w *= b ; return a; }
+ template<typename T> __forceinline Vec4<T>& operator /=( Vec4<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; a.w /= b ; return a; }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Reduction Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline T reduce_add( const Vec4<T>& a ) { return a.x + a.y + a.z + a.w; }
+ template<typename T> __forceinline T reduce_mul( const Vec4<T>& a ) { return a.x * a.y * a.z * a.w; }
+ template<typename T> __forceinline T reduce_min( const Vec4<T>& a ) { return min(a.x, a.y, a.z, a.w); }
+ template<typename T> __forceinline T reduce_max( const Vec4<T>& a ) { return max(a.x, a.y, a.z, a.w); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Comparison Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline bool operator ==( const Vec4<T>& a, const Vec4<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
+ template<typename T> __forceinline bool operator !=( const Vec4<T>& a, const Vec4<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; }
+ template<typename T> __forceinline bool operator < ( const Vec4<T>& a, const Vec4<T>& b ) {
+ if (a.x != b.x) return a.x < b.x;
+ if (a.y != b.y) return a.y < b.y;
+ if (a.z != b.z) return a.z < b.z;
+ if (a.w != b.w) return a.w < b.w;
+ return false;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Shift Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec4<T> shift_right_1( const Vec4<T>& a ) {
+ return Vec4<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z),shift_right_1(a.w));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Euclidian Space Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline T dot ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); }
+
+ template<typename T> __forceinline T length ( const Vec4<T>& a ) { return sqrt(dot(a,a)); }
+ template<typename T> __forceinline Vec4<T> normalize( const Vec4<T>& a ) { return a*rsqrt(dot(a,a)); }
+ template<typename T> __forceinline T distance ( const Vec4<T>& a, const Vec4<T>& b ) { return length(a-b); }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Select
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline Vec4<T> select ( bool s, const Vec4<T>& t, const Vec4<T>& f ) {
+ return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
+ }
+
+ template<typename T> __forceinline Vec4<T> select ( const Vec4<bool>& s, const Vec4<T>& t, const Vec4<T>& f ) {
+ return Vec4<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z),select(s.w,t.w,f.w));
+ }
+
+ template<typename T> __forceinline Vec4<T> select ( const typename T::Bool& s, const Vec4<T>& t, const Vec4<T>& f ) {
+ return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
+ }
+
+ template<typename T>
+ __forceinline Vec4<T> lerp(const Vec4<T>& v0, const Vec4<T>& v1, const T& t) {
+ return madd(Vec4<T>(T(1.0f)-t),v0,t*v1);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Output Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec4<T>& a) {
+ return cout << "(" << a.x << ", " << a.y << ", " << a.z << ", " << a.w << ")";
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Default template instantiations
+ ////////////////////////////////////////////////////////////////////////////////
+
+ typedef Vec4<bool > Vec4b;
+ typedef Vec4<unsigned char> Vec4uc;
+ typedef Vec4<int > Vec4i;
+ typedef Vec4<float > Vec4f;
+}
+
+#include "vec3ba.h"
+#include "vec3ia.h"
+#include "vec3fa.h"
+
+////////////////////////////////////////////////////////////////////////////////
+/// SSE / AVX / MIC specializations
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined __SSE__
+#include "../simd/sse.h"
+#endif
+
+#if defined __AVX__
+#include "../simd/avx.h"
+#endif
+
+#if defined __AVX512F__
+#include "../simd/avx512.h"
+#endif
+
+namespace embree
+{
+ template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
+
+#if defined(__AVX__)
+ template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
+ x = a.x; y = a.y; z = a.z; w = a.w;
+ }
+#elif defined(__SSE__)
+ template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
+ const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
+ }
+#endif
+
+#if defined(__AVX__)
+ template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fx& a ) {
+ x = a.x; y = a.y; z = a.z; w = a.w;
+ }
+#endif
+
+#if defined(__AVX512F__)
+ template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
+#endif
+}