From 34b3e8f9e2ae076990ecf3b2827eff759ba2abf9 Mon Sep 17 00:00:00 2001 From: jfons Date: Tue, 20 Apr 2021 18:38:09 +0200 Subject: Add Embree-aarch64 thirdparty library --- .../geometry/triangle_intersector_moeller.h | 403 +++++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100644 thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h (limited to 'thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h') diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h new file mode 100644 index 0000000000..b5a8519236 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h @@ -0,0 +1,403 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "intersector_epilog.h" + +/*! This intersector implements a modified version of the Moeller + * Trumbore intersector from the paper "Fast, Minimum Storage + * Ray-Triangle Intersection". In contrast to the paper we + * precalculate some factors and factor the calculations differently + * to allow precalculating the cross product e1 x e2. The resulting + * algorithm is similar to the fastest one of the paper "Optimizing + * Ray-Triangle Intersection via Automated Search". */ + +namespace embree +{ + namespace isa + { + template + struct MoellerTrumboreHitM + { + __forceinline MoellerTrumboreHitM() {} + + __forceinline MoellerTrumboreHitM(const vbool& valid, const vfloat& U, const vfloat& V, const vfloat& T, const vfloat& absDen, const Vec3vf& Ng) + : U(U), V(V), T(T), absDen(absDen), valid(valid), vNg(Ng) {} + + __forceinline void finalize() + { + const vfloat rcpAbsDen = rcp(absDen); + vt = T * rcpAbsDen; + vu = U * rcpAbsDen; + vv = V * rcpAbsDen; + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + public: + vfloat U; + vfloat V; + vfloat T; + vfloat absDen; + + public: + vbool valid; + vfloat vu; + vfloat vv; + vfloat vt; + Vec3vf vNg; + }; + + template + struct MoellerTrumboreIntersector1 + { + __forceinline MoellerTrumboreIntersector1() {} + + __forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {} + + __forceinline bool intersect(const vbool& valid0, + Ray& ray, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + const Vec3vf& tri_Ng, + MoellerTrumboreHitM& hit) const + { + /* calculate denominator */ + vbool valid = valid0; + const Vec3vf O = Vec3vf((Vec3fa)ray.org); + const Vec3vf D = Vec3vf((Vec3fa)ray.dir); + const Vec3vf C = Vec3vf(tri_v0) - O; + const Vec3vf R = cross(C,D); + const vfloat den = dot(Vec3vf(tri_Ng),D); + + const vfloat absDen = abs(den); + const vfloat sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat U = dot(R,Vec3vf(tri_e2)) ^ sgnDen; + const vfloat V = dot(R,Vec3vf(tri_e1)) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= (den < vfloat(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + valid &= (den != vfloat(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat T = dot(Vec3vf(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat(ray.tnear()) < T) & (T <= absDen*vfloat(ray.tfar)); + if (likely(none(valid))) return false; + + + /* update hit information */ + new (&hit) MoellerTrumboreHitM(valid,U,V,T,absDen,tri_Ng); + + return true; + } + + __forceinline bool intersectEdge(Ray& ray, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + MoellerTrumboreHitM& hit) const + { + vbool valid = true; + const Vec3> tri_Ng = cross(tri_e2,tri_e1); + return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,hit); + } + + __forceinline bool intersect(Ray& ray, + const Vec3vf& v0, + const Vec3vf& v1, + const Vec3vf& v2, + MoellerTrumboreHitM& hit) const + { + const Vec3vf e1 = v0-v1; + const Vec3vf e2 = v2-v0; + return intersectEdge(ray,v0,e1,e2,hit); + } + + __forceinline bool intersect(const vbool& valid, + Ray& ray, + const Vec3vf& v0, + const Vec3vf& v1, + const Vec3vf& v2, + MoellerTrumboreHitM& hit) const + { + const Vec3vf e1 = v0-v1; + const Vec3vf e2 = v2-v0; + return intersectEdge(valid,ray,v0,e1,e2,hit); + } + + template + __forceinline bool intersectEdge(Ray& ray, + const Vec3vf& v0, + const Vec3vf& e1, + const Vec3vf& e2, + const Epilog& epilog) const + { + MoellerTrumboreHitM hit; + if (likely(intersectEdge(ray,v0,e1,e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template + __forceinline bool intersect(Ray& ray, + const Vec3vf& v0, + const Vec3vf& v1, + const Vec3vf& v2, + const Epilog& epilog) const + { + MoellerTrumboreHitM hit; + if (likely(intersect(ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); + return false; + } + + template + __forceinline bool intersect(const vbool& valid, + Ray& ray, + const Vec3vf& v0, + const Vec3vf& v1, + const Vec3vf& v2, + const Epilog& epilog) const + { + MoellerTrumboreHitM hit; + if (likely(intersect(valid,ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); + return false; + } + }; + + template + struct MoellerTrumboreHitK + { + __forceinline MoellerTrumboreHitK(const vfloat& U, const vfloat& V, const vfloat& T, const vfloat& absDen, const Vec3vf& Ng) + : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {} + + __forceinline std::tuple,vfloat,vfloat,Vec3vf> operator() () const + { + const vfloat rcpAbsDen = rcp(absDen); + const vfloat t = T * rcpAbsDen; + const vfloat u = U * rcpAbsDen; + const vfloat v = V * rcpAbsDen; + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat U; + const vfloat V; + const vfloat T; + const vfloat absDen; + const Vec3vf Ng; + }; + + template + struct MoellerTrumboreIntersectorK + { + __forceinline MoellerTrumboreIntersectorK(const vbool& valid, const RayK& ray) {} + + /*! Intersects K rays with one of M triangles. */ + template + __forceinline vbool intersectK(const vbool& valid0, + //RayK& ray, + const Vec3vf& ray_org, + const Vec3vf& ray_dir, + const vfloat& ray_tnear, + const vfloat& ray_tfar, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + const Vec3vf& tri_Ng, + const Epilog& epilog) const + { + /* calculate denominator */ + vbool valid = valid0; + const Vec3vf C = tri_v0 - ray_org; + const Vec3vf R = cross(C,ray_dir); + const vfloat den = dot(tri_Ng,ray_dir); + const vfloat absDen = abs(den); + const vfloat sgnDen = signmsk(den); + + /* test against edge p2 p0 */ + const vfloat U = dot(tri_e2,R) ^ sgnDen; + valid &= U >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p0 p1 */ + const vfloat V = dot(tri_e1,R) ^ sgnDen; + valid &= V >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p1 p2 */ + const vfloat W = absDen-U-V; + valid &= W >= 0.0f; + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat T = dot(tri_Ng,C) ^ sgnDen; + valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar); + if (unlikely(none(valid))) return false; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= den < vfloat(zero); + if (unlikely(none(valid))) return false; +#else + valid &= den != vfloat(zero); + if (unlikely(none(valid))) return false; +#endif + + /* calculate hit information */ + MoellerTrumboreHitK hit(U,V,T,absDen,tri_Ng); + return epilog(valid,hit); + } + + /*! Intersects K rays with one of M triangles. */ + template + __forceinline vbool intersectK(const vbool& valid0, + RayK& ray, + const Vec3vf& tri_v0, + const Vec3vf& tri_v1, + const Vec3vf& tri_v2, + const Epilog& epilog) const + { + const Vec3vf e1 = tri_v0-tri_v1; + const Vec3vf e2 = tri_v2-tri_v0; + const Vec3vf Ng = cross(e2,e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog); + } + + /*! Intersects K rays with one of M triangles. */ + template + __forceinline vbool intersectEdgeK(const vbool& valid0, + RayK& ray, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + const Epilog& epilog) const + { + const Vec3vf tri_Ng = cross(tri_e2,tri_e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog); + } + + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + __forceinline bool intersectEdge(RayK& ray, + size_t k, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + MoellerTrumboreHitM& hit) const + { + /* calculate denominator */ + typedef Vec3vf Vec3vfM; + const Vec3vf tri_Ng = cross(tri_e2,tri_e1); + + const Vec3vfM O = broadcast>(ray.org,k); + const Vec3vfM D = broadcast>(ray.dir,k); + const Vec3vfM C = Vec3vfM(tri_v0) - O; + const Vec3vfM R = cross(C,D); + const vfloat den = dot(Vec3vfM(tri_Ng),D); + const vfloat absDen = abs(den); + const vfloat sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat U = dot(Vec3vf(tri_e2),R) ^ sgnDen; + const vfloat V = dot(Vec3vf(tri_e1),R) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + vbool valid = (den < vfloat(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + vbool valid = (den != vfloat(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat T = dot(Vec3vf(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat(ray.tnear()[k]) < T) & (T <= absDen*vfloat(ray.tfar[k])); + if (likely(none(valid))) return false; + + /* calculate hit information */ + new (&hit) MoellerTrumboreHitM(valid,U,V,T,absDen,tri_Ng); + return true; + } + + __forceinline bool intersectEdge(RayK& ray, + size_t k, + const BBox>& time_range, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + MoellerTrumboreHitM& hit) const + { + if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit))) + { + hit.valid &= time_range.lower <= vfloat(ray.time[k]); + hit.valid &= vfloat(ray.time[k]) < time_range.upper; + return any(hit.valid); + } + return false; + } + + template + __forceinline bool intersectEdge(RayK& ray, + size_t k, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + const Epilog& epilog) const + { + MoellerTrumboreHitM hit; + if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template + __forceinline bool intersectEdge(RayK& ray, + size_t k, + const BBox>& time_range, + const Vec3vf& tri_v0, + const Vec3vf& tri_e1, + const Vec3vf& tri_e2, + const Epilog& epilog) const + { + MoellerTrumboreHitM hit; + if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template + __forceinline bool intersect(RayK& ray, + size_t k, + const Vec3vf& v0, + const Vec3vf& v1, + const Vec3vf& v2, + const Epilog& epilog) const + { + const Vec3vf e1 = v0-v1; + const Vec3vf e2 = v2-v0; + return intersectEdge(ray,k,v0,e1,e2,epilog); + } + + template + __forceinline bool intersect(RayK& ray, + size_t k, + const BBox>& time_range, + const Vec3vf& v0, + const Vec3vf& v1, + const Vec3vf& v2, + const Epilog& epilog) const + { + const Vec3vf e1 = v0-v1; + const Vec3vf e2 = v2-v0; + return intersectEdge(ray,k,time_range,v0,e1,e2,epilog); + } + }; + } +} -- cgit v1.2.3