From 34b3e8f9e2ae076990ecf3b2827eff759ba2abf9 Mon Sep 17 00:00:00 2001 From: jfons Date: Tue, 20 Apr 2021 18:38:09 +0200 Subject: Add Embree-aarch64 thirdparty library --- .../kernels/geometry/curve_intersector_ribbon.h | 214 +++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h (limited to 'thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h') diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h new file mode 100644 index 0000000000..a99cf99d56 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h @@ -0,0 +1,214 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "quad_intersector.h" +#include "curve_intersector_precalculations.h" + +#define Bezier1Intersector1 RibbonCurve1Intersector1 +#define Bezier1IntersectorK RibbonCurve1IntersectorK + +namespace embree +{ + namespace isa + { + template + struct RibbonHit + { + __forceinline RibbonHit() {} + + __forceinline RibbonHit(const vbool& valid, const vfloat& U, const vfloat& V, const vfloat& T, const int i, const int N, + const NativeCurve3ff& curve3D) + : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {} + + __forceinline void finalize() + { + vu = (vfloat(step)+U+vfloat(float(i)))*(1.0f/float(N)); + vv = V; + vt = T; + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { + return curve3D.eval_du(vu[i]); + } + + public: + vfloat U; + vfloat V; + vfloat T; + int i, N; + NativeCurve3ff curve3D; + + public: + vbool valid; + vfloat vu; + vfloat vv; + vfloat vt; + }; + + /* calculate squared distance of point p0 to line p1->p2 */ + __forceinline std::pair sqr_point_line_distance(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2) + { + const vfloatx num = det(p2-p1,p1-p0); + const vfloatx den2 = dot(p2-p1,p2-p1); + return std::make_pair(num*num,den2); + } + + /* performs culling against a cylinder */ + __forceinline vboolx cylinder_culling_test(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2, const vfloatx& r) + { + const std::pair d = sqr_point_line_distance(p0,p1,p2); + return d.first <= r*r*d.second; + } + + template + __forceinline bool intersect_ribbon(const Vec3fa& ray_org, const Vec3fa& ray_dir, const float ray_tnear, const float& ray_tfar, + const LinearSpace3fa& ray_space, const float& depth_scale, + const NativeCurve3ff& curve3D, const int N, + const Epilog& epilog) + { + /* transform control points into ray space */ + const NativeCurve3ff curve2D = curve3D.xfm_pr(ray_space,ray_org); + float eps = 4.0f*float(ulp)*reduce_max(max(abs(curve2D.v0),abs(curve2D.v1),abs(curve2D.v2),abs(curve2D.v3))); + + /* evaluate the bezier curve */ + bool ishit = false; + vboolx valid = vfloatx(step) < vfloatx(float(N)); + const Vec4vfx p0 = curve2D.template eval0(0,N); + const Vec4vfx p1 = curve2D.template eval1(0,N); + valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w)); + + if (any(valid)) + { + Vec3vfx dp0dt = curve2D.template derivative0(0,N); + Vec3vfx dp1dt = curve2D.template derivative1(0,N); + dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt); + dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt); + const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f); + const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f); + const Vec3vfx nn0 = normalize(n0); + const Vec3vfx nn1 = normalize(n1); + const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1)); + const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); + + vfloatx vu,vv,vt; + vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); + + if (any(valid0)) + { + /* ignore self intersections */ + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) { + vfloatx r = lerp(p0.w, p1.w, vu); + valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; + } + + if (any(valid0)) + { + vv = madd(2.0f,vv,vfloatx(-1.0f)); + RibbonHit bhit(valid0,vu,vv,vt,0,N,curve3D); + ishit |= epilog(bhit.valid,bhit); + } + } + } + + if (unlikely(VSIZEX < N)) + { + /* process SIMD-size many segments per iteration */ + for (int i=VSIZEX; i(i,N); + const Vec4vfx p1 = curve2D.template eval1(i,N); + valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w)); + if (none(valid)) continue; + + Vec3vfx dp0dt = curve2D.template derivative0(i,N); + Vec3vfx dp1dt = curve2D.template derivative1(i,N); + dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt); + dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt); + const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f); + const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f); + const Vec3vfx nn0 = normalize(n0); + const Vec3vfx nn1 = normalize(n1); + const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1)); + const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); + + vfloatx vu,vv,vt; + vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); + + if (any(valid0)) + { + /* ignore self intersections */ + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) { + vfloatx r = lerp(p0.w, p1.w, vu); + valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; + } + + if (any(valid0)) + { + vv = madd(2.0f,vv,vfloatx(-1.0f)); + RibbonHit bhit(valid0,vu,vv,vt,i,N,curve3D); + ishit |= epilog(bhit.valid,bhit); + } + } + } + } + return ishit; + } + + template class NativeCurve> + struct RibbonCurve1Intersector1 + { + typedef NativeCurve NativeCurve3ff; + + template + __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, + const Epilog& epilog) + { + const int N = geom->tessellationRate; + NativeCurve3ff curve(v0,v1,v2,v3); + curve = enlargeRadiusToMinWidth(context,geom,ray.org,curve); + return intersect_ribbon(ray.org,ray.dir,ray.tnear(),ray.tfar, + pre.ray_space,pre.depth_scale, + curve,N, + epilog); + } + }; + + template class NativeCurve, int K> + struct RibbonCurve1IntersectorK + { + typedef NativeCurve NativeCurve3ff; + + template + __forceinline bool intersect(const CurvePrecalculationsK& pre, RayK& ray, size_t k, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, + const Epilog& epilog) + { + const int N = geom->tessellationRate; + const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); + const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); + NativeCurve3ff curve(v0,v1,v2,v3); + curve = enlargeRadiusToMinWidth(context,geom,ray_org,curve); + return intersect_ribbon(ray_org,ray_dir,ray.tnear()[k],ray.tfar[k], + pre.ray_space[k],pre.depth_scale[k], + curve,N, + epilog); + } + }; + } +} -- cgit v1.2.3