diff options
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/geometry')
73 files changed, 19139 insertions, 0 deletions
diff --git a/thirdparty/embree-aarch64/kernels/geometry/cone.h b/thirdparty/embree-aarch64/kernels/geometry/cone.h new file mode 100644 index 0000000000..961ef86160 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/cone.h @@ -0,0 +1,321 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" + +namespace embree +{ + namespace isa + { + struct Cone + { + const Vec3fa p0; //!< start position of cone + const Vec3fa p1; //!< end position of cone + const float r0; //!< start radius of cone + const float r1; //!< end radius of cone + + __forceinline Cone(const Vec3fa& p0, const float r0, const Vec3fa& p1, const float r1) + : p0(p0), p1(p1), r0(r0), r1(r1) {} + + __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, + BBox1f& t_o, + float& u0_o, Vec3fa& Ng0_o, + float& u1_o, Vec3fa& Ng1_o) const + { + /* calculate quadratic equation to solve */ + const Vec3fa v0 = p0-org; + const Vec3fa v1 = p1-org; + + const float rl = rcp_length(v1-v0); + const Vec3fa P0 = v0, dP = (v1-v0)*rl; + const float dr = (r1-r0)*rl; + const Vec3fa O = -P0, dO = dir; + + const float dOdO = dot(dO,dO); + const float OdO = dot(dO,O); + const float OO = dot(O,O); + const float dOz = dot(dP,dO); + const float Oz = dot(dP,O); + + const float R = r0 + Oz*dr; + const float A = dOdO - sqr(dOz) * (1.0f+sqr(dr)); + const float B = 2.0f * (OdO - dOz*(Oz + R*dr)); + const float C = OO - (sqr(Oz) + sqr(R)); + + /* we miss the cone if determinant is smaller than zero */ + const float D = B*B - 4.0f*A*C; + if (D < 0.0f) return false; + + /* special case for rays that are "parallel" to the cone */ + const float eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); + if (unlikely(abs(A) < eps)) + { + /* cylinder case */ + if (abs(dr) < 16.0f*float(ulp)) { + if (C <= 0.0f) { t_o = BBox1f(neg_inf,pos_inf); return true; } + else { t_o = BBox1f(pos_inf,neg_inf); return false; } + } + + /* cone case */ + else + { + /* if we hit the negative cone there cannot be a hit */ + const float t = -C/B; + const float z0 = Oz+t*dOz; + const float z0r = r0+z0*dr; + if (z0r < 0.0f) return false; + + /* test if we start inside or outside the cone */ + if (dOz*dr > 0.0f) t_o = BBox1f(t,pos_inf); + else t_o = BBox1f(neg_inf,t); + } + } + + /* standard case for "non-parallel" rays */ + else + { + const float Q = sqrt(D); + const float rcp_2A = rcp(2.0f*A); + t_o.lower = (-B-Q)*rcp_2A; + t_o.upper = (-B+Q)*rcp_2A; + + /* standard case where both hits are on same cone */ + if (likely(A > 0.0f)) { + const float z0 = Oz+t_o.lower*dOz; + const float z0r = r0+z0*dr; + if (z0r < 0.0f) return false; + } + + /* special case where the hits are on the positive and negative cone */ + else + { + /* depending on the ray direction and the open direction + * of the cone we have a hit from inside or outside the + * cone */ + if (dOz*dr > 0) t_o.upper = pos_inf; + else t_o.lower = neg_inf; + } + } + + /* calculates u and Ng for near hit */ + { + u0_o = (Oz+t_o.lower*dOz)*rl; + const Vec3fa Pr = t_o.lower*dir; + const Vec3fa Pl = v0 + u0_o*(v1-v0); + const Vec3fa R = normalize(Pr-Pl); + const Vec3fa U = (p1-p0)+(r1-r0)*R; + const Vec3fa V = cross(p1-p0,R); + Ng0_o = cross(V,U); + } + + /* calculates u and Ng for far hit */ + { + u1_o = (Oz+t_o.upper*dOz)*rl; + const Vec3fa Pr = t_o.upper*dir; + const Vec3fa Pl = v0 + u1_o*(v1-v0); + const Vec3fa R = normalize(Pr-Pl); + const Vec3fa U = (p1-p0)+(r1-r0)*R; + const Vec3fa V = cross(p1-p0,R); + Ng1_o = cross(V,U); + } + return true; + } + + __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, BBox1f& t_o) const + { + float u0_o; Vec3fa Ng0_o; float u1_o; Vec3fa Ng1_o; + return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); + } + + static bool verify(const size_t id, const Cone& cone, const Ray& ray, bool shouldhit, const float t0, const float t1) + { + float eps = 0.001f; + BBox1f t; bool hit; + hit = cone.intersect(ray.org,ray.dir,t); + + bool failed = hit != shouldhit; + if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : (t0 == -1E6) ? t.lower > -1E6f : abs(t0-t.lower) > eps; + if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : (t1 == +1E6) ? t.upper < +1E6f : abs(t1-t.upper) > eps; + if (!failed) return true; + embree_cout << "Cone test " << id << " failed: cone = " << cone << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl; + return false; + } + + /* verify cone class */ + static bool verify() + { + bool passed = true; + const Cone cone0(Vec3fa(0.0f,0.0f,0.0f),0.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f); + passed &= verify(0,cone0,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,3.0f,pos_inf); + passed &= verify(1,cone0,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f); + passed &= verify(2,cone0,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),false,0.0f,0.0f); + passed &= verify(3,cone0,Ray(Vec3fa(+1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,3.0f); + passed &= verify(4,cone0,Ray(Vec3fa(-1.0f,0.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,1.0f,pos_inf); + passed &= verify(5,cone0,Ray(Vec3fa(+1.0f,0.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f); + passed &= verify(6,cone0,Ray(Vec3fa(+0.0f,0.0f,1.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,1.0f); + passed &= verify(7,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f); + passed &= verify(8,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(+1.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.5f,+1E6); + passed &= verify(9,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,+1.0f,+0.0f),0.0f,float(inf)),true,-1E6,-0.5f); + const Cone cone1(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),0.0f); + passed &= verify(10,cone1,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,2.0f); + passed &= verify(11,cone1,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,0.0f,4.0f); + const Cone cylinder(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f); + passed &= verify(12,cylinder,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); + passed &= verify(13,cylinder,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); + passed &= verify(14,cylinder,Ray(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f); + passed &= verify(15,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); + passed &= verify(16,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); + passed &= verify(17,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); + passed &= verify(18,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); + return passed; + } + + /*! output operator */ + friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cone& c) { + return cout << "Cone { p0 = " << c.p0 << ", r0 = " << c.r0 << ", p1 = " << c.p1 << ", r1 = " << c.r1 << "}"; + } + }; + + template<int N> + struct ConeN + { + typedef Vec3<vfloat<N>> Vec3vfN; + + const Vec3vfN p0; //!< start position of cone + const Vec3vfN p1; //!< end position of cone + const vfloat<N> r0; //!< start radius of cone + const vfloat<N> r1; //!< end radius of cone + + __forceinline ConeN(const Vec3vfN& p0, const vfloat<N>& r0, const Vec3vfN& p1, const vfloat<N>& r1) + : p0(p0), p1(p1), r0(r0), r1(r1) {} + + __forceinline Cone operator[] (const size_t i) const + { + assert(i<N); + return Cone(Vec3fa(p0.x[i],p0.y[i],p0.z[i]),r0[i],Vec3fa(p1.x[i],p1.y[i],p1.z[i]),r1[i]); + } + + __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, + BBox<vfloat<N>>& t_o, + vfloat<N>& u0_o, Vec3vfN& Ng0_o, + vfloat<N>& u1_o, Vec3vfN& Ng1_o) const + { + /* calculate quadratic equation to solve */ + const Vec3vfN v0 = p0-Vec3vfN(org); + const Vec3vfN v1 = p1-Vec3vfN(org); + + const vfloat<N> rl = rcp_length(v1-v0); + const Vec3vfN P0 = v0, dP = (v1-v0)*rl; + const vfloat<N> dr = (r1-r0)*rl; + const Vec3vfN O = -P0, dO = dir; + + const vfloat<N> dOdO = dot(dO,dO); + const vfloat<N> OdO = dot(dO,O); + const vfloat<N> OO = dot(O,O); + const vfloat<N> dOz = dot(dP,dO); + const vfloat<N> Oz = dot(dP,O); + + const vfloat<N> R = r0 + Oz*dr; + const vfloat<N> A = dOdO - sqr(dOz) * (vfloat<N>(1.0f)+sqr(dr)); + const vfloat<N> B = 2.0f * (OdO - dOz*(Oz + R*dr)); + const vfloat<N> C = OO - (sqr(Oz) + sqr(R)); + + /* we miss the cone if determinant is smaller than zero */ + const vfloat<N> D = B*B - 4.0f*A*C; + vbool<N> valid = D >= 0.0f; + if (none(valid)) return valid; + + /* special case for rays that are "parallel" to the cone */ + const vfloat<N> eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); + const vbool<N> validt = valid & (abs(A) < eps); + const vbool<N> validf = valid & !(abs(A) < eps); + if (unlikely(any(validt))) + { + const vboolx validtt = validt & (abs(dr) < 16.0f*float(ulp)); + const vboolx validtf = validt & (abs(dr) >= 16.0f*float(ulp)); + + /* cylinder case */ + if (unlikely(any(validtt))) + { + t_o.lower = select(validtt, select(C <= 0.0f, vfloat<N>(neg_inf), vfloat<N>(pos_inf)), t_o.lower); + t_o.upper = select(validtt, select(C <= 0.0f, vfloat<N>(pos_inf), vfloat<N>(neg_inf)), t_o.upper); + valid &= !validtt | C <= 0.0f; + } + + /* cone case */ + if (any(validtf)) + { + /* if we hit the negative cone there cannot be a hit */ + const vfloat<N> t = -C/B; + const vfloat<N> z0 = Oz+t*dOz; + const vfloat<N> z0r = r0+z0*dr; + valid &= !validtf | z0r >= 0.0f; + + /* test if we start inside or outside the cone */ + t_o.lower = select(validtf, select(dOz*dr > 0.0f, t, vfloat<N>(neg_inf)), t_o.lower); + t_o.upper = select(validtf, select(dOz*dr > 0.0f, vfloat<N>(pos_inf), t), t_o.upper); + } + } + + /* standard case for "non-parallel" rays */ + if (likely(any(validf))) + { + const vfloat<N> Q = sqrt(D); + const vfloat<N> rcp_2A = 0.5f*rcp(A); + t_o.lower = select(validf, (-B-Q)*rcp_2A, t_o.lower); + t_o.upper = select(validf, (-B+Q)*rcp_2A, t_o.upper); + + /* standard case where both hits are on same cone */ + const vbool<N> validft = validf & A>0.0f; + const vbool<N> validff = validf & !(A>0.0f); + if (any(validft)) { + const vfloat<N> z0 = Oz+t_o.lower*dOz; + const vfloat<N> z0r = r0+z0*dr; + valid &= !validft | z0r >= 0.0f; + } + + /* special case where the hits are on the positive and negative cone */ + if (any(validff)) { + /* depending on the ray direction and the open direction + * of the cone we have a hit from inside or outside the + * cone */ + t_o.lower = select(validff, select(dOz*dr > 0.0f, t_o.lower, float(neg_inf)), t_o.lower); + t_o.upper = select(validff, select(dOz*dr > 0.0f, float(pos_inf), t_o.upper), t_o.upper); + } + } + + /* calculates u and Ng for near hit */ + { + u0_o = (Oz+t_o.lower*dOz)*rl; + const Vec3vfN Pr = t_o.lower*Vec3vfN(dir); + const Vec3vfN Pl = v0 + u0_o*(v1-v0); + const Vec3vfN R = normalize(Pr-Pl); + const Vec3vfN U = (p1-p0)+(r1-r0)*R; + const Vec3vfN V = cross(p1-p0,R); + Ng0_o = cross(V,U); + } + + /* calculates u and Ng for far hit */ + { + u1_o = (Oz+t_o.upper*dOz)*rl; + const Vec3vfN Pr = t_o.lower*Vec3vfN(dir); + const Vec3vfN Pl = v0 + u1_o*(v1-v0); + const Vec3vfN R = normalize(Pr-Pl); + const Vec3vfN U = (p1-p0)+(r1-r0)*R; + const Vec3vfN V = cross(p1-p0,R); + Ng1_o = cross(V,U); + } + return valid; + } + + __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const + { + vfloat<N> u0_o; Vec3vfN Ng0_o; vfloat<N> u1_o; Vec3vfN Ng1_o; + return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); + } + }; + } +} + diff --git a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h new file mode 100644 index 0000000000..0902baff7d --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h @@ -0,0 +1,209 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + namespace isa + { + namespace __coneline_internal + { + template<int M, typename Epilog, typename ray_tfar_func> + static __forceinline bool intersectCone(const vbool<M>& valid_i, + const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir, + const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar, + const Vec4vf<M>& v0, const Vec4vf<M>& v1, + const vbool<M>& cL, const vbool<M>& cR, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + + /* move ray origin closer to make calculations numerically stable */ + const vfloat<M> dOdO = sqr(ray_dir); + const vfloat<M> rcp_dOdO = rcp(dOdO); + const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz()); + const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO; + const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir; + + const Vec3vf<M> dP = v1.xyz() - v0.xyz(); + const Vec3vf<M> p0 = ray_org - v0.xyz(); + const Vec3vf<M> p1 = ray_org - v1.xyz(); + + const vfloat<M> dPdP = sqr(dP); + const vfloat<M> dP0 = dot(p0,dP); + const vfloat<M> dP1 = dot(p1,dP); + const vfloat<M> dOdP = dot(ray_dir,dP); + + // intersect cone body + const vfloat<M> dr = v0.w - v1.w; + const vfloat<M> hy = dPdP + sqr(dr); + const vfloat<M> dO0 = dot(ray_dir,p0); + const vfloat<M> OO = sqr(p0); + const vfloat<M> dPdP2 = sqr(dPdP); + const vfloat<M> dPdPr0 = dPdP*v0.w; + + const vfloat<M> A = dPdP2 - sqr(dOdP)*hy; + const vfloat<M> B = dPdP2*dO0 - dP0*dOdP*hy + dPdPr0*(dr*dOdP); + const vfloat<M> C = dPdP2*OO - sqr(dP0)*hy + dPdPr0*(2.0f*dr*dP0 - dPdPr0); + + const vfloat<M> D = B*B - A*C; + valid &= D >= 0.0f; + if (unlikely(none(valid))) { + return false; + } + + /* standard case for "non-parallel" rays */ + const vfloat<M> Q = sqrt(D); + const vfloat<M> rcp_A = rcp(A); + /* special case for rays that are "parallel" to the cone - assume miss */ + const vbool<M> isParallel = abs(A) <= min_rcp_input; + + vfloat<M> t_cone_lower = select (isParallel, neg_inf, (-B-Q)*rcp_A); + vfloat<M> t_cone_upper = select (isParallel, pos_inf, (-B+Q)*rcp_A); + const vfloat<M> y_lower = dP0 + t_cone_lower*dOdP; + const vfloat<M> y_upper = dP0 + t_cone_upper*dOdP; + t_cone_lower = select(valid & y_lower > 0.0f & y_lower < dPdP, t_cone_lower, pos_inf); + t_cone_upper = select(valid & y_upper > 0.0f & y_upper < dPdP, t_cone_upper, neg_inf); + + const vbool<M> hitDisk0 = valid & cL; + const vbool<M> hitDisk1 = valid & cR; + const vfloat<M> rcp_dOdP = rcp(dOdP); + const vfloat<M> t_disk0 = select (hitDisk0, select (sqr(p0*dOdP-ray_dir*dP0)<(sqr(v0.w)*sqr(dOdP)), -dP0*rcp_dOdP, pos_inf), pos_inf); + const vfloat<M> t_disk1 = select (hitDisk1, select (sqr(p1*dOdP-ray_dir*dP1)<(sqr(v1.w)*sqr(dOdP)), -dP1*rcp_dOdP, pos_inf), pos_inf); + const vfloat<M> t_disk_lower = min(t_disk0, t_disk1); + const vfloat<M> t_disk_upper = max(t_disk0, t_disk1); + + const vfloat<M> t_lower = min(t_cone_lower, t_disk_lower); + const vfloat<M> t_upper = max(t_cone_upper, select(t_lower==t_disk_lower, + select(t_disk_upper==vfloat<M>(pos_inf),neg_inf,t_disk_upper), + select(t_disk_lower==vfloat<M>(pos_inf),neg_inf,t_disk_lower))); + + const vbool<M> valid_lower = valid & ray_tnear <= dt+t_lower & dt+t_lower <= ray_tfar() & t_lower != vfloat<M>(pos_inf); + const vbool<M> valid_upper = valid & ray_tnear <= dt+t_upper & dt+t_upper <= ray_tfar() & t_upper != vfloat<M>(neg_inf); + + const vbool<M> valid_first = valid_lower | valid_upper; + if (unlikely(none(valid_first))) + return false; + + const vfloat<M> t_first = select(valid_lower, t_lower, t_upper); + const vfloat<M> y_first = select(valid_lower, y_lower, y_upper); + + const vfloat<M> rcp_dPdP = rcp(dPdP); + const Vec3vf<M> dP2drr0dP = dPdP*dr*v0.w*dP; + const Vec3vf<M> dPhy = dP*hy; + const vbool<M> cone_hit_first = valid & (t_first == t_cone_lower | t_first == t_cone_upper); + const vbool<M> disk0_hit_first = valid & (t_first == t_disk0); + const Vec3vf<M> Ng_first = select(cone_hit_first, dPdP2*(p0+t_first*ray_dir)+dP2drr0dP-dPhy*y_first, select(disk0_hit_first, -dP, dP)); + const vfloat<M> u_first = select(cone_hit_first, y_first*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one))); + + /* invoke intersection filter for first hit */ + RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first); + const bool is_hit_first = epilog(valid_first, hit); + + /* check for possible second hits before potentially accepted hit */ + const vfloat<M> t_second = t_upper; + const vfloat<M> y_second = y_upper; + const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_upper <= ray_tfar()); + if (unlikely(none(valid_second))) + return is_hit_first; + + /* invoke intersection filter for second hit */ + const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper; + const vbool<M> disk0_hit_second = t_second == t_disk0; + const Vec3vf<M> Ng_second = select(cone_hit_second, dPdP2*(p0+t_second*ray_dir)+dP2drr0dP-dPhy*y_second, select(disk0_hit_second, -dP, dP)); + const vfloat<M> u_second = select(cone_hit_second, y_second*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one))); + + hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second); + const bool is_hit_second = epilog(valid_second, hit); + + return is_hit_first | is_hit_second; + } + } + + template<int M> + struct ConeLineIntersectorHitM + { + __forceinline ConeLineIntersectorHitM() {} + + __forceinline ConeLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) + : vu(u), vv(v), vt(t), vNg(Ng) {} + + __forceinline void finalize() {} + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + public: + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct ConeCurveIntersector1 + { + typedef CurvePrecalculations1 Precalculations; + + struct ray_tfar { + Ray& ray; + __forceinline ray_tfar(Ray& ray) : ray(ray) {} + __forceinline vfloat<M> operator() () const { return ray.tfar; }; + }; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + Ray& ray, + IntersectContext* context, + const LineSegments* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, + const vbool<M>& cL, const vbool<M>& cR, + const Epilog& epilog) + { + const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); + const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); + const vfloat<M> ray_tnear(ray.tnear()); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog); + } + }; + + template<int M, int K> + struct ConeCurveIntersectorK + { + typedef CurvePrecalculationsK<K> Precalculations; + + struct ray_tfar { + RayK<K>& ray; + size_t k; + __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {} + __forceinline vfloat<M> operator() () const { return ray.tfar[k]; }; + }; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + RayK<K>& ray, size_t k, + IntersectContext* context, + const LineSegments* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, + const vbool<M>& cL, const vbool<M>& cR, + const Epilog& epilog) + { + const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); + const vfloat<M> ray_tnear = ray.tnear()[k]; + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h new file mode 100644 index 0000000000..d47218eb8b --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h @@ -0,0 +1,141 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "coneline_intersector.h" +#include "intersector_epilog.h" + +namespace embree +{ + namespace isa + { + template<int M, int Mx, bool filter> + struct ConeCurveMiIntersector1 + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + return false; + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); + } + }; + + template<int M, int Mx, bool filter> + struct ConeCurveMiMBIntersector1 + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom,ray.time()); + const vbool<Mx> valid = line.template valid<Mx>(); + ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom,ray.time()); + const vbool<Mx> valid = line.template valid<Mx>(); + return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + return false; + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); + } + }; + + template<int M, int Mx, int K, bool filter> + struct ConeCurveMiIntersectorK + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct ConeCurveMiMBIntersectorK + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom,ray.time()[k]); + const vbool<Mx> valid = line.template valid<Mx>(); + ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; + vbool<M> cL,cR; + line.gather(v0,v1,cL,cR,geom,ray.time()[k]); + const vbool<Mx> valid = line.template valid<Mx>(); + return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi.h new file mode 100644 index 0000000000..51384f1959 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curveNi.h @@ -0,0 +1,222 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + template<int M> + struct CurveNi + { + struct Type : public PrimitiveType { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* Returns maximum number of stored primitives */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; } + + static __forceinline size_t bytes(size_t N) + { + const size_t f = N/M, r = N%M; + static_assert(sizeof(CurveNi) == 22+25*M, "internal data layout issue"); + return f*sizeof(CurveNi) + (r!=0)*(22 + 25*r); + } + + public: + + /*! Default constructor. */ + __forceinline CurveNi () {} + + /*! fill curve from curve list */ + __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene) + { + size_t end = min(begin+M,_end); + N = (uint8_t)(end-begin); + const unsigned int geomID0 = prims[begin].geomID(); + this->geomID(N) = geomID0; + ty = (uint8_t) scene->get(geomID0)->getType(); + + /* encode all primitives */ + BBox3fa bounds = empty; + for (size_t i=0; i<N; i++) + { + const PrimRef& prim = prims[begin+i]; + const unsigned int geomID = prim.geomID(); assert(geomID == geomID0); + const unsigned int primID = prim.primID(); + bounds.extend(scene->get(geomID)->vbounds(primID)); + } + + /* calculate offset and scale */ + Vec3fa loffset = bounds.lower; + float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f))); + if (bounds.size() == Vec3fa(zero)) lscale = 0.0f; + *this->offset(N) = loffset; + *this->scale(N) = lscale; + + /* encode all primitives */ + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRef& prim = prims[begin]; + const unsigned int geomID = prim.geomID(); + const unsigned int primID = prim.primID(); + const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpace(primID); + + const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz)); + const BBox3fa bounds = scene->get(geomID)->vbounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID); + + bounds_vx_x(N)[i] = (int8_t) space3.vx.x; + bounds_vx_y(N)[i] = (int8_t) space3.vx.y; + bounds_vx_z(N)[i] = (int8_t) space3.vx.z; + bounds_vx_lower(N)[i] = (short) clamp(floor(bounds.lower.x),-32767.0f,32767.0f); + bounds_vx_upper(N)[i] = (short) clamp(ceil (bounds.upper.x),-32767.0f,32767.0f); + assert(-32767.0f <= floor(bounds.lower.x) && floor(bounds.lower.x) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.upper.x) && ceil (bounds.upper.x) <= 32767.0f); + + bounds_vy_x(N)[i] = (int8_t) space3.vy.x; + bounds_vy_y(N)[i] = (int8_t) space3.vy.y; + bounds_vy_z(N)[i] = (int8_t) space3.vy.z; + bounds_vy_lower(N)[i] = (short) clamp(floor(bounds.lower.y),-32767.0f,32767.0f); + bounds_vy_upper(N)[i] = (short) clamp(ceil (bounds.upper.y),-32767.0f,32767.0f); + assert(-32767.0f <= floor(bounds.lower.y) && floor(bounds.lower.y) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.upper.y) && ceil (bounds.upper.y) <= 32767.0f); + + bounds_vz_x(N)[i] = (int8_t) space3.vz.x; + bounds_vz_y(N)[i] = (int8_t) space3.vz.y; + bounds_vz_z(N)[i] = (int8_t) space3.vz.z; + bounds_vz_lower(N)[i] = (short) clamp(floor(bounds.lower.z),-32767.0f,32767.0f); + bounds_vz_upper(N)[i] = (short) clamp(ceil (bounds.upper.z),-32767.0f,32767.0f); + assert(-32767.0f <= floor(bounds.lower.z) && floor(bounds.lower.z) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.upper.z) && ceil (bounds.upper.z) <= 32767.0f); + + this->primID(N)[i] = primID; + } + } + + template<typename BVH, typename Allocator> + __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) + { + size_t start = set.begin(); + size_t items = CurveNi::blocks(set.size()); + size_t numbytes = CurveNi::bytes(set.size()); + CurveNi* accel = (CurveNi*) alloc.malloc1(numbytes,BVH::byteAlignment); + for (size_t i=0; i<items; i++) { + accel[i].fill(prims,start,set.end(),bvh->scene); + } + return bvh->encodeLeaf((int8_t*)accel,items); + }; + + public: + + // 27.6 - 46 bytes per primitive + uint8_t ty; + uint8_t N; + uint8_t data[4+25*M+16]; + + /* + struct Layout + { + unsigned int geomID; + unsigned int primID[N]; + + int8_t bounds_vx_x[N]; + int8_t bounds_vx_y[N]; + int8_t bounds_vx_z[N]; + short bounds_vx_lower[N]; + short bounds_vx_upper[N]; + + int8_t bounds_vy_x[N]; + int8_t bounds_vy_y[N]; + int8_t bounds_vy_z[N]; + short bounds_vy_lower[N]; + short bounds_vy_upper[N]; + + int8_t bounds_vz_x[N]; + int8_t bounds_vz_y[N]; + int8_t bounds_vz_z[N]; + short bounds_vz_lower[N]; + short bounds_vz_upper[N]; + + Vec3f offset; + float scale; + }; + */ + + __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); } + __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); } + + __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); } + __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); } + + __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); } + __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); } + + __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); } + __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); } + + __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); } + __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); } + + __forceinline short* bounds_vx_lower(size_t N) { return (short*)((int8_t*)this+6+7*N); } + __forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((int8_t*)this+6+7*N); } + + __forceinline short* bounds_vx_upper(size_t N) { return (short*)((int8_t*)this+6+9*N); } + __forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((int8_t*)this+6+9*N); } + + __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+11*N); } + __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+11*N); } + + __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+12*N); } + __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+12*N); } + + __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+13*N); } + __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+13*N); } + + __forceinline short* bounds_vy_lower(size_t N) { return (short*)((int8_t*)this+6+14*N); } + __forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((int8_t*)this+6+14*N); } + + __forceinline short* bounds_vy_upper(size_t N) { return (short*)((int8_t*)this+6+16*N); } + __forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((int8_t*)this+6+16*N); } + + __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+18*N); } + __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+18*N); } + + __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+19*N); } + __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+19*N); } + + __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+20*N); } + __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+20*N); } + + __forceinline short* bounds_vz_lower(size_t N) { return (short*)((int8_t*)this+6+21*N); } + __forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((int8_t*)this+6+21*N); } + + __forceinline short* bounds_vz_upper(size_t N) { return (short*)((int8_t*)this+6+23*N); } + __forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((int8_t*)this+6+23*N); } + + __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+25*N); } + __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+25*N); } + + __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+25*N+12); } + __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+25*N+12); } + + __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+25*N+16; } + __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+25*N+16; } + }; + + template<int M> + typename CurveNi<M>::Type CurveNi<M>::type; + + typedef CurveNi<4> Curve4i; + typedef CurveNi<8> Curve8i; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h new file mode 100644 index 0000000000..0f9038c9fc --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h @@ -0,0 +1,569 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curveNi.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct CurveNiIntersector1 + { + typedef CurveNi<M> Primitive; + typedef Vec3vf<M> Vec3vfM; + typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o) + { + const size_t N = prim.N; + const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); + const Vec3fa offset = Vec3fa(offset_scale); + const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); + const Vec3fa org1 = (ray.org-offset)*scale; + const Vec3fa dir1 = ray.dir*scale; + + const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), + vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), + vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); + + const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); + const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); + const Vec3vfM rcp_dir2 = rcp_safe(dir2); + + const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + + const vfloat<M> round_up (1.0f+3.0f*float(ulp)); + const vfloat<M> round_down(1.0f-3.0f*float(ulp)); + const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear())); + const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar)); + tNear_o = tNear; + return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + + unsigned int vertexID = geom->curve(primID); + Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + + unsigned int vertexID = geom->curve(primID); + Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); + Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); + if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); + Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); + if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + }; + + template<int M, int K> + struct CurveNiIntersectorK + { + typedef CurveNi<M> Primitive; + typedef Vec3vf<M> Vec3vfM; + typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o) + { + const size_t N = prim.N; + const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); + const Vec3fa offset = Vec3fa(offset_scale); + const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); + + const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); + const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); + const Vec3fa org1 = (ray_org-offset)*scale; + const Vec3fa dir1 = ray_dir*scale; + + const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), + vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), + vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); + + const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); + const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); + const Vec3vfM rcp_dir2 = rcp_safe(dir2); + + const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + + const vfloat<M> round_up (1.0f+3.0f*float(ulp)); + const vfloat<M> round_down(1.0f-3.0f*float(ulp)); + const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k])); + const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k])); + tNear_o = tNear; + return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + + unsigned int vertexID = geom->curve(primID); + Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + + unsigned int vertexID = geom->curve(primID); + Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + const unsigned int primID1 = prim.primID(N)[i1]; + geom->prefetchL1_vertices(geom->curve(primID1)); + if (mask1) { + const size_t i2 = bsf(mask1); + const unsigned int primID2 = prim.primID(N)[i2]; + geom->prefetchL2_vertices(geom->curve(primID2)); + } + } + + if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); + Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); + if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); + Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); + if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h new file mode 100644 index 0000000000..0cd8f833fd --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h @@ -0,0 +1,278 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + template<int M> + struct CurveNiMB + { + struct Type : public PrimitiveType { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* Returns maximum number of stored primitives */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; } + + static __forceinline size_t bytes(size_t N) + { + const size_t f = N/M, r = N%M; + static_assert(sizeof(CurveNiMB) == 6+37*M+24, "internal data layout issue"); + return f*sizeof(CurveNiMB) + (r!=0)*(6+37*r+24); + } + + public: + + /*! Default constructor. */ + __forceinline CurveNiMB () {} + + /*! fill curve from curve list */ + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t _end, Scene* scene, const BBox1f time_range) + { + size_t end = min(begin+M,_end); + N = (uint8_t)(end-begin); + const unsigned int geomID0 = prims[begin].geomID(); + this->geomID(N) = geomID0; + ty = (uint8_t) scene->get(geomID0)->getType(); + + /* encode all primitives */ + LBBox3fa lbounds = empty; + for (size_t i=0; i<N; i++) + { + const PrimRefMB& prim = prims[begin+i]; + const unsigned int geomID = prim.geomID(); assert(geomID == geomID0); + const unsigned int primID = prim.primID(); + lbounds.extend(scene->get(geomID)->vlinearBounds(primID,time_range)); + } + BBox3fa bounds = lbounds.bounds(); + + /* calculate offset and scale */ + Vec3fa loffset = bounds.lower; + float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f))); + if (bounds.size() == Vec3fa(zero)) lscale = 0.0f; + *this->offset(N) = loffset; + *this->scale(N) = lscale; + this->time_offset(N) = time_range.lower; + this->time_scale(N) = 1.0f/time_range.size(); + + /* encode all primitives */ + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRefMB& prim = prims[begin]; + const unsigned int geomID = prim.geomID(); + const unsigned int primID = prim.primID(); + const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpaceMB(primID,time_range); + + const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz)); + const LBBox3fa bounds = scene->get(geomID)->vlinearBounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID,time_range); + + // NOTE: this weird (int8_t) (short) cast works around VS2015 Win32 compiler bug + bounds_vx_x(N)[i] = (int8_t) (short) space3.vx.x; + bounds_vx_y(N)[i] = (int8_t) (short) space3.vx.y; + bounds_vx_z(N)[i] = (int8_t) (short) space3.vx.z; + bounds_vx_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.x),-32767.0f,32767.0f); + bounds_vx_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.x),-32767.0f,32767.0f); + bounds_vx_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.x),-32767.0f,32767.0f); + bounds_vx_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.x),-32767.0f,32767.0f); + assert(-32767.0f <= floor(bounds.bounds0.lower.x) && floor(bounds.bounds0.lower.x) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.bounds0.upper.x) && ceil (bounds.bounds0.upper.x) <= 32767.0f); + assert(-32767.0f <= floor(bounds.bounds1.lower.x) && floor(bounds.bounds1.lower.x) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.bounds1.upper.x) && ceil (bounds.bounds1.upper.x) <= 32767.0f); + + bounds_vy_x(N)[i] = (int8_t) (short) space3.vy.x; + bounds_vy_y(N)[i] = (int8_t) (short) space3.vy.y; + bounds_vy_z(N)[i] = (int8_t) (short) space3.vy.z; + bounds_vy_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.y),-32767.0f,32767.0f); + bounds_vy_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.y),-32767.0f,32767.0f); + bounds_vy_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.y),-32767.0f,32767.0f); + bounds_vy_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.y),-32767.0f,32767.0f); + assert(-32767.0f <= floor(bounds.bounds0.lower.y) && floor(bounds.bounds0.lower.y) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.bounds0.upper.y) && ceil (bounds.bounds0.upper.y) <= 32767.0f); + assert(-32767.0f <= floor(bounds.bounds1.lower.y) && floor(bounds.bounds1.lower.y) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.bounds1.upper.y) && ceil (bounds.bounds1.upper.y) <= 32767.0f); + + bounds_vz_x(N)[i] = (int8_t) (short) space3.vz.x; + bounds_vz_y(N)[i] = (int8_t) (short) space3.vz.y; + bounds_vz_z(N)[i] = (int8_t) (short) space3.vz.z; + bounds_vz_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.z),-32767.0f,32767.0f); + bounds_vz_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.z),-32767.0f,32767.0f); + bounds_vz_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.z),-32767.0f,32767.0f); + bounds_vz_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.z),-32767.0f,32767.0f); + assert(-32767.0f <= floor(bounds.bounds0.lower.z) && floor(bounds.bounds0.lower.z) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.bounds0.upper.z) && ceil (bounds.bounds0.upper.z) <= 32767.0f); + assert(-32767.0f <= floor(bounds.bounds1.lower.z) && floor(bounds.bounds1.lower.z) <= 32767.0f); + assert(-32767.0f <= ceil (bounds.bounds1.upper.z) && ceil (bounds.bounds1.upper.z) <= 32767.0f); + + this->primID(N)[i] = primID; + } + + return lbounds; + } + + template<typename BVH, typename SetMB, typename Allocator> + __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) + { + size_t start = prims.begin(); + size_t end = prims.end(); + size_t items = CurveNiMB::blocks(prims.size()); + size_t numbytes = CurveNiMB::bytes(prims.size()); + CurveNiMB* accel = (CurveNiMB*) alloc.malloc1(numbytes,BVH::byteAlignment); + const typename BVH::NodeRef node = bvh->encodeLeaf((int8_t*)accel,items); + + LBBox3fa bounds = empty; + for (size_t i=0; i<items; i++) + bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range)); + + return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range); + }; + + + public: + + // 27.6 - 46 bytes per primitive + uint8_t ty; + uint8_t N; + uint8_t data[4+37*M+24]; + + /* + struct Layout + { + unsigned int geomID; + unsigned int primID[N]; + + int8_t bounds_vx_x[N]; + int8_t bounds_vx_y[N]; + int8_t bounds_vx_z[N]; + short bounds_vx_lower0[N]; + short bounds_vx_upper0[N]; + short bounds_vx_lower1[N]; + short bounds_vx_upper1[N]; + + int8_t bounds_vy_x[N]; + int8_t bounds_vy_y[N]; + int8_t bounds_vy_z[N]; + short bounds_vy_lower0[N]; + short bounds_vy_upper0[N]; + short bounds_vy_lower1[N]; + short bounds_vy_upper1[N]; + + int8_t bounds_vz_x[N]; + int8_t bounds_vz_y[N]; + int8_t bounds_vz_z[N]; + short bounds_vz_lower0[N]; + short bounds_vz_upper0[N]; + short bounds_vz_lower1[N]; + short bounds_vz_upper1[N]; + + Vec3f offset; + float scale; + + float time_offset; + float time_scale; + }; + */ + + __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); } + __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); } + + __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); } + __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); } + + __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); } + __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); } + + __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); } + __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); } + + __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); } + __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); } + + __forceinline short* bounds_vx_lower0(size_t N) { return (short*)((int8_t*)this+6+7*N); } + __forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((int8_t*)this+6+7*N); } + + __forceinline short* bounds_vx_upper0(size_t N) { return (short*)((int8_t*)this+6+9*N); } + __forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((int8_t*)this+6+9*N); } + + __forceinline short* bounds_vx_lower1(size_t N) { return (short*)((int8_t*)this+6+11*N); } + __forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((int8_t*)this+6+11*N); } + + __forceinline short* bounds_vx_upper1(size_t N) { return (short*)((int8_t*)this+6+13*N); } + __forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((int8_t*)this+6+13*N); } + + __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+15*N); } + __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+15*N); } + + __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+16*N); } + __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+16*N); } + + __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+17*N); } + __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+17*N); } + + __forceinline short* bounds_vy_lower0(size_t N) { return (short*)((int8_t*)this+6+18*N); } + __forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((int8_t*)this+6+18*N); } + + __forceinline short* bounds_vy_upper0(size_t N) { return (short*)((int8_t*)this+6+20*N); } + __forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((int8_t*)this+6+20*N); } + + __forceinline short* bounds_vy_lower1(size_t N) { return (short*)((int8_t*)this+6+22*N); } + __forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((int8_t*)this+6+22*N); } + + __forceinline short* bounds_vy_upper1(size_t N) { return (short*)((int8_t*)this+6+24*N); } + __forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((int8_t*)this+6+24*N); } + + __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+26*N); } + __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+26*N); } + + __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+27*N); } + __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+27*N); } + + __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+28*N); } + __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+28*N); } + + __forceinline short* bounds_vz_lower0(size_t N) { return (short*)((int8_t*)this+6+29*N); } + __forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((int8_t*)this+6+29*N); } + + __forceinline short* bounds_vz_upper0(size_t N) { return (short*)((int8_t*)this+6+31*N); } + __forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((int8_t*)this+6+31*N); } + + __forceinline short* bounds_vz_lower1(size_t N) { return (short*)((int8_t*)this+6+33*N); } + __forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((int8_t*)this+6+33*N); } + + __forceinline short* bounds_vz_upper1(size_t N) { return (short*)((int8_t*)this+6+35*N); } + __forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((int8_t*)this+6+35*N); } + + __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+37*N); } + __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+37*N); } + + __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+37*N+12); } + __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+37*N+12); } + + __forceinline float& time_offset(size_t N) { return *(float*)((int8_t*)this+6+37*N+16); } + __forceinline const float& time_offset(size_t N) const { return *(float*)((int8_t*)this+6+37*N+16); } + + __forceinline float& time_scale(size_t N) { return *(float*)((int8_t*)this+6+37*N+20); } + __forceinline const float& time_scale(size_t N) const { return *(float*)((int8_t*)this+6+37*N+20); } + + __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+37*N+24; } + __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+37*N+24; } + }; + + template<int M> + typename CurveNiMB<M>::Type CurveNiMB<M>::type; + + typedef CurveNiMB<4> Curve4iMB; + typedef CurveNiMB<8> Curve8iMB; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h new file mode 100644 index 0000000000..0cbc764668 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h @@ -0,0 +1,516 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curveNi_mb.h" +#include "../subdiv/linear_bezier_patch.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct CurveNiMBIntersector1 + { + typedef CurveNiMB<M> Primitive; + typedef Vec3vf<M> Vec3vfM; + typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o) + { + const size_t N = prim.N; + const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); + const Vec3fa offset = Vec3fa(offset_scale); + const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); + const Vec3fa org1 = (ray.org-offset)*scale; + const Vec3fa dir1 = ray.dir*scale; + + const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), + vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), + vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); + + const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); + const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); + const Vec3vfM rcp_dir2 = rcp_safe(dir2); + + const vfloat<M> ltime = (ray.time()-prim.time_offset(N))*prim.time_scale(N); + const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N)); + const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N)); + const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0); + const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N)); + const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N)); + const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0); + + const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N)); + const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N)); + const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0); + const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N)); + const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N)); + const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0); + + const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N)); + const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N)); + const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0); + const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N)); + const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N)); + const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0); + + const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + + const vfloat<M> round_up (1.0f+3.0f*float(ulp)); + const vfloat<M> round_down(1.0f-3.0f*float(ulp)); + const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear())); + const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar)); + tNear_o = tNear; + return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()); + + Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()); + + if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); + Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); + + if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()); + Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()); + if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); + Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); + if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + }; + + template<int M, int K> + struct CurveNiMBIntersectorK + { + typedef CurveNiMB<M> Primitive; + typedef Vec3vf<M> Vec3vfM; + typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o) + { + const size_t N = prim.N; + const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); + const Vec3fa offset = Vec3fa(offset_scale); + const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); + + const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); + const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); + const Vec3fa org1 = (ray_org-offset)*scale; + const Vec3fa dir1 = ray_dir*scale; + + const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), + vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), + vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); + + const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); + const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); + const Vec3vfM rcp_dir2 = rcp_safe(dir2); + + const vfloat<M> ltime = (ray.time()[k]-prim.time_offset(N))*prim.time_scale(N); + const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N)); + const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N)); + const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0); + const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N)); + const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N)); + const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0); + + const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N)); + const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N)); + const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0); + const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N)); + const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N)); + const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0); + + const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N)); + const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N)); + const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0); + const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N)); + const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N)); + const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0); + + const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); + const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); + const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); + + const vfloat<M> round_up (1.0f+3.0f*float(ulp)); + const vfloat<M> round_down(1.0f-3.0f*float(ulp)); + const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k])); + const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k])); + tNear_o = tNear; + return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]); + + Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]); + + if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); + Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); + + if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]); + Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]); + if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); + Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); + const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); + if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv.h new file mode 100644 index 0000000000..6eb5e30b39 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curveNv.h @@ -0,0 +1,101 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curveNi.h" + +namespace embree +{ + template<int M> + struct CurveNv : public CurveNi<M> + { + using CurveNi<M>::N; + + struct Type : public PrimitiveType { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* Returns maximum number of stored primitives */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; } + + static __forceinline size_t bytes(size_t N) + { + const size_t f = N/M, r = N%M; + static_assert(sizeof(CurveNv) == 22+25*M+4*16*M, "internal data layout issue"); + return f*sizeof(CurveNv) + (r!=0)*(22 + 25*r + 4*16*r); + } + + public: + + /*! Default constructor. */ + __forceinline CurveNv () {} + + /*! fill curve from curve list */ + __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene) + { + size_t end = min(begin+M,_end); + size_t N = end-begin; + + /* encode all primitives */ + for (size_t i=0; i<N; i++) + { + const PrimRef& prim = prims[begin+i]; + const unsigned int geomID = prim.geomID(); + const unsigned int primID = prim.primID(); + CurveGeometry* mesh = (CurveGeometry*) scene->get(geomID); + const unsigned vtxID = mesh->curve(primID); + Vec3fa::storeu(&this->vertices(i,N)[0],mesh->vertex(vtxID+0)); + Vec3fa::storeu(&this->vertices(i,N)[1],mesh->vertex(vtxID+1)); + Vec3fa::storeu(&this->vertices(i,N)[2],mesh->vertex(vtxID+2)); + Vec3fa::storeu(&this->vertices(i,N)[3],mesh->vertex(vtxID+3)); + } + } + + template<typename BVH, typename Allocator> + __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) + { + if (set.size() == 0) + return BVH::emptyNode; + + /* fall back to CurveNi for oriented curves */ + unsigned int geomID = prims[set.begin()].geomID(); + if (bvh->scene->get(geomID)->getCurveType() == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) { + return CurveNi<M>::createLeaf(bvh,prims,set,alloc); + } + if (bvh->scene->get(geomID)->getCurveBasis() == Geometry::GTY_BASIS_HERMITE) { + return CurveNi<M>::createLeaf(bvh,prims,set,alloc); + } + + size_t start = set.begin(); + size_t items = CurveNv::blocks(set.size()); + size_t numbytes = CurveNv::bytes(set.size()); + CurveNv* accel = (CurveNv*) alloc.malloc1(numbytes,BVH::byteAlignment); + for (size_t i=0; i<items; i++) { + accel[i].CurveNv<M>::fill(prims,start,set.end(),bvh->scene); + accel[i].CurveNi<M>::fill(prims,start,set.end(),bvh->scene); + } + return bvh->encodeLeaf((char*)accel,items); + }; + + public: + unsigned char data[4*16*M]; + __forceinline Vec3fa* vertices(size_t i, size_t N) { return (Vec3fa*)CurveNi<M>::end(N)+4*i; } + __forceinline const Vec3fa* vertices(size_t i, size_t N) const { return (Vec3fa*)CurveNi<M>::end(N)+4*i; } + }; + + template<int M> + typename CurveNv<M>::Type CurveNv<M>::type; + + typedef CurveNv<4> Curve4v; + typedef CurveNv<8> Curve8v; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h new file mode 100644 index 0000000000..e20da2882e --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h @@ -0,0 +1,181 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curveNv.h" +#include "curveNi_intersector.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct CurveNvIntersector1 : public CurveNiIntersector1<M> + { + typedef CurveNv<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); + const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); + const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); + const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); + const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + prefetchL1(&prim.vertices(i1,N)[0]); + prefetchL1(&prim.vertices(i1,N)[4]); + if (mask1) { + const size_t i2 = bsf(mask1); + prefetchL2(&prim.vertices(i2,N)[0]); + prefetchL2(&prim.vertices(i2,N)[4]); + } + } + + Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); + const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); + const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); + const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); + const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + prefetchL1(&prim.vertices(i1,N)[0]); + prefetchL1(&prim.vertices(i1,N)[4]); + if (mask1) { + const size_t i2 = bsf(mask1); + prefetchL2(&prim.vertices(i2,N)[0]); + prefetchL2(&prim.vertices(i2,N)[4]); + } + } + + if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar)); + } + return false; + } + }; + + template<int M, int K> + struct CurveNvIntersectorK : public CurveNiIntersectorK<M,K> + { + typedef CurveNv<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + template<typename Intersector, typename Epilog> + static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(normal.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); + const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); + const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); + const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); + const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + prefetchL1(&prim.vertices(i1,N)[0]); + prefetchL1(&prim.vertices(i1,N)[4]); + if (mask1) { + const size_t i2 = bsf(mask1); + prefetchL2(&prim.vertices(i2,N)[0]); + prefetchL2(&prim.vertices(i2,N)[4]); + } + } + + Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)); + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + } + + template<typename Intersector, typename Epilog> + static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) + { + vfloat<M> tNear; + vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear); + + const size_t N = prim.N; + size_t mask = movemask(valid); + while (mask) + { + const size_t i = bscf(mask); + STAT3(shadow.trav_prims,1,1,1); + const unsigned int geomID = prim.geomID(N); + const unsigned int primID = prim.primID(N)[i]; + const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); + const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); + const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); + const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); + const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); + + size_t mask1 = mask; + const size_t i1 = bscf(mask1); + if (mask) { + prefetchL1(&prim.vertices(i1,N)[0]); + prefetchL1(&prim.vertices(i1,N)[4]); + if (mask1) { + const size_t i2 = bsf(mask1); + prefetchL2(&prim.vertices(i2,N)[0]); + prefetchL2(&prim.vertices(i2,N)[4]); + } + } + + if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID))) + return true; + + mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); + } + return false; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h new file mode 100644 index 0000000000..204958f7cc --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h @@ -0,0 +1,98 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "../subdiv/bezier_curve.h" +#include "../common/primref.h" +#include "bezier_hair_intersector.h" +#include "bezier_ribbon_intersector.h" +#include "bezier_curve_intersector.h" +#include "oriented_curve_intersector.h" +#include "../bvh/node_intersector1.h" + +// FIXME: this file seems replicate of curve_intersector_virtual.h + +namespace embree +{ + namespace isa + { + struct VirtualCurveIntersector1 + { + typedef unsigned char Primitive; + typedef CurvePrecalculations1 Precalculations; + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; + leafIntersector.intersect<1>(&pre,&ray,context,prim); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; + return leafIntersector.occluded<1>(&pre,&ray,context,prim); + } + }; + + template<int K> + struct VirtualCurveIntersectorK + { + typedef unsigned char Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; + size_t mask = movemask(valid_i); + while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim); + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; + vbool<K> valid_o = false; + size_t mask = movemask(valid_i); + while (mask) { + size_t k = bscf(mask); + if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim)) + set(valid_o, k); + } + return valid_o; + } + + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; + leafIntersector.intersect<K>(&pre,&ray,k,context,prim); + } + + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; + return leafIntersector.occluded<K>(&pre,&ray,k,context,prim); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h new file mode 100644 index 0000000000..343cc8ff28 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h @@ -0,0 +1,129 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + namespace isa + { + template<typename NativeCurve3fa, int M> + struct DistanceCurveHit + { + __forceinline DistanceCurveHit() {} + + __forceinline DistanceCurveHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N, + const NativeCurve3fa& curve3D) + : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {} + + __forceinline void finalize() + { + vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N)); + vv = V; + vt = T; + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { + return curve3D.eval_du(vu[i]); + } + + public: + vfloat<M> U; + vfloat<M> V; + vfloat<M> T; + int i, N; + NativeCurve3fa curve3D; + + public: + vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + }; + + template<typename NativeCurve3fa> + struct DistanceCurve1Intersector1 + { + template<typename Epilog> + __forceinline bool intersect(const CurvePrecalculations1& pre,Ray& ray, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3, + const Epilog& epilog) + { + const int N = geom->tessellationRate; + + /* transform control points into ray space */ + const NativeCurve3fa curve3Di(v0,v1,v2,v3); + const NativeCurve3fa curve3D = enlargeRadiusToMinWidth(context,geom,ray.org,curve3Di); + const NativeCurve3fa curve2D = curve3D.xfm_pr(pre.ray_space,ray.org); + + /* evaluate the bezier curve */ + vboolx valid = vfloatx(step) < vfloatx(float(N)); + const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N); + const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N); + + /* approximative intersection with cone */ + const Vec4vfx v = p1-p0; + const Vec4vfx w = -p0; + const vfloatx d0 = madd(w.x,v.x,w.y*v.y); + const vfloatx d1 = madd(v.x,v.x,v.y*v.y); + const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one)); + const Vec4vfx p = madd(u,v,p0); + const vfloatx t = p.z*pre.depth_scale; + const vfloatx d2 = madd(p.x,p.x,p.y*p.y); + const vfloatx r = p.w; + const vfloatx r2 = r*r; + valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar)); + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) + valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections + + /* update hit information */ + bool ishit = false; + if (unlikely(any(valid))) { + DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,0,N,curve3D); + ishit = ishit | epilog(valid,hit); + } + + if (unlikely(VSIZEX < N)) + { + /* process SIMD-size many segments per iteration */ + for (int i=VSIZEX; i<N; i+=VSIZEX) + { + /* evaluate the bezier curve */ + vboolx valid = vintx(i)+vintx(step) < vintx(N); + const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N); + const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N); + + /* approximative intersection with cone */ + const Vec4vfx v = p1-p0; + const Vec4vfx w = -p0; + const vfloatx d0 = madd(w.x,v.x,w.y*v.y); + const vfloatx d1 = madd(v.x,v.x,v.y*v.y); + const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one)); + const Vec4vfx p = madd(u,v,p0); + const vfloatx t = p.z*pre.depth_scale; + const vfloatx d2 = madd(p.x,p.x,p.y*p.y); + const vfloatx r = p.w; + const vfloatx r2 = r*r; + valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar)); + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) + valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections + + /* update hit information */ + if (unlikely(any(valid))) { + DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,i,N,curve3D); + ishit = ishit | epilog(valid,hit); + } + } + } + return ishit; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h new file mode 100644 index 0000000000..47531027fc --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h @@ -0,0 +1,417 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "curve_intersector_precalculations.h" +#include "curve_intersector_sweep.h" +#include "../subdiv/linear_bezier_patch.h" + +#define DBG(x) + +namespace embree +{ + namespace isa + { + template<typename Ray, typename Epilog> + struct TensorLinearCubicBezierSurfaceIntersector + { + const LinearSpace3fa& ray_space; + Ray& ray; + TensorLinearCubicBezierSurface3fa curve3d; + TensorLinearCubicBezierSurface2fa curve2d; + float eps; + const Epilog& epilog; + bool isHit; + + __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog) + : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false) + { + const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org); + curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R)); + const BBox2fa b2 = curve2d.bounds(); + eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper))); + } + + __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1) + { + if (p1 == p0) { + if (p0 == 0.0f) return Interval1f(u0,u1); + else return Interval1f(empty); + } + const float t = -p0/(p1-p0); + const float tt = lerp(u0,u1,t); + return Interval1f(tt); + } + + __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u) + { + if (sign(p0.lower) != sign(p0.upper)) u.extend(u0); + if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower)); + if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper)); + if (sign(p1.lower) != sign(p1.upper)) u.extend(u1); + } + + __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve) + { + Interval1f u = empty; + solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u); + solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u); + solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u); + solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u); + solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u); + solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u); + return intersect(u,Interval1f(0.0f,1.0f)); + } + + __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve) + { + Interval1f v = empty; + solve_linear(0.0f,1.0f,curve.v0,curve.v1,v); + return intersect(v,Interval1f(0.0f,1.0f)); + } + + __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2) + { + BBox2fa bounds = curve2.bounds(); + if (bounds.upper.x < 0.0f) return; + if (bounds.upper.y < 0.0f) return; + if (bounds.lower.x > 0.0f) return; + if (bounds.lower.y > 0.0f) return; + + if (max(cu.size(),cv.size()) < 1E-4f) + { + const float u = cu.center(); + const float v = cv.center(); + TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org); + const float t = curve_z.eval(u,v); + if (ray.tnear() <= t && t <= ray.tfar) { + const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v)); + BezierCurveHit hit(t,u,v,Ng); + isHit |= epilog(hit); + } + return; + } + + const Vec2fa dv = curve2.axis_v(); + const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv); + LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); + if (!curve0v.hasRoot()) return; + + const Interval1f v = bezier_clipping(curve0v); + if (isEmpty(v)) return; + TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v); + cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper)); + + const Vec2fa du = curve2.axis_u(); + const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du); + CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v(); + int roots = curve0u.maxRoots(); + if (roots == 0) return; + + if (roots == 1) + { + const Interval1f u = bezier_clipping(curve0u); + if (isEmpty(u)) return; + TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u); + cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper)); + solve_bezier_clipping(cu,cv,curve2b); + return; + } + + TensorLinearCubicBezierSurface2fa curve2l, curve2r; + curve2a.split_u(curve2l,curve2r); + solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l); + solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r); + } + + __forceinline bool solve_bezier_clipping() + { + solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d); + return isHit; + } + + __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv) + { + Vec2fa uv(cu.center(),cv.center()); + const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y); + const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y); + const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv)); + solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J); + } + + __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J) + { + Vec2fa uv = uv_in; + + for (size_t i=0; i<200; i++) + { + const Vec2fa f = curve2d.eval(uv.x,uv.y); + const Vec2fa duv = rcp_J*f; + uv -= duv; + + if (max(abs(f.x),abs(f.y)) < eps) + { + const float u = uv.x; + const float v = uv.y; + if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs + if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs + const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org); + const float t = curve_z.eval(u,v); + if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs + const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v)); + BezierCurveHit hit(t,u,v,Ng); + isHit |= epilog(hit); + return; + } + } + } + + __forceinline bool clip_v(BBox1f& cu, BBox1f& cv) + { + const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower); + const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv); + LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); + if (!curve0v.hasRoot()) return false; + Interval1f v = bezier_clipping(curve0v); + if (isEmpty(v)) return false; + v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f)); + cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper)); + return true; + } + + __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv) + { + /* perform bezier clipping in v-direction to get tight v-bounds */ + TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv); + const Vec2fa dv = curve2.axis_v(); + const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv); + LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); + if (unlikely(!curve0v.hasRoot())) return true; + Interval1f v = bezier_clipping(curve0v); + if (unlikely(isEmpty(v))) return true; + v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f)); + curve2 = curve2.clip_v(v); + cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper)); + + /* perform one newton raphson iteration */ + Vec2fa c(cu.center(),cv.center()); + Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv); + const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv)); + const Vec2fa c1 = c - rcp_J*f; + + /* calculate bounds of derivatives */ + const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds(); + const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds(); + + /* calculate krawczyk test */ + LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f), + Interval1f(0.0f), Interval1f(1.0f)); + + LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x), + Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y)); + + const LinearSpace2<Vec2f> rcp_J2(rcp_J); + const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2); + + const Vec2<Interval1f> x(cu,cv); + const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c))); + + /* test if there is no solution */ + const Vec2<Interval1f> KK = intersect(K,x); + if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true; + + /* exit if convergence cannot get proven, but terminate if we are very small */ + if (unlikely(!subset(K,x) && !very_small)) return false; + + /* solve using newton raphson iteration of convergence is guarenteed */ + solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J); + return true; + } + + __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv) + { + if (!clip_v(cu,cv)) return; + return solve_newton_raphson(cu,cv); + } + + __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv) + { + unsigned int sptr = 0; + const unsigned int stack_size = 4; + unsigned int mask_stack[stack_size]; + BBox1f cu_stack[stack_size]; + BBox1f cv_stack[stack_size]; + goto entry; + + /* terminate if stack is empty */ + while (sptr) + { + /* pop from stack */ + { + sptr--; + size_t mask = mask_stack[sptr]; + cu = cu_stack[sptr]; + cv = cv_stack[sptr]; + const size_t i = bscf(mask); + mask_stack[sptr] = mask; + if (mask) sptr++; // there are still items on the stack + + /* process next element recurse into each hit curve segment */ + const float u0 = float(i+0)*(1.0f/(VSIZEX-1)); + const float u1 = float(i+1)*(1.0f/(VSIZEX-1)); + const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1)); + cu = cui; + } + +#if 0 + solve_newton_raphson_no_recursion(cu,cv); + continue; + +#else + /* we assume convergence for small u ranges and verify using krawczyk */ + if (cu.size() < 1.0f/6.0f) { + const bool very_small = cu.size() < 0.001f || sptr >= stack_size; + if (solve_krawczyk(very_small,cu,cv)) { + continue; + } + } +#endif + + entry: + + /* split the curve into VSIZEX-1 segments in u-direction */ + vboolx valid = true; + TensorLinearCubicBezierSurface<Vec2vfx> subcurves = curve2d.clip_v(cv).vsplit_u(valid,cu); + + /* slabs test in u-direction */ + Vec2vfx ndv = cross(subcurves.axis_v()); + BBox<vfloatx> boundsv = subcurves.vxfm(ndv).bounds(); + valid &= boundsv.lower <= eps; + valid &= boundsv.upper >= -eps; + if (none(valid)) continue; + + /* slabs test in v-direction */ + Vec2vfx ndu = cross(subcurves.axis_u()); + BBox<vfloatx> boundsu = subcurves.vxfm(ndu).bounds(); + valid &= boundsu.lower <= eps; + valid &= boundsu.upper >= -eps; + if (none(valid)) continue; + + /* push valid segments to stack */ + assert(sptr < stack_size); + mask_stack [sptr] = movemask(valid); + cu_stack [sptr] = cu; + cv_stack [sptr] = cv; + sptr++; + } + } + + __forceinline bool solve_newton_raphson_main() + { + BBox1f vu(0.0f,1.0f); + BBox1f vv(0.0f,1.0f); + solve_newton_raphson_recursion(vu,vv); + return isHit; + } + }; + + + template<template<typename Ty> class SourceCurve> + struct OrientedCurve1Intersector1 + { + //template<typename Ty> using Curve = SourceCurve<Ty>; + typedef SourceCurve<Vec3ff> SourceCurve3ff; + typedef SourceCurve<Vec3fa> SourceCurve3fa; + + __forceinline OrientedCurve1Intersector1() {} + + __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i, + const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i, + const Epilog& epilog) const + { + STAT3(normal.trav_prims,1,1,1); + + SourceCurve3ff ccurve(v0i,v1i,v2i,v3i); + SourceCurve3fa ncurve(n0i,n1i,n2i,n3i); + ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve); + TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); + //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping(); + return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main(); + } + + template<typename Epilog> + __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const + { + STAT3(normal.trav_prims,1,1,1); + //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping(); + return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main(); + } + }; + + template<template<typename Ty> class SourceCurve, int K> + struct OrientedCurve1IntersectorK + { + //template<typename Ty> using Curve = SourceCurve<Ty>; + typedef SourceCurve<Vec3ff> SourceCurve3ff; + typedef SourceCurve<Vec3fa> SourceCurve3fa; + + struct Ray1 + { + __forceinline Ray1(RayK<K>& ray, size_t k) + : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} + + Vec3fa org; + Vec3fa dir; + float _tnear; + float& tfar; + + __forceinline float& tnear() { return _tnear; } + //__forceinline float& tfar() { return _tfar; } + __forceinline const float& tnear() const { return _tnear; } + //__forceinline const float& tfar() const { return _tfar; } + }; + + template<typename Epilog> + __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i, + const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i, + const Epilog& epilog) + { + STAT3(normal.trav_prims,1,1,1); + Ray1 ray(vray,k); + SourceCurve3ff ccurve(v0i,v1i,v2i,v3i); + SourceCurve3fa ncurve(n0i,n1i,n2i,n3i); + ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve); + TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); + //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping(); + return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main(); + } + + template<typename Epilog> + __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const TensorLinearCubicBezierSurface3fa& curve, + const Epilog& epilog) + { + STAT3(normal.trav_prims,1,1,1); + Ray1 ray(vray,k); + //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping(); + return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main(); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h new file mode 100644 index 0000000000..6e9fc91925 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h @@ -0,0 +1,49 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "../common/geometry.h" + +namespace embree +{ + namespace isa + { + struct CurvePrecalculations1 + { + float depth_scale; + LinearSpace3fa ray_space; + + __forceinline CurvePrecalculations1() {} + + __forceinline CurvePrecalculations1(const Ray& ray, const void* ptr) + { + depth_scale = rsqrt(dot(ray.dir,ray.dir)); + LinearSpace3fa space = frame(depth_scale*ray.dir); + space.vz *= depth_scale; + ray_space = space.transposed(); + } + }; + + template<int K> + struct CurvePrecalculationsK + { + vfloat<K> depth_scale; + LinearSpace3fa ray_space[K]; + + __forceinline CurvePrecalculationsK(const vbool<K>& valid, const RayK<K>& ray) + { + size_t mask = movemask(valid); + depth_scale = rsqrt(dot(ray.dir,ray.dir)); + while (mask) { + size_t k = bscf(mask); + Vec3fa ray_dir_k = Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); + LinearSpace3fa ray_space_k = frame(depth_scale[k]*ray_dir_k); + ray_space_k.vz *= depth_scale[k]; + ray_space[k] = ray_space_k.transposed(); + } + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h new file mode 100644 index 0000000000..a99cf99d56 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h @@ -0,0 +1,214 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "quad_intersector.h" +#include "curve_intersector_precalculations.h" + +#define Bezier1Intersector1 RibbonCurve1Intersector1 +#define Bezier1IntersectorK RibbonCurve1IntersectorK + +namespace embree +{ + namespace isa + { + template<typename NativeCurve3ff, int M> + struct RibbonHit + { + __forceinline RibbonHit() {} + + __forceinline RibbonHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N, + const NativeCurve3ff& curve3D) + : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {} + + __forceinline void finalize() + { + vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N)); + vv = V; + vt = T; + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { + return curve3D.eval_du(vu[i]); + } + + public: + vfloat<M> U; + vfloat<M> V; + vfloat<M> T; + int i, N; + NativeCurve3ff curve3D; + + public: + vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + }; + + /* calculate squared distance of point p0 to line p1->p2 */ + __forceinline std::pair<vfloatx,vfloatx> sqr_point_line_distance(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2) + { + const vfloatx num = det(p2-p1,p1-p0); + const vfloatx den2 = dot(p2-p1,p2-p1); + return std::make_pair(num*num,den2); + } + + /* performs culling against a cylinder */ + __forceinline vboolx cylinder_culling_test(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2, const vfloatx& r) + { + const std::pair<vfloatx,vfloatx> d = sqr_point_line_distance(p0,p1,p2); + return d.first <= r*r*d.second; + } + + template<typename NativeCurve3ff, typename Epilog> + __forceinline bool intersect_ribbon(const Vec3fa& ray_org, const Vec3fa& ray_dir, const float ray_tnear, const float& ray_tfar, + const LinearSpace3fa& ray_space, const float& depth_scale, + const NativeCurve3ff& curve3D, const int N, + const Epilog& epilog) + { + /* transform control points into ray space */ + const NativeCurve3ff curve2D = curve3D.xfm_pr(ray_space,ray_org); + float eps = 4.0f*float(ulp)*reduce_max(max(abs(curve2D.v0),abs(curve2D.v1),abs(curve2D.v2),abs(curve2D.v3))); + + /* evaluate the bezier curve */ + bool ishit = false; + vboolx valid = vfloatx(step) < vfloatx(float(N)); + const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N); + const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N); + valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w)); + + if (any(valid)) + { + Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(0,N); + Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(0,N); + dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt); + dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt); + const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f); + const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f); + const Vec3vfx nn0 = normalize(n0); + const Vec3vfx nn1 = normalize(n1); + const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1)); + const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); + + vfloatx vu,vv,vt; + vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); + + if (any(valid0)) + { + /* ignore self intersections */ + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) { + vfloatx r = lerp(p0.w, p1.w, vu); + valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; + } + + if (any(valid0)) + { + vv = madd(2.0f,vv,vfloatx(-1.0f)); + RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,0,N,curve3D); + ishit |= epilog(bhit.valid,bhit); + } + } + } + + if (unlikely(VSIZEX < N)) + { + /* process SIMD-size many segments per iteration */ + for (int i=VSIZEX; i<N; i+=VSIZEX) + { + /* evaluate the bezier curve */ + vboolx valid = vintx(i)+vintx(step) < vintx(N); + const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N); + const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N); + valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w)); + if (none(valid)) continue; + + Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(i,N); + Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(i,N); + dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt); + dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt); + const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f); + const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f); + const Vec3vfx nn0 = normalize(n0); + const Vec3vfx nn1 = normalize(n1); + const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1)); + const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0)); + const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); + + vfloatx vu,vv,vt; + vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); + + if (any(valid0)) + { + /* ignore self intersections */ + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) { + vfloatx r = lerp(p0.w, p1.w, vu); + valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; + } + + if (any(valid0)) + { + vv = madd(2.0f,vv,vfloatx(-1.0f)); + RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,i,N,curve3D); + ishit |= epilog(bhit.valid,bhit); + } + } + } + } + return ishit; + } + + template<template<typename Ty> class NativeCurve> + struct RibbonCurve1Intersector1 + { + typedef NativeCurve<Vec3ff> NativeCurve3ff; + + template<typename Epilog> + __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, + const Epilog& epilog) + { + const int N = geom->tessellationRate; + NativeCurve3ff curve(v0,v1,v2,v3); + curve = enlargeRadiusToMinWidth(context,geom,ray.org,curve); + return intersect_ribbon<NativeCurve3ff>(ray.org,ray.dir,ray.tnear(),ray.tfar, + pre.ray_space,pre.depth_scale, + curve,N, + epilog); + } + }; + + template<template<typename Ty> class NativeCurve, int K> + struct RibbonCurve1IntersectorK + { + typedef NativeCurve<Vec3ff> NativeCurve3ff; + + template<typename Epilog> + __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& ray, size_t k, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, + const Epilog& epilog) + { + const int N = geom->tessellationRate; + const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); + const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); + NativeCurve3ff curve(v0,v1,v2,v3); + curve = enlargeRadiusToMinWidth(context,geom,ray_org,curve); + return intersect_ribbon<NativeCurve3ff>(ray_org,ray_dir,ray.tnear()[k],ray.tfar[k], + pre.ray_space[k],pre.depth_scale[k], + curve,N, + epilog); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h new file mode 100644 index 0000000000..883cedc3d2 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h @@ -0,0 +1,362 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "cylinder.h" +#include "plane.h" +#include "line_intersector.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + namespace isa + { + static const size_t numJacobianIterations = 5; +#if defined(__AVX__) + static const size_t numBezierSubdivisions = 2; +#else + static const size_t numBezierSubdivisions = 3; +#endif + + struct BezierCurveHit + { + __forceinline BezierCurveHit() {} + + __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng) + : t(t), u(u), v(0.0f), Ng(Ng) {} + + __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng) + : t(t), u(u), v(v), Ng(Ng) {} + + __forceinline void finalize() {} + + public: + float t; + float u; + float v; + Vec3fa Ng; + }; + + template<typename NativeCurve3ff, typename Ray, typename Epilog> + __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i, + const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1, + const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du, + const Epilog& epilog) + { + if (tp.lower[i]+dt > ray.tfar) return false; + Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]); + if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]); + if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]); + BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o); + return epilog(hit); + } + + template<typename NativeCurve3ff, typename Ray, typename Epilog> + __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog) + { + const Vec3fa org = zero; + const Vec3fa dir = ray.dir; + const float length_ray_dir = length(dir); + + /* error of curve evaluations is propertional to largest coordinate */ + const BBox3ff box = curve.bounds(); + const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper))); + + for (size_t i=0; i<numJacobianIterations; i++) + { + const Vec3fa Q = madd(Vec3fa(t),dir,org); + //const Vec3fa dQdu = zero; + const Vec3fa dQdt = dir; + const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here + + Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu); + //const Vec3fa dPdt = zero; + + const Vec3fa R = Q-P; + const float len_R = length(R); //reduce_max(abs(R)); + const float R_err = max(Q_err,P_err); + const Vec3fa dRdu = /*dQdu*/-dPdu; + const Vec3fa dRdt = dQdt;//-dPdt; + + const Vec3fa T = normalize(dPdu); + const Vec3fa dTdu = dnormalize(dPdu,ddPdu); + //const Vec3fa dTdt = zero; + const float cos_err = P_err/length(dPdu); + + /* Error estimate for dot(R,T): + + dot(R,T) = cos(R,T) |R| |T| + = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err) + = cos(R,T)*|R|*|T| + +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err) + +- cos_error*(|R| + |T|) + +- lower order terms + with cos(R,T) being in [0,1] and |T| = 1 we get: + dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1) + */ + + const float f = dot(R,T); + const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R); + const float dfdu = dot(dRdu,T) + dot(R,dTdu); + const float dfdt = dot(dRdt,T);// + dot(R,dTdt); + + const float K = dot(R,R)-sqr(f); + const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu); + const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt); + const float rsqrt_K = rsqrt(K); + + const float g = sqrt(K)-P.w; + const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w; + const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w; + const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w; + + const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt); + const Vec2f dut = rcp(J)*Vec2f(f,g); + const Vec2f ut = Vec2f(u,t) - dut; + u = ut.x; t = ut.y; + + if (abs(f) < f_err && abs(g) < g_err) + { + t+=dt; + if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs + if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs + const Vec3fa R = normalize(Q-P); + const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu); + const Vec3fa V = cross(dPdu,R); + BezierCurveHit hit(t,u,cross(V,U)); + return epilog(hit); + } + } + return false; + } + + template<typename NativeCurve3ff, typename Ray, typename Epilog> + bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, + float u0, float u1, unsigned int depth, const Epilog& epilog) + { +#if defined(__AVX__) + typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues + typedef vint8 vintx; + typedef vfloat8 vfloatx; +#else + typedef vbool4 vboolx; + typedef vint4 vintx; + typedef vfloat4 vfloatx; +#endif + typedef Vec3<vfloatx> Vec3vfx; + typedef Vec4<vfloatx> Vec4vfx; + + unsigned int maxDepth = numBezierSubdivisions; + bool found = false; + const Vec3fa org = zero; + const Vec3fa dir = ray.dir; + + unsigned int sptr = 0; + const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below + struct StackEntry { + vboolx valid; + vfloatx tlower; + float u0; + float u1; + unsigned int depth; + }; + StackEntry stack[stack_size]; + goto entry; + + /* terminate if stack is empty */ + while (sptr) + { + /* pop from stack */ + { + sptr--; + vboolx valid = stack[sptr].valid; + const vfloatx tlower = stack[sptr].tlower; + valid &= tlower+dt <= ray.tfar; + if (none(valid)) continue; + u0 = stack[sptr].u0; + u1 = stack[sptr].u1; + depth = stack[sptr].depth; + const size_t i = select_min(valid,tlower); clear(valid,i); + stack[sptr].valid = valid; + if (any(valid)) sptr++; // there are still items on the stack + + /* process next segment */ + const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); + u0 = vu0[i+0]; + u1 = vu0[i+1]; + } + entry: + + /* subdivide curve */ + const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1))); + const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); + Vec4vfx P0, dP0du; curve.veval(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); + const Vec4vfx P3 = shift_right_1(P0); + const Vec4vfx dP3du = shift_right_1(dP0du); + const Vec4vfx P1 = P0 + dP0du; + const Vec4vfx P2 = P3 - dP3du; + + /* calculate bounding cylinders */ + const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0)); + const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0)); + const vfloatx maxr12 = sqrt(max(rr1,rr2)); + const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp); + const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp); + vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12; + vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12; + r_outer = one_plus_ulp*r_outer; + r_inner = max(0.0f,one_minus_ulp*r_inner); + const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer); + const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner); + vboolx valid = true; clear(valid,vfloatx::size-1); + + /* intersect with outer cylinder */ + BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1; + valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1); + if (none(valid)) continue; + + /* intersect with cap-planes */ + BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt); + tp = embree::intersect(tp,tc_outer); + BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir); + tp = embree::intersect(tp,h0); + BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir); + tp = embree::intersect(tp,h1); + valid &= tp.lower <= tp.upper; + if (none(valid)) continue; + + /* clamp and correct u parameter */ + u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f)); + u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f)); + u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size))); + u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size))); + + /* intersect with inner cylinder */ + BBox<vfloatx> tc_inner; + vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero; + const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1); + + /* at the unstable area we subdivide deeper */ + const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f); + const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f); + + /* subtract the inner interval from the current hit interval */ + BBox<vfloatx> tp0, tp1; + subtract(tp,tc_inner,tp0,tp1); + vboolx valid0 = valid & (tp0.lower <= tp0.upper); + vboolx valid1 = valid & (tp1.lower <= tp1.upper); + if (none(valid0 | valid1)) continue; + + /* iterate over all first hits front to back */ + const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth)); + vboolx recursion_valid0 = valid0 & (depth < termDepth0); + valid0 &= depth >= termDepth0; + + while (any(valid0)) + { + const size_t i = select_min(valid0,tp0.lower); clear(valid0,i); + found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog); + //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog); + valid0 &= tp0.lower+dt <= ray.tfar; + } + valid1 &= tp1.lower+dt <= ray.tfar; + + /* iterate over all second hits front to back */ + const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth)); + vboolx recursion_valid1 = valid1 & (depth < termDepth1); + valid1 &= depth >= termDepth1; + while (any(valid1)) + { + const size_t i = select_min(valid1,tp1.lower); clear(valid1,i); + found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog); + //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog); + valid1 &= tp1.lower+dt <= ray.tfar; + } + + /* push valid segments to stack */ + recursion_valid0 &= tp0.lower+dt <= ray.tfar; + recursion_valid1 &= tp1.lower+dt <= ray.tfar; + const vboolx recursion_valid = recursion_valid0 | recursion_valid1; + if (any(recursion_valid)) + { + assert(sptr < stack_size); + stack[sptr].valid = recursion_valid; + stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower); + stack[sptr].u0 = u0; + stack[sptr].u1 = u1; + stack[sptr].depth = depth+1; + sptr++; + } + } + return found; + } + + template<template<typename Ty> class NativeCurve> + struct SweepCurve1Intersector1 + { + typedef NativeCurve<Vec3ff> NativeCurve3ff; + + template<typename Epilog> + __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, + const Epilog& epilog) + { + STAT3(normal.trav_prims,1,1,1); + + /* move ray closer to make intersection stable */ + NativeCurve3ff curve0(v0,v1,v2,v3); + curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); + const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); + const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); + const NativeCurve3ff curve1 = curve0-ref; + return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); + } + }; + + template<template<typename Ty> class NativeCurve, int K> + struct SweepCurve1IntersectorK + { + typedef NativeCurve<Vec3ff> NativeCurve3ff; + + struct Ray1 + { + __forceinline Ray1(RayK<K>& ray, size_t k) + : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} + + Vec3fa org; + Vec3fa dir; + float _tnear; + float& tfar; + + __forceinline float& tnear() { return _tnear; } + //__forceinline float& tfar() { return _tfar; } + __forceinline const float& tnear() const { return _tnear; } + //__forceinline const float& tfar() const { return _tfar; } + + }; + + template<typename Epilog> + __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, + IntersectContext* context, + const CurveGeometry* geom, const unsigned int primID, + const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, + const Epilog& epilog) + { + STAT3(normal.trav_prims,1,1,1); + Ray1 ray(vray,k); + + /* move ray closer to make intersection stable */ + NativeCurve3ff curve0(v0,v1,v2,v3); + curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); + const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); + const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); + const NativeCurve3ff curve1 = curve0-ref; + return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h new file mode 100644 index 0000000000..e1f4238130 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h @@ -0,0 +1,671 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "../subdiv/bezier_curve.h" +#include "../common/primref.h" +#include "curve_intersector_precalculations.h" +#include "../bvh/node_intersector1.h" +#include "../bvh/node_intersector_packet.h" + +#include "intersector_epilog.h" + +#include "../subdiv/bezier_curve.h" +#include "../subdiv/bspline_curve.h" +#include "../subdiv/hermite_curve.h" +#include "../subdiv/catmullrom_curve.h" + +#include "spherei_intersector.h" +#include "disci_intersector.h" + +#include "linei_intersector.h" +#include "roundlinei_intersector.h" +#include "conelinei_intersector.h" + +#include "curveNi_intersector.h" +#include "curveNv_intersector.h" +#include "curveNi_mb_intersector.h" + +#include "curve_intersector_distance.h" +#include "curve_intersector_ribbon.h" +#include "curve_intersector_oriented.h" +#include "curve_intersector_sweep.h" + +namespace embree +{ + struct VirtualCurveIntersector + { + typedef void (*Intersect1Ty)(void* pre, void* ray, IntersectContext* context, const void* primitive); + typedef bool (*Occluded1Ty )(void* pre, void* ray, IntersectContext* context, const void* primitive); + + typedef void (*Intersect4Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + typedef bool (*Occluded4Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + + typedef void (*Intersect8Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + typedef bool (*Occluded8Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + + typedef void (*Intersect16Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + typedef bool (*Occluded16Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + + public: + struct Intersectors + { + Intersectors() {} // WARNING: Do not zero initialize this, as we otherwise get problems with thread unsafe local static variable initialization (e.g. on VS2013) in curve_intersector_virtual.cpp. + + template<int K> void intersect(void* pre, void* ray, IntersectContext* context, const void* primitive); + template<int K> bool occluded (void* pre, void* ray, IntersectContext* context, const void* primitive); + + template<int K> void intersect(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + template<int K> bool occluded (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); + + public: + Intersect1Ty intersect1; + Occluded1Ty occluded1; + Intersect4Ty intersect4; + Occluded4Ty occluded4; + Intersect8Ty intersect8; + Occluded8Ty occluded8; + Intersect16Ty intersect16; + Occluded16Ty occluded16; + }; + + Intersectors vtbl[Geometry::GTY_END]; + }; + + template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(intersect1); intersect1(pre,ray,context,primitive); } + template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(occluded1); return occluded1(pre,ray,context,primitive); } + + template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<4>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect4); intersect4(pre,ray,k,context,primitive); } + template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<4> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded4); return occluded4(pre,ray,k,context,primitive); } + +#if defined(__AVX__) + template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<8>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect8); intersect8(pre,ray,k,context,primitive); } + template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<8> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded8); return occluded8(pre,ray,k,context,primitive); } +#endif + +#if defined(__AVX512F__) + template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<16>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect16); intersect16(pre,ray,k,context,primitive); } + template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<16> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded16); return occluded16(pre,ray,k,context,primitive); } +#endif + + namespace isa + { + struct VirtualCurveIntersector1 + { + typedef unsigned char Primitive; + typedef CurvePrecalculations1 Precalculations; + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; + leafIntersector.intersect<1>(&pre,&ray,context,prim); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; + return leafIntersector.occluded<1>(&pre,&ray,context,prim); + } + }; + + template<int K> + struct VirtualCurveIntersectorK + { + typedef unsigned char Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; + size_t mask = movemask(valid_i); + while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim); + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; + vbool<K> valid_o = false; + size_t mask = movemask(valid_i); + while (mask) { + size_t k = bscf(mask); + if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim)) + set(valid_o, k); + } + return valid_o; + } + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; + leafIntersector.intersect<K>(&pre,&ray,k,context,prim); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + assert(num == 1); + RTCGeometryType ty = (RTCGeometryType)(*prim); + assert(This->leafIntersector); + VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; + return leafIntersector.occluded<K>(&pre,&ray,k,context,prim); + } + }; + + template<int N> + static VirtualCurveIntersector::Intersectors LinearRoundConeNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors LinearConeNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors LinearRoundConeNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors LinearConeNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + + template<int N> + static VirtualCurveIntersector::Intersectors LinearRibbonNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors LinearRibbonNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors SphereNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors SphereNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors DiscNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors DiscNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors OrientedDiscNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<int N> + static VirtualCurveIntersector::Intersectors OrientedDiscNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::occluded; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,N,8,true>::occluded; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,N,16,true>::occluded; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors RibbonNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors RibbonNvIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNvIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors RibbonNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiMBIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors CurveNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors CurveNvIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNvIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors CurveNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors OrientedCurveNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors OrientedCurveNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors HermiteRibbonNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors HermiteRibbonNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors HermiteCurveNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors HermiteCurveNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + + template<template<typename Ty> class Curve, int N> + static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiMBIntersectors() + { + VirtualCurveIntersector::Intersectors intersectors; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; +#if defined(__AVX__) + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; +#endif +#if defined(__AVX512F__) + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; +#endif + return intersectors; + } + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h new file mode 100644 index 0000000000..69cf612275 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h @@ -0,0 +1,21 @@ +// Copyright 2020 Light Transport Entertainment Inc. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curve_intersector_virtual.h" + +namespace embree +{ + namespace isa + { + void AddVirtualCurveBezierCurveInterector4i(VirtualCurveIntersector &prim); + void AddVirtualCurveBezierCurveInterector4v(VirtualCurveIntersector &prim); + void AddVirtualCurveBezierCurveInterector4iMB(VirtualCurveIntersector &prim); +#if defined(__AVX__) + void AddVirtualCurveBezierCurveInterector8i(VirtualCurveIntersector &prim); + void AddVirtualCurveBezierCurveInterector8v(VirtualCurveIntersector &prim); + void AddVirtualCurveBezierCurveInterector8iMB(VirtualCurveIntersector &prim); +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h new file mode 100644 index 0000000000..d37e41098e --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h @@ -0,0 +1,21 @@ +// Copyright 2020 Light Transport Entertainment Inc. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curve_intersector_virtual.h" + +namespace embree +{ + namespace isa + { + void AddVirtualCurveBSplineCurveInterector4i(VirtualCurveIntersector &prim); + void AddVirtualCurveBSplineCurveInterector4v(VirtualCurveIntersector &prim); + void AddVirtualCurveBSplineCurveInterector4iMB(VirtualCurveIntersector &prim); +#if defined(__AVX__) + void AddVirtualCurveBSplineCurveInterector8i(VirtualCurveIntersector &prim); + void AddVirtualCurveBSplineCurveInterector8v(VirtualCurveIntersector &prim); + void AddVirtualCurveBSplineCurveInterector8iMB(VirtualCurveIntersector &prim); +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h new file mode 100644 index 0000000000..a133a11d63 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h @@ -0,0 +1,21 @@ +// Copyright 2020 Light Transport Entertainment Inc. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curve_intersector_virtual.h" + +namespace embree +{ + namespace isa + { + void AddVirtualCurveCatmullRomCurveInterector4i(VirtualCurveIntersector &prim); + void AddVirtualCurveCatmullRomCurveInterector4v(VirtualCurveIntersector &prim); + void AddVirtualCurveCatmullRomCurveInterector4iMB(VirtualCurveIntersector &prim); +#if defined(__AVX__) + void AddVirtualCurveCatmullRomCurveInterector8i(VirtualCurveIntersector &prim); + void AddVirtualCurveCatmullRomCurveInterector8v(VirtualCurveIntersector &prim); + void AddVirtualCurveCatmullRomCurveInterector8iMB(VirtualCurveIntersector &prim); +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h new file mode 100644 index 0000000000..9aec35da45 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h @@ -0,0 +1,21 @@ +// Copyright 2020 Light Transport Entertainment Inc. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curve_intersector_virtual.h" + +namespace embree +{ + namespace isa + { + void AddVirtualCurveHermiteCurveInterector4i(VirtualCurveIntersector &prim); + void AddVirtualCurveHermiteCurveInterector4v(VirtualCurveIntersector &prim); + void AddVirtualCurveHermiteCurveInterector4iMB(VirtualCurveIntersector &prim); +#if defined(__AVX__) + void AddVirtualCurveHermiteCurveInterector8i(VirtualCurveIntersector &prim); + void AddVirtualCurveHermiteCurveInterector8v(VirtualCurveIntersector &prim); + void AddVirtualCurveHermiteCurveInterector8iMB(VirtualCurveIntersector &prim); +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h new file mode 100644 index 0000000000..dd37d194f5 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h @@ -0,0 +1,21 @@ +// Copyright 2020 Light Transport Entertainment Inc. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curve_intersector_virtual.h" + +namespace embree +{ + namespace isa + { + void AddVirtualCurveLinearCurveInterector4i(VirtualCurveIntersector &prim); + void AddVirtualCurveLinearCurveInterector4v(VirtualCurveIntersector &prim); + void AddVirtualCurveLinearCurveInterector4iMB(VirtualCurveIntersector &prim); +#if defined(__AVX__) + void AddVirtualCurveLinearCurveInterector8i(VirtualCurveIntersector &prim); + void AddVirtualCurveLinearCurveInterector8v(VirtualCurveIntersector &prim); + void AddVirtualCurveLinearCurveInterector8iMB(VirtualCurveIntersector &prim); +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h new file mode 100644 index 0000000000..fe5ceed840 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h @@ -0,0 +1,22 @@ +// Copyright 2020 Light Transport Entertainment Inc. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "curve_intersector_virtual.h" + +namespace embree +{ + namespace isa + { + void AddVirtualCurvePointInterector4i(VirtualCurveIntersector &prim); + void AddVirtualCurvePointInterector4v(VirtualCurveIntersector &prim); + void AddVirtualCurvePointInterector4iMB(VirtualCurveIntersector &prim); + +#if defined (__AVX__) + void AddVirtualCurvePointInterector8i(VirtualCurveIntersector &prim); + void AddVirtualCurvePointInterector8v(VirtualCurveIntersector &prim); + void AddVirtualCurvePointInterector8iMB(VirtualCurveIntersector &prim); +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h b/thirdparty/embree-aarch64/kernels/geometry/cylinder.h new file mode 100644 index 0000000000..39a582864c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/cylinder.h @@ -0,0 +1,223 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" + +namespace embree +{ + namespace isa + { + struct Cylinder + { + const Vec3fa p0; //!< start location + const Vec3fa p1; //!< end position + const float rr; //!< squared radius of cylinder + + __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float r) + : p0(p0), p1(p1), rr(sqr(r)) {} + + __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float rr, bool) + : p0(p0), p1(p1), rr(rr) {} + + __forceinline bool intersect(const Vec3fa& org, + const Vec3fa& dir, + BBox1f& t_o, + float& u0_o, Vec3fa& Ng0_o, + float& u1_o, Vec3fa& Ng1_o) const + { + /* calculate quadratic equation to solve */ + const float rl = rcp_length(p1-p0); + const Vec3fa P0 = p0, dP = (p1-p0)*rl; + const Vec3fa O = org-P0, dO = dir; + + const float dOdO = dot(dO,dO); + const float OdO = dot(dO,O); + const float OO = dot(O,O); + const float dOz = dot(dP,dO); + const float Oz = dot(dP,O); + + const float A = dOdO - sqr(dOz); + const float B = 2.0f * (OdO - dOz*Oz); + const float C = OO - sqr(Oz) - rr; + + /* we miss the cylinder if determinant is smaller than zero */ + const float D = B*B - 4.0f*A*C; + if (D < 0.0f) { + t_o = BBox1f(pos_inf,neg_inf); + return false; + } + + /* special case for rays that are parallel to the cylinder */ + const float eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); + if (abs(A) < eps) + { + if (C <= 0.0f) { + t_o = BBox1f(neg_inf,pos_inf); + return true; + } else { + t_o = BBox1f(pos_inf,neg_inf); + return false; + } + } + + /* standard case for rays that are not parallel to the cylinder */ + const float Q = sqrt(D); + const float rcp_2A = rcp(2.0f*A); + const float t0 = (-B-Q)*rcp_2A; + const float t1 = (-B+Q)*rcp_2A; + + /* calculates u and Ng for near hit */ + { + u0_o = madd(t0,dOz,Oz)*rl; + const Vec3fa Pr = t0*dir; + const Vec3fa Pl = madd(u0_o,p1-p0,p0); + Ng0_o = Pr-Pl; + } + + /* calculates u and Ng for far hit */ + { + u1_o = madd(t1,dOz,Oz)*rl; + const Vec3fa Pr = t1*dir; + const Vec3fa Pl = madd(u1_o,p1-p0,p0); + Ng1_o = Pr-Pl; + } + + t_o.lower = t0; + t_o.upper = t1; + return true; + } + + __forceinline bool intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox1f& t_o) const + { + float u0_o; Vec3fa Ng0_o; + float u1_o; Vec3fa Ng1_o; + return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); + } + + static bool verify(const size_t id, const Cylinder& cylinder, const RayHit& ray, bool shouldhit, const float t0, const float t1) + { + float eps = 0.001f; + BBox1f t; bool hit; + hit = cylinder.intersect(ray.org,ray.dir,t); + + bool failed = hit != shouldhit; + if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : abs(t0-t.lower) > eps; + if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : abs(t1-t.upper) > eps; + if (!failed) return true; + embree_cout << "Cylinder test " << id << " failed: cylinder = " << cylinder << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl; + return false; + } + + /* verify cylinder class */ + static bool verify() + { + bool passed = true; + const Cylinder cylinder(Vec3fa(0.0f,0.0f,0.0f),Vec3fa(1.0f,0.0f,0.0f),1.0f); + passed &= verify(0,cylinder,RayHit(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); + passed &= verify(1,cylinder,RayHit(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); + passed &= verify(2,cylinder,RayHit(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f); + passed &= verify(3,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); + passed &= verify(4,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); + passed &= verify(5,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); + passed &= verify(6,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); + return passed; + } + + /*! output operator */ + friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cylinder& c) { + return cout << "Cylinder { p0 = " << c.p0 << ", p1 = " << c.p1 << ", r = " << sqrtf(c.rr) << "}"; + } + }; + + template<int N> + struct CylinderN + { + const Vec3vf<N> p0; //!< start location + const Vec3vf<N> p1; //!< end position + const vfloat<N> rr; //!< squared radius of cylinder + + __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& r) + : p0(p0), p1(p1), rr(sqr(r)) {} + + __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& rr, bool) + : p0(p0), p1(p1), rr(rr) {} + + + __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, + BBox<vfloat<N>>& t_o, + vfloat<N>& u0_o, Vec3vf<N>& Ng0_o, + vfloat<N>& u1_o, Vec3vf<N>& Ng1_o) const + { + /* calculate quadratic equation to solve */ + const vfloat<N> rl = rcp_length(p1-p0); + const Vec3vf<N> P0 = p0, dP = (p1-p0)*rl; + const Vec3vf<N> O = Vec3vf<N>(org)-P0, dO = dir; + + const vfloat<N> dOdO = dot(dO,dO); + const vfloat<N> OdO = dot(dO,O); + const vfloat<N> OO = dot(O,O); + const vfloat<N> dOz = dot(dP,dO); + const vfloat<N> Oz = dot(dP,O); + + const vfloat<N> A = dOdO - sqr(dOz); + const vfloat<N> B = 2.0f * (OdO - dOz*Oz); + const vfloat<N> C = OO - sqr(Oz) - rr; + + /* we miss the cylinder if determinant is smaller than zero */ + const vfloat<N> D = B*B - 4.0f*A*C; + vbool<N> valid = D >= 0.0f; + if (none(valid)) { + t_o = BBox<vfloat<N>>(empty); + return valid; + } + + /* standard case for rays that are not parallel to the cylinder */ + const vfloat<N> Q = sqrt(D); + const vfloat<N> rcp_2A = rcp(2.0f*A); + const vfloat<N> t0 = (-B-Q)*rcp_2A; + const vfloat<N> t1 = (-B+Q)*rcp_2A; + + /* calculates u and Ng for near hit */ + { + u0_o = madd(t0,dOz,Oz)*rl; + const Vec3vf<N> Pr = t0*Vec3vf<N>(dir); + const Vec3vf<N> Pl = madd(u0_o,p1-p0,p0); + Ng0_o = Pr-Pl; + } + + /* calculates u and Ng for far hit */ + { + u1_o = madd(t1,dOz,Oz)*rl; + const Vec3vf<N> Pr = t1*Vec3vf<N>(dir); + const Vec3vf<N> Pl = madd(u1_o,p1-p0,p0); + Ng1_o = Pr-Pl; + } + + t_o.lower = select(valid, t0, vfloat<N>(pos_inf)); + t_o.upper = select(valid, t1, vfloat<N>(neg_inf)); + + /* special case for rays that are parallel to the cylinder */ + const vfloat<N> eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); + vbool<N> validt = valid & (abs(A) < eps); + if (unlikely(any(validt))) + { + vbool<N> inside = C <= 0.0f; + t_o.lower = select(validt,select(inside,vfloat<N>(neg_inf),vfloat<N>(pos_inf)),t_o.lower); + t_o.upper = select(validt,select(inside,vfloat<N>(pos_inf),vfloat<N>(neg_inf)),t_o.upper); + valid &= !validt | inside; + } + return valid; + } + + __forceinline vbool<N> intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const + { + vfloat<N> u0_o; Vec3vf<N> Ng0_o; + vfloat<N> u1_o; Vec3vf<N> Ng1_o; + return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); + } + }; + } +} + diff --git a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h new file mode 100644 index 0000000000..e8305780e5 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h @@ -0,0 +1,216 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "../common/scene_points.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct DiscIntersectorHitM + { + __forceinline DiscIntersectorHitM() {} + + __forceinline DiscIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) + : vu(u), vv(v), vt(t), vNg(Ng) + { + } + + __forceinline void finalize() {} + + __forceinline Vec2f uv(const size_t i) const + { + return Vec2f(vu[i], vv[i]); + } + __forceinline float t(const size_t i) const + { + return vt[i]; + } + __forceinline Vec3fa Ng(const size_t i) const + { + return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]); + } + + public: + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct DiscIntersector1 + { + typedef CurvePrecalculations1 Precalculations; + + template<typename Epilog> + static __forceinline bool intersect( + const vbool<M>& valid_i, + Ray& ray, + IntersectContext* context, + const Points* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + + const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); + const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); + const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); + + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec3vf<M> center = v0.xyz(); + const vfloat<M> radius = v0.w; + + const Vec3vf<M> c0 = center - ray_org; + const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; + + valid &= (vfloat<M>(ray.tnear()) <= projC0) & (projC0 <= vfloat<M>(ray.tfar)); + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) + valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale; // ignore self intersections + if (unlikely(none(valid))) + return false; + + const Vec3vf<M> perp = c0 - projC0 * ray_dir; + const vfloat<M> l2 = dot(perp, perp); + const vfloat<M> r2 = radius * radius; + valid &= (l2 <= r2); + if (unlikely(none(valid))) + return false; + + DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir); + return epilog(valid, hit); + } + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + Ray& ray, + IntersectContext* context, + const Points* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, + const Vec3vf<M>& normal, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); + + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec3vf<M> center = v0.xyz(); + const vfloat<M> radius = v0.w; + + vfloat<M> divisor = dot(Vec3vf<M>((Vec3fa)ray.dir), normal); + const vbool<M> parallel = divisor == vfloat<M>(0.f); + valid &= !parallel; + divisor = select(parallel, 1.f, divisor); // prevent divide by zero + + vfloat<M> t = dot(center - Vec3vf<M>((Vec3fa)ray.org), Vec3vf<M>(normal)) / divisor; + + valid &= (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar)); + if (unlikely(none(valid))) + return false; + + Vec3vf<M> intersection = Vec3vf<M>((Vec3fa)ray.org) + Vec3vf<M>((Vec3fa)ray.dir) * t; + vfloat<M> dist2 = dot(intersection - center, intersection - center); + valid &= dist2 < radius * radius; + if (unlikely(none(valid))) + return false; + + DiscIntersectorHitM<M> hit(zero, zero, t, normal); + return epilog(valid, hit); + } + }; + + template<int M, int K> + struct DiscIntersectorK + { + typedef CurvePrecalculationsK<K> Precalculations; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + RayK<K>& ray, + size_t k, + IntersectContext* context, + const Points* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + + const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); + const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); + + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec3vf<M> center = v0.xyz(); + const vfloat<M> radius = v0.w; + + const Vec3vf<M> c0 = center - ray_org; + const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; + + valid &= (vfloat<M>(ray.tnear()[k]) <= projC0) & (projC0 <= vfloat<M>(ray.tfar[k])); + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) + valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale[k]; // ignore self intersections + if (unlikely(none(valid))) + return false; + + const Vec3vf<M> perp = c0 - projC0 * ray_dir; + const vfloat<M> l2 = dot(perp, perp); + const vfloat<M> r2 = radius * radius; + valid &= (l2 <= r2); + if (unlikely(none(valid))) + return false; + + DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir); + return epilog(valid, hit); + } + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + RayK<K>& ray, + size_t k, + IntersectContext* context, + const Points* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, + const Vec3vf<M>& normal, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); + + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec3vf<M> center = v0.xyz(); + const vfloat<M> radius = v0.w; + + vfloat<M> divisor = dot(Vec3vf<M>(ray_dir), normal); + const vbool<M> parallel = divisor == vfloat<M>(0.f); + valid &= !parallel; + divisor = select(parallel, 1.f, divisor); // prevent divide by zero + + vfloat<M> t = dot(center - Vec3vf<M>(ray_org), Vec3vf<M>(normal)) / divisor; + + valid &= (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k])); + if (unlikely(none(valid))) + return false; + + Vec3vf<M> intersection = Vec3vf<M>(ray_org) + Vec3vf<M>(ray_dir) * t; + vfloat<M> dist2 = dot(intersection - center, intersection - center); + valid &= dist2 < radius * radius; + if (unlikely(none(valid))) + return false; + + DiscIntersectorHitM<M> hit(zero, zero, t, normal); + return epilog(valid, hit); + } + }; + } // namespace isa +} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h new file mode 100644 index 0000000000..e1dc3aa98e --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h @@ -0,0 +1,277 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "disc_intersector.h" +#include "intersector_epilog.h" +#include "pointi.h" + +namespace embree +{ + namespace isa + { + template<int M, int Mx, bool filter> + struct DiscMiIntersector1 + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, + RayHit& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, + Ray& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, bool filter> + struct DiscMiMBIntersector1 + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, + RayHit& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, + Ray& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct DiscMiIntersectorK + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect( + const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded( + const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct DiscMiMBIntersectorK + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect( + const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded( + const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, bool filter> + struct OrientedDiscMiIntersector1 + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, + RayHit& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, + Ray& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, bool filter> + struct OrientedDiscMiMBIntersector1 + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, + RayHit& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom, ray.time()); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, + Ray& ray, + IntersectContext* context, + const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom, ray.time()); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct OrientedDiscMiIntersectorK + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect( + const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, n0, + Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded( + const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, n0, + Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct OrientedDiscMiMBIntersectorK + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect( + const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom, ray.time()[k]); + const vbool<Mx> valid = Disc.template valid<Mx>(); + DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, n0, + Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + + static __forceinline bool occluded( + const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(Disc.geomID()); + Vec4vf<M> v0; Vec3vf<M> n0; + Disc.gather(v0, n0, geom, ray.time()[k]); + const vbool<Mx> valid = Disc.template valid<Mx>(); + return DiscIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, n0, + Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + } + }; + } // namespace isa +} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/filter.h b/thirdparty/embree-aarch64/kernels/geometry/filter.h new file mode 100644 index 0000000000..4cdf7a395a --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/filter.h @@ -0,0 +1,204 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/geometry.h" +#include "../common/ray.h" +#include "../common/hit.h" +#include "../common/context.h" + +namespace embree +{ + namespace isa + { + __forceinline bool runIntersectionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) + { + if (geometry->intersectionFilterN) + { + assert(context->scene->hasGeometryFilterFunction()); + geometry->intersectionFilterN(args); + + if (args->valid[0] == 0) + return false; + } + + if (context->user->filter) { + assert(context->scene->hasContextFilterFunction()); + context->user->filter(args); + + if (args->valid[0] == 0) + return false; + } + + copyHitToRay(*(RayHit*)args->ray,*(Hit*)args->hit); + return true; + } + + __forceinline bool runIntersectionFilter1(const Geometry* const geometry, RayHit& ray, IntersectContext* context, Hit& hit) + { + RTCFilterFunctionNArguments args; + int mask = -1; + args.valid = &mask; + args.geometryUserPtr = geometry->userPtr; + args.context = context->user; + args.ray = (RTCRayN*)&ray; + args.hit = (RTCHitN*)&hit; + args.N = 1; + return runIntersectionFilter1Helper(&args,geometry,context); + } + + __forceinline void reportIntersection1(IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args) + { +#if defined(EMBREE_FILTER_FUNCTION) + IntersectContext* MAYBE_UNUSED context = args->internal_context; + const Geometry* const geometry = args->geometry; + if (geometry->intersectionFilterN) { + assert(context->scene->hasGeometryFilterFunction()); + geometry->intersectionFilterN(filter_args); + } + + //if (args->valid[0] == 0) + // return; + + if (context->user->filter) { + assert(context->scene->hasContextFilterFunction()); + context->user->filter(filter_args); + } +#endif + } + + __forceinline bool runOcclusionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) + { + if (geometry->occlusionFilterN) + { + assert(context->scene->hasGeometryFilterFunction()); + geometry->occlusionFilterN(args); + + if (args->valid[0] == 0) + return false; + } + + if (context->user->filter) { + assert(context->scene->hasContextFilterFunction()); + context->user->filter(args); + + if (args->valid[0] == 0) + return false; + } + return true; + } + + __forceinline bool runOcclusionFilter1(const Geometry* const geometry, Ray& ray, IntersectContext* context, Hit& hit) + { + RTCFilterFunctionNArguments args; + int mask = -1; + args.valid = &mask; + args.geometryUserPtr = geometry->userPtr; + args.context = context->user; + args.ray = (RTCRayN*)&ray; + args.hit = (RTCHitN*)&hit; + args.N = 1; + return runOcclusionFilter1Helper(&args,geometry,context); + } + + __forceinline void reportOcclusion1(OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args) + { +#if defined(EMBREE_FILTER_FUNCTION) + IntersectContext* MAYBE_UNUSED context = args->internal_context; + const Geometry* const geometry = args->geometry; + if (geometry->occlusionFilterN) { + assert(context->scene->hasGeometryFilterFunction()); + geometry->occlusionFilterN(filter_args); + } + + //if (args->valid[0] == 0) + // return false; + + if (context->user->filter) { + assert(context->scene->hasContextFilterFunction()); + context->user->filter(filter_args); + } +#endif + } + + template<int K> + __forceinline vbool<K> runIntersectionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) + { + vint<K>* mask = (vint<K>*) args->valid; + if (geometry->intersectionFilterN) + { + assert(context->scene->hasGeometryFilterFunction()); + geometry->intersectionFilterN(args); + } + + vbool<K> valid_o = *mask != vint<K>(zero); + if (none(valid_o)) return valid_o; + + if (context->user->filter) { + assert(context->scene->hasContextFilterFunction()); + context->user->filter(args); + } + + valid_o = *mask != vint<K>(zero); + if (none(valid_o)) return valid_o; + + copyHitToRay(valid_o,*(RayHitK<K>*)args->ray,*(HitK<K>*)args->hit); + return valid_o; + } + + template<int K> + __forceinline vbool<K> runIntersectionFilter(const vbool<K>& valid, const Geometry* const geometry, RayHitK<K>& ray, IntersectContext* context, HitK<K>& hit) + { + RTCFilterFunctionNArguments args; + vint<K> mask = valid.mask32(); + args.valid = (int*)&mask; + args.geometryUserPtr = geometry->userPtr; + args.context = context->user; + args.ray = (RTCRayN*)&ray; + args.hit = (RTCHitN*)&hit; + args.N = K; + return runIntersectionFilterHelper<K>(&args,geometry,context); + } + + template<int K> + __forceinline vbool<K> runOcclusionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) + { + vint<K>* mask = (vint<K>*) args->valid; + if (geometry->occlusionFilterN) + { + assert(context->scene->hasGeometryFilterFunction()); + geometry->occlusionFilterN(args); + } + + vbool<K> valid_o = *mask != vint<K>(zero); + + if (none(valid_o)) return valid_o; + + if (context->user->filter) { + assert(context->scene->hasContextFilterFunction()); + context->user->filter(args); + } + + valid_o = *mask != vint<K>(zero); + + RayK<K>* ray = (RayK<K>*) args->ray; + ray->tfar = select(valid_o, vfloat<K>(neg_inf), ray->tfar); + return valid_o; + } + + template<int K> + __forceinline vbool<K> runOcclusionFilter(const vbool<K>& valid, const Geometry* const geometry, RayK<K>& ray, IntersectContext* context, HitK<K>& hit) + { + RTCFilterFunctionNArguments args; + vint<K> mask = valid.mask32(); + args.valid = (int*)&mask; + args.geometryUserPtr = geometry->userPtr; + args.context = context->user; + args.ray = (RTCRayN*)&ray; + args.hit = (RTCHitN*)&hit; + args.N = K; + return runOcclusionFilterHelper<K>(&args,geometry,context); + } + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h new file mode 100644 index 0000000000..46a0af0827 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h @@ -0,0 +1,99 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "grid_soa.h" +#include "grid_soa_intersector1.h" +#include "grid_soa_intersector_packet.h" +#include "../common/ray.h" + +namespace embree +{ + namespace isa + { + template<typename T> + class SubdivPatch1Precalculations : public T + { + public: + __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr) + : T(ray,ptr) {} + }; + + template<int K, typename T> + class SubdivPatch1PrecalculationsK : public T + { + public: + __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray) + : T(valid,ray) {} + }; + + class Grid1Intersector1 + { + public: + typedef GridSOA Primitive; + typedef Grid1Precalculations<GridSOAIntersector1::Precalculations> Precalculations; + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) + { + GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node); + } + static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) { + intersect(pre,ray,context,prim,ty,lazy_node); + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) + { + GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node); + } + static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) { + return occluded(pre,ray,context,prim,ty,lazy_node); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) { + assert(false && "not implemented"); + return false; + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) { + assert(false && "not implemented"); + return false; + } + }; + + template <int K> + struct GridIntersectorK + { + typedef GridSOA Primitive; + typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations; + + + static __forceinline void intersect(const vbool<K>& valid, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) + { + GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node); + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) + { + GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node); + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) + { + GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) + { + GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); + } + }; + + typedef Grid1IntersectorK<4> SubdivPatch1Intersector4; + typedef Grid1IntersectorK<8> SubdivPatch1Intersector8; + typedef Grid1IntersectorK<16> SubdivPatch1Intersector16; + + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h new file mode 100644 index 0000000000..d3b275586c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h @@ -0,0 +1,275 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "../common/scene_subdiv_mesh.h" +#include "../bvh/bvh.h" +#include "../subdiv/tessellation.h" +#include "../subdiv/tessellation_cache.h" +#include "subdivpatch1.h" + +namespace embree +{ + namespace isa + { + class GridSOA + { + public: + + /*! GridSOA constructor */ + GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps, + const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, + const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr); + + /*! Subgrid creation */ + template<typename Allocator> + static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps, + unsigned x0, unsigned x1, unsigned y0, unsigned y1, + const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr) + { + const unsigned width = x1-x0+1; + const unsigned height = y1-y0+1; + const GridRange range(0,width-1,0,height-1); + size_t bvhBytes = 0; + if (time_steps == 1) + bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0); + else { + bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0); + bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D)); + } + const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float); + size_t rootBytes = time_steps*sizeof(BVH4::NodeRef); +#if !defined(__X86_64__) && !defined(__aarch64__) + rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding. +#endif + void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes); + assert(data); + return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o); + } + + /*! Grid creation */ + template<typename Allocator> + static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps, + const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr) + { + return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o); + } + + /*! returns reference to root */ + __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } + __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } + + /*! returns pointer to BVH array */ + __forceinline int8_t* bvhData() { return &data[0]; } + __forceinline const int8_t* bvhData() const { return &data[0]; } + + /*! returns pointer to Grid array */ + __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; } + __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; } + + __forceinline void* encodeLeaf(size_t u, size_t v) { + return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf + } + __forceinline float* decodeLeaf(size_t t, const void* ptr) { + return gridData(t) + (((size_t) (ptr) >> 4) - 1); + } + + /*! returns the size of the BVH over the grid in bytes */ + static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes); + + /*! returns the size of the temporal BVH over the time range BVHs */ + static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes); + + /*! calculates bounding box of grid range */ + __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const + { + const float* const grid_array = gridData(time); + const float* const grid_x_array = grid_array + 0 * dim_offset; + const float* const grid_y_array = grid_array + 1 * dim_offset; + const float* const grid_z_array = grid_array + 2 * dim_offset; + + /* compute the bounds just for the range! */ + BBox3fa bounds( empty ); + for (unsigned v = range.v_start; v<=range.v_end; v++) + { + for (unsigned u = range.u_start; u<=range.u_end; u++) + { + const float x = grid_x_array[ v * width + u]; + const float y = grid_y_array[ v * width + u]; + const float z = grid_z_array[ v * width + u]; + bounds.extend( Vec3fa(x,y,z) ); + } + } + assert(is_finite(bounds)); + return bounds; + } + + /*! Evaluates grid over patch and builds BVH4 tree over the grid. */ + std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o); + + /*! Create BVH4 tree over grid. */ + std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator); + + /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */ + std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o); + + /*! Create MBlur BVH4 tree over grid. */ + std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator); + + /*! Create MSMBlur BVH4 tree over grid. */ + std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o); + + template<typename Loader> + struct MapUV + { + typedef typename Loader::vfloat vfloat; + const float* const grid_uv; + size_t line_offset; + size_t lines; + + __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines) + : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {} + + __forceinline void operator() (vfloat& u, vfloat& v) const { + const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines); + const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]); + const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]); + const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]); + const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0; + u = uv[0];v = uv[1]; + } + }; + + struct Gather2x3 + { + enum { M = 4 }; + typedef vbool4 vbool; + typedef vint4 vint; + typedef vfloat4 vfloat; + + static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines) + { + vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset); + vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid + if (unlikely(line_offset == 2)) + { + r0 = shuffle<0,1,1,1>(r0); + r1 = shuffle<0,1,1,1>(r1); + } + return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11 + shuffle<1,1,2,2>(r0), // r01, r01, r02, r02 + shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12 + } + + static __forceinline void gather(const float* const grid_x, + const float* const grid_y, + const float* const grid_z, + const size_t line_offset, + const size_t lines, + Vec3vf4& v0_o, + Vec3vf4& v1_o, + Vec3vf4& v2_o) + { + const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines); + const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines); + const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines); + v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); + v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); + v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); + } + }; + +#if defined (__AVX__) + struct Gather3x3 + { + enum { M = 8 }; + typedef vbool8 vbool; + typedef vint8 vint; + typedef vfloat8 vfloat; + + static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines) + { + vfloat4 ra = vfloat4::loadu(grid + 0*line_offset); + vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid + vfloat4 rc; + if (likely(lines > 2)) + rc = vfloat4::loadu(grid + 2*line_offset); + else + rc = rb; + + if (unlikely(line_offset == 2)) + { + ra = shuffle<0,1,1,1>(ra); + rb = shuffle<0,1,1,1>(rb); + rc = shuffle<0,1,1,1>(rc); + } + + const vfloat8 r0 = vfloat8(ra,rb); + const vfloat8 r1 = vfloat8(rb,rc); + return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21 + shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12 + shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22 + } + + static __forceinline void gather(const float* const grid_x, + const float* const grid_y, + const float* const grid_z, + const size_t line_offset, + const size_t lines, + Vec3vf8& v0_o, + Vec3vf8& v1_o, + Vec3vf8& v2_o) + { + const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines); + const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines); + const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines); + v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); + v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); + v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); + } + }; +#endif + + template<typename vfloat> + static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv) + { + typedef typename vfloat::Int vint; + const vint iu = asInt(uv) & 0xffff; + const vint iv = srl(asInt(uv),16); + const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000); + const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000); + return Vec2<vfloat>(u,v); + } + + __forceinline unsigned int geomID() const { + return _geomID; + } + + __forceinline unsigned int primID() const { + return _primID; + } + + public: + BVH4::NodeRef troot; +#if !defined(__X86_64__) && !defined(__aarch64__) + unsigned align1; +#endif + unsigned time_steps; + unsigned width; + + unsigned height; + unsigned dim_offset; + unsigned _geomID; + unsigned _primID; + + unsigned align2; + unsigned gridOffset; + unsigned gridBytes; + unsigned rootOffset; + + int8_t data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h new file mode 100644 index 0000000000..2ed922a5ae --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h @@ -0,0 +1,207 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "grid_soa.h" +#include "../common/ray.h" +#include "triangle_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + class GridSOAIntersector1 + { + public: + typedef void Primitive; + + class Precalculations + { + public: + __forceinline Precalculations (const Ray& ray, const void* ptr) + : grid(nullptr) {} + + public: + GridSOA* grid; + int itime; + float ftime; + }; + + template<typename Loader> + static __forceinline void intersect(RayHit& ray, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t dim_offset = pre.grid->dim_offset; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + Vec3<vfloat> v0, v1, v2; + Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); + GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); + PlueckerIntersector1<Loader::M> intersector(ray,nullptr); + intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + }; + + template<typename Loader> + static __forceinline bool occluded(Ray& ray, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t dim_offset = pre.grid->dim_offset; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + Vec3<vfloat> v0, v1, v2; + Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); + + GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); + PlueckerIntersector1<Loader::M> intersector(ray,nullptr); + return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + } + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(0,prim); + +#if defined(__AVX__) + intersect<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre); +#else + intersect<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre); + if (likely(lines > 2)) + intersect<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre); +#endif + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(0,prim); + +#if defined(__AVX__) + return occluded<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre); +#else + if (occluded<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre)) return true; + if (likely(lines > 2)) + if (occluded<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre)) return true; +#endif + return false; + } + }; + + class GridSOAMBIntersector1 + { + public: + typedef void Primitive; + typedef GridSOAIntersector1::Precalculations Precalculations; + + template<typename Loader> + static __forceinline void intersect(RayHit& ray, const float ftime, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t dim_offset = pre.grid->dim_offset; + const size_t grid_offset = pre.grid->gridBytes >> 2; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + Vec3<vfloat> a0, a1, a2; + Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); + + Vec3<vfloat> b0, b1, b2; + Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); + + Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); + Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); + Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); + + GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); + PlueckerIntersector1<Loader::M> intersector(ray,nullptr); + intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + }; + + template<typename Loader> + static __forceinline bool occluded(Ray& ray, const float ftime, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t dim_offset = pre.grid->dim_offset; + const size_t grid_offset = pre.grid->gridBytes >> 2; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + Vec3<vfloat> a0, a1, a2; + Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); + + Vec3<vfloat> b0, b1, b2; + Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); + + Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); + Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); + Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); + + GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); + PlueckerIntersector1<Loader::M> intersector(ray,nullptr); + return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + } + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim); + +#if defined(__AVX__) + intersect<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre); +#else + intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x, line_offset, lines, pre); + if (likely(lines > 2)) + intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre); +#endif + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim); + +#if defined(__AVX__) + return occluded<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre); +#else + if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x , line_offset, lines, pre)) return true; + if (likely(lines > 2)) + if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true; +#endif + return false; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h new file mode 100644 index 0000000000..41d66e1e28 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h @@ -0,0 +1,445 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "grid_soa.h" +#include "../common/ray.h" +#include "triangle_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + template<int K> + struct MapUV0 + { + const float* const grid_uv; + size_t ofs00, ofs01, ofs10, ofs11; + + __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) + : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} + + __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const { + const vfloat<K> uv00(grid_uv[ofs00]); + const vfloat<K> uv01(grid_uv[ofs01]); + const vfloat<K> uv10(grid_uv[ofs10]); + const vfloat<K> uv11(grid_uv[ofs11]); + const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00); + const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01); + const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10); + const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); + u = uv[0]; v = uv[1]; + } + }; + + template<int K> + struct MapUV1 + { + const float* const grid_uv; + size_t ofs00, ofs01, ofs10, ofs11; + + __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) + : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} + + __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const { + const vfloat<K> uv00(grid_uv[ofs00]); + const vfloat<K> uv01(grid_uv[ofs01]); + const vfloat<K> uv10(grid_uv[ofs10]); + const vfloat<K> uv11(grid_uv[ofs11]); + const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10); + const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01); + const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11); + const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); + u = uv[0]; v = uv[1]; + } + }; + + template<int K> + class GridSOAIntersectorK + { + public: + typedef void Primitive; + + class Precalculations + { +#if defined(__AVX__) + static const int M = 8; +#else + static const int M = 4; +#endif + + public: + __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) + : grid(nullptr), intersector(valid,ray) {} + + public: + GridSOA* grid; + PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector + }; + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t dim_offset = pre.grid->dim_offset; + const size_t line_offset = pre.grid->width; + const float* const grid_x = pre.grid->decodeLeaf(0,prim); + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + const size_t max_x = pre.grid->width == 2 ? 1 : 2; + const size_t max_y = pre.grid->height == 2 ? 1 : 2; + for (size_t y=0; y<max_y; y++) + { + for (size_t x=0; x<max_x; x++) + { + const size_t ofs00 = (y+0)*line_offset+(x+0); + const size_t ofs01 = (y+0)*line_offset+(x+1); + const size_t ofs10 = (y+1)*line_offset+(x+0); + const size_t ofs11 = (y+1)*line_offset+(x+1); + const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); + const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); + const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); + const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); + + pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + } + } + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t dim_offset = pre.grid->dim_offset; + const size_t line_offset = pre.grid->width; + const float* const grid_x = pre.grid->decodeLeaf(0,prim); + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + vbool<K> valid = valid_i; + const size_t max_x = pre.grid->width == 2 ? 1 : 2; + const size_t max_y = pre.grid->height == 2 ? 1 : 2; + for (size_t y=0; y<max_y; y++) + { + for (size_t x=0; x<max_x; x++) + { + const size_t ofs00 = (y+0)*line_offset+(x+0); + const size_t ofs01 = (y+0)*line_offset+(x+1); + const size_t ofs10 = (y+1)*line_offset+(x+0); + const size_t ofs11 = (y+1)*line_offset+(x+1); + const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); + const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); + const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); + const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); + + pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); + if (none(valid)) break; + pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); + if (none(valid)) break; + } + } + return !valid; + } + + template<typename Loader> + static __forceinline void intersect(RayHitK<K>& ray, size_t k, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t dim_offset = pre.grid->dim_offset; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); + pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); + }; + + template<typename Loader> + static __forceinline bool occluded(RayK<K>& ray, size_t k, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t dim_offset = pre.grid->dim_offset; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); + return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); + } + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(0,prim); +#if defined(__AVX__) + intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre); +#else + intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre); + if (likely(lines > 2)) + intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre); +#endif + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(0,prim); + +#if defined(__AVX__) + return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre); +#else + if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true; + if (likely(lines > 2)) + if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true; +#endif + return false; + } + }; + + template<int K> + class GridSOAMBIntersectorK + { + public: + typedef void Primitive; + typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations; + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + vfloat<K> vftime; + vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); + + vbool<K> valid1 = valid_i; + while (any(valid1)) { + const size_t j = bsf(movemask(valid1)); + const int itime = vitime[j]; + const vbool<K> valid2 = valid1 & (itime == vitime); + valid1 = valid1 & !valid2; + intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node); + } + } + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t grid_offset = pre.grid->gridBytes >> 2; + const size_t dim_offset = pre.grid->dim_offset; + const size_t line_offset = pre.grid->width; + const float* const grid_x = pre.grid->decodeLeaf(itime,prim); + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + const size_t max_x = pre.grid->width == 2 ? 1 : 2; + const size_t max_y = pre.grid->height == 2 ? 1 : 2; + for (size_t y=0; y<max_y; y++) + { + for (size_t x=0; x<max_x; x++) + { + size_t ofs00 = (y+0)*line_offset+(x+0); + size_t ofs01 = (y+0)*line_offset+(x+1); + size_t ofs10 = (y+1)*line_offset+(x+0); + size_t ofs11 = (y+1)*line_offset+(x+1); + const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); + const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); + const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); + const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); + ofs00 += grid_offset; + ofs01 += grid_offset; + ofs10 += grid_offset; + ofs11 += grid_offset; + const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); + const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); + const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); + const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); + const Vec3vf<K> p00 = lerp(a00,b00,ftime); + const Vec3vf<K> p01 = lerp(a01,b01,ftime); + const Vec3vf<K> p10 = lerp(a10,b10,ftime); + const Vec3vf<K> p11 = lerp(a11,b11,ftime); + + pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); + } + } + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + vfloat<K> vftime; + vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); + + vbool<K> valid_o = valid_i; + vbool<K> valid1 = valid_i; + while (any(valid1)) { + const int j = int(bsf(movemask(valid1))); + const int itime = vitime[j]; + const vbool<K> valid2 = valid1 & (itime == vitime); + valid1 = valid1 & !valid2; + valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node); + } + return !valid_o; + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + const size_t grid_offset = pre.grid->gridBytes >> 2; + const size_t dim_offset = pre.grid->dim_offset; + const size_t line_offset = pre.grid->width; + const float* const grid_x = pre.grid->decodeLeaf(itime,prim); + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + vbool<K> valid = valid_i; + const size_t max_x = pre.grid->width == 2 ? 1 : 2; + const size_t max_y = pre.grid->height == 2 ? 1 : 2; + for (size_t y=0; y<max_y; y++) + { + for (size_t x=0; x<max_x; x++) + { + size_t ofs00 = (y+0)*line_offset+(x+0); + size_t ofs01 = (y+0)*line_offset+(x+1); + size_t ofs10 = (y+1)*line_offset+(x+0); + size_t ofs11 = (y+1)*line_offset+(x+1); + const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); + const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); + const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); + const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); + ofs00 += grid_offset; + ofs01 += grid_offset; + ofs10 += grid_offset; + ofs11 += grid_offset; + const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); + const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); + const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); + const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); + const Vec3vf<K> p00 = lerp(a00,b00,ftime); + const Vec3vf<K> p01 = lerp(a01,b01,ftime); + const Vec3vf<K> p10 = lerp(a10,b10,ftime); + const Vec3vf<K> p11 = lerp(a11,b11,ftime); + + pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); + if (none(valid)) break; + pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); + if (none(valid)) break; + } + } + return valid; + } + + template<typename Loader> + static __forceinline void intersect(RayHitK<K>& ray, size_t k, + const float ftime, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t grid_offset = pre.grid->gridBytes >> 2; + const size_t dim_offset = pre.grid->dim_offset; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + Vec3<vfloat> a0, a1, a2; + Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); + + Vec3<vfloat> b0, b1, b2; + Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); + + Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); + Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); + Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); + + pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); + }; + + template<typename Loader> + static __forceinline bool occluded(RayK<K>& ray, size_t k, + const float ftime, + IntersectContext* context, + const float* const grid_x, + const size_t line_offset, + const size_t lines, + Precalculations& pre) + { + typedef typename Loader::vfloat vfloat; + const size_t grid_offset = pre.grid->gridBytes >> 2; + const size_t dim_offset = pre.grid->dim_offset; + const float* const grid_y = grid_x + 1 * dim_offset; + const float* const grid_z = grid_x + 2 * dim_offset; + const float* const grid_uv = grid_x + 3 * dim_offset; + + Vec3<vfloat> a0, a1, a2; + Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); + + Vec3<vfloat> b0, b1, b2; + Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); + + Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); + Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); + Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); + + return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); + } + + /*! Intersect a ray with the primitive. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + float ftime; + int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); + + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(itime,prim); + +#if defined(__AVX__) + intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre); +#else + intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre); + if (likely(lines > 2)) + intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre); +#endif + } + + /*! Test if the ray is occluded by the primitive */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + float ftime; + int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); + + const size_t line_offset = pre.grid->width; + const size_t lines = pre.grid->height; + const float* const grid_x = pre.grid->decodeLeaf(itime,prim); + +#if defined(__AVX__) + return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre); +#else + if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true; + if (likely(lines > 2)) + if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true; +#endif + return false; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance.h b/thirdparty/embree-aarch64/kernels/geometry/instance.h new file mode 100644 index 0000000000..66893d581f --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/instance.h @@ -0,0 +1,78 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "../common/scene_instance.h" + +namespace embree +{ + struct InstancePrimitive + { + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored primitives */ + static __forceinline size_t max_size() { return 1; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return N; } + + public: + + InstancePrimitive (const Instance* instance, unsigned int instID) + : instance(instance) + , instID_(instID) + {} + + __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene) + { + assert(end-i == 1); + const PrimRef& prim = prims[i]; i++; + const unsigned int geomID = prim.geomID(); + const Instance* instance = scene->get<Instance>(geomID); + new (this) InstancePrimitive(instance, geomID); + } + + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime) + { + assert(end-i == 1); + const PrimRef& prim = prims[i]; i++; + const unsigned int geomID = prim.geomID(); + const Instance* instance = scene->get<Instance>(geomID); + new (this) InstancePrimitive(instance,geomID); + return instance->linearBounds(0,itime); + } + + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range) + { + assert(end-i == 1); + const PrimRefMB& prim = prims[i]; i++; + const unsigned int geomID = prim.geomID(); + const Instance* instance = scene->get<Instance>(geomID); + new (this) InstancePrimitive(instance,geomID); + return instance->linearBounds(0,time_range); + } + + /* Updates the primitive */ + __forceinline BBox3fa update(Instance* instance) { + return instance->bounds(0); + } + + public: + const Instance* instance; + const unsigned int instID_ = std::numeric_limits<unsigned int>::max (); + }; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h new file mode 100644 index 0000000000..91731a39c5 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h @@ -0,0 +1,84 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "instance.h" +#include "../common/ray.h" +#include "../common/point_query.h" + +namespace embree +{ + namespace isa + { + struct InstanceIntersector1 + { + typedef InstancePrimitive Primitive; + + struct Precalculations { + __forceinline Precalculations (const Ray& ray, const void *ptr) {} + }; + + static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim); + static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim); + static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim); + }; + + struct InstanceIntersector1MB + { + typedef InstancePrimitive Primitive; + + struct Precalculations { + __forceinline Precalculations (const Ray& ray, const void *ptr) {} + }; + + static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim); + static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim); + static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim); + }; + + template<int K> + struct InstanceIntersectorK + { + typedef InstancePrimitive Primitive; + + struct Precalculations { + __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {} + }; + + static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim); + static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim); + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { + intersect(vbool<K>(1<<int(k)),pre,ray,context,prim); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { + occluded(vbool<K>(1<<int(k)),pre,ray,context,prim); + return ray.tfar[k] < 0.0f; + } + }; + + template<int K> + struct InstanceIntersectorKMB + { + typedef InstancePrimitive Primitive; + + struct Precalculations { + __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {} + }; + + static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim); + static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim); + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { + intersect(vbool<K>(1<<int(k)),pre,ray,context,prim); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { + occluded(vbool<K>(1<<int(k)),pre,ray,context,prim); + return ray.tfar[k] < 0.0f; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h new file mode 100644 index 0000000000..0df49dd6e9 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h @@ -0,0 +1,1074 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "../common/context.h" +#include "filter.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct UVIdentity { + __forceinline void operator() (vfloat<M>& u, vfloat<M>& v) const {} + }; + + + template<bool filter> + struct Intersect1Epilog1 + { + RayHit& ray; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Intersect1Epilog1(RayHit& ray, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (Hit& hit) const + { + /* ray mask test */ + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + if ((geometry->mask & ray.mask) == 0) return false; +#endif + hit.finalize(); + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); + const float old_t = ray.tfar; + ray.tfar = hit.t; + bool found = runIntersectionFilter1(geometry,ray,context,h); + if (!found) ray.tfar = old_t; + return found; + } + } +#endif + + /* update hit information */ + ray.tfar = hit.t; + ray.Ng = hit.Ng; + ray.u = hit.u; + ray.v = hit.v; + ray.primID = primID; + ray.geomID = geomID; + instance_id_stack::copy(context->user->instID, ray.instID); + return true; + } + }; + + template<bool filter> + struct Occluded1Epilog1 + { + Ray& ray; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Occluded1Epilog1(Ray& ray, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (Hit& hit) const + { + /* ray mask test */ + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + + +#if defined(EMBREE_RAY_MASK) + if ((geometry->mask & ray.mask) == 0) return false; +#endif + hit.finalize(); + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) { + HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); + const float old_t = ray.tfar; + ray.tfar = hit.t; + const bool found = runOcclusionFilter1(geometry,ray,context,h); + if (!found) ray.tfar = old_t; + return found; + } + } +#endif + return true; + } + }; + + template<int K, bool filter> + struct Intersect1KEpilog1 + { + RayHitK<K>& ray; + size_t k; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Intersect1KEpilog1(RayHitK<K>& ray, size_t k, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (Hit& hit) const + { + /* ray mask test */ + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + if ((geometry->mask & ray.mask[k]) == 0) + return false; +#endif + hit.finalize(); + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); + const float old_t = ray.tfar[k]; + ray.tfar[k] = hit.t; + const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h)); + if (!found) ray.tfar[k] = old_t; + return found; + } + } +#endif + + /* update hit information */ + ray.tfar[k] = hit.t; + ray.Ng.x[k] = hit.Ng.x; + ray.Ng.y[k] = hit.Ng.y; + ray.Ng.z[k] = hit.Ng.z; + ray.u[k] = hit.u; + ray.v[k] = hit.v; + ray.primID[k] = primID; + ray.geomID[k] = geomID; + instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); + return true; + } + }; + + template<int K, bool filter> + struct Occluded1KEpilog1 + { + RayK<K>& ray; + size_t k; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Occluded1KEpilog1(RayK<K>& ray, size_t k, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (Hit& hit) const + { + /* ray mask test */ + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + if ((geometry->mask & ray.mask[k]) == 0) + return false; +#endif + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) { + hit.finalize(); + HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); + const float old_t = ray.tfar[k]; + ray.tfar[k] = hit.t; + const bool found = any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h)); + if (!found) ray.tfar[k] = old_t; + return found; + } + } +#endif + return true; + } + }; + + template<int M, int Mx, bool filter> + struct Intersect1EpilogM + { + RayHit& ray; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + + __forceinline Intersect1EpilogM(RayHit& ray, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) + : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + vbool<Mx> valid = valid_i; + if (Mx > M) valid &= (1<<M)-1; + hit.finalize(); + size_t i = select_min(valid,hit.vt); + unsigned int geomID = geomIDs[i]; + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) + bool foundhit = false; + goto entry; + while (true) + { + if (unlikely(none(valid))) return foundhit; + i = select_min(valid,hit.vt); + + geomID = geomIDs[i]; + entry: + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + +#if defined(EMBREE_RAY_MASK) + /* goto next hit if mask test fails */ + if ((geometry->mask & ray.mask) == 0) { + clear(valid,i); + continue; + } +#endif + +#if defined(EMBREE_FILTER_FUNCTION) + /* call intersection filter function */ + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + const Vec2f uv = hit.uv(i); + HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); + const float old_t = ray.tfar; + ray.tfar = hit.t(i); + const bool found = runIntersectionFilter1(geometry,ray,context,h); + if (!found) ray.tfar = old_t; + foundhit |= found; + clear(valid,i); + valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value + continue; + } + } +#endif + break; + } +#endif + + /* update hit information */ + const Vec2f uv = hit.uv(i); + ray.tfar = hit.vt[i]; + ray.Ng.x = hit.vNg.x[i]; + ray.Ng.y = hit.vNg.y[i]; + ray.Ng.z = hit.vNg.z[i]; + ray.u = uv.x; + ray.v = uv.y; + ray.primID = primIDs[i]; + ray.geomID = geomID; + instance_id_stack::copy(context->user->instID, ray.instID); + return true; + + } + }; + +#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 + template<int M, bool filter> + struct Intersect1EpilogM<M,16,filter> + { + static const size_t Mx = 16; + RayHit& ray; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + + __forceinline Intersect1EpilogM(RayHit& ray, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) + : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + { + Scene* MAYBE_UNUSED scene = context->scene; + vbool<Mx> valid = valid_i; + if (Mx > M) valid &= (1<<M)-1; + hit.finalize(); + size_t i = select_min(valid,hit.vt); + unsigned int geomID = geomIDs[i]; + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) + bool foundhit = false; + goto entry; + while (true) + { + if (unlikely(none(valid))) return foundhit; + i = select_min(valid,hit.vt); + + geomID = geomIDs[i]; + entry: + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + +#if defined(EMBREE_RAY_MASK) + /* goto next hit if mask test fails */ + if ((geometry->mask & ray.mask) == 0) { + clear(valid,i); + continue; + } +#endif + +#if defined(EMBREE_FILTER_FUNCTION) + /* call intersection filter function */ + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + const Vec2f uv = hit.uv(i); + HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); + const float old_t = ray.tfar; + ray.tfar = hit.t(i); + const bool found = runIntersectionFilter1(geometry,ray,context,h); + if (!found) ray.tfar = old_t; + foundhit |= found; + clear(valid,i); + valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value + continue; + } + } +#endif + break; + } +#endif + + vbool<Mx> finalMask(((unsigned int)1 << i)); + ray.update(finalMask,hit.vt,hit.vu,hit.vv,hit.vNg.x,hit.vNg.y,hit.vNg.z,geomID,primIDs); + instance_id_stack::foreach([&](unsigned level) + { + ray.instID[level] = context->user->instID[level]; + return (context->user->instID[level] != RTC_INVALID_GEOMETRY_ID); + }); + return true; + + } + }; +#endif + + template<int M, int Mx, bool filter> + struct Occluded1EpilogM + { + Ray& ray; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + + __forceinline Occluded1EpilogM(Ray& ray, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) + : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) + if (unlikely(filter)) + hit.finalize(); /* called only once */ + + vbool<Mx> valid = valid_i; + if (Mx > M) valid &= (1<<M)-1; + size_t m=movemask(valid); + goto entry; + while (true) + { + if (unlikely(m == 0)) return false; + entry: + size_t i=bsf(m); + + const unsigned int geomID = geomIDs[i]; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + +#if defined(EMBREE_RAY_MASK) + /* goto next hit if mask test fails */ + if ((geometry->mask & ray.mask) == 0) { + m=btc(m,i); + continue; + } +#endif + +#if defined(EMBREE_FILTER_FUNCTION) + /* if we have no filter then the test passed */ + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) + { + const Vec2f uv = hit.uv(i); + HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); + const float old_t = ray.tfar; + ray.tfar = hit.t(i); + if (runOcclusionFilter1(geometry,ray,context,h)) return true; + ray.tfar = old_t; + m=btc(m,i); + continue; + } + } +#endif + break; + } +#endif + + return true; + } + }; + + template<int M, bool filter> + struct Intersect1EpilogMU + { + RayHit& ray; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Intersect1EpilogMU(RayHit& ray, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const + { + /* ray mask test */ + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + if ((geometry->mask & ray.mask) == 0) return false; +#endif + + vbool<M> valid = valid_i; + hit.finalize(); + + size_t i = select_min(valid,hit.vt); + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) + { + bool foundhit = false; + while (true) + { + /* call intersection filter function */ + Vec2f uv = hit.uv(i); + const float old_t = ray.tfar; + ray.tfar = hit.t(i); + HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); + const bool found = runIntersectionFilter1(geometry,ray,context,h); + if (!found) ray.tfar = old_t; + foundhit |= found; + clear(valid,i); + valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value + if (unlikely(none(valid))) break; + i = select_min(valid,hit.vt); + } + return foundhit; + } +#endif + + /* update hit information */ + const Vec2f uv = hit.uv(i); + const Vec3fa Ng = hit.Ng(i); + ray.tfar = hit.t(i); + ray.Ng.x = Ng.x; + ray.Ng.y = Ng.y; + ray.Ng.z = Ng.z; + ray.u = uv.x; + ray.v = uv.y; + ray.primID = primID; + ray.geomID = geomID; + instance_id_stack::copy(context->user->instID, ray.instID); + return true; + } + }; + + template<int M, bool filter> + struct Occluded1EpilogMU + { + Ray& ray; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Occluded1EpilogMU(Ray& ray, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<M>& valid, Hit& hit) const + { + /* ray mask test */ + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + if ((geometry->mask & ray.mask) == 0) return false; +#endif + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) + { + hit.finalize(); + for (size_t m=movemask(valid), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) + { + const Vec2f uv = hit.uv(i); + const float old_t = ray.tfar; + ray.tfar = hit.t(i); + HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); + if (runOcclusionFilter1(geometry,ray,context,h)) return true; + ray.tfar = old_t; + } + return false; + } +#endif + return true; + } + }; + + template<int M, int K, bool filter> + struct IntersectKEpilogM + { + RayHitK<K>& ray; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + const size_t i; + + __forceinline IntersectKEpilogM(RayHitK<K>& ray, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs, + size_t i) + : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {} + + template<typename Hit> + __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + + vfloat<K> u, v, t; + Vec3vf<K> Ng; + vbool<K> valid = valid_i; + + std::tie(u,v,t,Ng) = hit(); + + const unsigned int geomID = geomIDs[i]; + const unsigned int primID = primIDs[i]; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + + /* ray masking test */ +#if defined(EMBREE_RAY_MASK) + valid &= (geometry->mask & ray.mask) != 0; + if (unlikely(none(valid))) return false; +#endif + + /* occlusion filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + HitK<K> h(context->user,geomID,primID,u,v,Ng); + const vfloat<K> old_t = ray.tfar; + ray.tfar = select(valid,t,ray.tfar); + const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h); + ray.tfar = select(m_accept,ray.tfar,old_t); + return m_accept; + } + } +#endif + + /* update hit information */ + vfloat<K>::store(valid,&ray.tfar,t); + vfloat<K>::store(valid,&ray.Ng.x,Ng.x); + vfloat<K>::store(valid,&ray.Ng.y,Ng.y); + vfloat<K>::store(valid,&ray.Ng.z,Ng.z); + vfloat<K>::store(valid,&ray.u,u); + vfloat<K>::store(valid,&ray.v,v); + vuint<K>::store(valid,&ray.primID,primID); + vuint<K>::store(valid,&ray.geomID,geomID); + instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid); + return valid; + } + }; + + template<int M, int K, bool filter> + struct OccludedKEpilogM + { + vbool<K>& valid0; + RayK<K>& ray; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + const size_t i; + + __forceinline OccludedKEpilogM(vbool<K>& valid0, + RayK<K>& ray, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs, + size_t i) + : valid0(valid0), ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {} + + template<typename Hit> + __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const + { + vbool<K> valid = valid_i; + + /* ray masking test */ + Scene* scene MAYBE_UNUSED = context->scene; + const unsigned int geomID = geomIDs[i]; + const unsigned int primID = primIDs[i]; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + valid &= (geometry->mask & ray.mask) != 0; + if (unlikely(none(valid))) return valid; +#endif + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) + { + vfloat<K> u, v, t; + Vec3vf<K> Ng; + std::tie(u,v,t,Ng) = hit(); + HitK<K> h(context->user,geomID,primID,u,v,Ng); + const vfloat<K> old_t = ray.tfar; + ray.tfar = select(valid,t,ray.tfar); + valid = runOcclusionFilter(valid,geometry,ray,context,h); + ray.tfar = select(valid,ray.tfar,old_t); + } + } +#endif + + /* update occlusion */ + valid0 = valid0 & !valid; + return valid; + } + }; + + template<int M, int K, bool filter> + struct IntersectKEpilogMU + { + RayHitK<K>& ray; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline IntersectKEpilogMU(RayHitK<K>& ray, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline vbool<K> operator() (const vbool<K>& valid_org, const Hit& hit) const + { + vbool<K> valid = valid_org; + vfloat<K> u, v, t; + Vec3vf<K> Ng; + std::tie(u,v,t,Ng) = hit(); + + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + + /* ray masking test */ +#if defined(EMBREE_RAY_MASK) + valid &= (geometry->mask & ray.mask) != 0; + if (unlikely(none(valid))) return false; +#endif + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + HitK<K> h(context->user,geomID,primID,u,v,Ng); + const vfloat<K> old_t = ray.tfar; + ray.tfar = select(valid,t,ray.tfar); + const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h); + ray.tfar = select(m_accept,ray.tfar,old_t); + return m_accept; + } + } +#endif + + /* update hit information */ + vfloat<K>::store(valid,&ray.tfar,t); + vfloat<K>::store(valid,&ray.Ng.x,Ng.x); + vfloat<K>::store(valid,&ray.Ng.y,Ng.y); + vfloat<K>::store(valid,&ray.Ng.z,Ng.z); + vfloat<K>::store(valid,&ray.u,u); + vfloat<K>::store(valid,&ray.v,v); + vuint<K>::store(valid,&ray.primID,primID); + vuint<K>::store(valid,&ray.geomID,geomID); + instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid); + + return valid; + } + }; + + template<int M, int K, bool filter> + struct OccludedKEpilogMU + { + vbool<K>& valid0; + RayK<K>& ray; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline OccludedKEpilogMU(vbool<K>& valid0, + RayK<K>& ray, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : valid0(valid0), ray(ray), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const + { + vbool<K> valid = valid_i; + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + +#if defined(EMBREE_RAY_MASK) + valid &= (geometry->mask & ray.mask) != 0; + if (unlikely(none(valid))) return false; +#endif + + /* occlusion filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) + { + vfloat<K> u, v, t; + Vec3vf<K> Ng; + std::tie(u,v,t,Ng) = hit(); + HitK<K> h(context->user,geomID,primID,u,v,Ng); + const vfloat<K> old_t = ray.tfar; + ray.tfar = select(valid,t,ray.tfar); + valid = runOcclusionFilter(valid,geometry,ray,context,h); + ray.tfar = select(valid,ray.tfar,old_t); + } + } +#endif + + /* update occlusion */ + valid0 = valid0 & !valid; + return valid; + } + }; + + template<int M, int Mx, int K, bool filter> + struct Intersect1KEpilogM + { + RayHitK<K>& ray; + size_t k; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + + __forceinline Intersect1KEpilogM(RayHitK<K>& ray, size_t k, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) + : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + vbool<Mx> valid = valid_i; + hit.finalize(); + if (Mx > M) valid &= (1<<M)-1; + size_t i = select_min(valid,hit.vt); + assert(i<M); + unsigned int geomID = geomIDs[i]; + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) + bool foundhit = false; + goto entry; + while (true) + { + if (unlikely(none(valid))) return foundhit; + i = select_min(valid,hit.vt); + assert(i<M); + geomID = geomIDs[i]; + entry: + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + +#if defined(EMBREE_RAY_MASK) + /* goto next hit if mask test fails */ + if ((geometry->mask & ray.mask[k]) == 0) { + clear(valid,i); + continue; + } +#endif + +#if defined(EMBREE_FILTER_FUNCTION) + /* call intersection filter function */ + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { + assert(i<M); + const Vec2f uv = hit.uv(i); + HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); + const float old_t = ray.tfar[k]; + ray.tfar[k] = hit.t(i); + const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h)); + if (!found) ray.tfar[k] = old_t; + foundhit = foundhit | found; + clear(valid,i); + valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value + continue; + } + } +#endif + break; + } +#endif + assert(i<M); + /* update hit information */ +#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 + ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<Mx>(hit.vNg.x),vfloat<Mx>(hit.vNg.y),vfloat<Mx>(hit.vNg.z),geomID,vuint<Mx>(primIDs)); +#else + const Vec2f uv = hit.uv(i); + ray.tfar[k] = hit.t(i); + ray.Ng.x[k] = hit.vNg.x[i]; + ray.Ng.y[k] = hit.vNg.y[i]; + ray.Ng.z[k] = hit.vNg.z[i]; + ray.u[k] = uv.x; + ray.v[k] = uv.y; + ray.primID[k] = primIDs[i]; + ray.geomID[k] = geomID; + instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); +#endif + return true; + } + }; + + template<int M, int Mx, int K, bool filter> + struct Occluded1KEpilogM + { + RayK<K>& ray; + size_t k; + IntersectContext* context; + const vuint<M>& geomIDs; + const vuint<M>& primIDs; + + __forceinline Occluded1KEpilogM(RayK<K>& ray, size_t k, + IntersectContext* context, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) + : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) + if (unlikely(filter)) + hit.finalize(); /* called only once */ + + vbool<Mx> valid = valid_i; + if (Mx > M) valid &= (1<<M)-1; + size_t m=movemask(valid); + goto entry; + while (true) + { + if (unlikely(m == 0)) return false; + entry: + size_t i=bsf(m); + + const unsigned int geomID = geomIDs[i]; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); + +#if defined(EMBREE_RAY_MASK) + /* goto next hit if mask test fails */ + if ((geometry->mask & ray.mask[k]) == 0) { + m=btc(m,i); + continue; + } +#endif + +#if defined(EMBREE_FILTER_FUNCTION) + /* execute occlusion filer */ + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) + { + const Vec2f uv = hit.uv(i); + const float old_t = ray.tfar[k]; + ray.tfar[k] = hit.t(i); + HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); + if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true; + ray.tfar[k] = old_t; + m=btc(m,i); + continue; + } + } +#endif + break; + } +#endif + return true; + } + }; + + template<int M, int K, bool filter> + struct Intersect1KEpilogMU + { + RayHitK<K>& ray; + size_t k; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Intersect1KEpilogMU(RayHitK<K>& ray, size_t k, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + /* ray mask test */ + if ((geometry->mask & ray.mask[k]) == 0) + return false; +#endif + + /* finalize hit calculation */ + vbool<M> valid = valid_i; + hit.finalize(); + size_t i = select_min(valid,hit.vt); + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) + { + bool foundhit = false; + while (true) + { + const Vec2f uv = hit.uv(i); + const float old_t = ray.tfar[k]; + ray.tfar[k] = hit.t(i); + HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); + const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h)); + if (!found) ray.tfar[k] = old_t; + foundhit = foundhit | found; + clear(valid,i); + valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value + if (unlikely(none(valid))) break; + i = select_min(valid,hit.vt); + } + return foundhit; + } + } +#endif + + /* update hit information */ +#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 + const Vec3fa Ng = hit.Ng(i); + ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<M>(Ng.x),vfloat<M>(Ng.y),vfloat<M>(Ng.z),geomID,vuint<M>(primID)); +#else + const Vec2f uv = hit.uv(i); + const Vec3fa Ng = hit.Ng(i); + ray.tfar[k] = hit.t(i); + ray.Ng.x[k] = Ng.x; + ray.Ng.y[k] = Ng.y; + ray.Ng.z[k] = Ng.z; + ray.u[k] = uv.x; + ray.v[k] = uv.y; + ray.primID[k] = primID; + ray.geomID[k] = geomID; + instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); +#endif + return true; + } + }; + + template<int M, int K, bool filter> + struct Occluded1KEpilogMU + { + RayK<K>& ray; + size_t k; + IntersectContext* context; + const unsigned int geomID; + const unsigned int primID; + + __forceinline Occluded1KEpilogMU(RayK<K>& ray, size_t k, + IntersectContext* context, + const unsigned int geomID, + const unsigned int primID) + : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} + + template<typename Hit> + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const + { + Scene* scene MAYBE_UNUSED = context->scene; + Geometry* geometry MAYBE_UNUSED = scene->get(geomID); +#if defined(EMBREE_RAY_MASK) + /* ray mask test */ + if ((geometry->mask & ray.mask[k]) == 0) + return false; +#endif + + /* intersection filter test */ +#if defined(EMBREE_FILTER_FUNCTION) + if (filter) { + if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) + { + hit.finalize(); + for (size_t m=movemask(valid_i), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) + { + const Vec2f uv = hit.uv(i); + const float old_t = ray.tfar[k]; + ray.tfar[k] = hit.t(i); + HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); + if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true; + ray.tfar[k] = old_t; + } + return false; + } + } +#endif + return true; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h new file mode 100644 index 0000000000..5c1ba5cb61 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h @@ -0,0 +1,172 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/scene.h" +#include "../common/ray.h" +#include "../common/point_query.h" +#include "../bvh/node_intersector1.h" +#include "../bvh/node_intersector_packet.h" + +namespace embree +{ + namespace isa + { + template<typename Intersector> + struct ArrayIntersector1 + { + typedef typename Intersector::Primitive Primitive; + typedef typename Intersector::Precalculations Precalculations; + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + for (size_t i=0; i<num; i++) + Intersector::intersect(pre,ray,context,prim[i]); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + for (size_t i=0; i<num; i++) { + if (Intersector::occluded(pre,ray,context,prim[i])) + return true; + } + return false; + } + + template<int N> + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) + { + bool changed = false; + for (size_t i=0; i<num; i++) + changed |= Intersector::pointQuery(query, context, prim[i]); + return changed; + } + + template<int K> + static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + } + + template<int K> + static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + return valid; + } + }; + + template<int K, typename Intersector> + struct ArrayIntersectorK_1 + { + typedef typename Intersector::Primitive Primitive; + typedef typename Intersector::Precalculations Precalculations; + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + for (size_t i=0; i<num; i++) { + Intersector::intersect(valid,pre,ray,context,prim[i]); + } + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + vbool<K> valid0 = valid; + for (size_t i=0; i<num; i++) { + valid0 &= !Intersector::occluded(valid0,pre,ray,context,prim[i]); + if (none(valid0)) break; + } + return !valid0; + } + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + for (size_t i=0; i<num; i++) { + Intersector::intersect(pre,ray,k,context,prim[i]); + } + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + for (size_t i=0; i<num; i++) { + if (Intersector::occluded(pre,ray,k,context,prim[i])) + return true; + } + return false; + } + }; + + // ============================================================================================= + + template<int K, typename IntersectorK> + struct ArrayIntersectorKStream + { + typedef typename IntersectorK::Primitive PrimitiveK; + typedef typename IntersectorK::Precalculations PrecalculationsK; + + static __forceinline void intersectK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) + { + PrecalculationsK pre(valid,ray); // FIXME: might cause trouble + + for (size_t i=0; i<num; i++) { + IntersectorK::intersect(valid,pre,ray,context,prim[i]); + } + } + + static __forceinline vbool<K> occludedK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) + { + PrecalculationsK pre(valid,ray); // FIXME: might cause trouble + vbool<K> valid0 = valid; + for (size_t i=0; i<num; i++) { + valid0 &= !IntersectorK::occluded(valid0,pre,ray,context,prim[i]); + if (none(valid0)) break; + } + return !valid0; + } + + static __forceinline void intersect(const Accel::Intersectors* This, RayHitK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) + { + PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble + for (size_t i=0; i<num; i++) { + IntersectorK::intersect(pre,ray,k,context,prim[i]); + } + } + + static __forceinline bool occluded(const Accel::Intersectors* This, RayK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) + { + PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble + for (size_t i=0; i<num; i++) { + if (IntersectorK::occluded(pre,ray,k,context,prim[i])) + return true; + } + return false; + } + + static __forceinline size_t occluded(const Accel::Intersectors* This, size_t cur_mask, RayK<K>** __restrict__ inputPackets, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) + { + size_t m_occluded = 0; + for (size_t i=0; i<num; i++) { + size_t bits = cur_mask & (~m_occluded); + for (; bits!=0; ) + { + const size_t rayID = bscf(bits); + RayHitK<K> &ray = *inputPackets[rayID / K]; + const size_t k = rayID % K; + PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble + if (IntersectorK::occluded(pre,ray,k,context,prim[i])) + { + m_occluded |= (size_t)1 << rayID; + ray.tfar[k] = neg_inf; + } + } + } + return m_occluded; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h new file mode 100644 index 0000000000..eef5b0b1fd --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h @@ -0,0 +1,141 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct LineIntersectorHitM + { + __forceinline LineIntersectorHitM() {} + + __forceinline LineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) + : vu(u), vv(v), vt(t), vNg(Ng) {} + + __forceinline void finalize() {} + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + public: + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct FlatLinearCurveIntersector1 + { + typedef CurvePrecalculations1 Precalculations; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + Ray& ray, + IntersectContext* context, + const LineSegments* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, + const Epilog& epilog) + { + /* transform end points into ray space */ + vbool<M> valid = valid_i; + vfloat<M> depth_scale = pre.depth_scale; + LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space; + + const Vec3vf<M> ray_org ((Vec3fa)ray.org); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + + Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w); + Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w); + + /* approximative intersection with cone */ + const Vec4vf<M> v = p1-p0; + const Vec4vf<M> w = -p0; + const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y); + const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y); + const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one)); + const Vec4vf<M> p = madd(u,v,p0); + const vfloat<M> t = p.z; + const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y); + const vfloat<M> r = p.w; + const vfloat<M> r2 = r*r; + valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar)); + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) + valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections + if (unlikely(none(valid))) return false; + + /* ignore denormalized segments */ + const Vec3vf<M> T = v1.xyz()-v0.xyz(); + valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero)); + if (unlikely(none(valid))) return false; + + /* update hit information */ + LineIntersectorHitM<M> hit(u,zero,t,T); + return epilog(valid,hit); + } + }; + + template<int M, int K> + struct FlatLinearCurveIntersectorK + { + typedef CurvePrecalculationsK<K> Precalculations; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + RayK<K>& ray, size_t k, + IntersectContext* context, + const LineSegments* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, + const Epilog& epilog) + { + /* transform end points into ray space */ + vbool<M> valid = valid_i; + vfloat<M> depth_scale = pre.depth_scale[k]; + LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space[k]; + const Vec3vf<M> ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); + const Vec3vf<M> ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); + + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + + Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w); + Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w); + + /* approximative intersection with cone */ + const Vec4vf<M> v = p1-p0; + const Vec4vf<M> w = -p0; + const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y); + const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y); + const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one)); + const Vec4vf<M> p = madd(u,v,p0); + const vfloat<M> t = p.z; + const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y); + const vfloat<M> r = p.w; + const vfloat<M> r2 = r*r; + valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k])); + if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) + valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections + if (unlikely(none(valid))) return false; + + /* ignore denormalized segments */ + const Vec3vf<M> T = v1.xyz()-v0.xyz(); + valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero)); + if (unlikely(none(valid))) return false; + + /* update hit information */ + LineIntersectorHitM<M> hit(u,zero,t,T); + return epilog(valid,hit); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei.h b/thirdparty/embree-aarch64/kernels/geometry/linei.h new file mode 100644 index 0000000000..a72029ca53 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/linei.h @@ -0,0 +1,709 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + template<int M> + struct LineMi + { + /* Virtual interface to query information about the line segment type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored line segments */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N line segments */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + /* Returns required number of bytes for N line segments */ + static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); } + + public: + + /* Default constructor */ + __forceinline LineMi() { } + + /* Construction from vertices and IDs */ + __forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype) + : gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs) + { + assert(all(vuint<M>(geomID()) == geomIDs)); + } + + /* Returns a mask that tells which line segments are valid */ + __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } + + /* Returns a mask that tells which line segments are valid */ + template<int Mx> + __forceinline vbool<Mx> valid() const { return vuint<Mx>(primIDs) != vuint<Mx>(-1); } + + /* Returns if the specified line segment is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } + + /* Returns the number of stored line segments */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + //template<class T> + //static __forceinline T unmask(T &index) { return index & 0x3fffffff; } + + __forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; } + //__forceinline vuint<M> geomID() { return unmask(geomIDs); } + //__forceinline const vuint<M> geomID() const { return unmask(geomIDs); } + //__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); } + + /* Returns the primitive IDs */ + __forceinline vuint<M>& primID() { return primIDs; } + __forceinline const vuint<M>& primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* gather the line segments */ + __forceinline void gather(Vec4vf<M>& p0, + Vec4vf<M>& p1, + const LineSegments* geom) const; + + __forceinline void gatheri(Vec4vf<M>& p0, + Vec4vf<M>& p1, + const LineSegments* geom, + const int itime) const; + + __forceinline void gather(Vec4vf<M>& p0, + Vec4vf<M>& p1, + const LineSegments* geom, + float time) const; + + /* gather the line segments with lateral info */ + __forceinline void gather(Vec4vf<M>& p0, + Vec4vf<M>& p1, + Vec4vf<M>& pL, + Vec4vf<M>& pR, + const LineSegments* geom) const; + + __forceinline void gatheri(Vec4vf<M>& p0, + Vec4vf<M>& p1, + Vec4vf<M>& pL, + Vec4vf<M>& pR, + const LineSegments* geom, + const int itime) const; + + __forceinline void gather(Vec4vf<M>& p0, + Vec4vf<M>& p1, + Vec4vf<M>& pL, + Vec4vf<M>& pR, + const LineSegments* geom, + float time) const; + + __forceinline void gather(Vec4vf<M>& p0, + Vec4vf<M>& p1, + vbool<M>& cL, + vbool<M>& cR, + const LineSegments* geom) const; + + __forceinline void gatheri(Vec4vf<M>& p0, + Vec4vf<M>& p1, + vbool<M>& cL, + vbool<M>& cR, + const LineSegments* geom, + const int itime) const; + + __forceinline void gather(Vec4vf<M>& p0, + Vec4vf<M>& p1, + vbool<M>& cL, + vbool<M>& cR, + const LineSegments* geom, + float time) const; + + /* Calculate the bounds of the line segments */ + __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const + { + BBox3fa bounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const LineSegments* geom = scene->get<LineSegments>(geomID(i)); + const Vec3ff& p0 = geom->vertex(v0[i]+0,itime); + const Vec3ff& p1 = geom->vertex(v0[i]+1,itime); + BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1)); + b = enlarge(b,Vec3fa(max(p0.w,p1.w))); + bounds.extend(b); + } + return bounds; + } + + /* Calculate the linear bounds of the primitive */ + __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) { + return LBBox3fa(bounds(scene,itime+0), bounds(scene,itime+1)); + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) { + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const LineSegments* geom = scene->get<LineSegments>(geomID(i)); + allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps)); + } + return allBounds; + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) + { + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const LineSegments* geom = scene->get<LineSegments>(geomID((unsigned int)i)); + allBounds.extend(geom->linearBounds(primID(i), time_range)); + } + return allBounds; + } + + /* Fill line segment from line segment list */ + template<typename PrimRefT> + __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) + { + Geometry::GType gty = scene->get(prims[begin].geomID())->getType(); + vuint<M> geomID, primID; + vuint<M> v0; + unsigned short leftExists = 0; + unsigned short rightExists = 0; + const PrimRefT* prim = &prims[begin]; + + for (size_t i=0; i<M; i++) + { + const LineSegments* geom = scene->get<LineSegments>(prim->geomID()); + if (begin<end) { + geomID[i] = prim->geomID(); + primID[i] = prim->primID(); + v0[i] = geom->segment(prim->primID()); + leftExists |= geom->segmentLeftExists(primID[i]) << i; + rightExists |= geom->segmentRightExists(primID[i]) << i; + begin++; + } else { + assert(i); + if (i>0) { + geomID[i] = geomID[i-1]; + primID[i] = -1; + v0[i] = v0[i-1]; + } + } + if (begin<end) prim = &prims[begin]; // FIXME: remove this line + } + new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store + } + + template<typename BVH, typename Allocator> + __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) + { + size_t start = set.begin(); + size_t items = LineMi::blocks(set.size()); + size_t numbytes = LineMi::bytes(set.size()); + LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float)); + for (size_t i=0; i<items; i++) { + accel[i].fill(prims,start,set.end(),bvh->scene); + } + return bvh->encodeLeaf((char*)accel,items); + }; + + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) + { + fill(prims,begin,end,scene); + return linearBounds(scene,itime); + } + + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) + { + fill(prims,begin,end,scene); + return linearBounds(scene,time_range); + } + + template<typename BVH, typename SetMB, typename Allocator> + __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) + { + size_t start = prims.begin(); + size_t end = prims.end(); + size_t items = LineMi::blocks(prims.size()); + size_t numbytes = LineMi::bytes(prims.size()); + LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float)); + const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items); + + LBBox3fa bounds = empty; + for (size_t i=0; i<items; i++) + bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range)); + + return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range); + }; + + /* Updates the primitive */ + __forceinline BBox3fa update(LineSegments* geom) + { + BBox3fa bounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const Vec3ff& p0 = geom->vertex(v0[i]+0); + const Vec3ff& p1 = geom->vertex(v0[i]+1); + BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1)); + b = enlarge(b,Vec3fa(max(p0.w,p1.w))); + bounds.extend(b); + } + return bounds; + } + + /*! output operator */ + friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) { + return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}"; + } + + public: + unsigned char gtype; + unsigned char m; + unsigned int sharedGeomID; + unsigned short leftExists, rightExists; + vuint<M> v0; // index of start vertex + private: + vuint<M> primIDs; // primitive ID + }; + + template<> + __forceinline void LineMi<4>::gather(Vec4vf4& p0, + Vec4vf4& p1, + const LineSegments* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); + transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); + transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); + } + + template<> + __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, + Vec4vf4& p1, + const LineSegments* geom, + const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); + transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); + transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); + } + + template<> + __forceinline void LineMi<4>::gather(Vec4vf4& p0, + Vec4vf4& p1, + const LineSegments* geom, + float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf4 a0,a1; + gatheri(a0,a1,geom,itime); + Vec4vf4 b0,b1; + gatheri(b0,b1,geom,itime+1); + p0 = lerp(a0,b0,vfloat4(ftime)); + p1 = lerp(a1,b1,vfloat4(ftime)); + } + + template<> + __forceinline void LineMi<4>::gather(Vec4vf4& p0, + Vec4vf4& p1, + vbool4& cL, + vbool4& cR, + const LineSegments* geom) const + { + gather(p0,p1,geom); + cL = !vbool4(leftExists); + cR = !vbool4(rightExists); + } + + template<> + __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, + Vec4vf4& p1, + vbool4& cL, + vbool4& cR, + const LineSegments* geom, + const int itime) const + { + gatheri(p0,p1,geom,itime); + cL = !vbool4(leftExists); + cR = !vbool4(rightExists); + } + + template<> + __forceinline void LineMi<4>::gather(Vec4vf4& p0, + Vec4vf4& p1, + vbool4& cL, + vbool4& cR, + const LineSegments* geom, + float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf4 a0,a1; + gatheri(a0,a1,geom,itime); + Vec4vf4 b0,b1; + gatheri(b0,b1,geom,itime+1); + p0 = lerp(a0,b0,vfloat4(ftime)); + p1 = lerp(a1,b1,vfloat4(ftime)); + cL = !vbool4(leftExists); + cR = !vbool4(rightExists); + } + + template<> + __forceinline void LineMi<4>::gather(Vec4vf4& p0, + Vec4vf4& p1, + Vec4vf4& pL, + Vec4vf4& pR, + const LineSegments* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); + transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); + transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); + + const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf); + const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf); + const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf); + const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf); + transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w); + + const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf); + const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf); + const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf); + const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf); + transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w); + } + + template<> + __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, + Vec4vf4& p1, + Vec4vf4& pL, + Vec4vf4& pR, + const LineSegments* geom, + const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); + transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); + transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); + + const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf); + const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf); + const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf); + const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf); + transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w); + + const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf); + const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf); + const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf); + const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf); + transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w); + } + + template<> + __forceinline void LineMi<4>::gather(Vec4vf4& p0, + Vec4vf4& p1, + Vec4vf4& pL, + Vec4vf4& pR, + const LineSegments* geom, + float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf4 a0,a1,aL,aR; + gatheri(a0,a1,aL,aR,geom,itime); + Vec4vf4 b0,b1,bL,bR; + gatheri(b0,b1,bL,bR,geom,itime+1); + p0 = lerp(a0,b0,vfloat4(ftime)); + p1 = lerp(a1,b1,vfloat4(ftime)); + pL = lerp(aL,bL,vfloat4(ftime)); + pR = lerp(aR,bR,vfloat4(ftime)); + } + +#if defined(__AVX__) + + template<> + __forceinline void LineMi<8>::gather(Vec4vf8& p0, + Vec4vf8& p1, + const LineSegments* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4])); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5])); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6])); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7])); + transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); + const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1)); + const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1)); + const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1)); + const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1)); + transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); + } + + template<> + __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, + Vec4vf8& p1, + const LineSegments* geom, + const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime)); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime)); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime)); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime)); + transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); + const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime)); + const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime)); + const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime)); + const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime)); + transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); + } + + template<> + __forceinline void LineMi<8>::gather(Vec4vf8& p0, + Vec4vf8& p1, + const LineSegments* geom, + float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf8 a0,a1; + gatheri(a0,a1,geom,itime); + Vec4vf8 b0,b1; + gatheri(b0,b1,geom,itime+1); + p0 = lerp(a0,b0,vfloat8(ftime)); + p1 = lerp(a1,b1,vfloat8(ftime)); + } + + template<> + __forceinline void LineMi<8>::gather(Vec4vf8& p0, + Vec4vf8& p1, + Vec4vf8& pL, + Vec4vf8& pR, + const LineSegments* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4])); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5])); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6])); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7])); + transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); + const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1)); + const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1)); + const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1)); + const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1)); + transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); + + const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf); + const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf); + const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf); + const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf); + const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf); + const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf); + const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf); + const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf); + transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w); + + const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf); + const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf); + const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf); + const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf); + const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf); + const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf); + const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf); + const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf); + transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w); + } + + template<> + __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, + Vec4vf8& p1, + Vec4vf8& pL, + Vec4vf8& pR, + const LineSegments* geom, + const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime)); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime)); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime)); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime)); + transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); + + const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); + const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); + const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); + const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); + const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime)); + const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime)); + const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime)); + const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime)); + transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); + + const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf); + const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf); + const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf); + const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf); + const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf); + const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf); + const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf); + const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf); + transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w); + + const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf); + const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf); + const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf); + const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf); + const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf); + const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf); + const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf); + const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf); + transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w); + } + + template<> + __forceinline void LineMi<8>::gather(Vec4vf8& p0, + Vec4vf8& p1, + Vec4vf8& pL, + Vec4vf8& pR, + const LineSegments* geom, + float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf8 a0,a1,aL,aR; + gatheri(a0,a1,aL,aR,geom,itime); + Vec4vf8 b0,b1,bL,bR; + gatheri(b0,b1,bL,bR,geom,itime+1); + p0 = lerp(a0,b0,vfloat8(ftime)); + p1 = lerp(a1,b1,vfloat8(ftime)); + pL = lerp(aL,bL,vfloat8(ftime)); + pR = lerp(aR,bR,vfloat8(ftime)); + } + + template<> + __forceinline void LineMi<8>::gather(Vec4vf8& p0, + Vec4vf8& p1, + vbool8& cL, + vbool8& cR, + const LineSegments* geom) const + { + gather(p0,p1,geom); + cL = !vbool8(leftExists); + cR = !vbool8(rightExists); + } + + template<> + __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, + Vec4vf8& p1, + vbool8& cL, + vbool8& cR, + const LineSegments* geom, + const int itime) const + { + gatheri(p0,p1,geom,itime); + cL = !vbool8(leftExists); + cR = !vbool8(rightExists); + } + + template<> + __forceinline void LineMi<8>::gather(Vec4vf8& p0, + Vec4vf8& p1, + vbool8& cL, + vbool8& cR, + const LineSegments* geom, + float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf8 a0,a1; + gatheri(a0,a1,geom,itime); + Vec4vf8 b0,b1; + gatheri(b0,b1,geom,itime+1); + p0 = lerp(a0,b0,vfloat8(ftime)); + p1 = lerp(a1,b1,vfloat8(ftime)); + cL = !vbool8(leftExists); + cR = !vbool8(rightExists); + } + +#endif + + template<int M> + typename LineMi<M>::Type LineMi<M>::type; + + typedef LineMi<4> Line4i; + typedef LineMi<8> Line8i; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h new file mode 100644 index 0000000000..a431796a88 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h @@ -0,0 +1,124 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "linei.h" +#include "line_intersector.h" +#include "intersector_epilog.h" + +namespace embree +{ + namespace isa + { + template<int M, int Mx, bool filter> + struct FlatLinearCurveMiIntersector1 + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); + } + }; + + template<int M, int Mx, bool filter> + struct FlatLinearCurveMiMBIntersector1 + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()); + const vbool<Mx> valid = line.template valid<Mx>(); + FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()); + const vbool<Mx> valid = line.template valid<Mx>(); + return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); + } + }; + + template<int M, int Mx, int K, bool filter> + struct FlatLinearCurveMiIntersectorK + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct FlatLinearCurveMiMBIntersectorK + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]); + const vbool<Mx> valid = line.template valid<Mx>(); + FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]); + const vbool<Mx> valid = line.template valid<Mx>(); + return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/object.h b/thirdparty/embree-aarch64/kernels/geometry/object.h new file mode 100644 index 0000000000..f26391de52 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/object.h @@ -0,0 +1,84 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + struct Object + { + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored primitives */ + static __forceinline size_t max_size() { return 1; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return N; } + + public: + + /*! constructs a virtual object */ + Object (unsigned geomID, unsigned primID) + : _geomID(geomID), _primID(primID) {} + + __forceinline unsigned geomID() const { + return _geomID; + } + + __forceinline unsigned primID() const { + return _primID; + } + + /*! fill triangle from triangle list */ + __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene) + { + const PrimRef& prim = prims[i]; i++; + new (this) Object(prim.geomID(), prim.primID()); + } + + /*! fill triangle from triangle list */ + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime) + { + const PrimRef& prim = prims[i]; i++; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + new (this) Object(geomID, primID); + AccelSet* accel = (AccelSet*) scene->get(geomID); + return accel->linearBounds(primID,itime); + } + + /*! fill triangle from triangle list */ + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range) + { + const PrimRefMB& prim = prims[i]; i++; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + new (this) Object(geomID, primID); + AccelSet* accel = (AccelSet*) scene->get(geomID); + return accel->linearBounds(primID,time_range); + } + + /* Updates the primitive */ + __forceinline BBox3fa update(AccelSet* mesh) { + return mesh->bounds(primID()); + } + + private: + unsigned int _geomID; //!< geometry ID + unsigned int _primID; //!< primitive ID + }; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h new file mode 100644 index 0000000000..97882e0e59 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h @@ -0,0 +1,127 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "object.h" +#include "../common/ray.h" + +namespace embree +{ + namespace isa + { + template<bool mblur> + struct ObjectIntersector1 + { + typedef Object Primitive; + + static const bool validIntersectorK = false; + + struct Precalculations { + __forceinline Precalculations() {} + __forceinline Precalculations (const Ray& ray, const void *ptr) {} + }; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) + { + AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); + + /* perform ray mask test */ +#if defined(EMBREE_RAY_MASK) + if ((ray.mask & accel->mask) == 0) + return; +#endif + + accel->intersect(ray,prim.geomID(),prim.primID(),context,reportIntersection1); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) + { + AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); + /* perform ray mask test */ +#if defined(EMBREE_RAY_MASK) + if ((ray.mask & accel->mask) == 0) + return false; +#endif + + accel->occluded(ray,prim.geomID(),prim.primID(),context,&reportOcclusion1); + return ray.tfar < 0.0f; + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim) + { + AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID()); + context->geomID = prim.geomID(); + context->primID = prim.primID(); + return accel->pointQuery(query, context); + } + + template<int K> + static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + assert(false); + } + + template<int K> + static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) + { + assert(false); + return valid; + } + }; + + template<int K, bool mblur> + struct ObjectIntersectorK + { + typedef Object Primitive; + + struct Precalculations { + __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {} + }; + + static __forceinline void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim) + { + vbool<K> valid = valid_i; + AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); + + /* perform ray mask test */ +#if defined(EMBREE_RAY_MASK) + valid &= (ray.mask & accel->mask) != 0; + if (none(valid)) return; +#endif + accel->intersect(valid,ray,prim.geomID(),prim.primID(),context,&reportIntersection1); + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim) + { + vbool<K> valid = valid_i; + AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); + + /* perform ray mask test */ +#if defined(EMBREE_RAY_MASK) + valid &= (ray.mask & accel->mask) != 0; + if (none(valid)) return false; +#endif + accel->occluded(valid,ray,prim.geomID(),prim.primID(),context,&reportOcclusion1); + return ray.tfar < 0.0f; + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { + intersect(vbool<K>(1<<int(k)),pre,ray,context,prim); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { + occluded(vbool<K>(1<<int(k)),pre,ray,context,prim); + return ray.tfar[k] < 0.0f; + } + }; + + typedef ObjectIntersectorK<4,false> ObjectIntersector4; + typedef ObjectIntersectorK<8,false> ObjectIntersector8; + typedef ObjectIntersectorK<16,false> ObjectIntersector16; + + typedef ObjectIntersectorK<4,true> ObjectIntersector4MB; + typedef ObjectIntersectorK<8,true> ObjectIntersector8MB; + typedef ObjectIntersectorK<16,true> ObjectIntersector16MB; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/plane.h b/thirdparty/embree-aarch64/kernels/geometry/plane.h new file mode 100644 index 0000000000..ebe45db558 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/plane.h @@ -0,0 +1,57 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" + +namespace embree +{ + namespace isa + { + struct HalfPlane + { + const Vec3fa P; //!< plane origin + const Vec3fa N; //!< plane normal + + __forceinline HalfPlane(const Vec3fa& P, const Vec3fa& N) + : P(P), N(N) {} + + __forceinline BBox1f intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const + { + Vec3fa O = Vec3fa(ray_org) - P; + Vec3fa D = Vec3fa(ray_dir); + float ON = dot(O,N); + float DN = dot(D,N); + bool eps = abs(DN) < min_rcp_input; + float t = -ON*rcp(DN); + float lower = select(eps || DN < 0.0f, float(neg_inf), t); + float upper = select(eps || DN > 0.0f, float(pos_inf), t); + return BBox1f(lower,upper); + } + }; + + template<int M> + struct HalfPlaneN + { + const Vec3vf<M> P; //!< plane origin + const Vec3vf<M> N; //!< plane normal + + __forceinline HalfPlaneN(const Vec3vf<M>& P, const Vec3vf<M>& N) + : P(P), N(N) {} + + __forceinline BBox<vfloat<M>> intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const + { + Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray_org) - P; + Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray_dir); + vfloat<M> ON = dot(O,N); + vfloat<M> DN = dot(D,N); + vbool<M> eps = abs(DN) < min_rcp_input; + vfloat<M> t = -ON*rcp(DN); + vfloat<M> lower = select(eps | DN < 0.0f, vfloat<M>(neg_inf), t); + vfloat<M> upper = select(eps | DN > 0.0f, vfloat<M>(pos_inf), t); + return BBox<vfloat<M>>(lower,upper); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/pointi.h b/thirdparty/embree-aarch64/kernels/geometry/pointi.h new file mode 100644 index 0000000000..4ba298e86b --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/pointi.h @@ -0,0 +1,417 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + template<int M> + struct PointMi + { + /* Virtual interface to query information about the line segment type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored line segments */ + static __forceinline size_t max_size() + { + return M; + } + + /* Returns required number of primitive blocks for N line segments */ + static __forceinline size_t blocks(size_t N) + { + return (N + max_size() - 1) / max_size(); + } + + /* Returns required number of bytes for N line segments */ + static __forceinline size_t bytes(size_t N) + { + return blocks(N) * sizeof(PointMi); + } + + public: + /* Default constructor */ + __forceinline PointMi() {} + + /* Construction from vertices and IDs */ + __forceinline PointMi(const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype, uint32_t numPrimitives) + : gtype((unsigned char)gtype), + numPrimitives(numPrimitives), + sharedGeomID(geomIDs[0]), + primIDs(primIDs) + { + assert(all(vuint<M>(geomID()) == geomIDs)); + } + + /* Returns a mask that tells which line segments are valid */ + __forceinline vbool<M> valid() const { + return vint<M>(step) < vint<M>(numPrimitives); + } + + /* Returns a mask that tells which line segments are valid */ + template<int Mx> __forceinline vbool<Mx> valid() const { + return vint<Mx>(step) < vint<Mx>(numPrimitives); + } + + /* Returns if the specified line segment is valid */ + __forceinline bool valid(const size_t i) const + { + assert(i < M); + return i < numPrimitives; + } + + /* Returns the number of stored line segments */ + __forceinline size_t size() const { + return numPrimitives; + } + + __forceinline unsigned int geomID(unsigned int i = 0) const { + return sharedGeomID; + } + + __forceinline vuint<M>& primID() { + return primIDs; + } + __forceinline const vuint<M>& primID() const { + return primIDs; + } + __forceinline unsigned int primID(const size_t i) const { + assert(i < M); + return primIDs[i]; + } + + /* gather the line segments */ + __forceinline void gather(Vec4vf<M>& p0, const Points* geom) const; + __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom) const; + + __forceinline void gatheri(Vec4vf<M>& p0, const Points* geom, const int itime) const; + __forceinline void gatheri(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, const int itime) const; + + __forceinline void gather(Vec4vf<M>& p0, const Points* geom, float time) const; + __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, float time) const; + + /* Calculate the bounds of the line segments */ + __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const + { + BBox3fa bounds = empty; + for (size_t i = 0; i < M && valid(i); i++) { + const Points* geom = scene->get<Points>(geomID(i)); + bounds.extend(geom->bounds(primID(i),itime)); + } + return bounds; + } + + /* Calculate the linear bounds of the primitive */ + __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) { + return LBBox3fa(bounds(scene, itime + 0), bounds(scene, itime + 1)); + } + + __forceinline LBBox3fa linearBounds(const Scene* const scene, size_t itime, size_t numTimeSteps) + { + LBBox3fa allBounds = empty; + for (size_t i = 0; i < M && valid(i); i++) { + const Points* geom = scene->get<Points>(geomID(i)); + allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps)); + } + return allBounds; + } + + __forceinline LBBox3fa linearBounds(const Scene* const scene, const BBox1f time_range) + { + LBBox3fa allBounds = empty; + for (size_t i = 0; i < M && valid(i); i++) { + const Points* geom = scene->get<Points>(geomID((unsigned int)i)); + allBounds.extend(geom->linearBounds(primID(i), time_range)); + } + return allBounds; + } + + /* Fill line segment from line segment list */ + template<typename PrimRefT> + __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) + { + Geometry::GType gty = scene->get(prims[begin].geomID())->getType(); + vuint<M> geomID, primID; + vuint<M> v0; + const PrimRefT* prim = &prims[begin]; + + int numPrimitives = 0; + for (size_t i = 0; i < M; i++) { + if (begin < end) { + geomID[i] = prim->geomID(); + primID[i] = prim->primID(); + begin++; + numPrimitives++; + } else { + assert(i); + if (i > 0) { + geomID[i] = geomID[i - 1]; + primID[i] = primID[i - 1]; + } + } + if (begin < end) + prim = &prims[begin]; // FIXME: remove this line + } + new (this) PointMi(geomID, primID, gty, numPrimitives); // FIXME: use non temporal store + } + + template<typename BVH, typename Allocator> + __forceinline static typename BVH::NodeRef createLeaf(BVH* bvh, + const PrimRef* prims, + const range<size_t>& set, + const Allocator& alloc) + { + size_t start = set.begin(); + size_t items = PointMi::blocks(set.size()); + size_t numbytes = PointMi::bytes(set.size()); + PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float)); + for (size_t i = 0; i < items; i++) { + accel[i].fill(prims, start, set.end(), bvh->scene); + } + return bvh->encodeLeaf((char*)accel, items); + }; + + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) + { + fill(prims, begin, end, scene); + return linearBounds(scene, itime); + } + + __forceinline LBBox3fa fillMB( + const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) + { + fill(prims, begin, end, scene); + return linearBounds(scene, time_range); + } + + template<typename BVH, typename SetMB, typename Allocator> + __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) + { + size_t start = prims.object_range.begin(); + size_t end = prims.object_range.end(); + size_t items = PointMi::blocks(prims.object_range.size()); + size_t numbytes = PointMi::bytes(prims.object_range.size()); + PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float)); + const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel, items); + + LBBox3fa bounds = empty; + for (size_t i = 0; i < items; i++) + bounds.extend(accel[i].fillMB(prims.prims->data(), start, end, bvh->scene, prims.time_range)); + + return typename BVH::NodeRecordMB4D(node, bounds, prims.time_range); + }; + + /*! output operator */ + friend __forceinline embree_ostream operator<<(embree_ostream cout, const PointMi& line) + { + return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}"; + } + + public: + unsigned char gtype; + unsigned char numPrimitives; + unsigned int sharedGeomID; + + private: + vuint<M> primIDs; // primitive ID + }; + + template<> + __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); + transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); + } + + template<> + __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); + transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); + const vfloat4 b0 = vfloat4(geom->normal(primID(0))); + const vfloat4 b1 = vfloat4(geom->normal(primID(1))); + const vfloat4 b2 = vfloat4(geom->normal(primID(2))); + const vfloat4 b3 = vfloat4(geom->normal(primID(3))); + transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z); + } + + template<> + __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, const Points* geom, const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); + transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); + } + + template<> + __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); + transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); + const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime)); + const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime)); + const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime)); + const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime)); + transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z); + } + + template<> + __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom, float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf4 a0; gatheri(a0, geom, itime); + Vec4vf4 b0; gatheri(b0, geom, itime + 1); + p0 = lerp(a0, b0, vfloat4(ftime)); + } + + template<> + __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf4 a0, b0; + Vec3vf4 norm0, norm1; + gatheri(a0, norm0, geom, itime); + gatheri(b0, norm1, geom, itime + 1); + p0 = lerp(a0, b0, vfloat4(ftime)); + n0 = lerp(norm0, norm1, vfloat4(ftime)); + } + +#if defined(__AVX__) + + template<> + __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4))); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5))); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6))); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7))); + transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); + } + + template<> + __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4))); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5))); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6))); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7))); + transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); + const vfloat4 b0 = vfloat4(geom->normal(primID(0))); + const vfloat4 b1 = vfloat4(geom->normal(primID(1))); + const vfloat4 b2 = vfloat4(geom->normal(primID(2))); + const vfloat4 b3 = vfloat4(geom->normal(primID(3))); + const vfloat4 b4 = vfloat4(geom->normal(primID(4))); + const vfloat4 b5 = vfloat4(geom->normal(primID(5))); + const vfloat4 b6 = vfloat4(geom->normal(primID(6))); + const vfloat4 b7 = vfloat4(geom->normal(primID(7))); + transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z); + } + + template<> + __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, const Points* geom, const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime)); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime)); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime)); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime)); + transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); + } + + template<> + __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, const int itime) const + { + const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); + const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); + const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); + const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); + const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime)); + const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime)); + const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime)); + const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime)); + transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); + const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime)); + const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime)); + const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime)); + const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime)); + const vfloat4 b4 = vfloat4(geom->normal(primID(4), itime)); + const vfloat4 b5 = vfloat4(geom->normal(primID(5), itime)); + const vfloat4 b6 = vfloat4(geom->normal(primID(6), itime)); + const vfloat4 b7 = vfloat4(geom->normal(primID(7), itime)); + transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z); + } + + template<> + __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom, float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf8 a0; + gatheri(a0, geom, itime); + Vec4vf8 b0; + gatheri(b0, geom, itime + 1); + p0 = lerp(a0, b0, vfloat8(ftime)); + } + + template<> + __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, float time) const + { + float ftime; + const int itime = geom->timeSegment(time, ftime); + + Vec4vf8 a0, b0; + Vec3vf8 norm0, norm1; + gatheri(a0, norm0, geom, itime); + gatheri(b0, norm1, geom, itime + 1); + p0 = lerp(a0, b0, vfloat8(ftime)); + n0 = lerp(norm0, norm1, vfloat8(ftime)); + } +#endif + + template<int M> + typename PointMi<M>::Type PointMi<M>::type; + + typedef PointMi<4> Point4i; + typedef PointMi<8> Point8i; + +} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive.h b/thirdparty/embree-aarch64/kernels/geometry/primitive.h new file mode 100644 index 0000000000..41e5b2b304 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/primitive.h @@ -0,0 +1,49 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/default.h" +#include "../common/scene.h" +#include "../../common/simd/simd.h" +#include "../common/primref.h" +#include "../common/primref_mb.h" + +namespace embree +{ + struct PrimitiveType + { + /*! returns name of this primitive type */ + virtual const char* name() const = 0; + + /*! Returns the number of stored active primitives in a block. */ + virtual size_t sizeActive(const char* This) const = 0; + + /*! Returns the number of stored active and inactive primitives in a block. */ + virtual size_t sizeTotal(const char* This) const = 0; + + /*! Returns the number of bytes of block. */ + virtual size_t getBytes(const char* This) const = 0; + }; + + template<typename Primitive> + struct PrimitivePointQuery1 + { + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim) + { + bool changed = false; + for (size_t i = 0; i < Primitive::max_size(); i++) + { + if (!prim.valid(i)) break; + STAT3(point_query.trav_prims,1,1,1); + AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID(i)); + context->geomID = prim.geomID(i); + context->primID = prim.primID(i); + changed |= accel->pointQuery(query, context); + } + return changed; + } + + static __forceinline void pointQueryNoop(PointQuery* query, PointQueryContext* context, const Primitive& prim) { } + }; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp b/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp new file mode 100644 index 0000000000..f93574c9c8 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp @@ -0,0 +1,379 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "primitive.h" +#include "curveNv.h" +#include "curveNi.h" +#include "curveNi_mb.h" +#include "linei.h" +#include "triangle.h" +#include "trianglev.h" +#include "trianglev_mb.h" +#include "trianglei.h" +#include "quadv.h" +#include "quadi.h" +#include "subdivpatch1.h" +#include "object.h" +#include "instance.h" +#include "subgrid.h" + +namespace embree +{ + /********************** Curve4v **************************/ + + template<> + const char* Curve4v::Type::name () const { + return "curve4v"; + } + + template<> + size_t Curve4v::Type::sizeActive(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return ((Line4i*)This)->size(); + else + return ((Curve4v*)This)->N; + } + + template<> + size_t Curve4v::Type::sizeTotal(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return 4; + else + return ((Curve4v*)This)->N; + } + + template<> + size_t Curve4v::Type::getBytes(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return Line4i::bytes(sizeActive(This)); + else + return Curve4v::bytes(sizeActive(This)); + } + + /********************** Curve4i **************************/ + + template<> + const char* Curve4i::Type::name () const { + return "curve4i"; + } + + template<> + size_t Curve4i::Type::sizeActive(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return ((Line4i*)This)->size(); + else + return ((Curve4i*)This)->N; + } + + template<> + size_t Curve4i::Type::sizeTotal(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return 4; + else + return ((Curve4i*)This)->N; + } + + template<> + size_t Curve4i::Type::getBytes(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return Line4i::bytes(sizeActive(This)); + else + return Curve4i::bytes(sizeActive(This)); + } + + /********************** Curve4iMB **************************/ + + template<> + const char* Curve4iMB::Type::name () const { + return "curve4imb"; + } + + template<> + size_t Curve4iMB::Type::sizeActive(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return ((Line4i*)This)->size(); + else + return ((Curve4iMB*)This)->N; + } + + template<> + size_t Curve4iMB::Type::sizeTotal(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return 4; + else + return ((Curve4iMB*)This)->N; + } + + template<> + size_t Curve4iMB::Type::getBytes(const char* This) const + { + if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) + return Line4i::bytes(sizeActive(This)); + else + return Curve4iMB::bytes(sizeActive(This)); + } + + /********************** Line4i **************************/ + + template<> + const char* Line4i::Type::name () const { + return "line4i"; + } + + template<> + size_t Line4i::Type::sizeActive(const char* This) const { + return ((Line4i*)This)->size(); + } + + template<> + size_t Line4i::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Line4i::Type::getBytes(const char* This) const { + return sizeof(Line4i); + } + + /********************** Triangle4 **************************/ + + template<> + const char* Triangle4::Type::name () const { + return "triangle4"; + } + + template<> + size_t Triangle4::Type::sizeActive(const char* This) const { + return ((Triangle4*)This)->size(); + } + + template<> + size_t Triangle4::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Triangle4::Type::getBytes(const char* This) const { + return sizeof(Triangle4); + } + + /********************** Triangle4v **************************/ + + template<> + const char* Triangle4v::Type::name () const { + return "triangle4v"; + } + + template<> + size_t Triangle4v::Type::sizeActive(const char* This) const { + return ((Triangle4v*)This)->size(); + } + + template<> + size_t Triangle4v::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Triangle4v::Type::getBytes(const char* This) const { + return sizeof(Triangle4v); + } + + /********************** Triangle4i **************************/ + + template<> + const char* Triangle4i::Type::name () const { + return "triangle4i"; + } + + template<> + size_t Triangle4i::Type::sizeActive(const char* This) const { + return ((Triangle4i*)This)->size(); + } + + template<> + size_t Triangle4i::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Triangle4i::Type::getBytes(const char* This) const { + return sizeof(Triangle4i); + } + + /********************** Triangle4vMB **************************/ + + template<> + const char* Triangle4vMB::Type::name () const { + return "triangle4vmb"; + } + + template<> + size_t Triangle4vMB::Type::sizeActive(const char* This) const { + return ((Triangle4vMB*)This)->size(); + } + + template<> + size_t Triangle4vMB::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Triangle4vMB::Type::getBytes(const char* This) const { + return sizeof(Triangle4vMB); + } + + /********************** Quad4v **************************/ + + template<> + const char* Quad4v::Type::name () const { + return "quad4v"; + } + + template<> + size_t Quad4v::Type::sizeActive(const char* This) const { + return ((Quad4v*)This)->size(); + } + + template<> + size_t Quad4v::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Quad4v::Type::getBytes(const char* This) const { + return sizeof(Quad4v); + } + + /********************** Quad4i **************************/ + + template<> + const char* Quad4i::Type::name () const { + return "quad4i"; + } + + template<> + size_t Quad4i::Type::sizeActive(const char* This) const { + return ((Quad4i*)This)->size(); + } + + template<> + size_t Quad4i::Type::sizeTotal(const char* This) const { + return 4; + } + + template<> + size_t Quad4i::Type::getBytes(const char* This) const { + return sizeof(Quad4i); + } + + /********************** SubdivPatch1 **************************/ + + const char* SubdivPatch1::Type::name () const { + return "subdivpatch1"; + } + + size_t SubdivPatch1::Type::sizeActive(const char* This) const { + return 1; + } + + size_t SubdivPatch1::Type::sizeTotal(const char* This) const { + return 1; + } + + size_t SubdivPatch1::Type::getBytes(const char* This) const { + return sizeof(SubdivPatch1); + } + + SubdivPatch1::Type SubdivPatch1::type; + + /********************** Virtual Object **************************/ + + const char* Object::Type::name () const { + return "object"; + } + + size_t Object::Type::sizeActive(const char* This) const { + return 1; + } + + size_t Object::Type::sizeTotal(const char* This) const { + return 1; + } + + size_t Object::Type::getBytes(const char* This) const { + return sizeof(Object); + } + + Object::Type Object::type; + + /********************** Instance **************************/ + + const char* InstancePrimitive::Type::name () const { + return "instance"; + } + + size_t InstancePrimitive::Type::sizeActive(const char* This) const { + return 1; + } + + size_t InstancePrimitive::Type::sizeTotal(const char* This) const { + return 1; + } + + size_t InstancePrimitive::Type::getBytes(const char* This) const { + return sizeof(InstancePrimitive); + } + + InstancePrimitive::Type InstancePrimitive::type; + + /********************** SubGrid **************************/ + + const char* SubGrid::Type::name () const { + return "subgrid"; + } + + size_t SubGrid::Type::sizeActive(const char* This) const { + return 1; + } + + size_t SubGrid::Type::sizeTotal(const char* This) const { + return 1; + } + + size_t SubGrid::Type::getBytes(const char* This) const { + return sizeof(SubGrid); + } + + SubGrid::Type SubGrid::type; + + /********************** SubGridQBVH4 **************************/ + + template<> + const char* SubGridQBVH4::Type::name () const { + return "SubGridQBVH4"; + } + + template<> + size_t SubGridQBVH4::Type::sizeActive(const char* This) const { + return 1; + } + + template<> + size_t SubGridQBVH4::Type::sizeTotal(const char* This) const { + return 1; + } + + template<> + size_t SubGridQBVH4::Type::getBytes(const char* This) const { + return sizeof(SubGridQBVH4); + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h new file mode 100644 index 0000000000..57ff4e60e5 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h @@ -0,0 +1,76 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +namespace embree +{ + namespace isa + { + /*! Intersects a ray with a quad with backface culling + * enabled. The quad v0,v1,v2,v3 is split into two triangles + * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two + * triangles gets intersected. */ + template<int N> + __forceinline vbool<N> intersect_quad_backface_culling(const vbool<N>& valid0, + const Vec3fa& ray_org, + const Vec3fa& ray_dir, + const float ray_tnear, + const float ray_tfar, + const Vec3vf<N>& quad_v0, + const Vec3vf<N>& quad_v1, + const Vec3vf<N>& quad_v2, + const Vec3vf<N>& quad_v3, + vfloat<N>& u_o, + vfloat<N>& v_o, + vfloat<N>& t_o) + { + /* calculate vertices relative to ray origin */ + vbool<N> valid = valid0; + const Vec3vf<N> O = Vec3vf<N>(ray_org); + const Vec3vf<N> D = Vec3vf<N>(ray_dir); + const Vec3vf<N> va = quad_v0-O; + const Vec3vf<N> vb = quad_v1-O; + const Vec3vf<N> vc = quad_v2-O; + const Vec3vf<N> vd = quad_v3-O; + + const Vec3vf<N> edb = vb-vd; + const vfloat<N> WW = dot(cross(vd,edb),D); + const Vec3vf<N> v0 = select(WW <= 0.0f,va,vc); + const Vec3vf<N> v1 = select(WW <= 0.0f,vb,vd); + const Vec3vf<N> v2 = select(WW <= 0.0f,vd,vb); + + /* calculate edges */ + const Vec3vf<N> e0 = v2-v0; + const Vec3vf<N> e1 = v0-v1; + + /* perform edge tests */ + const vfloat<N> U = dot(cross(v0,e0),D); + const vfloat<N> V = dot(cross(v1,e1),D); + valid &= max(U,V) <= 0.0f; + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<N> Ng = cross(e1,e0); + const vfloat<N> den = dot(Ng,D); + const vfloat<N> rcpDen = rcp(den); + + /* perform depth test */ + const vfloat<N> t = rcpDen*dot(v0,Ng); + valid &= vfloat<N>(ray_tnear) <= t & t <= vfloat<N>(ray_tfar); + if (unlikely(none(valid))) return false; + + /* avoid division by 0 */ + valid &= den != vfloat<N>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + t_o = t; + u_o = U * rcpDen; + v_o = V * rcpDen; + u_o = select(WW <= 0.0f,u_o,1.0f-u_o); + v_o = select(WW <= 0.0f,v_o,1.0f-v_o); + return valid; + } + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h new file mode 100644 index 0000000000..74e8c7720c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h @@ -0,0 +1,566 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "quadv.h" +#include "triangle_intersector_moeller.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct QuadHitM + { + __forceinline QuadHitM() {} + + __forceinline QuadHitM(const vbool<M>& valid, + const vfloat<M>& U, + const vfloat<M>& V, + const vfloat<M>& T, + const vfloat<M>& absDen, + const Vec3vf<M>& Ng, + const vbool<M>& flags) + : U(U), V(V), T(T), absDen(absDen), tri_Ng(Ng), valid(valid), flags(flags) {} + + __forceinline void finalize() + { + const vfloat<M> rcpAbsDen = rcp(absDen); + vt = T * rcpAbsDen; + const vfloat<M> u = min(U * rcpAbsDen,1.0f); + const vfloat<M> v = min(V * rcpAbsDen,1.0f); + const vfloat<M> u1 = vfloat<M>(1.0f) - u; + const vfloat<M> v1 = vfloat<M>(1.0f) - v; +#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) + vu = select(flags,u1,u); + vv = select(flags,v1,v); + vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z); +#else + const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); + vv = select(flags,u1,v); + vu = select(flags,v1,u); + vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z); +#endif + } + + __forceinline Vec2f uv(const size_t i) + { + const float u = vu[i]; + const float v = vv[i]; + return Vec2f(u,v); + } + + __forceinline float t(const size_t i) { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + private: + vfloat<M> U; + vfloat<M> V; + vfloat<M> T; + vfloat<M> absDen; + Vec3vf<M> tri_Ng; + + public: + vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + + public: + const vbool<M> flags; + }; + + template<int K> + struct QuadHitK + { + __forceinline QuadHitK(const vfloat<K>& U, + const vfloat<K>& V, + const vfloat<K>& T, + const vfloat<K>& absDen, + const Vec3vf<K>& Ng, + const vbool<K>& flags) + : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vfloat<K> rcpAbsDen = rcp(absDen); + const vfloat<K> t = T * rcpAbsDen; + const vfloat<K> u0 = min(U * rcpAbsDen,1.0f); + const vfloat<K> v0 = min(V * rcpAbsDen,1.0f); + const vfloat<K> u1 = vfloat<K>(1.0f) - u0; + const vfloat<K> v1 = vfloat<K>(1.0f) - v0; + const vfloat<K> u = select(flags,u1,u0); + const vfloat<K> v = select(flags,v1,v0); + const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> T; + const vfloat<K> absDen; + const vbool<K> flags; + const Vec3vf<K> tri_Ng; + }; + + /* ----------------------------- */ + /* -- single ray intersectors -- */ + /* ----------------------------- */ + + + template<int M, bool filter> + struct QuadMIntersector1MoellerTrumbore; + + /*! Intersects M quads with 1 ray */ + template<int M, bool filter> + struct QuadMIntersector1MoellerTrumbore + { + __forceinline QuadMIntersector1MoellerTrumbore() {} + + __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + __forceinline void intersect(RayHit& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + MoellerTrumboreHitM<M> hit; + MoellerTrumboreIntersector1<M> intersector(ray,nullptr); + Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,hit)) + epilog(hit.valid,hit); + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + MoellerTrumboreHitM<M> hit; + MoellerTrumboreIntersector1<M> intersector(ray,nullptr); + Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,hit)) + { + if (epilog(hit.valid,hit)) + return true; + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + if (epilog(hit.valid,hit)) + return true; + } + return false; + } + }; + +#if defined(__AVX512ER__) // KNL + + /*! Intersects 4 quads with 1 ray using AVX512 */ + template<bool filter> + struct QuadMIntersector1MoellerTrumbore<4,filter> + { + __forceinline QuadMIntersector1MoellerTrumbore() {} + + __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), + select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), + select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); + const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); +#else + const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), + select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), + select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); + const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), + select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), + select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); +#endif + const vbool16 flags(0xf0f0); + + MoellerTrumboreHitM<16> hit; + MoellerTrumboreIntersector1<16> intersector(ray,nullptr); + if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) + { + vfloat16 U = hit.U, V = hit.V, absDen = hit.absDen; +#if !defined(EMBREE_BACKFACE_CULLING) + hit.U = select(flags,absDen-V,U); + hit.V = select(flags,absDen-U,V); + hit.vNg *= select(flags,vfloat16(-1.0f),vfloat16(1.0f)); // FIXME: use XOR +#else + hit.U = select(flags,absDen-U,U); + hit.V = select(flags,absDen-V,V); +#endif + if (likely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + }; + +#elif defined(__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<bool filter> + struct QuadMIntersector1MoellerTrumbore<4,filter> + { + __forceinline QuadMIntersector1MoellerTrumbore() {} + + __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + MoellerTrumboreHitM<8> hit; + MoellerTrumboreIntersector1<8> intersector(ray,nullptr); + const vbool8 flags(0,0,0,0,1,1,1,1); + if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) + { + vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; + +#if !defined(EMBREE_BACKFACE_CULLING) + hit.U = select(flags,absDen-V,U); + hit.V = select(flags,absDen-U,V); + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); // FIXME: use XOR +#else + hit.U = select(flags,absDen-U,U); + hit.V = select(flags,absDen-V,V); +#endif + if (unlikely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + }; + +#endif + + /* ----------------------------- */ + /* -- ray packet intersectors -- */ + /* ----------------------------- */ + + + struct MoellerTrumboreIntersector1KTriangleM + { + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + template<int M, int K, typename Epilog> + static __forceinline bool intersect(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Vec3vf<M>& tri_Ng, + const vbool<M>& flags, + const Epilog& epilog) + { + /* calculate denominator */ + const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; + const Vec3vf<M> R = cross(C,D); + const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); + const vfloat<M> absDen = abs(den); + const vfloat<M> sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; + const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); + if (likely(none(valid))) return false; + + /* calculate hit information */ + QuadHitM<M> hit(valid,U,V,T,absDen,tri_Ng,flags); + return epilog(valid,hit); + } + + template<int M, int K, typename Epilog> + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const vbool<M>& flags, + const Epilog& epilog) + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + const Vec3vf<M> Ng = cross(e2,e1); + return intersect(ray,k,v0,e1,e2,Ng,flags,epilog); + } + }; + + template<int M, int K, bool filter> + struct QuadMIntersectorKMoellerTrumboreBase + { + __forceinline QuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {} + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_e1, + const Vec3vf<K>& tri_e2, + const Vec3vf<K>& tri_Ng, + const vbool<K>& flags, + const Epilog& epilog) const + { + /* calculate denominator */ + vbool<K> valid = valid0; + const Vec3vf<K> C = tri_v0 - ray.org; + const Vec3vf<K> R = cross(C,ray.dir); + const vfloat<K> den = dot(tri_Ng,ray.dir); + const vfloat<K> absDen = abs(den); + const vfloat<K> sgnDen = signmsk(den); + + /* test against edge p2 p0 */ + const vfloat<K> U = dot(R,tri_e2) ^ sgnDen; + valid &= U >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p0 p1 */ + const vfloat<K> V = dot(R,tri_e1) ^ sgnDen; + valid &= V >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p1 p2 */ + const vfloat<K> W = absDen-U-V; + valid &= W >= 0.0f; + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; + valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar); + if (unlikely(none(valid))) return false; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= den < vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#else + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#endif + + /* calculate hit information */ + QuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags); + return epilog(valid,hit); + } + + /*! Intersects K rays with one of M quads. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const vbool<K>& flags, + const Epilog& epilog) const + { + const Vec3vf<K> e1 = tri_v0-tri_v1; + const Vec3vf<K> e2 = tri_v2-tri_v0; + const Vec3vf<K> Ng = cross(e2,e1); + return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,epilog); + } + + /*! Intersects K rays with one of M quads. */ + template<typename Epilog> + __forceinline bool intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const Epilog& epilog) const + { + intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog); + if (none(valid0)) return true; + intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog); + return none(valid0); + } + }; + + template<int M, int K, bool filter> + struct QuadMIntersectorKMoellerTrumbore : public QuadMIntersectorKMoellerTrumboreBase<M,K,filter> + { + __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : QuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {} + + __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); + MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog); + MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); + if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; + if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; + return false; + } + }; + + +#if defined(__AVX512ER__) // KNL + + /*! Intersects 4 quads with 1 ray using AVX512 */ + template<int K, bool filter> + struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter> + { + __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), + select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), + select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); + const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); +#else + const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), + select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), + select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); + const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), + select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), + select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); +#endif + const vbool16 flags(0xf0f0); + return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + }; + +#elif defined(__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<int K, bool filter> + struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter> + { + __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + const vbool8 flags(0,0,0,0,1,1,1,1); + return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + }; + +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h new file mode 100644 index 0000000000..7ca3aed0a0 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h @@ -0,0 +1,529 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "quad_intersector_moeller.h" + +/*! Modified Pluecker ray/triangle intersector. The test first shifts + * the ray origin into the origin of the coordinate system and then + * uses Pluecker coordinates for the intersection. Due to the shift, + * the Pluecker coordinate calculation simplifies and the tests get + * numerically stable. The edge equations are watertight along the + * edge for neighboring triangles. */ + +namespace embree +{ + namespace isa + { + template<int M> + struct QuadHitPlueckerM + { + __forceinline QuadHitPlueckerM() {} + + __forceinline QuadHitPlueckerM(const vbool<M>& valid, + const vfloat<M>& U, + const vfloat<M>& V, + const vfloat<M>& UVW, + const vfloat<M>& t, + const Vec3vf<M>& Ng, + const vbool<M>& flags) + : U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {} + + __forceinline void finalize() + { + const vbool<M> invalid = abs(UVW) < min_rcp_input; + const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); + const vfloat<M> u = min(U * rcpUVW,1.0f); + const vfloat<M> v = min(V * rcpUVW,1.0f); + const vfloat<M> u1 = vfloat<M>(1.0f) - u; + const vfloat<M> v1 = vfloat<M>(1.0f) - v; +#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) + vu = select(flags,u1,u); + vv = select(flags,v1,v); + vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z); +#else + const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); + vv = select(flags,u1,v); + vu = select(flags,v1,u); + vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z); +#endif + } + + __forceinline Vec2f uv(const size_t i) + { + const float u = vu[i]; + const float v = vv[i]; + return Vec2f(u,v); + } + + __forceinline float t(const size_t i) { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + private: + vfloat<M> U; + vfloat<M> V; + vfloat<M> UVW; + Vec3vf<M> tri_Ng; + + public: + vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + + public: + const vbool<M> flags; + }; + + template<int K> + struct QuadHitPlueckerK + { + __forceinline QuadHitPlueckerK(const vfloat<K>& U, + const vfloat<K>& V, + const vfloat<K>& UVW, + const vfloat<K>& t, + const Vec3vf<K>& Ng, + const vbool<K>& flags) + : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vbool<K> invalid = abs(UVW) < min_rcp_input; + const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); + const vfloat<K> u0 = min(U * rcpUVW,1.0f); + const vfloat<K> v0 = min(V * rcpUVW,1.0f); + const vfloat<K> u1 = vfloat<K>(1.0f) - u0; + const vfloat<K> v1 = vfloat<K>(1.0f) - v0; + const vfloat<K> u = select(flags,u1,u0); + const vfloat<K> v = select(flags,v1,v0); + const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> UVW; + const vfloat<K> t; + const vbool<K> flags; + const Vec3vf<K> tri_Ng; + }; + + struct PlueckerIntersectorTriangle1 + { + template<int M, typename Epilog> + static __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const vbool<M>& flags, + const Epilog& epilog) + { + /* calculate vertices relative to ray origin */ + const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); + const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); + const Vec3vf<M> v0 = tri_v0-O; + const Vec3vf<M> v1 = tri_v1-O; + const Vec3vf<M> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<M> e0 = v2-v0; + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<M> U = dot(cross(e0,v2+v0),D); + const vfloat<M> V = dot(cross(e1,v0+v1),D); + const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; + const vfloat<M> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = max(U,V,W) <= eps; +#else + vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<M> den = twice(dot(Ng,D)); + + /* perform depth test */ + const vfloat<M> T = twice(dot(v0,Ng)); + const vfloat<M> t = rcp(den)*T; + valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); + valid &= den != vfloat<M>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags); + return epilog(valid,hit); + } + }; + + /*! Intersects M quads with 1 ray */ + template<int M, bool filter> + struct QuadMIntersector1Pluecker + { + __forceinline QuadMIntersector1Pluecker() {} + + __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + __forceinline void intersect(RayHit& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); + PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog); + PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true),epilog); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); + if (PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog)) return true; + if (PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true; + return false; + } + }; + +#if defined(__AVX512ER__) // KNL + + /*! Intersects 4 quads with 1 ray using AVX512 */ + template<bool filter> + struct QuadMIntersector1Pluecker<4,filter> + { + __forceinline QuadMIntersector1Pluecker() {} + + __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), + select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), + select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); + const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); +#else + const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), + select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), + select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); + const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), + select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), + select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); +#endif + const vbool16 flags(0xf0f0); + return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog); + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + }; + +#elif defined(__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<bool filter> + struct QuadMIntersector1Pluecker<4,filter> + { + __forceinline QuadMIntersector1Pluecker() {} + + __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + const vbool8 flags(0,0,0,0,1,1,1,1); + return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog); + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + } + }; + +#endif + + + /* ----------------------------- */ + /* -- ray packet intersectors -- */ + /* ----------------------------- */ + + struct PlueckerIntersector1KTriangleM + { + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + template<int M, int K, typename Epilog> + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const vbool<M>& flags, + const Epilog& epilog) + { + /* calculate vertices relative to ray origin */ + const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vf<M> v0 = tri_v0-O; + const Vec3vf<M> v1 = tri_v1-O; + const Vec3vf<M> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<M> e0 = v2-v0; + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<M> U = dot(cross(e0,v2+v0),D); + const vfloat<M> V = dot(cross(e1,v0+v1),D); + const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; + const vfloat<M> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = max(U,V,W) <= eps; +#else + vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<M> den = twice(dot(Ng,D)); + + /* perform depth test */ + const vfloat<M> T = twice(dot(v0,Ng)); + const vfloat<M> t = rcp(den)*T; + valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); + if (unlikely(none(valid))) return false; + + /* avoid division by 0 */ + valid &= den != vfloat<M>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags); + return epilog(valid,hit); + } + }; + + template<int M, int K, bool filter> + struct QuadMIntersectorKPlueckerBase + { + __forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const vbool<K>& flags, + const Epilog& epilog) const + { + /* calculate vertices relative to ray origin */ + vbool<K> valid = valid0; + const Vec3vf<K> O = ray.org; + const Vec3vf<K> D = ray.dir; + const Vec3vf<K> v0 = tri_v0-O; + const Vec3vf<K> v1 = tri_v1-O; + const Vec3vf<K> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<K> e0 = v2-v0; + const Vec3vf<K> e1 = v0-v1; + const Vec3vf<K> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); + const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); + const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); + const vfloat<K> UVW = U+V+W; + const vfloat<K> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + valid &= max(U,V,W) <= eps; +#else + valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); + + /* perform depth test */ + const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); + const vfloat<K> t = rcp(den)*T; + valid &= ray.tnear() <= t & t <= ray.tfar; + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; + + /* calculate hit information */ + QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags); + return epilog(valid,hit); + } + + /*! Intersects K rays with one of M quads. */ + template<typename Epilog> + __forceinline bool intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const Epilog& epilog) const + { + intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog); + if (none(valid0)) return true; + intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog); + return none(valid0); + } + }; + + template<int M, int K, bool filter> + struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter> + { + __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) + : QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} + + __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); + PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog); + PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const vuint<M>& geomID, const vuint<M>& primID) const + { + Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); + if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; + if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; + return false; + } + }; + +#if defined(__AVX512ER__) // KNL + + /*! Intersects 4 quads with 1 ray using AVX512 */ + template<int K, bool filter> + struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter> + { + __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) + : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), + select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), + select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); + const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); +#else + const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), + select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), + select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); + const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), + select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), + select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); +#endif + + const vbool16 flags(0xf0f0); + return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + }; + +#elif defined(__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<int K, bool filter> + struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter> + { + __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) + : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); + const vbool8 flags(0,0,0,0,1,1,1,1); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const vuint4& geomID, const vuint4& primID) const + { + return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + } + }; + +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi.h b/thirdparty/embree-aarch64/kernels/geometry/quadi.h new file mode 100644 index 0000000000..741ec519ab --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quadi.h @@ -0,0 +1,483 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "../common/scene.h" + +namespace embree +{ + /* Stores M quads from an indexed face set */ + template <int M> + struct QuadMi + { + /* Virtual interface to query information about the quad type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored quads */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline QuadMi() { } + + /* Construction from vertices and IDs */ + __forceinline QuadMi(const vuint<M>& v0, + const vuint<M>& v1, + const vuint<M>& v2, + const vuint<M>& v3, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) +#if defined(EMBREE_COMPACT_POLYS) + : geomIDs(geomIDs), primIDs(primIDs) {} +#else + : v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {} +#endif + + /* Returns a mask that tells which quads are valid */ + __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } + + /* Returns if the specified quad is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } + + /* Returns the number of stored quads */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + __forceinline vuint<M>& geomID() { return geomIDs; } + __forceinline const vuint<M>& geomID() const { return geomIDs; } + __forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; } + + /* Returns the primitive IDs */ + __forceinline vuint<M>& primID() { return primIDs; } + __forceinline const vuint<M>& primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* Calculate the bounds of the quads */ + __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const + { + BBox3fa bounds = empty; + for (size_t i=0; i<M && valid(i); i++) { + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); + bounds.extend(mesh->bounds(primID(i),itime)); + } + return bounds; + } + + /* Calculate the linear bounds of the primitive */ + __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) { + return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) + { + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); + allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps)); + } + return allBounds; + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) + { + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); + allBounds.extend(mesh->linearBounds(primID(i), time_range)); + } + return allBounds; + } + + /* Fill quad from quad list */ + template<typename PrimRefT> + __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) + { + vuint<M> geomID = -1, primID = -1; + const PrimRefT* prim = &prims[begin]; + vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero; + + for (size_t i=0; i<M; i++) + { + if (begin<end) { + geomID[i] = prim->geomID(); + primID[i] = prim->primID(); +#if !defined(EMBREE_COMPACT_POLYS) + const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID()); + const QuadMesh::Quad& q = mesh->quad(prim->primID()); + unsigned int_stride = mesh->vertices0.getStride()/4; + v0[i] = q.v[0] * int_stride; + v1[i] = q.v[1] * int_stride; + v2[i] = q.v[2] * int_stride; + v3[i] = q.v[3] * int_stride; +#endif + begin++; + } else { + assert(i); + if (likely(i > 0)) { + geomID[i] = geomID[0]; // always valid geomIDs + primID[i] = -1; // indicates invalid data + v0[i] = v0[0]; + v1[i] = v0[0]; + v2[i] = v0[0]; + v3[i] = v0[0]; + } + } + if (begin<end) prim = &prims[begin]; + } + new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store + } + + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) + { + fill(prims, begin, end, scene); + return linearBounds(scene, itime); + } + + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) + { + fill(prims, begin, end, scene); + return linearBounds(scene, time_range); + } + + friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) { + return cout << "QuadMi<" << M << ">( " +#if !defined(EMBREE_COMPACT_POLYS) + << "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", " +#endif + << "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )"; + } + + protected: +#if !defined(EMBREE_COMPACT_POLYS) + vuint<M> v0_; // 4 byte offset of 1st vertex + vuint<M> v1_; // 4 byte offset of 2nd vertex + vuint<M> v2_; // 4 byte offset of 3rd vertex + vuint<M> v3_; // 4 byte offset of 4th vertex +#endif + vuint<M> geomIDs; // geometry ID of mesh + vuint<M> primIDs; // primitive ID of primitive inside mesh + }; + + namespace isa + { + + template<int M> + struct QuadMi : public embree::QuadMi<M> + { +#if !defined(EMBREE_COMPACT_POLYS) + using embree::QuadMi<M>::v0_; + using embree::QuadMi<M>::v1_; + using embree::QuadMi<M>::v2_; + using embree::QuadMi<M>::v3_; +#endif + using embree::QuadMi<M>::geomIDs; + using embree::QuadMi<M>::primIDs; + using embree::QuadMi<M>::geomID; + using embree::QuadMi<M>::primID; + using embree::QuadMi<M>::valid; + + template<int vid> + __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const + { +#if defined(EMBREE_COMPACT_POLYS) + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); + const QuadMesh::Quad& quad = mesh->quad(primID(index)); + return (Vec3f) mesh->vertices[0][quad.v[vid]]; +#else + const vuint<M>& v = getVertexOffset<vid>(); + const float* vertices = scene->vertices[geomID(index)]; + return (Vec3f&) vertices[v[index]]; +#endif + } + + template<int vid, typename T> + __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const + { +#if defined(EMBREE_COMPACT_POLYS) + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); + const QuadMesh::Quad& quad = mesh->quad(primID(index)); + const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]]; + const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]]; +#else + const vuint<M>& v = getVertexOffset<vid>(); + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); + const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0); + const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1); + const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); + const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); +#endif + const Vec3<T> p0(v0.x,v0.y,v0.z); + const Vec3<T> p1(v1.x,v1.y,v1.z); + return lerp(p0,p1,ftime); + } + + template<int vid, int K, typename T> + __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const + { + Vec3<T> p0, p1; + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); + + for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask)) + { +#if defined(EMBREE_COMPACT_POLYS) + const QuadMesh::Quad& quad = mesh->quad(primID(index)); + const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]]; + const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]]; +#else + const vuint<M>& v = getVertexOffset<vid>(); + const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0); + const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1); + const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); + const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); +#endif + p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z; + p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z; + } + return (T(one)-ftime)*p0 + ftime*p1; + } + + struct Quad { + vfloat4 v0,v1,v2,v3; + }; + +#if defined(EMBREE_COMPACT_POLYS) + + __forceinline Quad loadQuad(const int i, const Scene* const scene) const + { + const unsigned int geomID = geomIDs[i]; + const unsigned int primID = primIDs[i]; + if (unlikely(primID == -1)) return { zero, zero, zero, zero }; + const QuadMesh* mesh = scene->get<QuadMesh>(geomID); + const QuadMesh::Quad& quad = mesh->quad(primID); + const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]]; + const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]]; + const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]]; + const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]]; + return { v0, v1, v2, v3 }; + } + + __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const + { + const unsigned int geomID = geomIDs[i]; + const unsigned int primID = primIDs[i]; + if (unlikely(primID == -1)) return { zero, zero, zero, zero }; + const QuadMesh* mesh = scene->get<QuadMesh>(geomID); + const QuadMesh::Quad& quad = mesh->quad(primID); + const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]]; + const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]]; + const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]]; + const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]]; + return { v0, v1, v2, v3 }; + } + +#else + + __forceinline Quad loadQuad(const int i, const Scene* const scene) const + { + const float* vertices = scene->vertices[geomID(i)]; + const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); + const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); + const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); + const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]); + return { v0, v1, v2, v3 }; + } + + __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const + { + const unsigned int geomID = geomIDs[i]; + const QuadMesh* mesh = scene->get<QuadMesh>(geomID); + const float* vertices = (const float*) mesh->vertexPtr(0,itime); + const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); + const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); + const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); + const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]); + return { v0, v1, v2, v3 }; + } + +#endif + + /* Gather the quads */ + __forceinline void gather(Vec3vf<M>& p0, + Vec3vf<M>& p1, + Vec3vf<M>& p2, + Vec3vf<M>& p3, + const Scene *const scene) const; + +#if defined(__AVX512F__) + __forceinline void gather(Vec3vf16& p0, + Vec3vf16& p1, + Vec3vf16& p2, + Vec3vf16& p3, + const Scene *const scene) const; +#endif + + template<int K> +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019 + __noinline +#else + __forceinline +#endif + void gather(const vbool<K>& valid, + Vec3vf<K>& p0, + Vec3vf<K>& p1, + Vec3vf<K>& p2, + Vec3vf<K>& p3, + const size_t index, + const Scene* const scene, + const vfloat<K>& time) const + { + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); + + vfloat<K> ftime; + const vint<K> itime = mesh->timeSegment(time, ftime); + + const size_t first = bsf(movemask(valid)); + if (likely(all(valid,itime[first] == itime))) + { + p0 = getVertex<0>(index, scene, itime[first], ftime); + p1 = getVertex<1>(index, scene, itime[first], ftime); + p2 = getVertex<2>(index, scene, itime[first], ftime); + p3 = getVertex<3>(index, scene, itime[first], ftime); + } + else + { + p0 = getVertex<0>(valid, index, scene, itime, ftime); + p1 = getVertex<1>(valid, index, scene, itime, ftime); + p2 = getVertex<2>(valid, index, scene, itime, ftime); + p3 = getVertex<3>(valid, index, scene, itime, ftime); + } + } + + __forceinline void gather(Vec3vf<M>& p0, + Vec3vf<M>& p1, + Vec3vf<M>& p2, + Vec3vf<M>& p3, + const QuadMesh* mesh, + const Scene *const scene, + const int itime) const; + + __forceinline void gather(Vec3vf<M>& p0, + Vec3vf<M>& p1, + Vec3vf<M>& p2, + Vec3vf<M>& p3, + const Scene *const scene, + const float time) const; + + /* Updates the primitive */ + __forceinline BBox3fa update(QuadMesh* mesh) + { + BBox3fa bounds = empty; + for (size_t i=0; i<M; i++) + { + if (!valid(i)) break; + const unsigned primId = primID(i); + const QuadMesh::Quad& q = mesh->quad(primId); + const Vec3fa p0 = mesh->vertex(q.v[0]); + const Vec3fa p1 = mesh->vertex(q.v[1]); + const Vec3fa p2 = mesh->vertex(q.v[2]); + const Vec3fa p3 = mesh->vertex(q.v[3]); + bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3))); + } + return bounds; + } + + private: +#if !defined(EMBREE_COMPACT_POLYS) + template<int N> const vuint<M>& getVertexOffset() const; +#endif + }; + +#if !defined(EMBREE_COMPACT_POLYS) + template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; } + template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; } + template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; } + template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; } +#endif + + template<> + __forceinline void QuadMi<4>::gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const Scene *const scene) const + { + prefetchL1(((char*)this)+0*64); + prefetchL1(((char*)this)+1*64); + const Quad tri0 = loadQuad(0,scene); + const Quad tri1 = loadQuad(1,scene); + const Quad tri2 = loadQuad(2,scene); + const Quad tri3 = loadQuad(3,scene); + transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); + transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); + transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); + transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z); + } + + template<> + __forceinline void QuadMi<4>::gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const QuadMesh* mesh, + const Scene *const scene, + const int itime) const + { + // FIXME: for trianglei there all geometries are identical, is this the case here too? + + const Quad tri0 = loadQuad(0,itime,scene); + const Quad tri1 = loadQuad(1,itime,scene); + const Quad tri2 = loadQuad(2,itime,scene); + const Quad tri3 = loadQuad(3,itime,scene); + transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); + transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); + transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); + transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z); + } + + template<> + __forceinline void QuadMi<4>::gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const Scene *const scene, + const float time) const + { + const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical + + float ftime; + const int itime = mesh->timeSegment(time, ftime); + + Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime); + Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1); + p0 = lerp(a0,b0,vfloat4(ftime)); + p1 = lerp(a1,b1,vfloat4(ftime)); + p2 = lerp(a2,b2,vfloat4(ftime)); + p3 = lerp(a3,b3,vfloat4(ftime)); + } + } + + template<int M> + typename QuadMi<M>::Type QuadMi<M>::type; + + typedef QuadMi<4> Quad4i; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h new file mode 100644 index 0000000000..96cf7f1ca2 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h @@ -0,0 +1,350 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "quadi.h" +#include "quad_intersector_moeller.h" +#include "quad_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + /*! Intersects M quads with 1 ray */ + template<int M, bool filter> + struct QuadMiIntersector1Moeller + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations; + + /*! Intersect a ray with the M quads and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of M quads. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); + } + }; + + /*! Intersects M triangles with K rays. */ + template<int M, int K, bool filter> + struct QuadMiIntersectorKMoeller + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + Scene* scene = context->scene; + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); + const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); + const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); + const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); + pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + Scene* scene = context->scene; + vbool<K> valid0 = valid_i; + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); + const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); + const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); + const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); + if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) + break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + }; + + /*! Intersects M quads with 1 ray */ + template<int M, bool filter> + struct QuadMiIntersector1Pluecker + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersector1Pluecker<M,filter> Precalculations; + + /*! Intersect a ray with the M quads and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of M quads. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); + } + }; + + /*! Intersects M triangles with K rays. */ + template<int M, int K, bool filter> + struct QuadMiIntersectorKPluecker + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + Scene* scene = context->scene; + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); + const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); + const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); + const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); + pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + Scene* scene = context->scene; + vbool<K> valid0 = valid_i; + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); + const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); + const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); + const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); + if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) + break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + }; + + /*! Intersects M motion blur quads with 1 ray */ + template<int M, bool filter> + struct QuadMiMBIntersector1Moeller + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations; + + /*! Intersect a ray with the M quads and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); + pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of M quads. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); + return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); + } + }; + + /*! Intersects M motion blur quads with K rays. */ + template<int M, int K, bool filter> + struct QuadMiMBIntersectorKMoeller + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations; + + /*! Intersects K rays with M quads. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M quads. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + vbool<K> valid0 = valid_i; + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) + break; + } + return !valid0; + } + + /*! Intersect a ray with M quads and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); + pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of the M quads. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + }; + + /*! Intersects M motion blur quads with 1 ray */ + template<int M, bool filter> + struct QuadMiMBIntersector1Pluecker + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersector1Pluecker<M,filter> Precalculations; + + /*! Intersect a ray with the M quads and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); + pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of M quads. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); + return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); + } + }; + + /*! Intersects M motion blur quads with K rays. */ + template<int M, int K, bool filter> + struct QuadMiMBIntersectorKPluecker + { + typedef QuadMi<M> Primitive; + typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations; + + /*! Intersects K rays with M quads. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M quads. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) + { + vbool<K> valid0 = valid_i; + for (size_t i=0; i<QuadMi<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) + break; + } + return !valid0; + } + + /*! Intersect a ray with M quads and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); + pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of the M quads. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv.h b/thirdparty/embree-aarch64/kernels/geometry/quadv.h new file mode 100644 index 0000000000..0a1fe4d128 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quadv.h @@ -0,0 +1,165 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + /* Stores the vertices of M quads in struct of array layout */ + template <int M> + struct QuadMv + { + public: + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* Returns maximum number of stored quads */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline QuadMv() {} + + /* Construction from vertices and IDs */ + __forceinline QuadMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomIDs, const vuint<M>& primIDs) + : v0(v0), v1(v1), v2(v2), v3(v3), geomIDs(geomIDs), primIDs(primIDs) {} + + /* Returns a mask that tells which quads are valid */ + __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } + + /* Returns true if the specified quad is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } + + /* Returns the number of stored quads */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + __forceinline vuint<M>& geomID() { return geomIDs; } + __forceinline const vuint<M>& geomID() const { return geomIDs; } + __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } + + /* Returns the primitive IDs */ + __forceinline vuint<M> primID() { return primIDs; } + __forceinline const vuint<M> primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* Calculate the bounds of the quads */ + __forceinline BBox3fa bounds() const + { + Vec3vf<M> lower = min(v0,v1,v2,v3); + Vec3vf<M> upper = max(v0,v1,v2,v3); + vbool<M> mask = valid(); + lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); + lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); + lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); + upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); + upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); + upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); + return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), + Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); + } + + /* Non temporal store */ + __forceinline static void store_nt(QuadMv* dst, const QuadMv& src) + { + vfloat<M>::store_nt(&dst->v0.x,src.v0.x); + vfloat<M>::store_nt(&dst->v0.y,src.v0.y); + vfloat<M>::store_nt(&dst->v0.z,src.v0.z); + vfloat<M>::store_nt(&dst->v1.x,src.v1.x); + vfloat<M>::store_nt(&dst->v1.y,src.v1.y); + vfloat<M>::store_nt(&dst->v1.z,src.v1.z); + vfloat<M>::store_nt(&dst->v2.x,src.v2.x); + vfloat<M>::store_nt(&dst->v2.y,src.v2.y); + vfloat<M>::store_nt(&dst->v2.z,src.v2.z); + vfloat<M>::store_nt(&dst->v3.x,src.v3.x); + vfloat<M>::store_nt(&dst->v3.y,src.v3.y); + vfloat<M>::store_nt(&dst->v3.z,src.v3.z); + vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); + vuint<M>::store_nt(&dst->primIDs,src.primIDs); + } + + /* Fill quad from quad list */ + __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene) + { + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero; + + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRef& prim = prims[begin]; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + const QuadMesh* __restrict__ const mesh = scene->get<QuadMesh>(geomID); + const QuadMesh::Quad& quad = mesh->quad(primID); + const Vec3fa& p0 = mesh->vertex(quad.v[0]); + const Vec3fa& p1 = mesh->vertex(quad.v[1]); + const Vec3fa& p2 = mesh->vertex(quad.v[2]); + const Vec3fa& p3 = mesh->vertex(quad.v[3]); + vgeomID [i] = geomID; + vprimID [i] = primID; + v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; + v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; + v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; + v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z; + } + QuadMv::store_nt(this,QuadMv(v0,v1,v2,v3,vgeomID,vprimID)); + } + + /* Updates the primitive */ + __forceinline BBox3fa update(QuadMesh* mesh) + { + BBox3fa bounds = empty; + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; + + for (size_t i=0; i<M; i++) + { + if (primID(i) == -1) break; + const unsigned geomId = geomID(i); + const unsigned primId = primID(i); + const QuadMesh::Quad& quad = mesh->quad(primId); + const Vec3fa p0 = mesh->vertex(quad.v[0]); + const Vec3fa p1 = mesh->vertex(quad.v[1]); + const Vec3fa p2 = mesh->vertex(quad.v[2]); + const Vec3fa p3 = mesh->vertex(quad.v[3]); + bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3))); + vgeomID [i] = geomId; + vprimID [i] = primId; + v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; + v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; + v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; + v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z; + } + new (this) QuadMv(v0,v1,v2,v3,vgeomID,vprimID); + return bounds; + } + + public: + Vec3vf<M> v0; // 1st vertex of the quads + Vec3vf<M> v1; // 2nd vertex of the quads + Vec3vf<M> v2; // 3rd vertex of the quads + Vec3vf<M> v3; // 4rd vertex of the quads + private: + vuint<M> geomIDs; // geometry ID + vuint<M> primIDs; // primitive ID + }; + + template<int M> + typename QuadMv<M>::Type QuadMv<M>::type; + + typedef QuadMv<4> Quad4v; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h new file mode 100644 index 0000000000..30a24b291a --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h @@ -0,0 +1,181 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "quadv.h" +#include "quad_intersector_moeller.h" +#include "quad_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + /*! Intersects M quads with 1 ray */ + template<int M, bool filter> + struct QuadMvIntersector1Moeller + { + typedef QuadMv<M> Primitive; + typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations; + + /*! Intersect a ray with the M quads and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of M quads. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); + } + }; + + /*! Intersects M triangles with K rays. */ + template<int M, int K, bool filter> + struct QuadMvIntersectorKMoeller + { + typedef QuadMv<M> Primitive; + typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) + { + for (size_t i=0; i<QuadMv<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); + const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); + const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); + const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); + pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<QuadMv<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); + const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); + const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); + const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); + if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) + break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + }; + + /*! Intersects M quads with 1 ray */ + template<int M, bool filter> + struct QuadMvIntersector1Pluecker + { + typedef QuadMv<M> Primitive; + typedef QuadMIntersector1Pluecker<M,filter> Precalculations; + + /*! Intersect a ray with the M quads and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of M quads. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); + } + }; + + /*! Intersects M triangles with K rays. */ + template<int M, int K, bool filter> + struct QuadMvIntersectorKPluecker + { + typedef QuadMv<M> Primitive; + typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) + { + for (size_t i=0; i<QuadMv<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); + const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); + const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); + const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); + pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<QuadMv<M>::max_size(); i++) + { + if (!quad.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); + const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); + const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); + const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); + if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) + break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); + } + }; + } +} + diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h new file mode 100644 index 0000000000..cdf68f486b --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h @@ -0,0 +1,710 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "curve_intersector_precalculations.h" + + +/* + + This file implements the intersection of a ray with a round linear + curve segment. We define the geometry of such a round linear curve + segment from point p0 with radius r0 to point p1 with radius r1 + using the cone that touches spheres p0/r0 and p1/r1 tangentially + plus the sphere p1/r1. We denote the tangentially touching cone from + p0/r0 to p1/r1 with cone(p0,r0,p1,r1) and the cone plus the ending + sphere with cone_sphere(p0,r0,p1,r1). + + For multiple connected round linear curve segments this construction + yield a proper shape when viewed from the outside. Using the + following CSG we can also handle the interiour in most common cases: + + round_linear_curve(pl,rl,p0,r0,p1,r1,pr,rr) = + cone_sphere(p0,r0,p1,r1) - cone(pl,rl,p0,r0) - cone(p1,r1,pr,rr) + + Thus by subtracting the neighboring cone geometries, we cut away + parts of the center cone_sphere surface which lie inside the + combined curve. This approach works as long as geometry of the + current cone_sphere penetrates into direct neighbor segments only, + and not into segments further away. + + To construct a cone that touches two spheres at p0 and p1 with r0 + and r1, one has to increase the cone radius at r0 and r1 to obtain + larger radii w0 and w1, such that the infinite cone properly touches + the spheres. From the paper "Ray Tracing Generalized Tube + Primitives: Method and Applications" + (https://www.researchgate.net/publication/334378683_Ray_Tracing_Generalized_Tube_Primitives_Method_and_Applications) + one can derive the following equations for these increased + radii: + + sr = 1.0f / sqrt(1-sqr(dr)/sqr(p1-p0)) + w0 = sr*r0 + w1 = sr*r1 + + Further, we want the cone to start where it touches the sphere at p0 + and to end where it touches sphere at p1. Therefore, we need to + construct clipping locations y0 and y1 for the start and end of the + cone. These start and end clipping location of the cone can get + calculated as: + + Y0 = - r0 * (r1-r0) / length(p1-p0) + Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0) + + Where the cone starts a distance Y0 and ends a distance Y1 away of + point p0 along the cone center. The distance between Y1-Y0 can get + calculated as: + + dY = length(p1-p0) - (r1-r0)^2 / length(p1-p0) + + In the code below, Y will always be scaled by length(p1-p0) to + obtain y and you will find the terms r0*(r1-r0) and + (p1-p0)^2-(r1-r0)^2. + + */ + +namespace embree +{ + namespace isa + { + template<int M> + struct RoundLineIntersectorHitM + { + __forceinline RoundLineIntersectorHitM() {} + + __forceinline RoundLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) + : vu(u), vv(v), vt(t), vNg(Ng) {} + + __forceinline void finalize() {} + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + public: + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + namespace __roundline_internal + { + template<int M> + struct ConeGeometry + { + ConeGeometry (const Vec4vf<M>& a, const Vec4vf<M>& b) + : p0(a.xyz()), p1(b.xyz()), dP(p1-p0), dPdP(dot(dP,dP)), r0(a.w), sqr_r0(sqr(r0)), r1(b.w), dr(r1-r0), drdr(dr*dr), r0dr (r0*dr), g(dPdP - drdr) {} + + /* + + This function tests if a point is accepted by first cone + clipping plane. + + First, we need to project the point onto the line p0->p1: + + Y = (p-p0)*(p1-p0)/length(p1-p0) + + This value y is the distance to the projection point from + p0. The clip distances are calculated as: + + Y0 = - r0 * (r1-r0) / length(p1-p0) + Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0) + + Thus to test if the point p is accepted by the first + clipping plane we need to test Y > Y0 and to test if it + is accepted by the second clipping plane we need to test + Y < Y1. + + By multiplying the calculations with length(p1-p0) these + calculation can get simplied to: + + y = (p-p0)*(p1-p0) + y0 = - r0 * (r1-r0) + y1 = (p1-p0)^2 - r1 * (r1-r0) + + and the test y > y0 and y < y1. + + */ + + __forceinline vbool<M> isClippedByPlane (const vbool<M>& valid_i, const Vec3vf<M>& p) const + { + const Vec3vf<M> p0p = p - p0; + const vfloat<M> y = dot(p0p,dP); + const vfloat<M> cap0 = -r0dr; + const vbool<M> inside_cone = y > cap0; + return valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)) & inside_cone; + } + + /* + + This function tests whether a point lies inside the capped cone + tangential to its ending spheres. + + Therefore one has to check if the point is inside the + region defined by the cone clipping planes, which is + performed similar as in the previous function. + + To perform the inside cone test we need to project the + point onto the line p0->p1: + + dP = p1-p0 + Y = (p-p0)*dP/length(dP) + + This value Y is the distance to the projection point from + p0. To obtain a parameter value u going from 0 to 1 along + the line p0->p1 we calculate: + + U = Y/length(dP) + + The radii to use at points p0 and p1 are: + + w0 = sr * r0 + w1 = sr * r1 + dw = w1-w0 + + Using these radii and u one can directly test if the point + lies inside the cone using the formula dP*dP < wy*wy with: + + wy = w0 + u*dw + py = p0 + u*dP - p + + By multiplying the calculations with length(p1-p0) and + inserting the definition of w can obtain simpler equations: + + y = (p-p0)*dP + ry = r0 + y/dP^2 * dr + wy = sr*ry + py = p0 + y/dP^2*dP - p + y0 = - r0 * dr + y1 = dP^2 - r1 * dr + + Thus for the in-cone test we get: + + py^2 < wy^2 + <=> py^2 < sr^2 * ry^2 + <=> py^2 * ( dP^2 - dr^2 ) < dP^2 * ry^2 + + This can further get simplified to: + + (p0-p)^2 * (dP^2 - dr^2) - y^2 < dP^2 * r0^2 + 2.0f*r0*dr*y; + + */ + + __forceinline vbool<M> isInsideCappedCone (const vbool<M>& valid_i, const Vec3vf<M>& p) const + { + const Vec3vf<M> p0p = p - p0; + const vfloat<M> y = dot(p0p,dP); + const vfloat<M> cap0 = -r0dr+vfloat<M>(ulp); + const vfloat<M> cap1 = -r1*dr + dPdP; + + vbool<M> inside_cone = valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)); + inside_cone &= y > cap0; // start clipping plane + inside_cone &= y < cap1; // end clipping plane + inside_cone &= sqr(p0p)*g - sqr(y) < dPdP * sqr_r0 + 2.0f*r0dr*y; // in cone test + return inside_cone; + } + + protected: + Vec3vf<M> p0; + Vec3vf<M> p1; + Vec3vf<M> dP; + vfloat<M> dPdP; + vfloat<M> r0; + vfloat<M> sqr_r0; + vfloat<M> r1; + vfloat<M> dr; + vfloat<M> drdr; + vfloat<M> r0dr; + vfloat<M> g; + }; + + template<int M> + struct ConeGeometryIntersector : public ConeGeometry<M> + { + using ConeGeometry<M>::p0; + using ConeGeometry<M>::p1; + using ConeGeometry<M>::dP; + using ConeGeometry<M>::dPdP; + using ConeGeometry<M>::r0; + using ConeGeometry<M>::sqr_r0; + using ConeGeometry<M>::r1; + using ConeGeometry<M>::dr; + using ConeGeometry<M>::r0dr; + using ConeGeometry<M>::g; + + ConeGeometryIntersector (const Vec3vf<M>& ray_org, const Vec3vf<M>& ray_dir, const vfloat<M>& dOdO, const vfloat<M>& rcp_dOdO, const Vec4vf<M>& a, const Vec4vf<M>& b) + : ConeGeometry<M>(a,b), org(ray_org), O(ray_org-p0), dO(ray_dir), dOdO(dOdO), rcp_dOdO(rcp_dOdO), OdP(dot(dP,O)), dOdP(dot(dP,dO)), yp(OdP + r0dr) {} + + /* + + This function intersects a ray with a cone that touches a + start sphere p0/r0 and end sphere p1/r1. + + To find this ray/cone intersections one could just + calculate radii w0 and w1 as described above and use a + standard ray/cone intersection routine with these + radii. However, it turns out that calculations can get + simplified when deriving a specialized ray/cone + intersection for this special case. We perform + calculations relative to the cone origin p0 and define: + + O = ray_org - p0 + dO = ray_dir + dP = p1-p0 + dr = r1-r0 + dw = w1-w0 + + For some t we can compute the potential hit point h = O + t*dO and + project it onto the cone vector dP to obtain u = (h*dP)/(dP*dP). In + case of an intersection, the squared distance from the hit point + projected onto the cone center line to the hit point should be equal + to the squared cone radius at u: + + (u*dP - h)^2 = (w0 + u*dw)^2 + + Inserting the definition of h, u, w0, and dw into this formula, then + factoring out all terms, and sorting by t^2, t^1, and t^0 terms + yields a quadratic equation to solve. + + Inserting u: + ( (h*dP)*dP/dP^2 - h )^2 = ( w0 + (h*dP)*dw/dP^2 )^2 + + Multiplying by dP^4: + ( (h*dP)*dP - h*dP^2 )^2 = ( w0*dP^2 + (h*dP)*dw )^2 + + Inserting w0 and dw: + ( (h*dP)*dP - h*dP^2 )^2 = ( r0*dP^2 + (h*dP)*dr )^2 / (1-dr^2/dP^2) + ( (h*dP)*dP - h*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (h*dP)*dr )^2 + + Now one can insert the definition of h, factor out, and presort by t: + ( ((O + t*dO)*dP)*dP - (O + t*dO)*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + ((O + t*dO)*dP)*dr )^2 + ( (O*dP)*dP-O*dP^2 + t*( (dO*dP)*dP - dO*dP^2 ) )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (O*dP)*dr + t*(dO*dP)*dr )^2 + + Factoring out further and sorting by t^2, t^1 and t^0 yields: + + 0 = t^2 * [ ((dO*dP)*dP - dO-dP^2)^2 * (dP^2 - dr^2) - dP^2*(dO*dP)^2*dr^2 ] + + 2*t^1 * [ ((O*dP)*dP - O*dP^2) * ((dO*dP)*dP - dO*dP^2) * (dP^2 - dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)*(dO*dP)*dr ] + + t^0 * [ ( (O*dP)*dP - O*dP^2)^2 * (dP^2-dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)^2 ] + + This can be simplified to: + + 0 = t^2 * [ (dP^2 - dr^2)*dO^2 - (dO*dP)^2 ] + + 2*t^1 * [ (dP^2 - dr^2)*(O*dO) - (dO*dP)*(O*dP + r0*dr) ] + + t^0 * [ (dP^2 - dr^2)*O^2 - (O*dP)^2 - r0^2*dP^2 - 2.0f*r0*dr*(O*dP) ] + + Solving this quadratic equation yields the values for t at which the + ray intersects the cone. + + */ + + __forceinline bool intersectCone(vbool<M>& valid, vfloat<M>& lower, vfloat<M>& upper) + { + /* return no hit by default */ + lower = pos_inf; + upper = neg_inf; + + /* compute quadratic equation A*t^2 + B*t + C = 0 */ + const vfloat<M> OO = dot(O,O); + const vfloat<M> OdO = dot(dO,O); + const vfloat<M> A = g * dOdO - sqr(dOdP); + const vfloat<M> B = 2.0f * (g*OdO - dOdP*yp); + const vfloat<M> C = g*OO - sqr(OdP) - sqr_r0*dPdP - 2.0f*r0dr*OdP; + + /* we miss the cone if determinant is smaller than zero */ + const vfloat<M> D = B*B - 4.0f*A*C; + valid &= (D >= 0.0f & g > 0.0f); // if g <= 0 then the cone is inside a sphere end + + /* When rays are parallel to the cone surface, then the + * ray may be inside or outside the cone. We just assume a + * miss in that case, which is fine as rays inside the + * cone would anyway hit the ending spheres in that + * case. */ + valid &= abs(A) > min_rcp_input; + if (unlikely(none(valid))) { + return false; + } + + /* compute distance to front and back hit */ + const vfloat<M> Q = sqrt(D); + const vfloat<M> rcp_2A = rcp(2.0f*A); + t_cone_front = (-B-Q)*rcp_2A; + y_cone_front = yp + t_cone_front*dOdP; + lower = select( (y_cone_front > -(float)ulp) & (y_cone_front <= g) & (g > 0.0f), t_cone_front, vfloat<M>(pos_inf)); +#if !defined (EMBREE_BACKFACE_CULLING_CURVES) + t_cone_back = (-B+Q)*rcp_2A; + y_cone_back = yp + t_cone_back *dOdP; + upper = select( (y_cone_back > -(float)ulp) & (y_cone_back <= g) & (g > 0.0f), t_cone_back , vfloat<M>(neg_inf)); +#endif + return true; + } + + /* + This function intersects the ray with the end sphere at + p1. We already clip away hits that are inside the + neighboring cone segment. + + */ + + __forceinline void intersectEndSphere(vbool<M>& valid, + const ConeGeometry<M>& coneR, + vfloat<M>& lower, vfloat<M>& upper) + { + /* calculate front and back hit with end sphere */ + const Vec3vf<M> O1 = org - p1; + const vfloat<M> O1dO = dot(O1,dO); + const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r1)); + const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) ); + + /* clip away front hit if it is inside next cone segment */ + t_sph1_front = (-O1dO - rhs1)*rcp_dOdO; + const Vec3vf<M> hit_front = org + t_sph1_front*dO; + vbool<M> valid_sph1_front = h2 >= 0.0f & yp + t_sph1_front*dOdP > g & !coneR.isClippedByPlane (valid, hit_front); + lower = select(valid_sph1_front, t_sph1_front, vfloat<M>(pos_inf)); + +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + /* clip away back hit if it is inside next cone segment */ + t_sph1_back = (-O1dO + rhs1)*rcp_dOdO; + const Vec3vf<M> hit_back = org + t_sph1_back*dO; + vbool<M> valid_sph1_back = h2 >= 0.0f & yp + t_sph1_back*dOdP > g & !coneR.isClippedByPlane (valid, hit_back); + upper = select(valid_sph1_back, t_sph1_back, vfloat<M>(neg_inf)); +#else + upper = vfloat<M>(neg_inf); +#endif + } + + __forceinline void intersectBeginSphere(const vbool<M>& valid, + vfloat<M>& lower, vfloat<M>& upper) + { + /* calculate front and back hit with end sphere */ + const Vec3vf<M> O1 = org - p0; + const vfloat<M> O1dO = dot(O1,dO); + const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r0)); + const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) ); + + /* clip away front hit if it is inside next cone segment */ + t_sph0_front = (-O1dO - rhs1)*rcp_dOdO; + vbool<M> valid_sph1_front = valid & h2 >= 0.0f & yp + t_sph0_front*dOdP < 0; + lower = select(valid_sph1_front, t_sph0_front, vfloat<M>(pos_inf)); + +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + /* clip away back hit if it is inside next cone segment */ + t_sph0_back = (-O1dO + rhs1)*rcp_dOdO; + vbool<M> valid_sph1_back = valid & h2 >= 0.0f & yp + t_sph0_back*dOdP < 0; + upper = select(valid_sph1_back, t_sph0_back, vfloat<M>(neg_inf)); +#else + upper = vfloat<M>(neg_inf); +#endif + } + + /* + + This function calculates the geometry normal of some cone hit. + + For a given hit point h (relative to p0) with a cone + starting at p0 with radius w0 and ending at p1 with + radius w1 one normally calculates the geometry normal by + first calculating the parmetric u hit location along the + cone: + + u = dot(h,dP)/dP^2 + + Using this value one can now directly calculate the + geometry normal by bending the connection vector (h-u*dP) + from hit to projected hit with some cone dependent value + dw/sqrt(dP^2) * normalize(dP): + + Ng = normalize(h-u*dP) - dw/length(dP) * normalize(dP) + + The length of the vector (h-u*dP) can also get calculated + by interpolating the radii as w0+u*dw which yields: + + Ng = (h-u*dP)/(w0+u*dw) - dw/dP^2 * dP + + Multiplying with (w0+u*dw) yield a scaled Ng': + + Ng' = (h-u*dP) - (w0+u*dw)*dw/dP^2*dP + + Inserting the definition of w0 and dw and refactoring + yield a furhter scaled Ng'': + + Ng'' = (dP^2 - dr^2) (h-q) - (r0+u*dr)*dr*dP + + Now inserting the definition of u gives and multiplying + with the denominator yields: + + Ng''' = (dP^2-dr^2)*(dP^2*h-dot(h,dP)*dP) - (dP^2*r0+dot(h,dP)*dr)*dr*dP + + Factoring out, cancelling terms, dividing by dP^2, and + factoring again yields finally: + + Ng'''' = (dP^2-dr^2)*h - dP*(dot(h,dP) + r0*dr) + + */ + + __forceinline Vec3vf<M> Ng_cone(const vbool<M>& front_hit) const + { +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back); + const vfloat<M> t = select(front_hit, t_cone_front, t_cone_back); + const Vec3vf<M> h = O + t*dO; + return g*h-dP*y; +#else + const Vec3vf<M> h = O + t_cone_front*dO; + return g*h-dP*y_cone_front; +#endif + } + + /* compute geometry normal of sphere hit as the difference + * vector from hit point to sphere center */ + + __forceinline Vec3vf<M> Ng_sphere1(const vbool<M>& front_hit) const + { +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + const vfloat<M> t_sph1 = select(front_hit, t_sph1_front, t_sph1_back); + return org+t_sph1*dO-p1; +#else + return org+t_sph1_front*dO-p1; +#endif + } + + __forceinline Vec3vf<M> Ng_sphere0(const vbool<M>& front_hit) const + { +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + const vfloat<M> t_sph0 = select(front_hit, t_sph0_front, t_sph0_back); + return org+t_sph0*dO-p0; +#else + return org+t_sph0_front*dO-p0; +#endif + } + + /* + This function calculates the u coordinate of a + hit. Therefore we use the hit distance y (which is zero + at the first cone clipping plane) and divide by distance + g between the clipping planes. + + */ + + __forceinline vfloat<M> u_cone(const vbool<M>& front_hit) const + { +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back); + return clamp(y*rcp(g)); +#else + return clamp(y_cone_front*rcp(g)); +#endif + } + + private: + Vec3vf<M> org; + Vec3vf<M> O; + Vec3vf<M> dO; + vfloat<M> dOdO; + vfloat<M> rcp_dOdO; + vfloat<M> OdP; + vfloat<M> dOdP; + + /* for ray/cone intersection */ + private: + vfloat<M> yp; + vfloat<M> y_cone_front; + vfloat<M> t_cone_front; +#if !defined (EMBREE_BACKFACE_CULLING_CURVES) + vfloat<M> y_cone_back; + vfloat<M> t_cone_back; +#endif + + /* for ray/sphere intersection */ + private: + vfloat<M> t_sph1_front; + vfloat<M> t_sph0_front; +#if !defined (EMBREE_BACKFACE_CULLING_CURVES) + vfloat<M> t_sph1_back; + vfloat<M> t_sph0_back; +#endif + }; + + + template<int M, typename Epilog, typename ray_tfar_func> + static __forceinline bool intersectConeSphere(const vbool<M>& valid_i, + const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir, + const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar, + const Vec4vf<M>& v0, const Vec4vf<M>& v1, + const Vec4vf<M>& vL, const Vec4vf<M>& vR, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + + /* move ray origin closer to make calculations numerically stable */ + const vfloat<M> dOdO = sqr(ray_dir); + const vfloat<M> rcp_dOdO = rcp(dOdO); + const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz()); + const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO; + const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir; + + /* intersect with cone from v0 to v1 */ + vfloat<M> t_cone_lower, t_cone_upper; + ConeGeometryIntersector<M> cone (ray_org, ray_dir, dOdO, rcp_dOdO, v0, v1); + vbool<M> validCone = valid; + cone.intersectCone(validCone, t_cone_lower, t_cone_upper); + + valid &= (validCone | (cone.g <= 0.0f)); // if cone is entirely in sphere end - check sphere + if (unlikely(none(valid))) + return false; + + /* cone hits inside the neighboring capped cones are inside the geometry and thus ignored */ + const ConeGeometry<M> coneL (v0, vL); + const ConeGeometry<M> coneR (v1, vR); +#if !defined(EMBREE_BACKFACE_CULLING_CURVES) + const Vec3vf<M> hit_lower = ray_org + t_cone_lower*ray_dir; + const Vec3vf<M> hit_upper = ray_org + t_cone_upper*ray_dir; + t_cone_lower = select (!coneL.isInsideCappedCone (validCone, hit_lower) & !coneR.isInsideCappedCone (validCone, hit_lower), t_cone_lower, vfloat<M>(pos_inf)); + t_cone_upper = select (!coneL.isInsideCappedCone (validCone, hit_upper) & !coneR.isInsideCappedCone (validCone, hit_upper), t_cone_upper, vfloat<M>(neg_inf)); +#endif + + /* intersect ending sphere */ + vfloat<M> t_sph1_lower, t_sph1_upper; + vfloat<M> t_sph0_lower = vfloat<M>(pos_inf); + vfloat<M> t_sph0_upper = vfloat<M>(neg_inf); + cone.intersectEndSphere(valid, coneR, t_sph1_lower, t_sph1_upper); + + const vbool<M> isBeginPoint = valid & (vL[0] == vfloat<M>(pos_inf)); + if (unlikely(any(isBeginPoint))) { + cone.intersectBeginSphere (isBeginPoint, t_sph0_lower, t_sph0_upper); + } + + /* CSG union of cone and end sphere */ + vfloat<M> t_sph_lower = min(t_sph0_lower, t_sph1_lower); + vfloat<M> t_cone_sphere_lower = min(t_cone_lower, t_sph_lower); +#if !defined (EMBREE_BACKFACE_CULLING_CURVES) + vfloat<M> t_sph_upper = max(t_sph0_upper, t_sph1_upper); + vfloat<M> t_cone_sphere_upper = max(t_cone_upper, t_sph_upper); + + /* filter out hits that are not in tnear/tfar range */ + const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf); + const vbool<M> valid_upper = valid & ray_tnear <= dt+t_cone_sphere_upper & dt+t_cone_sphere_upper <= ray_tfar() & t_cone_sphere_upper != vfloat<M>(neg_inf); + + /* check if there is a first hit */ + const vbool<M> valid_first = valid_lower | valid_upper; + if (unlikely(none(valid_first))) + return false; + + /* construct first hit */ + const vfloat<M> t_first = select(valid_lower, t_cone_sphere_lower, t_cone_sphere_upper); + const vbool<M> cone_hit_first = t_first == t_cone_lower | t_first == t_cone_upper; + const vbool<M> sph0_hit_first = t_first == t_sph0_lower | t_first == t_sph0_upper; + const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower))); + const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one))); + + /* invoke intersection filter for first hit */ + RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first); + const bool is_hit_first = epilog(valid_first, hit); + + /* check for possible second hits before potentially accepted hit */ + const vfloat<M> t_second = t_cone_sphere_upper; + const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_cone_sphere_upper <= ray_tfar()); + if (unlikely(none(valid_second))) + return is_hit_first; + + /* invoke intersection filter for second hit */ + const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper; + const vbool<M> sph0_hit_second = t_second == t_sph0_lower | t_second == t_sph0_upper; + const Vec3vf<M> Ng_second = select(cone_hit_second, cone.Ng_cone(false), select (sph0_hit_second, cone.Ng_sphere0(false), cone.Ng_sphere1(false))); + const vfloat<M> u_second = select(cone_hit_second, cone.u_cone(false), select (sph0_hit_second, vfloat<M>(zero), vfloat<M>(one))); + + hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second); + const bool is_hit_second = epilog(valid_second, hit); + + return is_hit_first | is_hit_second; +#else + /* filter out hits that are not in tnear/tfar range */ + const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf); + + /* check if there is a valid hit */ + if (unlikely(none(valid_lower))) + return false; + + /* construct first hit */ + const vbool<M> cone_hit_first = t_cone_sphere_lower == t_cone_lower | t_cone_sphere_lower == t_cone_upper; + const vbool<M> sph0_hit_first = t_cone_sphere_lower == t_sph0_lower | t_cone_sphere_lower == t_sph0_upper; + const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower))); + const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one))); + + /* invoke intersection filter for first hit */ + RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_cone_sphere_lower,Ng_first); + const bool is_hit_first = epilog(valid_lower, hit); + + return is_hit_first; +#endif + } + + } // end namespace __roundline_internal + + template<int M> + struct RoundLinearCurveIntersector1 + { + typedef CurvePrecalculations1 Precalculations; + + struct ray_tfar { + Ray& ray; + __forceinline ray_tfar(Ray& ray) : ray(ray) {} + __forceinline vfloat<M> operator() () const { return ray.tfar; }; + }; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + Ray& ray, + IntersectContext* context, + const LineSegments* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, + const Vec4vf<M>& vLi, const Vec4vf<M>& vRi, + const Epilog& epilog) + { + const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); + const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); + const vfloat<M> ray_tnear(ray.tnear()); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi); + const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi); + return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,vL,vR,epilog); + } + }; + + template<int M, int K> + struct RoundLinearCurveIntersectorK + { + typedef CurvePrecalculationsK<K> Precalculations; + + struct ray_tfar { + RayK<K>& ray; + size_t k; + __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {} + __forceinline vfloat<M> operator() () const { return ray.tfar[k]; }; + }; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + RayK<K>& ray, size_t k, + IntersectContext* context, + const LineSegments* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, + const Vec4vf<M>& vLi, const Vec4vf<M>& vRi, + const Epilog& epilog) + { + const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); + const vfloat<M> ray_tnear = ray.tnear()[k]; + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi); + const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi); + return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h new file mode 100644 index 0000000000..079817335e --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h @@ -0,0 +1,136 @@ +// ======================================================================== // +// Copyright 2009-2020 Intel Corporation // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ======================================================================== // + +#pragma once + +#include "roundline_intersector.h" +#include "intersector_epilog.h" + +namespace embree +{ + namespace isa + { + template<int M, int Mx, bool filter> + struct RoundLinearCurveMiIntersector1 + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); + } + }; + + template<int M, int Mx, bool filter> + struct RoundLinearCurveMiMBIntersector1 + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()); + const vbool<Mx> valid = line.template valid<Mx>(); + RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()); + const vbool<Mx> valid = line.template valid<Mx>(); + return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); + } + }; + + template<int M, int Mx, int K, bool filter> + struct RoundLinearCurveMiIntersectorK + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); + const vbool<Mx> valid = line.template valid<Mx>(); + return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct RoundLinearCurveMiMBIntersectorK + { + typedef LineMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(normal.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]); + const vbool<Mx> valid = line.template valid<Mx>(); + RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) + { + STAT3(shadow.trav_prims,1,1,1); + const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); + Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]); + const vbool<Mx> valid = line.template valid<Mx>(); + return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h new file mode 100644 index 0000000000..3ab90c29ef --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h @@ -0,0 +1,183 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "../common/scene_points.h" +#include "curve_intersector_precalculations.h" + +namespace embree +{ + namespace isa + { + template<int M> + struct SphereIntersectorHitM + { + __forceinline SphereIntersectorHitM() {} + + __forceinline SphereIntersectorHitM(const vfloat<M>& t, const Vec3vf<M>& Ng) + : vt(t), vNg(Ng) {} + + __forceinline void finalize() {} + + __forceinline Vec2f uv(const size_t i) const { + return Vec2f(0.0f, 0.0f); + } + __forceinline float t(const size_t i) const { + return vt[i]; + } + __forceinline Vec3fa Ng(const size_t i) const { + return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]); + } + + public: + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct SphereIntersector1 + { + typedef CurvePrecalculations1 Precalculations; + + template<typename Epilog> + static __forceinline bool intersect( + const vbool<M>& valid_i, Ray& ray, + const Precalculations& pre, const Vec4vf<M>& v0, const Epilog& epilog) + { + vbool<M> valid = valid_i; + + const vfloat<M> rd2 = rcp(dot(ray.dir, ray.dir)); + const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); + const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); + const Vec3vf<M> center = v0.xyz(); + const vfloat<M> radius = v0.w; + + const Vec3vf<M> c0 = center - ray_org; + const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; + const Vec3vf<M> perp = c0 - projC0 * ray_dir; + const vfloat<M> l2 = dot(perp, perp); + const vfloat<M> r2 = radius * radius; + valid &= (l2 <= r2); + if (unlikely(none(valid))) + return false; + + const vfloat<M> td = sqrt((r2 - l2) * rd2); + const vfloat<M> t_front = projC0 - td; + const vfloat<M> t_back = projC0 + td; + + const vbool<M> valid_front = valid & (ray.tnear() <= t_front) & (t_front <= ray.tfar); + const vbool<M> valid_back = valid & (ray.tnear() <= t_back ) & (t_back <= ray.tfar); + + /* check if there is a first hit */ + const vbool<M> valid_first = valid_front | valid_back; + if (unlikely(none(valid_first))) + return false; + + /* construct first hit */ + const vfloat<M> td_front = -td; + const vfloat<M> td_back = +td; + const vfloat<M> t_first = select(valid_front, t_front, t_back); + const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp; + SphereIntersectorHitM<M> hit(t_first, Ng_first); + + /* invoke intersection filter for first hit */ + const bool is_hit_first = epilog(valid_first, hit); + + /* check for possible second hits before potentially accepted hit */ + const vfloat<M> t_second = t_back; + const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar); + if (unlikely(none(valid_second))) + return is_hit_first; + + /* invoke intersection filter for second hit */ + const Vec3vf<M> Ng_second = td_back * ray_dir - perp; + hit = SphereIntersectorHitM<M> (t_second, Ng_second); + const bool is_hit_second = epilog(valid_second, hit); + + return is_hit_first | is_hit_second; + } + + template<typename Epilog> + static __forceinline bool intersect( + const vbool<M>& valid_i, Ray& ray, IntersectContext* context, const Points* geom, + const Precalculations& pre, const Vec4vf<M>& v0i, const Epilog& epilog) + { + const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + return intersect(valid_i,ray,pre,v0,epilog); + } + }; + + template<int M, int K> + struct SphereIntersectorK + { + typedef CurvePrecalculationsK<K> Precalculations; + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid_i, + RayK<K>& ray, size_t k, + IntersectContext* context, + const Points* geom, + const Precalculations& pre, + const Vec4vf<M>& v0i, + const Epilog& epilog) + { + vbool<M> valid = valid_i; + + const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); + const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); + const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); + + const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec3vf<M> center = v0.xyz(); + const vfloat<M> radius = v0.w; + + const Vec3vf<M> c0 = center - ray_org; + const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; + const Vec3vf<M> perp = c0 - projC0 * ray_dir; + const vfloat<M> l2 = dot(perp, perp); + const vfloat<M> r2 = radius * radius; + valid &= (l2 <= r2); + if (unlikely(none(valid))) + return false; + + const vfloat<M> td = sqrt((r2 - l2) * rd2); + const vfloat<M> t_front = projC0 - td; + const vfloat<M> t_back = projC0 + td; + + const vbool<M> valid_front = valid & (ray.tnear()[k] <= t_front) & (t_front <= ray.tfar[k]); + const vbool<M> valid_back = valid & (ray.tnear()[k] <= t_back ) & (t_back <= ray.tfar[k]); + + /* check if there is a first hit */ + const vbool<M> valid_first = valid_front | valid_back; + if (unlikely(none(valid_first))) + return false; + + /* construct first hit */ + const vfloat<M> td_front = -td; + const vfloat<M> td_back = +td; + const vfloat<M> t_first = select(valid_front, t_front, t_back); + const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp; + SphereIntersectorHitM<M> hit(t_first, Ng_first); + + /* invoke intersection filter for first hit */ + const bool is_hit_first = epilog(valid_first, hit); + + /* check for possible second hits before potentially accepted hit */ + const vfloat<M> t_second = t_back; + const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar[k]); + if (unlikely(none(valid_second))) + return is_hit_first; + + /* invoke intersection filter for second hit */ + const Vec3vf<M> Ng_second = td_back * ray_dir - perp; + hit = SphereIntersectorHitM<M> (t_second, Ng_second); + const bool is_hit_second = epilog(valid_second, hit); + + return is_hit_first | is_hit_second; + } + }; + } // namespace isa +} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h new file mode 100644 index 0000000000..1146847602 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h @@ -0,0 +1,156 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "intersector_epilog.h" +#include "pointi.h" +#include "sphere_intersector.h" + +namespace embree +{ + namespace isa + { + template<int M, int Mx, bool filter> + struct SphereMiIntersector1 + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, + RayHit& ray, + IntersectContext* context, + const Primitive& sphere) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom); + const vbool<Mx> valid = sphere.template valid<Mx>(); + SphereIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, + Ray& ray, + IntersectContext* context, + const Primitive& sphere) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom); + const vbool<Mx> valid = sphere.template valid<Mx>(); + return SphereIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, + PointQueryContext* context, + const Primitive& sphere) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere); + } + }; + + template<int M, int Mx, bool filter> + struct SphereMiMBIntersector1 + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculations1 Precalculations; + + static __forceinline void intersect(const Precalculations& pre, + RayHit& ray, + IntersectContext* context, + const Primitive& sphere) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()); + const vbool<Mx> valid = sphere.template valid<Mx>(); + SphereIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, + Ray& ray, + IntersectContext* context, + const Primitive& sphere) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()); + const vbool<Mx> valid = sphere.template valid<Mx>(); + return SphereIntersector1<Mx>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, + PointQueryContext* context, + const Primitive& sphere) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere); + } + }; + + template<int M, int Mx, int K, bool filter> + struct SphereMiIntersectorK + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect( + const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom); + const vbool<Mx> valid = sphere.template valid<Mx>(); + SphereIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + } + + static __forceinline bool occluded( + const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom); + const vbool<Mx> valid = sphere.template valid<Mx>(); + return SphereIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + } + }; + + template<int M, int Mx, int K, bool filter> + struct SphereMiMBIntersectorK + { + typedef PointMi<M> Primitive; + typedef CurvePrecalculationsK<K> Precalculations; + + static __forceinline void intersect( + const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) + { + STAT3(normal.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]); + const vbool<Mx> valid = sphere.template valid<Mx>(); + SphereIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + } + + static __forceinline bool occluded( + const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) + { + STAT3(shadow.trav_prims, 1, 1, 1); + const Points* geom = context->scene->get<Points>(sphere.geomID()); + Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]); + const vbool<Mx> valid = sphere.template valid<Mx>(); + return SphereIntersectorK<Mx, K>::intersect( + valid, ray, k, context, geom, pre, v0, + Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + } + }; + } // namespace isa +} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h new file mode 100644 index 0000000000..94ad46ad87 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h @@ -0,0 +1,38 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../geometry/primitive.h" +#include "../subdiv/subdivpatch1base.h" + +namespace embree +{ + + struct __aligned(64) SubdivPatch1 : public SubdivPatch1Base + { + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + + static Type type; + + public: + + /*! constructor for cached subdiv patch */ + SubdivPatch1 (const unsigned int gID, + const unsigned int pID, + const unsigned int subPatch, + const SubdivMesh *const mesh, + const size_t time, + const Vec2f uv[4], + const float edge_level[4], + const int subdiv[4], + const int simd_width) + : SubdivPatch1Base(gID,pID,subPatch,mesh,time,uv,edge_level,subdiv,simd_width) {} + }; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h new file mode 100644 index 0000000000..74ec1de258 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h @@ -0,0 +1,237 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subdivpatch1.h" +#include "grid_soa.h" +#include "grid_soa_intersector1.h" +#include "grid_soa_intersector_packet.h" +#include "../common/ray.h" + +namespace embree +{ + namespace isa + { + template<typename T> + class SubdivPatch1Precalculations : public T + { + public: + __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr) + : T(ray,ptr) {} + }; + + template<int K, typename T> + class SubdivPatch1PrecalculationsK : public T + { + public: + __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray) + : T(valid,ray) {} + }; + + class SubdivPatch1Intersector1 + { + public: + typedef GridSOA Primitive; + typedef SubdivPatch1Precalculations<GridSOAIntersector1::Precalculations> Precalculations; + + static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + lazy_node = prim->root(0); + pre.grid = (Primitive*)prim; + return false; + } + + /*! Intersect a ray with the primitive. */ + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node); + else processLazyNode(pre,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + intersect(This,pre,ray,context,prim,ty,tray,lazy_node); + } + + /*! Test if the ray is occluded by the primitive */ + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) return GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node); + else return processLazyNode(pre,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + return occluded(This,pre,ray,context,prim,ty,tray,lazy_node); + } + + template<int N> + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) + { + // TODO: PointQuery implement + assert(false && "not implemented"); + return false; + } + + template<int N> + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) { + return pointQuery(This,query,context,prim,ty,tquery,lazy_node); + } + }; + + class SubdivPatch1MBIntersector1 + { + public: + typedef SubdivPatch1 Primitive; + typedef GridSOAMBIntersector1::Precalculations Precalculations; + + static __forceinline bool processLazyNode(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node) + { + Primitive* prim = (Primitive*) prim_i; + GridSOA* grid = nullptr; + grid = (GridSOA*) prim->root_ref.get(); + pre.itime = getTimeSegment(ray.time(), float(grid->time_steps-1), pre.ftime); + lazy_node = grid->root(pre.itime); + pre.grid = grid; + return false; + } + + /*! Intersect a ray with the primitive. */ + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) GridSOAMBIntersector1::intersect(pre,ray,context,prim,lazy_node); + else processLazyNode(pre,ray,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + intersect(This,pre,ray,context,prim,ty,tray,lazy_node); + } + + /*! Test if the ray is occluded by the primitive */ + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) return GridSOAMBIntersector1::occluded(pre,ray,context,prim,lazy_node); + else return processLazyNode(pre,ray,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + return occluded(This,pre,ray,context,prim,ty,tray,lazy_node); + } + + template<int N> + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) + { + // TODO: PointQuery implement + assert(false && "not implemented"); + return false; + } + + template<int N, int Nx, bool robust> + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) { + return pointQuery(This,query,context,prim,ty,tquery,lazy_node); + } + }; + + template <int K> + struct SubdivPatch1IntersectorK + { + typedef GridSOA Primitive; + typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations; + + static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node) + { + lazy_node = prim->root(0); + pre.grid = (Primitive*)prim; + return false; + } + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node); + else processLazyNode(pre,context,prim,lazy_node); + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node); + else return processLazyNode(pre,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); + else processLazyNode(pre,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); + else return processLazyNode(pre,context,prim,lazy_node); + } + }; + + typedef SubdivPatch1IntersectorK<4> SubdivPatch1Intersector4; + typedef SubdivPatch1IntersectorK<8> SubdivPatch1Intersector8; + typedef SubdivPatch1IntersectorK<16> SubdivPatch1Intersector16; + + template <int K> + struct SubdivPatch1MBIntersectorK + { + typedef SubdivPatch1 Primitive; + //typedef GridSOAMBIntersectorK<K>::Precalculations Precalculations; + typedef SubdivPatch1PrecalculationsK<K,typename GridSOAMBIntersectorK<K>::Precalculations> Precalculations; + + static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node) + { + Primitive* prim = (Primitive*) prim_i; + GridSOA* grid = (GridSOA*) prim->root_ref.get(); + lazy_node = grid->troot; + pre.grid = grid; + return false; + } + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node); + else processLazyNode(pre,context,prim,lazy_node); + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node); + else return processLazyNode(pre,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); + else processLazyNode(pre,context,prim,lazy_node); + } + + template<int N, int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); + else return processLazyNode(pre,context,prim,lazy_node); + } + }; + + typedef SubdivPatch1MBIntersectorK<4> SubdivPatch1MBIntersector4; + typedef SubdivPatch1MBIntersectorK<8> SubdivPatch1MBIntersector8; + typedef SubdivPatch1MBIntersectorK<16> SubdivPatch1MBIntersector16; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid.h new file mode 100644 index 0000000000..39fa6fb0f0 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subgrid.h @@ -0,0 +1,517 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/ray.h" +#include "../common/scene_grid_mesh.h" +#include "../bvh/bvh.h" + +namespace embree +{ + /* Stores M quads from an indexed face set */ + struct SubGrid + { + /* Virtual interface to query information about the quad type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored quads */ + static __forceinline size_t max_size() { return 1; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline SubGrid() { } + + /* Construction from vertices and IDs */ + __forceinline SubGrid(const unsigned int x, + const unsigned int y, + const unsigned int geomID, + const unsigned int primID) + : _x(x), _y(y), _geomID(geomID), _primID(primID) + { + } + + __forceinline bool invalid3x3X() const { return (unsigned int)_x & (1<<15); } + __forceinline bool invalid3x3Y() const { return (unsigned int)_y & (1<<15); } + + /* Gather the quads */ + __forceinline void gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const GridMesh* const mesh, + const GridMesh::Grid &g) const + { + /* first quad always valid */ + const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; + const size_t vtxID01 = vtxID00 + 1; + const vfloat4 vtx00 = vfloat4::loadu(mesh->vertexPtr(vtxID00)); + const vfloat4 vtx01 = vfloat4::loadu(mesh->vertexPtr(vtxID01)); + const size_t vtxID10 = vtxID00 + g.lineVtxOffset; + const size_t vtxID11 = vtxID01 + g.lineVtxOffset; + const vfloat4 vtx10 = vfloat4::loadu(mesh->vertexPtr(vtxID10)); + const vfloat4 vtx11 = vfloat4::loadu(mesh->vertexPtr(vtxID11)); + + /* deltaX => vtx02, vtx12 */ + const size_t deltaX = invalid3x3X() ? 0 : 1; + const size_t vtxID02 = vtxID01 + deltaX; + const vfloat4 vtx02 = vfloat4::loadu(mesh->vertexPtr(vtxID02)); + const size_t vtxID12 = vtxID11 + deltaX; + const vfloat4 vtx12 = vfloat4::loadu(mesh->vertexPtr(vtxID12)); + + /* deltaY => vtx20, vtx21 */ + const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; + const size_t vtxID20 = vtxID10 + deltaY; + const size_t vtxID21 = vtxID11 + deltaY; + const vfloat4 vtx20 = vfloat4::loadu(mesh->vertexPtr(vtxID20)); + const vfloat4 vtx21 = vfloat4::loadu(mesh->vertexPtr(vtxID21)); + + /* deltaX/deltaY => vtx22 */ + const size_t vtxID22 = vtxID11 + deltaX + deltaY; + const vfloat4 vtx22 = vfloat4::loadu(mesh->vertexPtr(vtxID22)); + + transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z); + transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z); + transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z); + transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z); + } + + template<typename T> + __forceinline vfloat4 getVertexMB(const GridMesh* const mesh, const size_t offset, const size_t itime, const float ftime) const + { + const T v0 = T::loadu(mesh->vertexPtr(offset,itime+0)); + const T v1 = T::loadu(mesh->vertexPtr(offset,itime+1)); + return lerp(v0,v1,ftime); + } + + /* Gather the quads */ + __forceinline void gatherMB(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const GridMesh* const mesh, + const GridMesh::Grid &g, + const size_t itime, + const float ftime) const + { + /* first quad always valid */ + const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; + const size_t vtxID01 = vtxID00 + 1; + const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime); + const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime); + const size_t vtxID10 = vtxID00 + g.lineVtxOffset; + const size_t vtxID11 = vtxID01 + g.lineVtxOffset; + const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime); + const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime); + + /* deltaX => vtx02, vtx12 */ + const size_t deltaX = invalid3x3X() ? 0 : 1; + const size_t vtxID02 = vtxID01 + deltaX; + const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime); + const size_t vtxID12 = vtxID11 + deltaX; + const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime); + + /* deltaY => vtx20, vtx21 */ + const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; + const size_t vtxID20 = vtxID10 + deltaY; + const size_t vtxID21 = vtxID11 + deltaY; + const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime); + const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime); + + /* deltaX/deltaY => vtx22 */ + const size_t vtxID22 = vtxID11 + deltaX + deltaY; + const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime); + + transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z); + transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z); + transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z); + transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z); + } + + + + /* Gather the quads */ + __forceinline void gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const Scene *const scene) const + { + const GridMesh* const mesh = scene->get<GridMesh>(geomID()); + const GridMesh::Grid &g = mesh->grid(primID()); + gather(p0,p1,p2,p3,mesh,g); + } + + /* Gather the quads in the motion blur case */ + __forceinline void gatherMB(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + Vec3vf4& p3, + const Scene *const scene, + const size_t itime, + const float ftime) const + { + const GridMesh* const mesh = scene->get<GridMesh>(geomID()); + const GridMesh::Grid &g = mesh->grid(primID()); + gatherMB(p0,p1,p2,p3,mesh,g,itime,ftime); + } + + /* Gather the quads */ + __forceinline void gather(Vec3fa vtx[16], const Scene *const scene) const + { + const GridMesh* mesh = scene->get<GridMesh>(geomID()); + const GridMesh::Grid &g = mesh->grid(primID()); + + /* first quad always valid */ + const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; + const size_t vtxID01 = vtxID00 + 1; + const Vec3fa vtx00 = Vec3fa::loadu(mesh->vertexPtr(vtxID00)); + const Vec3fa vtx01 = Vec3fa::loadu(mesh->vertexPtr(vtxID01)); + const size_t vtxID10 = vtxID00 + g.lineVtxOffset; + const size_t vtxID11 = vtxID01 + g.lineVtxOffset; + const Vec3fa vtx10 = Vec3fa::loadu(mesh->vertexPtr(vtxID10)); + const Vec3fa vtx11 = Vec3fa::loadu(mesh->vertexPtr(vtxID11)); + + /* deltaX => vtx02, vtx12 */ + const size_t deltaX = invalid3x3X() ? 0 : 1; + const size_t vtxID02 = vtxID01 + deltaX; + const Vec3fa vtx02 = Vec3fa::loadu(mesh->vertexPtr(vtxID02)); + const size_t vtxID12 = vtxID11 + deltaX; + const Vec3fa vtx12 = Vec3fa::loadu(mesh->vertexPtr(vtxID12)); + + /* deltaY => vtx20, vtx21 */ + const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; + const size_t vtxID20 = vtxID10 + deltaY; + const size_t vtxID21 = vtxID11 + deltaY; + const Vec3fa vtx20 = Vec3fa::loadu(mesh->vertexPtr(vtxID20)); + const Vec3fa vtx21 = Vec3fa::loadu(mesh->vertexPtr(vtxID21)); + + /* deltaX/deltaY => vtx22 */ + const size_t vtxID22 = vtxID11 + deltaX + deltaY; + const Vec3fa vtx22 = Vec3fa::loadu(mesh->vertexPtr(vtxID22)); + + vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10; + vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11; + vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20; + vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21; + } + + /* Gather the quads */ + __forceinline void gatherMB(vfloat4 vtx[16], const Scene *const scene, const size_t itime, const float ftime) const + { + const GridMesh* mesh = scene->get<GridMesh>(geomID()); + const GridMesh::Grid &g = mesh->grid(primID()); + + /* first quad always valid */ + const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; + const size_t vtxID01 = vtxID00 + 1; + const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime); + const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime); + const size_t vtxID10 = vtxID00 + g.lineVtxOffset; + const size_t vtxID11 = vtxID01 + g.lineVtxOffset; + const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime); + const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime); + + /* deltaX => vtx02, vtx12 */ + const size_t deltaX = invalid3x3X() ? 0 : 1; + const size_t vtxID02 = vtxID01 + deltaX; + const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime); + const size_t vtxID12 = vtxID11 + deltaX; + const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime); + + /* deltaY => vtx20, vtx21 */ + const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; + const size_t vtxID20 = vtxID10 + deltaY; + const size_t vtxID21 = vtxID11 + deltaY; + const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime); + const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime); + + /* deltaX/deltaY => vtx22 */ + const size_t vtxID22 = vtxID11 + deltaX + deltaY; + const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime); + + vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10; + vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11; + vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20; + vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21; + } + + + /* Calculate the bounds of the subgrid */ + __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const + { + BBox3fa bounds = empty; + FATAL("not implemented yet"); + return bounds; + } + + /* Calculate the linear bounds of the primitive */ + __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) + { + return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) + { + LBBox3fa allBounds = empty; + FATAL("not implemented yet"); + return allBounds; + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) + { + LBBox3fa allBounds = empty; + FATAL("not implemented yet"); + return allBounds; + } + + + friend embree_ostream operator<<(embree_ostream cout, const SubGrid& sg) { + return cout << "SubGrid " << " ( x " << sg.x() << ", y = " << sg.y() << ", geomID = " << sg.geomID() << ", primID = " << sg.primID() << " )"; + } + + __forceinline unsigned int geomID() const { return _geomID; } + __forceinline unsigned int primID() const { return _primID; } + __forceinline unsigned int x() const { return (unsigned int)_x & 0x7fff; } + __forceinline unsigned int y() const { return (unsigned int)_y & 0x7fff; } + + private: + unsigned short _x; + unsigned short _y; + unsigned int _geomID; // geometry ID of mesh + unsigned int _primID; // primitive ID of primitive inside mesh + }; + + struct SubGridID { + unsigned short x; + unsigned short y; + unsigned int primID; + + __forceinline SubGridID() {} + __forceinline SubGridID(const unsigned int x, const unsigned int y, const unsigned int primID) : + x(x), y(y), primID(primID) {} + }; + + /* QuantizedBaseNode as large subgrid leaf */ + template<int N> + struct SubGridQBVHN + { + /* Virtual interface to query information about the quad type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + __forceinline size_t size() const + { + for (size_t i=0;i<N;i++) + if (primID(i) == -1) return i; + return N; + } + + __forceinline void clear() { + for (size_t i=0;i<N;i++) + subgridIDs[i] = SubGridID(0,0,(unsigned int)-1); + qnode.clear(); + } + + /* Default constructor */ + __forceinline SubGridQBVHN() { } + + /* Construction from vertices and IDs */ + __forceinline SubGridQBVHN(const unsigned int x[N], + const unsigned int y[N], + const unsigned int primID[N], + const BBox3fa * const subGridBounds, + const unsigned int geomID, + const unsigned int items) + { + clear(); + _geomID = geomID; + + __aligned(64) typename BVHN<N>::AABBNode node; + node.clear(); + for (size_t i=0;i<items;i++) + { + subgridIDs[i] = SubGridID(x[i],y[i],primID[i]); + node.setBounds(i,subGridBounds[i]); + } + qnode.init_dim(node); + } + + __forceinline unsigned int geomID() const { return _geomID; } + __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; } + __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; } + __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; } + + __forceinline SubGrid subgrid(const size_t i) const { + assert(i < N); + assert(primID(i) != -1); + return SubGrid(x(i),y(i),geomID(),primID(i)); + } + + public: + SubGridID subgridIDs[N]; + + typename BVHN<N>::QuantizedBaseNode qnode; + + unsigned int _geomID; // geometry ID of mesh + + + friend embree_ostream operator<<(embree_ostream cout, const SubGridQBVHN& sg) { + cout << "SubGridQBVHN " << embree_endl; + for (size_t i=0;i<N;i++) + cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl; + cout << "geomID " << sg._geomID << embree_endl; + cout << "lowerX " << sg.qnode.dequantizeLowerX() << embree_endl; + cout << "upperX " << sg.qnode.dequantizeUpperX() << embree_endl; + cout << "lowerY " << sg.qnode.dequantizeLowerY() << embree_endl; + cout << "upperY " << sg.qnode.dequantizeUpperY() << embree_endl; + cout << "lowerZ " << sg.qnode.dequantizeLowerZ() << embree_endl; + cout << "upperZ " << sg.qnode.dequantizeUpperZ() << embree_endl; + return cout; + } + + }; + + template<int N> + typename SubGridQBVHN<N>::Type SubGridQBVHN<N>::type; + + typedef SubGridQBVHN<4> SubGridQBVH4; + typedef SubGridQBVHN<8> SubGridQBVH8; + + + /* QuantizedBaseNode as large subgrid leaf */ + template<int N> + struct SubGridMBQBVHN + { + /* Virtual interface to query information about the quad type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + __forceinline size_t size() const + { + for (size_t i=0;i<N;i++) + if (primID(i) == -1) return i; + return N; + } + + __forceinline void clear() { + for (size_t i=0;i<N;i++) + subgridIDs[i] = SubGridID(0,0,(unsigned int)-1); + qnode.clear(); + } + + /* Default constructor */ + __forceinline SubGridMBQBVHN() { } + + /* Construction from vertices and IDs */ + __forceinline SubGridMBQBVHN(const unsigned int x[N], + const unsigned int y[N], + const unsigned int primID[N], + const BBox3fa * const subGridBounds0, + const BBox3fa * const subGridBounds1, + const unsigned int geomID, + const float toffset, + const float tscale, + const unsigned int items) + { + clear(); + _geomID = geomID; + time_offset = toffset; + time_scale = tscale; + + __aligned(64) typename BVHN<N>::AABBNode node0,node1; + node0.clear(); + node1.clear(); + for (size_t i=0;i<items;i++) + { + subgridIDs[i] = SubGridID(x[i],y[i],primID[i]); + node0.setBounds(i,subGridBounds0[i]); + node1.setBounds(i,subGridBounds1[i]); + } + qnode.node0.init_dim(node0); + qnode.node1.init_dim(node1); + } + + __forceinline unsigned int geomID() const { return _geomID; } + __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; } + __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; } + __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; } + + __forceinline SubGrid subgrid(const size_t i) const { + assert(i < N); + assert(primID(i) != -1); + return SubGrid(x(i),y(i),geomID(),primID(i)); + } + + __forceinline float adjustTime(const float t) const { return time_scale * (t-time_offset); } + + template<int K> + __forceinline vfloat<K> adjustTime(const vfloat<K> &t) const { return time_scale * (t-time_offset); } + + public: + SubGridID subgridIDs[N]; + + typename BVHN<N>::QuantizedBaseNodeMB qnode; + + float time_offset; + float time_scale; + unsigned int _geomID; // geometry ID of mesh + + + friend embree_ostream operator<<(embree_ostream cout, const SubGridMBQBVHN& sg) { + cout << "SubGridMBQBVHN " << embree_endl; + for (size_t i=0;i<N;i++) + cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl; + cout << "geomID " << sg._geomID << embree_endl; + cout << "time_offset " << sg.time_offset << embree_endl; + cout << "time_scale " << sg.time_scale << embree_endl; + cout << "lowerX " << sg.qnode.node0.dequantizeLowerX() << embree_endl; + cout << "upperX " << sg.qnode.node0.dequantizeUpperX() << embree_endl; + cout << "lowerY " << sg.qnode.node0.dequantizeLowerY() << embree_endl; + cout << "upperY " << sg.qnode.node0.dequantizeUpperY() << embree_endl; + cout << "lowerZ " << sg.qnode.node0.dequantizeLowerZ() << embree_endl; + cout << "upperZ " << sg.qnode.node0.dequantizeUpperZ() << embree_endl; + cout << "lowerX " << sg.qnode.node1.dequantizeLowerX() << embree_endl; + cout << "upperX " << sg.qnode.node1.dequantizeUpperX() << embree_endl; + cout << "lowerY " << sg.qnode.node1.dequantizeLowerY() << embree_endl; + cout << "upperY " << sg.qnode.node1.dequantizeUpperY() << embree_endl; + cout << "lowerZ " << sg.qnode.node1.dequantizeLowerZ() << embree_endl; + cout << "upperZ " << sg.qnode.node1.dequantizeUpperZ() << embree_endl; + return cout; + } + + }; + +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h new file mode 100644 index 0000000000..045eee4329 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h @@ -0,0 +1,518 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subgrid.h" +#include "subgrid_intersector_moeller.h" +#include "subgrid_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + + // ======================================================================================= + // =================================== SubGridIntersectors =============================== + // ======================================================================================= + + + template<int N, bool filter> + struct SubGridIntersector1Moeller + { + typedef SubGridQBVHN<N> Primitive; + typedef SubGridQuadMIntersector1MoellerTrumbore<4,filter> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(normal.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(shadow.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid) + { + STAT3(point_query.trav_prims,1,1,1); + AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID()); + assert(accel); + context->geomID = subgrid.geomID(); + context->primID = subgrid.primID(); + return accel->pointQuery(query, context); + } + + template<int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); +#if defined(__AVX__) + STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); +#endif + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (unlikely(dist[ID] > ray.tfar)) continue; + intersect(pre,ray,context,prim[i].subgrid(ID)); + } + } + } + template<int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (occluded(pre,ray,context,prim[i].subgrid(ID))) + return true; + } + } + return false; + } + + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) + { + bool changed = false; + for (size_t i=0;i<num;i++) + { + vfloat<N> dist; + size_t mask; + if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) { + mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist); + } else { + mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist); + } + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + changed |= pointQuery(query, context, prim[i].subgrid(ID)); + } + } + return changed; + } + }; + + template<int N, bool filter> + struct SubGridIntersector1Pluecker + { + typedef SubGridQBVHN<N> Primitive; + typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(normal.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(shadow.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid) + { + STAT3(point_query.trav_prims,1,1,1); + AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID()); + context->geomID = subgrid.geomID(); + context->primID = subgrid.primID(); + return accel->pointQuery(query, context); + } + + template<int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); +#if defined(__AVX__) + STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); +#endif + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (unlikely(dist[ID] > ray.tfar)) continue; + intersect(pre,ray,context,prim[i].subgrid(ID)); + } + } + } + + template<int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (occluded(pre,ray,context,prim[i].subgrid(ID))) + return true; + } + } + return false; + } + + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) + { + bool changed = false; + for (size_t i=0;i<num;i++) + { + vfloat<N> dist; + size_t mask; + if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) { + mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist); + } else { + mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist); + } +#if defined(__AVX__) + STAT3(point_query.trav_hit_boxes[popcnt(mask)],1,1,1); +#endif + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + changed |= pointQuery(query, context, prim[i].subgrid(ID)); + } + } + return changed; + } + }; + + template<int N, int K, bool filter> + struct SubGridIntersectorKMoeller + { + typedef SubGridQBVHN<N> Primitive; + typedef SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> Precalculations; + + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid) + { + Vec3fa vtx[16]; + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + subgrid.gather(vtx,context->scene); + for (unsigned int i=0; i<4; i++) + { + const Vec3vf<K> p0 = vtx[i*4+0]; + const Vec3vf<K> p1 = vtx[i*4+1]; + const Vec3vf<K> p2 = vtx[i*4+2]; + const Vec3vf<K> p3 = vtx[i*4+3]; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i)); + } + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid) + { + vbool<K> valid0 = valid_i; + Vec3fa vtx[16]; + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + subgrid.gather(vtx,context->scene); + for (unsigned int i=0; i<4; i++) + { + const Vec3vf<K> p0 = vtx[i*4+0]; + const Vec3vf<K> p1 = vtx[i*4+1]; + const Vec3vf<K> p2 = vtx[i*4+2]; + const Vec3vf<K> p3 = vtx[i*4+3]; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) + break; + } + return !valid0; + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(normal.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(shadow.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); + } + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; + for (size_t j=0;j<num;j++) + { + size_t m_valid = movemask(prim[j].qnode.validMask()); + vfloat<K> dist; + while(m_valid) + { + const size_t i = bscf(m_valid); + if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; + intersect(valid,pre,ray,context,prim[j].subgrid(i)); + } + } + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; + vbool<K> valid0 = valid; + for (size_t j=0;j<num;j++) + { + size_t m_valid = movemask(prim[j].qnode.validMask()); + vfloat<K> dist; + while(m_valid) + { + const size_t i = bscf(m_valid); + if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; + valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i)); + if (none(valid0)) break; + } + } + return !valid0; + } + + template<int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (unlikely(dist[ID] > ray.tfar[k])) continue; + intersect(pre,ray,k,context,prim[i].subgrid(ID)); + } + } + } + + template<int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (occluded(pre,ray,k,context,prim[i].subgrid(ID))) + return true; + } + } + return false; + } + }; + + + template<int N, int K, bool filter> + struct SubGridIntersectorKPluecker + { + typedef SubGridQBVHN<N> Primitive; + typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations; + + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid) + { + Vec3fa vtx[16]; + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + subgrid.gather(vtx,context->scene); + for (unsigned int i=0; i<4; i++) + { + const Vec3vf<K> p0 = vtx[i*4+0]; + const Vec3vf<K> p1 = vtx[i*4+1]; + const Vec3vf<K> p2 = vtx[i*4+2]; + const Vec3vf<K> p3 = vtx[i*4+3]; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i)); + } + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid) + { + vbool<K> valid0 = valid_i; + Vec3fa vtx[16]; + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + subgrid.gather(vtx,context->scene); + for (unsigned int i=0; i<4; i++) + { + const Vec3vf<K> p0 = vtx[i*4+0]; + const Vec3vf<K> p1 = vtx[i*4+1]; + const Vec3vf<K> p2 = vtx[i*4+2]; + const Vec3vf<K> p3 = vtx[i*4+3]; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) + break; + } + return !valid0; + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(normal.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(shadow.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); + } + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; + for (size_t j=0;j<num;j++) + { + size_t m_valid = movemask(prim[j].qnode.validMask()); + vfloat<K> dist; + while(m_valid) + { + const size_t i = bscf(m_valid); + if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; + intersect(valid,pre,ray,context,prim[j].subgrid(i)); + } + } + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; + vbool<K> valid0 = valid; + for (size_t j=0;j<num;j++) + { + size_t m_valid = movemask(prim[j].qnode.validMask()); + vfloat<K> dist; + while(m_valid) + { + const size_t i = bscf(m_valid); + if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; + valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i)); + if (none(valid0)) break; + } + } + return !valid0; + } + + template<int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (unlikely(dist[ID] > ray.tfar[k])) continue; + intersect(pre,ray,k,context,prim[i].subgrid(ID)); + } + } + } + + template<int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); + + if (occluded(pre,ray,k,context,prim[i].subgrid(ID))) + return true; + } + } + return false; + } + }; + + + + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h new file mode 100644 index 0000000000..f65b4abf61 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h @@ -0,0 +1,493 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subgrid.h" +#include "quad_intersector_moeller.h" + +namespace embree +{ + namespace isa + { + + /* ----------------------------- */ + /* -- single ray intersectors -- */ + /* ----------------------------- */ + + template<int M> + __forceinline void interpolateUV(MoellerTrumboreHitM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid) + { + /* correct U,V interpolation across the entire grid */ + const vint<M> sx((int)subgrid.x()); + const vint<M> sy((int)subgrid.y()); + const vint<M> sxM(sx + vint<M>(0,1,1,0)); + const vint<M> syM(sy + vint<M>(0,0,1,1)); + const float inv_resX = rcp((float)((int)g.resX-1)); + const float inv_resY = rcp((float)((int)g.resY-1)); + hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX; + hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY; + } + + template<int M, bool filter> + struct SubGridQuadMIntersector1MoellerTrumbore; + + template<int M, bool filter> + struct SubGridQuadMIntersector1MoellerTrumbore + { + __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} + + __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + __forceinline void intersect(RayHit& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + MoellerTrumboreHitM<M> hit; + MoellerTrumboreIntersector1<M> intersector(ray,nullptr); + Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,hit)) + { + interpolateUV<M>(hit,g,subgrid); + epilog(hit.valid,hit); + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid); + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + MoellerTrumboreHitM<M> hit; + MoellerTrumboreIntersector1<M> intersector(ray,nullptr); + Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,hit)) + { + interpolateUV<M>(hit,g,subgrid); + if (epilog(hit.valid,hit)) + return true; + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid); + if (epilog(hit.valid,hit)) + return true; + } + return false; + } + }; + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<bool filter> + struct SubGridQuadMIntersector1MoellerTrumbore<4,filter> + { + __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} + + __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + MoellerTrumboreHitM<8> hit; + MoellerTrumboreIntersector1<8> intersector(ray,nullptr); + const vbool8 flags(0,0,0,0,1,1,1,1); + if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) + { + vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; + +#if !defined(EMBREE_BACKFACE_CULLING) + hit.U = select(flags,absDen-V,U); + hit.V = select(flags,absDen-U,V); + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); +#else + hit.U = select(flags,absDen-U,U); + hit.V = select(flags,absDen-V,V); +#endif + /* correct U,V interpolation across the entire grid */ + const vint8 sx((int)subgrid.x()); + const vint8 sy((int)subgrid.y()); + const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0)); + const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1)); + const float inv_resX = rcp((float)((int)g.resX-1)); + const float inv_resY = rcp((float)((int)g.resY-1)); + hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX; + hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY; + + if (unlikely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + }; + +#endif + + // ============================================================================================================================ + // ============================================================================================================================ + // ============================================================================================================================ + + + /* ----------------------------- */ + /* -- ray packet intersectors -- */ + /* ----------------------------- */ + + template<int K> + struct SubGridQuadHitK + { + __forceinline SubGridQuadHitK(const vfloat<K>& U, + const vfloat<K>& V, + const vfloat<K>& T, + const vfloat<K>& absDen, + const Vec3vf<K>& Ng, + const vbool<K>& flags, + const GridMesh::Grid &g, + const SubGrid& subgrid, + const unsigned int i) + : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vfloat<K> rcpAbsDen = rcp(absDen); + const vfloat<K> t = T * rcpAbsDen; + const vfloat<K> u0 = min(U * rcpAbsDen,1.0f); + const vfloat<K> v0 = min(V * rcpAbsDen,1.0f); + const vfloat<K> u1 = vfloat<K>(1.0f) - u0; + const vfloat<K> v1 = vfloat<K>(1.0f) - v0; + const vfloat<K> uu = select(flags,u1,u0); + const vfloat<K> vv = select(flags,v1,v0); + const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); + const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); + const float inv_resX = rcp((float)(int)(g.resX-1)); + const float inv_resY = rcp((float)(int)(g.resY-1)); + const vfloat<K> u = (uu + (float)(int)sx) * inv_resX; + const vfloat<K> v = (vv + (float)(int)sy) * inv_resY; + const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> T; + const vfloat<K> absDen; + const vbool<K> flags; + const Vec3vf<K> tri_Ng; + + const GridMesh::Grid &g; + const SubGrid& subgrid; + const size_t i; + }; + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKMoellerTrumboreBase + { + __forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {} + + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_e1, + const Vec3vf<K>& tri_e2, + const Vec3vf<K>& tri_Ng, + const vbool<K>& flags, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + /* calculate denominator */ + vbool<K> valid = valid0; + const Vec3vf<K> C = tri_v0 - ray.org; + const Vec3vf<K> R = cross(C,ray.dir); + const vfloat<K> den = dot(tri_Ng,ray.dir); + const vfloat<K> absDen = abs(den); + const vfloat<K> sgnDen = signmsk(den); + + /* test against edge p2 p0 */ + const vfloat<K> U = dot(R,tri_e2) ^ sgnDen; + valid &= U >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p0 p1 */ + const vfloat<K> V = dot(R,tri_e1) ^ sgnDen; + valid &= V >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p1 p2 */ + const vfloat<K> W = absDen-U-V; + valid &= W >= 0.0f; + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; + valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar); + if (unlikely(none(valid))) return false; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= den < vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#else + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#endif + + /* calculate hit information */ + SubGridQuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags,g,subgrid,i); + return epilog(valid,hit); + } + + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const vbool<K>& flags, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + const Vec3vf<K> e1 = tri_v0-tri_v1; + const Vec3vf<K> e2 = tri_v2-tri_v0; + const Vec3vf<K> Ng = cross(e2,e1); + return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,g,subgrid,i,epilog); + } + + template<typename Epilog> + __forceinline bool intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog); + if (none(valid0)) return true; + intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog); + return none(valid0); + } + + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Vec3vf<M>& tri_Ng, + MoellerTrumboreHitM<M> &hit) + { + /* calculate denominator */ + const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; + const Vec3vf<M> R = cross(C,D); + const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); + const vfloat<M> absDen = abs(den); + const vfloat<M> sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; + const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); + if (likely(none(valid))) return false; + + /* calculate hit information */ + new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); + return true; + } + + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + MoellerTrumboreHitM<M> &hit) + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + const Vec3vf<M> Ng = cross(e2,e1); + return intersect1(ray,k,v0,e1,e2,Ng,hit); + } + + }; + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter> + { + __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {} + + __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + + MoellerTrumboreHitM<4> hit; + if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit)) + { + interpolateUV<M>(hit,g,subgrid); + epilog(hit.valid,hit); + } + + if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid); + epilog(hit.valid,hit); + } + + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + + MoellerTrumboreHitM<4> hit; + if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit)) + { + interpolateUV<M>(hit,g,subgrid); + if (epilog(hit.valid,hit)) return true; + } + + if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid); + if (epilog(hit.valid,hit)) return true; + } + return false; + } + }; + + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<int K, bool filter> + struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter> + { + __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + const vbool8 flags(0,0,0,0,1,1,1,1); + + MoellerTrumboreHitM<8> hit; + if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit)) + { + vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; +#if !defined(EMBREE_BACKFACE_CULLING) + hit.U = select(flags,absDen-V,U); + hit.V = select(flags,absDen-U,V); + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); +#else + hit.U = select(flags,absDen-U,U); + hit.V = select(flags,absDen-V,V); +#endif + + /* correct U,V interpolation across the entire grid */ + const vint8 sx((int)subgrid.x()); + const vint8 sy((int)subgrid.y()); + const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0)); + const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1)); + const float inv_resX = rcp((float)((int)g.resX-1)); + const float inv_resY = rcp((float)((int)g.resY-1)); + hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX; + hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY; + if (unlikely(epilog(hit.valid,hit))) + return true; + + } + return false; + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); + } + }; + +#endif + + + + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h new file mode 100644 index 0000000000..1cd88aa799 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h @@ -0,0 +1,508 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subgrid.h" +#include "quad_intersector_moeller.h" +#include "quad_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + + template<int M> + struct SubGridQuadHitPlueckerM + { + __forceinline SubGridQuadHitPlueckerM() {} + + __forceinline SubGridQuadHitPlueckerM(const vbool<M>& valid, + const vfloat<M>& U, + const vfloat<M>& V, + const vfloat<M>& UVW, + const vfloat<M>& t, + const Vec3vf<M>& Ng, + const vbool<M>& flags) : valid(valid), vt(t) + { + const vbool<M> invalid = abs(UVW) < min_rcp_input; + const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); + const vfloat<M> u = min(U * rcpUVW,1.0f); + const vfloat<M> v = min(V * rcpUVW,1.0f); + const vfloat<M> u1 = vfloat<M>(1.0f) - u; + const vfloat<M> v1 = vfloat<M>(1.0f) - v; +#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) + vu = select(flags,u1,u); + vv = select(flags,v1,v); + vNg = Vec3vf<M>(Ng.x,Ng.y,Ng.z); +#else + const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); + vv = select(flags,u1,v); + vu = select(flags,v1,u); + vNg = Vec3vf<M>(flip*Ng.x,flip*Ng.y,flip*Ng.z); +#endif + } + + __forceinline void finalize() + { + } + + __forceinline Vec2f uv(const size_t i) + { + const float u = vu[i]; + const float v = vv[i]; + return Vec2f(u,v); + } + + __forceinline float t(const size_t i) { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + public: + vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + __forceinline void interpolateUV(SubGridQuadHitPlueckerM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY) + { + /* correct U,V interpolation across the entire grid */ + const vint<M> sx((int)subgrid.x()); + const vint<M> sy((int)subgrid.y()); + const vint<M> sxM(sx + stepX); + const vint<M> syM(sy + stepY); + const float inv_resX = rcp((float)((int)g.resX-1)); + const float inv_resY = rcp((float)((int)g.resY-1)); + hit.vu = (hit.vu + vfloat<M>(sxM)) * inv_resX; + hit.vv = (hit.vv + vfloat<M>(syM)) * inv_resY; + } + + template<int M> + __forceinline static bool intersectPluecker(Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const vbool<M>& flags, + SubGridQuadHitPlueckerM<M> &hit) + { + /* calculate vertices relative to ray origin */ + const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); + const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); + const Vec3vf<M> v0 = tri_v0-O; + const Vec3vf<M> v1 = tri_v1-O; + const Vec3vf<M> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<M> e0 = v2-v0; + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<M> U = dot(cross(e0,v2+v0),D); + const vfloat<M> V = dot(cross(e1,v0+v1),D); + const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; + const vfloat<M> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = max(U,V,W) <= eps; +#else + vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<M> den = twice(dot(Ng,D)); + + /* perform depth test */ + const vfloat<M> T = twice(dot(v0,Ng)); + const vfloat<M> t = rcp(den)*T; + valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); + valid &= den != vfloat<M>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,Ng,flags); + return true; + } + + template<int M, bool filter> + struct SubGridQuadMIntersector1Pluecker; + + template<int M, bool filter> + struct SubGridQuadMIntersector1Pluecker + { + __forceinline SubGridQuadMIntersector1Pluecker() {} + + __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + __forceinline void intersect(RayHit& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + SubGridQuadHitPlueckerM<M> hit; + Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + /* intersect second triangle */ + if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + SubGridQuadHitPlueckerM<M> hit; + Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) + return true; + } + + /* intersect second triangle */ + if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) + return true; + } + + return false; + } + }; + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<bool filter> + struct SubGridQuadMIntersector1Pluecker<4,filter> + { + __forceinline SubGridQuadMIntersector1Pluecker() {} + + __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + SubGridQuadHitPlueckerM<8> hit; + const vbool8 flags(0,0,0,0,1,1,1,1); + if (unlikely(intersectPluecker(ray,vtx0,vtx1,vtx2,flags,hit))) + { + /* correct U,V interpolation across the entire grid */ + interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); + if (unlikely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + }; + +#endif + + + /* ----------------------------- */ + /* -- ray packet intersectors -- */ + /* ----------------------------- */ + + template<int K> + struct SubGridQuadHitPlueckerK + { + __forceinline SubGridQuadHitPlueckerK(const vfloat<K>& U, + const vfloat<K>& V, + const vfloat<K>& UVW, + const vfloat<K>& t, + const Vec3vf<K>& Ng, + const vbool<K>& flags, + const GridMesh::Grid &g, + const SubGrid& subgrid, + const unsigned int i) + : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vbool<K> invalid = abs(UVW) < min_rcp_input; + const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); + const vfloat<K> u0 = min(U * rcpUVW,1.0f); + const vfloat<K> v0 = min(V * rcpUVW,1.0f); + const vfloat<K> u1 = vfloat<K>(1.0f) - u0; + const vfloat<K> v1 = vfloat<K>(1.0f) - v0; + const vfloat<K> uu = select(flags,u1,u0); + const vfloat<K> vv = select(flags,v1,v0); + const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); + const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); + const float inv_resX = rcp((float)(int)(g.resX-1)); + const float inv_resY = rcp((float)(int)(g.resY-1)); + const vfloat<K> u = (uu + (float)(int)sx) * inv_resX; + const vfloat<K> v = (vv + (float)(int)sy) * inv_resY; + const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> UVW; + const vfloat<K> t; + const vfloat<K> absDen; + const vbool<K> flags; + const Vec3vf<K> tri_Ng; + + const GridMesh::Grid &g; + const SubGrid& subgrid; + const size_t i; + }; + + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKPlueckerBase + { + __forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} + + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const Vec3vf<K>& tri_Ng, + const vbool<K>& flags, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + /* calculate denominator */ + /* calculate vertices relative to ray origin */ + vbool<K> valid = valid0; + const Vec3vf<K> O = ray.org; + const Vec3vf<K> D = ray.dir; + const Vec3vf<K> v0 = tri_v0-O; + const Vec3vf<K> v1 = tri_v1-O; + const Vec3vf<K> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<K> e0 = v2-v0; + const Vec3vf<K> e1 = v0-v1; + const Vec3vf<K> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); + const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); + const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); + const vfloat<K> UVW = U+V+W; + const vfloat<K> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + valid &= max(U,V,W) <= eps; +#else + valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); + + /* perform depth test */ + const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); + const vfloat<K> t = rcp(den)*T; + valid &= ray.tnear() <= t & t <= ray.tfar; + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; + + /* calculate hit information */ + SubGridQuadHitPlueckerK<K> hit(U,V,UVW,t,tri_Ng,flags,g,subgrid,i); + return epilog(valid,hit); + } + + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const vbool<K>& flags, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + const Vec3vf<K> e1 = v0-v1; + const Vec3vf<K> e2 = v2-v0; + const Vec3vf<K> Ng = cross(e2,e1); + return intersectK(valid0,ray,v0,v1,v2,Ng,flags,g,subgrid,i,epilog); + } + + template<typename Epilog> + __forceinline bool intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog); + if (none(valid0)) return true; + intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog); + return none(valid0); + } + + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const Vec3vf<M>& tri_Ng, + const vbool<M>& flags, + SubGridQuadHitPlueckerM<M> &hit) + { + /* calculate vertices relative to ray origin */ + const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vf<M> v0 = tri_v0-O; + const Vec3vf<M> v1 = tri_v1-O; + const Vec3vf<M> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<M> e0 = v2-v0; + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<M> U = dot(cross(e0,v2+v0),D); + const vfloat<M> V = dot(cross(e1,v0+v1),D); + const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; + const vfloat<M> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = max(U,V,W) <= eps ; +#else + vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<M> den = twice(dot(Ng,D)); + + /* perform depth test */ + const vfloat<M> T = twice(dot(v0,Ng)); + const vfloat<M> t = rcp(den)*T; + valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); + if (unlikely(none(valid))) return false; + + /* avoid division by 0 */ + valid &= den != vfloat<M>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,tri_Ng,flags); + return true; + } + + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const vbool<M>& flags, + SubGridQuadHitPlueckerM<M> &hit) + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + const Vec3vf<M> Ng = cross(e2,e1); // FIXME: optimize!!! + return intersect1(ray,k,v0,v1,v2,Ng,flags,hit); + } + + }; + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter> + { + __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} + + __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + + SubGridQuadHitPlueckerM<4> hit; + if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + + SubGridQuadHitPlueckerM<4> hit; + if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) return true; + } + + if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) return true; + } + return false; + } + }; + + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h new file mode 100644 index 0000000000..400a88b985 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h @@ -0,0 +1,236 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subgrid_intersector.h" + +namespace embree +{ + namespace isa + { + template<int N, bool filter> + struct SubGridMBIntersector1Pluecker + { + typedef SubGridMBQBVHN<N> Primitive; + typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(normal.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + float ftime; + const int itime = mesh->timeSegment(ray.time(), ftime); + Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime); + pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(shadow.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + float ftime; + const int itime = mesh->timeSegment(ray.time(), ftime); + + Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime); + return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, subgrid); + } + + template<int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + for (size_t i=0;i<num;i++) + { + vfloat<Nx> dist; + const float time = prim[i].adjustTime(ray.time()); + + assert(time <= 1.0f); + size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); +#if defined(__AVX__) + STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); +#endif + while(mask != 0) + { + const size_t ID = bscf(mask); + if (unlikely(dist[ID] > ray.tfar)) continue; + intersect(pre,ray,context,prim[i].subgrid(ID)); + } + } + } + + template<int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + for (size_t i=0;i<num;i++) + { + const float time = prim[i].adjustTime(ray.time()); + assert(time <= 1.0f); + vfloat<Nx> dist; + size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + if (occluded(pre,ray,context,prim[i].subgrid(ID))) + return true; + } + } + return false; + } + + static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) + { + assert(false && "not implemented"); + return false; + } + }; + + + template<int N, int K, bool filter> + struct SubGridMBIntersectorKPluecker + { + typedef SubGridMBQBVHN<N> Primitive; + typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations; + + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid) + { + size_t m_valid = movemask(valid_i); + while(m_valid) + { + size_t ID = bscf(m_valid); + intersect(pre,ray,ID,context,subgrid); + } + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid) + { + vbool<K> valid0 = valid_i; + size_t m_valid = movemask(valid_i); + while(m_valid) + { + size_t ID = bscf(m_valid); + if (occluded(pre,ray,ID,context,subgrid)) + clear(valid0,ID); + } + return !valid0; + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(normal.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + vfloat<K> ftime; + const vint<K> itime = mesh->timeSegment(ray.time(), ftime); + Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]); + pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) + { + STAT3(shadow.trav_prims,1,1,1); + const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); + const GridMesh::Grid &g = mesh->grid(subgrid.primID()); + + vfloat<K> ftime; + const vint<K> itime = mesh->timeSegment(ray.time(), ftime); + Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]); + return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); + } + + template<bool robust> + static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; + for (size_t j=0;j<num;j++) + { + size_t m_valid = movemask(prim[j].qnode.validMask()); + const vfloat<K> time = prim[j].adjustTime(ray.time()); + + vfloat<K> dist; + while(m_valid) + { + const size_t i = bscf(m_valid); + if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue; + intersect(valid,pre,ray,context,prim[j].subgrid(i)); + } + } + } + + template<bool robust> + static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; + + vbool<K> valid0 = valid; + for (size_t j=0;j<num;j++) + { + size_t m_valid = movemask(prim[j].qnode.validMask()); + const vfloat<K> time = prim[j].adjustTime(ray.time()); + vfloat<K> dist; + while(m_valid) + { + const size_t i = bscf(m_valid); + if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue; + valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i)); + if (none(valid0)) break; + } + } + return !valid0; + } + + template<int Nx, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + for (size_t i=0;i<num;i++) + { + vfloat<N> dist; + const float time = prim[i].adjustTime(ray.time()[k]); + assert(time <= 1.0f); + + size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + if (unlikely(dist[ID] > ray.tfar[k])) continue; + intersect(pre,ray,k,context,prim[i].subgrid(ID)); + } + } + } + + template<int Nx, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + { + BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + + for (size_t i=0;i<num;i++) + { + vfloat<N> dist; + const float time = prim[i].adjustTime(ray.time()[k]); + assert(time <= 1.0f); + + size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); + while(mask != 0) + { + const size_t ID = bscf(mask); + if (occluded(pre,ray,k,context,prim[i].subgrid(ID))) + return true; + } + } + return false; + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle.h b/thirdparty/embree-aarch64/kernels/geometry/triangle.h new file mode 100644 index 0000000000..0dedf6dc4c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle.h @@ -0,0 +1,162 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + /* Precalculated representation for M triangles. Stores for each + triangle a base vertex, two edges, and the geometry normal to + speed up intersection calculations */ + template<int M> + struct TriangleM + { + public: + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* Returns maximum number of stored triangles */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline TriangleM() {} + + /* Construction from vertices and IDs */ + __forceinline TriangleM(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs) + : v0(v0), e1(v0-v1), e2(v2-v0), geomIDs(geomIDs), primIDs(primIDs) {} + + /* Returns a mask that tells which triangles are valid */ + __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } + + /* Returns true if the specified triangle is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } + + /* Returns the number of stored triangles */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + __forceinline vuint<M>& geomID() { return geomIDs; } + __forceinline const vuint<M>& geomID() const { return geomIDs; } + __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } + + /* Returns the primitive IDs */ + __forceinline vuint<M>& primID() { return primIDs; } + __forceinline const vuint<M>& primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* Calculate the bounds of the triangle */ + __forceinline BBox3fa bounds() const + { + Vec3vf<M> p0 = v0; + Vec3vf<M> p1 = v0-e1; + Vec3vf<M> p2 = v0+e2; + Vec3vf<M> lower = min(p0,p1,p2); + Vec3vf<M> upper = max(p0,p1,p2); + vbool<M> mask = valid(); + lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); + lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); + lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); + upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); + upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); + upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); + return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), + Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); + } + + /* Non temporal store */ + __forceinline static void store_nt(TriangleM* dst, const TriangleM& src) + { + vfloat<M>::store_nt(&dst->v0.x,src.v0.x); + vfloat<M>::store_nt(&dst->v0.y,src.v0.y); + vfloat<M>::store_nt(&dst->v0.z,src.v0.z); + vfloat<M>::store_nt(&dst->e1.x,src.e1.x); + vfloat<M>::store_nt(&dst->e1.y,src.e1.y); + vfloat<M>::store_nt(&dst->e1.z,src.e1.z); + vfloat<M>::store_nt(&dst->e2.x,src.e2.x); + vfloat<M>::store_nt(&dst->e2.y,src.e2.y); + vfloat<M>::store_nt(&dst->e2.z,src.e2.z); + vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); + vuint<M>::store_nt(&dst->primIDs,src.primIDs); + } + + /* Fill triangle from triangle list */ + __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene) + { + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; + + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRef& prim = prims[begin]; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID); + const TriangleMesh::Triangle& tri = mesh->triangle(primID); + const Vec3fa& p0 = mesh->vertex(tri.v[0]); + const Vec3fa& p1 = mesh->vertex(tri.v[1]); + const Vec3fa& p2 = mesh->vertex(tri.v[2]); + vgeomID [i] = geomID; + vprimID [i] = primID; + v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; + v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; + v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; + } + TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID)); + } + + /* Updates the primitive */ + __forceinline BBox3fa update(TriangleMesh* mesh) + { + BBox3fa bounds = empty; + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; + + for (size_t i=0; i<M; i++) + { + if (unlikely(geomID(i) == -1)) break; + const unsigned geomId = geomID(i); + const unsigned primId = primID(i); + const TriangleMesh::Triangle& tri = mesh->triangle(primId); + const Vec3fa p0 = mesh->vertex(tri.v[0]); + const Vec3fa p1 = mesh->vertex(tri.v[1]); + const Vec3fa p2 = mesh->vertex(tri.v[2]); + bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); + vgeomID [i] = geomId; + vprimID [i] = primId; + v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; + v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; + v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; + } + TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID)); + return bounds; + } + + public: + Vec3vf<M> v0; // base vertex of the triangles + Vec3vf<M> e1; // 1st edge of the triangles (v0-v1) + Vec3vf<M> e2; // 2nd edge of the triangles (v2-v0) + private: + vuint<M> geomIDs; // geometry IDs + vuint<M> primIDs; // primitive IDs + }; + + template<int M> + typename TriangleM<M>::Type TriangleM<M>::type; + + typedef TriangleM<4> Triangle4; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h new file mode 100644 index 0000000000..125a42c5fe --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h @@ -0,0 +1,96 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "triangle_intersector_moeller.h" + +namespace embree +{ + namespace isa + { + /*! Intersects M triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMIntersector1Moeller + { + typedef TriangleM<M> Primitive; + typedef MoellerTrumboreIntersector1<Mx> Precalculations; + + /*! Intersect a ray with the M triangles and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleM<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleM<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + + }; + + /*! Intersects M triangles with K rays. */ + template<int M, int Mx, int K, bool filter> + struct TriangleMIntersectorKMoeller + { + typedef TriangleM<M> Primitive; + typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleM<M>& tri) + { + STAT_USER(0,TriangleM<M>::max_size()); + for (size_t i=0; i<TriangleM<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i); + const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i); + const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i); + pre.intersectEdgeK(valid_i,ray,p0,e1,e2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleM<M>& tri) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<TriangleM<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i); + const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i); + const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i); + pre.intersectEdgeK(valid0,ray,p0,e1,e2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h new file mode 100644 index 0000000000..b5a8519236 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h @@ -0,0 +1,403 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "intersector_epilog.h" + +/*! This intersector implements a modified version of the Moeller + * Trumbore intersector from the paper "Fast, Minimum Storage + * Ray-Triangle Intersection". In contrast to the paper we + * precalculate some factors and factor the calculations differently + * to allow precalculating the cross product e1 x e2. The resulting + * algorithm is similar to the fastest one of the paper "Optimizing + * Ray-Triangle Intersection via Automated Search". */ + +namespace embree +{ + namespace isa + { + template<int M> + struct MoellerTrumboreHitM + { + __forceinline MoellerTrumboreHitM() {} + + __forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng) + : U(U), V(V), T(T), absDen(absDen), valid(valid), vNg(Ng) {} + + __forceinline void finalize() + { + const vfloat<M> rcpAbsDen = rcp(absDen); + vt = T * rcpAbsDen; + vu = U * rcpAbsDen; + vv = V * rcpAbsDen; + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + public: + vfloat<M> U; + vfloat<M> V; + vfloat<M> T; + vfloat<M> absDen; + + public: + vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct MoellerTrumboreIntersector1 + { + __forceinline MoellerTrumboreIntersector1() {} + + __forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {} + + __forceinline bool intersect(const vbool<M>& valid0, + Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Vec3vf<M>& tri_Ng, + MoellerTrumboreHitM<M>& hit) const + { + /* calculate denominator */ + vbool<M> valid = valid0; + const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); + const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); + const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; + const Vec3vf<M> R = cross(C,D); + const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); + + const vfloat<M> absDen = abs(den); + const vfloat<M> sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; + const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + valid &= (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat<M>(ray.tnear()) < T) & (T <= absDen*vfloat<M>(ray.tfar)); + if (likely(none(valid))) return false; + + + /* update hit information */ + new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); + + return true; + } + + __forceinline bool intersectEdge(Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + MoellerTrumboreHitM<M>& hit) const + { + vbool<M> valid = true; + const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1); + return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,hit); + } + + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + MoellerTrumboreHitM<M>& hit) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(ray,v0,e1,e2,hit); + } + + __forceinline bool intersect(const vbool<M>& valid, + Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + MoellerTrumboreHitM<M>& hit) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(valid,ray,v0,e1,e2,hit); + } + + template<typename Epilog> + __forceinline bool intersectEdge(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& e1, + const Vec3vf<M>& e2, + const Epilog& epilog) const + { + MoellerTrumboreHitM<M> hit; + if (likely(intersectEdge(ray,v0,e1,e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + MoellerTrumboreHitM<M> hit; + if (likely(intersect(ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersect(const vbool<M>& valid, + Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + MoellerTrumboreHitM<M> hit; + if (likely(intersect(valid,ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); + return false; + } + }; + + template<int K> + struct MoellerTrumboreHitK + { + __forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng) + : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vfloat<K> rcpAbsDen = rcp(absDen); + const vfloat<K> t = T * rcpAbsDen; + const vfloat<K> u = U * rcpAbsDen; + const vfloat<K> v = V * rcpAbsDen; + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> T; + const vfloat<K> absDen; + const Vec3vf<K> Ng; + }; + + template<int M, int K> + struct MoellerTrumboreIntersectorK + { + __forceinline MoellerTrumboreIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + //RayK<K>& ray, + const Vec3vf<K>& ray_org, + const Vec3vf<K>& ray_dir, + const vfloat<K>& ray_tnear, + const vfloat<K>& ray_tfar, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_e1, + const Vec3vf<K>& tri_e2, + const Vec3vf<K>& tri_Ng, + const Epilog& epilog) const + { + /* calculate denominator */ + vbool<K> valid = valid0; + const Vec3vf<K> C = tri_v0 - ray_org; + const Vec3vf<K> R = cross(C,ray_dir); + const vfloat<K> den = dot(tri_Ng,ray_dir); + const vfloat<K> absDen = abs(den); + const vfloat<K> sgnDen = signmsk(den); + + /* test against edge p2 p0 */ + const vfloat<K> U = dot(tri_e2,R) ^ sgnDen; + valid &= U >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p0 p1 */ + const vfloat<K> V = dot(tri_e1,R) ^ sgnDen; + valid &= V >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p1 p2 */ + const vfloat<K> W = absDen-U-V; + valid &= W >= 0.0f; + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; + valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar); + if (unlikely(none(valid))) return false; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= den < vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#else + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#endif + + /* calculate hit information */ + MoellerTrumboreHitK<K> hit(U,V,T,absDen,tri_Ng); + return epilog(valid,hit); + } + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const Epilog& epilog) const + { + const Vec3vf<K> e1 = tri_v0-tri_v1; + const Vec3vf<K> e2 = tri_v2-tri_v0; + const Vec3vf<K> Ng = cross(e2,e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog); + } + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_e1, + const Vec3vf<K>& tri_e2, + const Epilog& epilog) const + { + const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog); + } + + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + MoellerTrumboreHitM<M>& hit) const + { + /* calculate denominator */ + typedef Vec3vf<M> Vec3vfM; + const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1); + + const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vfM C = Vec3vfM(tri_v0) - O; + const Vec3vfM R = cross(C,D); + const vfloat<M> den = dot(Vec3vfM(tri_Ng),D); + const vfloat<M> absDen = abs(den); + const vfloat<M> sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen; + const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); + if (likely(none(valid))) return false; + + /* calculate hit information */ + new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); + return true; + } + + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const BBox<vfloat<M>>& time_range, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + MoellerTrumboreHitM<M>& hit) const + { + if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit))) + { + hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]); + hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper; + return any(hit.valid); + } + return false; + } + + template<typename Epilog> + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Epilog& epilog) const + { + MoellerTrumboreHitM<M> hit; + if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const BBox<vfloat<M>>& time_range, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Epilog& epilog) const + { + MoellerTrumboreHitM<M> hit; + if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersect(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(ray,k,v0,e1,e2,epilog); + } + + template<typename Epilog> + __forceinline bool intersect(RayK<K>& ray, + size_t k, + const BBox<vfloat<M>>& time_range, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(ray,k,time_range,v0,e1,e2,epilog); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h new file mode 100644 index 0000000000..f1de99d208 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h @@ -0,0 +1,247 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "trianglev.h" +#include "trianglev_mb.h" +#include "intersector_epilog.h" + +/*! Modified Pluecker ray/triangle intersector. The test first shifts + * the ray origin into the origin of the coordinate system and then + * uses Pluecker coordinates for the intersection. Due to the shift, + * the Pluecker coordinate calculation simplifies and the tests get + * numerically stable. The edge equations are watertight along the + * edge for neighboring triangles. */ + +namespace embree +{ + namespace isa + { + template<int M, typename UVMapper> + struct PlueckerHitM + { + __forceinline PlueckerHitM(const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV) + : U(U), V(V), UVW(UVW), mapUV(mapUV), vt(t), vNg(Ng) {} + + __forceinline void finalize() + { + const vbool<M> invalid = abs(UVW) < min_rcp_input; + const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); + vu = U * rcpUVW; + vv = V * rcpUVW; + mapUV(vu,vv); + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + private: + const vfloat<M> U; + const vfloat<M> V; + const vfloat<M> UVW; + const UVMapper& mapUV; + + public: + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct PlueckerIntersector1 + { + __forceinline PlueckerIntersector1() {} + + __forceinline PlueckerIntersector1(const Ray& ray, const void* ptr) {} + + template<typename UVMapper, typename Epilog> + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + /* calculate vertices relative to ray origin */ + const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); + const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); + const Vec3vf<M> v0 = tri_v0-O; + const Vec3vf<M> v1 = tri_v1-O; + const Vec3vf<M> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<M> e0 = v2-v0; + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<M> U = dot(cross(e0,v2+v0),D); + const vfloat<M> V = dot(cross(e1,v0+v1),D); + const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; + const vfloat<M> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = max(U,V,W) <= eps; +#else + vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<M> den = twice(dot(Ng,D)); + + /* perform depth test */ + const vfloat<M> T = twice(dot(v0,Ng)); + const vfloat<M> t = rcp(den)*T; + valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); + valid &= den != vfloat<M>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV); + return epilog(valid,hit); + } + }; + + template<int K, typename UVMapper> + struct PlueckerHitK + { + __forceinline PlueckerHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& UVW, const vfloat<K>& t, const Vec3vf<K>& Ng, const UVMapper& mapUV) + : U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vbool<K> invalid = abs(UVW) < min_rcp_input; + const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); + vfloat<K> u = U * rcpUVW; + vfloat<K> v = V * rcpUVW; + mapUV(u,v); + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> UVW; + const vfloat<K> t; + const Vec3vf<K> Ng; + const UVMapper& mapUV; + }; + + template<int M, int K> + struct PlueckerIntersectorK + { + __forceinline PlueckerIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} + + /*! Intersects K rays with one of M triangles. */ + template<typename UVMapper, typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + /* calculate vertices relative to ray origin */ + vbool<K> valid = valid0; + const Vec3vf<K> O = ray.org; + const Vec3vf<K> D = ray.dir; + const Vec3vf<K> v0 = tri_v0-O; + const Vec3vf<K> v1 = tri_v1-O; + const Vec3vf<K> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<K> e0 = v2-v0; + const Vec3vf<K> e1 = v0-v1; + const Vec3vf<K> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); + const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); + const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); + const vfloat<K> UVW = U+V+W; + const vfloat<K> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + valid &= max(U,V,W) <= eps; +#else + valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); + + /* perform depth test */ + const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); + const vfloat<K> t = rcp(den)*T; + valid &= ray.tnear() <= t & t <= ray.tfar; + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; + + /* calculate hit information */ + PlueckerHitK<K,UVMapper> hit(U,V,UVW,t,Ng,mapUV); + return epilog(valid,hit); + } + + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + template<typename UVMapper, typename Epilog> + __forceinline bool intersect(RayK<K>& ray, size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + /* calculate vertices relative to ray origin */ + const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vf<M> v0 = tri_v0-O; + const Vec3vf<M> v1 = tri_v1-O; + const Vec3vf<M> v2 = tri_v2-O; + + /* calculate triangle edges */ + const Vec3vf<M> e0 = v2-v0; + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v1-v2; + + /* perform edge tests */ + const vfloat<M> U = dot(cross(e0,v2+v0),D); + const vfloat<M> V = dot(cross(e1,v0+v1),D); + const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; + const vfloat<M> eps = float(ulp)*abs(UVW); +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = max(U,V,W) <= eps; +#else + vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); +#endif + if (unlikely(none(valid))) return false; + + /* calculate geometry normal and denominator */ + const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); + const vfloat<M> den = twice(dot(Ng,D)); + + /* perform depth test */ + const vfloat<M> T = twice(dot(v0,Ng)); + const vfloat<M> t = rcp(den)*T; + valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); + if (unlikely(none(valid))) return false; + + /* avoid division by 0 */ + valid &= den != vfloat<M>(zero); + if (unlikely(none(valid))) return false; + + /* update hit information */ + PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV); + return epilog(valid,hit); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h new file mode 100644 index 0000000000..63e649d8fb --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h @@ -0,0 +1,418 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "intersector_epilog.h" + +/*! This intersector implements a modified version of the Woop's ray-triangle intersection test */ + +namespace embree +{ + namespace isa + { + template<int M> + struct WoopHitM + { + __forceinline WoopHitM() {} + + __forceinline WoopHitM(const vbool<M>& valid, + const vfloat<M>& U, + const vfloat<M>& V, + const vfloat<M>& T, + const vfloat<M>& inv_det, + const Vec3vf<M>& Ng) + : U(U), V(V), T(T), inv_det(inv_det), valid(valid), vNg(Ng) {} + + __forceinline void finalize() + { + vt = T; + vu = U*inv_det; + vv = V*inv_det; + } + + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } + __forceinline float t (const size_t i) const { return vt[i]; } + __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + private: + const vfloat<M> U; + const vfloat<M> V; + const vfloat<M> T; + const vfloat<M> inv_det; + + public: + const vbool<M> valid; + vfloat<M> vu; + vfloat<M> vv; + vfloat<M> vt; + Vec3vf<M> vNg; + }; + + template<int M> + struct WoopPrecalculations1 + { + unsigned int kx,ky,kz; + Vec3vf<M> org; + Vec3fa S; + __forceinline WoopPrecalculations1() {} + + __forceinline WoopPrecalculations1(const Ray& ray, const void* ptr) + { + kz = maxDim(abs(ray.dir)); + kx = (kz+1) % 3; + ky = (kx+1) % 3; + const float inv_dir_kz = rcp(ray.dir[kz]); + if (ray.dir[kz]) std::swap(kx,ky); + S.x = ray.dir[kx] * inv_dir_kz; + S.y = ray.dir[ky] * inv_dir_kz; + S.z = inv_dir_kz; + org = Vec3vf<M>(ray.org[kx],ray.org[ky],ray.org[kz]); + } + }; + + + template<int M> + struct WoopIntersector1 + { + + typedef WoopPrecalculations1<M> Precalculations; + + __forceinline WoopIntersector1() {} + + __forceinline WoopIntersector1(const Ray& ray, const void* ptr) {} + + static __forceinline bool intersect(const vbool<M>& valid0, + Ray& ray, + const Precalculations& pre, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + WoopHitM<M>& hit) + { + vbool<M> valid = valid0; + + /* vertices relative to ray origin */ + const Vec3vf<M> org = Vec3vf<M>(pre.org.x,pre.org.y,pre.org.z); + const Vec3vf<M> A = Vec3vf<M>(tri_v0[pre.kx],tri_v0[pre.ky],tri_v0[pre.kz]) - org; + const Vec3vf<M> B = Vec3vf<M>(tri_v1[pre.kx],tri_v1[pre.ky],tri_v1[pre.kz]) - org; + const Vec3vf<M> C = Vec3vf<M>(tri_v2[pre.kx],tri_v2[pre.ky],tri_v2[pre.kz]) - org; + + /* shear and scale vertices */ + const vfloat<M> Ax = nmadd(A.z,pre.S.x,A.x); + const vfloat<M> Ay = nmadd(A.z,pre.S.y,A.y); + const vfloat<M> Bx = nmadd(B.z,pre.S.x,B.x); + const vfloat<M> By = nmadd(B.z,pre.S.y,B.y); + const vfloat<M> Cx = nmadd(C.z,pre.S.x,C.x); + const vfloat<M> Cy = nmadd(C.z,pre.S.y,C.y); + + /* scaled barycentric */ + const vfloat<M> U0 = Cx*By; + const vfloat<M> U1 = Cy*Bx; + const vfloat<M> V0 = Ax*Cy; + const vfloat<M> V1 = Ay*Cx; + const vfloat<M> W0 = Bx*Ay; + const vfloat<M> W1 = By*Ax; +#if !defined(__AVX512F__) + valid &= (U0 >= U1) & (V0 >= V1) & (W0 >= W1) | + (U0 <= U1) & (V0 <= V1) & (W0 <= W1); +#else + valid &= ge(ge(U0 >= U1,V0,V1),W0,W1) | le(le(U0 <= U1,V0,V1),W0,W1); +#endif + + if (likely(none(valid))) return false; + const vfloat<M> U = U0-U1; + const vfloat<M> V = V0-V1; + const vfloat<M> W = W0-W1; + + const vfloat<M> det = U+V+W; + + valid &= det != 0.0f; + const vfloat<M> inv_det = rcp(det); + + const vfloat<M> Az = pre.S.z * A.z; + const vfloat<M> Bz = pre.S.z * B.z; + const vfloat<M> Cz = pre.S.z * C.z; + const vfloat<M> T = madd(U,Az,madd(V,Bz,W*Cz)); + const vfloat<M> t = T * inv_det; + /* perform depth test */ + valid &= (vfloat<M>(ray.tnear()) < t) & (t <= vfloat<M>(ray.tfar)); + if (likely(none(valid))) return false; + + const Vec3vf<M> tri_Ng = cross(tri_v2-tri_v0,tri_v0-tri_v1); + + /* update hit information */ + new (&hit) WoopHitM<M>(valid,U,V,t,inv_det,tri_Ng); + return true; + } + + static __forceinline bool intersect(Ray& ray, + const Precalculations& pre, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + WoopHitM<M>& hit) + { + vbool<M> valid = true; + return intersect(valid,ray,pre,v0,v1,v2,hit); + } + + + template<typename Epilog> + static __forceinline bool intersect(Ray& ray, + const Precalculations& pre, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) + { + WoopHitM<M> hit; + if (likely(intersect(ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + static __forceinline bool intersect(const vbool<M>& valid, + Ray& ray, + const Precalculations& pre, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) + { + WoopHitM<M> hit; + if (likely(intersect(valid,ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit); + return false; + } + }; + +#if 0 + template<int K> + struct WoopHitK + { + __forceinline WoopHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng) + : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {} + + __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const + { + const vfloat<K> rcpAbsDen = rcp(absDen); + const vfloat<K> t = T * rcpAbsDen; + const vfloat<K> u = U * rcpAbsDen; + const vfloat<K> v = V * rcpAbsDen; + return std::make_tuple(u,v,t,Ng); + } + + private: + const vfloat<K> U; + const vfloat<K> V; + const vfloat<K> T; + const vfloat<K> absDen; + const Vec3vf<K> Ng; + }; + + template<int M, int K> + struct WoopIntersectorK + { + __forceinline WoopIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + //RayK<K>& ray, + const Vec3vf<K>& ray_org, + const Vec3vf<K>& ray_dir, + const vfloat<K>& ray_tnear, + const vfloat<K>& ray_tfar, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_e1, + const Vec3vf<K>& tri_e2, + const Vec3vf<K>& tri_Ng, + const Epilog& epilog) const + { + /* calculate denominator */ + vbool<K> valid = valid0; + const Vec3vf<K> C = tri_v0 - ray_org; + const Vec3vf<K> R = cross(C,ray_dir); + const vfloat<K> den = dot(tri_Ng,ray_dir); + const vfloat<K> absDen = abs(den); + const vfloat<K> sgnDen = signmsk(den); + + /* test against edge p2 p0 */ + const vfloat<K> U = dot(tri_e2,R) ^ sgnDen; + valid &= U >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p0 p1 */ + const vfloat<K> V = dot(tri_e1,R) ^ sgnDen; + valid &= V >= 0.0f; + if (likely(none(valid))) return false; + + /* test against edge p1 p2 */ + const vfloat<K> W = absDen-U-V; + valid &= W >= 0.0f; + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; + valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar); + if (unlikely(none(valid))) return false; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + valid &= den < vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#else + valid &= den != vfloat<K>(zero); + if (unlikely(none(valid))) return false; +#endif + + /* calculate hit information */ + WoopHitK<K> hit(U,V,T,absDen,tri_Ng); + return epilog(valid,hit); + } + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const Epilog& epilog) const + { + const Vec3vf<K> e1 = tri_v0-tri_v1; + const Vec3vf<K> e2 = tri_v2-tri_v0; + const Vec3vf<K> Ng = cross(e2,e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog); + } + + /*! Intersects K rays with one of M triangles. */ + template<typename Epilog> + __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_e1, + const Vec3vf<K>& tri_e2, + const Epilog& epilog) const + { + const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog); + } + + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + WoopHitM<M>& hit) const + { + /* calculate denominator */ + typedef Vec3vf<M> Vec3vfM; + const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1); + + const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k); + const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k); + const Vec3vfM C = Vec3vfM(tri_v0) - O; + const Vec3vfM R = cross(C,D); + const vfloat<M> den = dot(Vec3vfM(tri_Ng),D); + const vfloat<M> absDen = abs(den); + const vfloat<M> sgnDen = signmsk(den); + + /* perform edge tests */ + const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen; + const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen; + + /* perform backface culling */ +#if defined(EMBREE_BACKFACE_CULLING) + vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#else + vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); +#endif + if (likely(none(valid))) return false; + + /* perform depth test */ + const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; + valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); + if (likely(none(valid))) return false; + + /* calculate hit information */ + new (&hit) WoopHitM<M>(valid,U,V,T,absDen,tri_Ng); + return true; + } + + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const BBox<vfloat<M>>& time_range, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + WoopHitM<M>& hit) const + { + if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit))) + { + hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]); + hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper; + return any(hit.valid); + } + return false; + } + + template<typename Epilog> + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Epilog& epilog) const + { + WoopHitM<M> hit; + if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersectEdge(RayK<K>& ray, + size_t k, + const BBox<vfloat<M>>& time_range, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const Epilog& epilog) const + { + WoopHitM<M> hit; + if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersect(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(ray,k,v0,e1,e2,epilog); + } + + template<typename Epilog> + __forceinline bool intersect(RayK<K>& ray, + size_t k, + const BBox<vfloat<M>>& time_range, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(ray,k,time_range,v0,e1,e2,epilog); + } + }; +#endif + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h new file mode 100644 index 0000000000..91b35c36f3 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h @@ -0,0 +1,132 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "primitive.h" + +namespace embree +{ + namespace isa + { + struct TriangleTriangleIntersector + { + __forceinline static float T(float pa0, float pa1, float da0, float da1) { + return pa0 + (pa1-pa0)*da0/(da0-da1); + } + + __forceinline static bool point_line_side(const Vec2f& p, const Vec2f& a0, const Vec2f& a1) { + return det(p-a0,a0-a1) >= 0.0f; + } + + __forceinline static bool point_inside_triangle(const Vec2f& p, const Vec2f& a, const Vec2f& b, const Vec2f& c) + { + const bool pab = point_line_side(p,a,b); + const bool pbc = point_line_side(p,b,c); + const bool pca = point_line_side(p,c,a); + return pab == pbc && pab == pca; + } + + __forceinline static bool intersect_line_line(const Vec2f& a0, const Vec2f& a1, const Vec2f& b0, const Vec2f& b1) + { + const bool different_sides0 = point_line_side(b0,a0,a1) != point_line_side(b1,a0,a1); + const bool different_sides1 = point_line_side(a0,b0,b1) != point_line_side(a1,b0,b1); + return different_sides0 && different_sides1; + } + + __forceinline static bool intersect_triangle_triangle (const Vec2f& a0, const Vec2f& a1, const Vec2f& a2, + const Vec2f& b0, const Vec2f& b1, const Vec2f& b2) + { + const bool a01_b01 = intersect_line_line(a0,a1,b0,b1); + if (a01_b01) return true; + const bool a01_b12 = intersect_line_line(a0,a1,b1,b2); + if (a01_b12) return true; + const bool a01_b20 = intersect_line_line(a0,a1,b2,b0); + if (a01_b20) return true; + const bool a12_b01 = intersect_line_line(a1,a2,b0,b1); + if (a12_b01) return true; + const bool a12_b12 = intersect_line_line(a1,a2,b1,b2); + if (a12_b12) return true; + const bool a12_b20 = intersect_line_line(a1,a2,b2,b0); + if (a12_b20) return true; + const bool a20_b01 = intersect_line_line(a2,a0,b0,b1); + if (a20_b01) return true; + const bool a20_b12 = intersect_line_line(a2,a0,b1,b2); + if (a20_b12) return true; + const bool a20_b20 = intersect_line_line(a2,a0,b2,b0); + if (a20_b20) return true; + + bool a_in_b = point_inside_triangle(a0,b0,b1,b2) && point_inside_triangle(a1,b0,b1,b2) && point_inside_triangle(a2,b0,b1,b2); + if (a_in_b) return true; + + bool b_in_a = point_inside_triangle(b0,a0,a1,a2) && point_inside_triangle(b1,a0,a1,a2) && point_inside_triangle(b2,a0,a1,a2); + if (b_in_a) return true; + + return false; + } + + static bool intersect_triangle_triangle (const Vec3fa& a0, const Vec3fa& a1, const Vec3fa& a2, + const Vec3fa& b0, const Vec3fa& b1, const Vec3fa& b2) + { + const float eps = 1E-5f; + + /* calculate triangle planes */ + const Vec3fa Na = cross(a1-a0,a2-a0); + const float Ca = dot(Na,a0); + const Vec3fa Nb = cross(b1-b0,b2-b0); + const float Cb = dot(Nb,b0); + + /* project triangle A onto plane B */ + const float da0 = dot(Nb,a0)-Cb; + const float da1 = dot(Nb,a1)-Cb; + const float da2 = dot(Nb,a2)-Cb; + if (max(da0,da1,da2) < -eps) return false; + if (min(da0,da1,da2) > +eps) return false; + //CSTAT(bvh_collide_prim_intersections4++); + + /* project triangle B onto plane A */ + const float db0 = dot(Na,b0)-Ca; + const float db1 = dot(Na,b1)-Ca; + const float db2 = dot(Na,b2)-Ca; + if (max(db0,db1,db2) < -eps) return false; + if (min(db0,db1,db2) > +eps) return false; + //CSTAT(bvh_collide_prim_intersections5++); + + if (unlikely((std::fabs(da0) < eps && std::fabs(da1) < eps && std::fabs(da2) < eps) || + (std::fabs(db0) < eps && std::fabs(db1) < eps && std::fabs(db2) < eps))) + { + const size_t dz = maxDim(Na); + const size_t dx = (dz+1)%3; + const size_t dy = (dx+1)%3; + const Vec2f A0(a0[dx],a0[dy]); + const Vec2f A1(a1[dx],a1[dy]); + const Vec2f A2(a2[dx],a2[dy]); + const Vec2f B0(b0[dx],b0[dy]); + const Vec2f B1(b1[dx],b1[dy]); + const Vec2f B2(b2[dx],b2[dy]); + return intersect_triangle_triangle(A0,A1,A2,B0,B1,B2); + } + + const Vec3fa D = cross(Na,Nb); + const float pa0 = dot(D,a0); + const float pa1 = dot(D,a1); + const float pa2 = dot(D,a2); + const float pb0 = dot(D,b0); + const float pb1 = dot(D,b1); + const float pb2 = dot(D,b2); + + BBox1f ba = empty; + if (min(da0,da1) <= 0.0f && max(da0,da1) >= 0.0f && abs(da0-da1) > 0.0f) ba.extend(T(pa0,pa1,da0,da1)); + if (min(da1,da2) <= 0.0f && max(da1,da2) >= 0.0f && abs(da1-da2) > 0.0f) ba.extend(T(pa1,pa2,da1,da2)); + if (min(da2,da0) <= 0.0f && max(da2,da0) >= 0.0f && abs(da2-da0) > 0.0f) ba.extend(T(pa2,pa0,da2,da0)); + + BBox1f bb = empty; + if (min(db0,db1) <= 0.0f && max(db0,db1) >= 0.0f && abs(db0-db1) > 0.0f) bb.extend(T(pb0,pb1,db0,db1)); + if (min(db1,db2) <= 0.0f && max(db1,db2) >= 0.0f && abs(db1-db2) > 0.0f) bb.extend(T(pb1,pb2,db1,db2)); + if (min(db2,db0) <= 0.0f && max(db2,db0) >= 0.0f && abs(db2-db0) > 0.0f) bb.extend(T(pb2,pb0,db2,db0)); + + return conjoint(ba,bb); + } + }; + } +} + + diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei.h new file mode 100644 index 0000000000..4f3118cc0c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/trianglei.h @@ -0,0 +1,442 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" +#include "../common/scene.h" + +namespace embree +{ + /* Stores M triangles from an indexed face set */ + template <int M> + struct TriangleMi + { + /* Virtual interface to query information about the triangle type */ + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* primitive supports multiple time segments */ + static const bool singleTimeSegment = false; + + /* Returns maximum number of stored triangles */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline TriangleMi() { } + + /* Construction from vertices and IDs */ + __forceinline TriangleMi(const vuint<M>& v0, + const vuint<M>& v1, + const vuint<M>& v2, + const vuint<M>& geomIDs, + const vuint<M>& primIDs) +#if defined(EMBREE_COMPACT_POLYS) + : geomIDs(geomIDs), primIDs(primIDs) {} +#else + : v0_(v0), v1_(v1), v2_(v2), geomIDs(geomIDs), primIDs(primIDs) {} +#endif + + /* Returns a mask that tells which triangles are valid */ + __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } + + /* Returns if the specified triangle is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } + + /* Returns the number of stored triangles */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + __forceinline vuint<M> geomID() const { return geomIDs; } + __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } + + /* Returns the primitive IDs */ + __forceinline vuint<M> primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* Calculate the bounds of the triangles */ + __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const + { + BBox3fa bounds = empty; + for (size_t i=0; i<M && valid(i); i++) { + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i)); + bounds.extend(mesh->bounds(primID(i),itime)); + } + return bounds; + } + + /* Calculate the linear bounds of the primitive */ + __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime) { + return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) + { + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i)); + allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps)); + } + return allBounds; + } + + __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) + { + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && valid(i); i++) + { + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i)); + allBounds.extend(mesh->linearBounds(primID(i), time_range)); + } + return allBounds; + } + + /* Non-temporal store */ + __forceinline static void store_nt(TriangleMi* dst, const TriangleMi& src) + { +#if !defined(EMBREE_COMPACT_POLYS) + vuint<M>::store_nt(&dst->v0_,src.v0_); + vuint<M>::store_nt(&dst->v1_,src.v1_); + vuint<M>::store_nt(&dst->v2_,src.v2_); +#endif + vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); + vuint<M>::store_nt(&dst->primIDs,src.primIDs); + } + + /* Fill triangle from triangle list */ + template<typename PrimRefT> + __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) + { + vuint<M> v0 = zero, v1 = zero, v2 = zero; + vuint<M> geomID = -1, primID = -1; + const PrimRefT* prim = &prims[begin]; + + for (size_t i=0; i<M; i++) + { + if (begin<end) { + geomID[i] = prim->geomID(); + primID[i] = prim->primID(); +#if !defined(EMBREE_COMPACT_POLYS) + const TriangleMesh* mesh = scene->get<TriangleMesh>(prim->geomID()); + const TriangleMesh::Triangle& tri = mesh->triangle(prim->primID()); + unsigned int int_stride = mesh->vertices0.getStride()/4; + v0[i] = tri.v[0] * int_stride; + v1[i] = tri.v[1] * int_stride; + v2[i] = tri.v[2] * int_stride; +#endif + begin++; + } else { + assert(i); + if (likely(i > 0)) { + geomID[i] = geomID[0]; + primID[i] = -1; + v0[i] = v0[0]; + v1[i] = v0[0]; + v2[i] = v0[0]; + } + } + if (begin<end) prim = &prims[begin]; + } + new (this) TriangleMi(v0,v1,v2,geomID,primID); // FIXME: use non temporal store + } + + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) + { + fill(prims, begin, end, scene); + return linearBounds(scene, itime); + } + + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) + { + fill(prims, begin, end, scene); + return linearBounds(scene, time_range); + } + + /* Updates the primitive */ + __forceinline BBox3fa update(TriangleMesh* mesh) + { + BBox3fa bounds = empty; + for (size_t i=0; i<M; i++) + { + if (primID(i) == -1) break; + const unsigned int primId = primID(i); + const TriangleMesh::Triangle& tri = mesh->triangle(primId); + const Vec3fa p0 = mesh->vertex(tri.v[0]); + const Vec3fa p1 = mesh->vertex(tri.v[1]); + const Vec3fa p2 = mesh->vertex(tri.v[2]); + bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); + } + return bounds; + } + + protected: +#if !defined(EMBREE_COMPACT_POLYS) + vuint<M> v0_; // 4 byte offset of 1st vertex + vuint<M> v1_; // 4 byte offset of 2nd vertex + vuint<M> v2_; // 4 byte offset of 3rd vertex +#endif + vuint<M> geomIDs; // geometry ID of mesh + vuint<M> primIDs; // primitive ID of primitive inside mesh + }; + + namespace isa + { + + template<int M> + struct TriangleMi : public embree::TriangleMi<M> + { +#if !defined(EMBREE_COMPACT_POLYS) + using embree::TriangleMi<M>::v0_; + using embree::TriangleMi<M>::v1_; + using embree::TriangleMi<M>::v2_; +#endif + using embree::TriangleMi<M>::geomIDs; + using embree::TriangleMi<M>::primIDs; + using embree::TriangleMi<M>::geomID; + using embree::TriangleMi<M>::primID; + using embree::TriangleMi<M>::valid; + + /* loads a single vertex */ + template<int vid> + __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const + { +#if defined(EMBREE_COMPACT_POLYS) + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); + const TriangleMesh::Triangle& tri = mesh->triangle(primID(index)); + return (Vec3f) mesh->vertices[0][tri.v[vid]]; +#else + const vuint<M>& v = getVertexOffset<vid>(); + const float* vertices = scene->vertices[geomID(index)]; + return (Vec3f&) vertices[v[index]]; +#endif + } + + template<int vid, typename T> + __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const + { +#if defined(EMBREE_COMPACT_POLYS) + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); + const TriangleMesh::Triangle& tri = mesh->triangle(primID(index)); + const Vec3fa v0 = mesh->vertices[itime+0][tri.v[vid]]; + const Vec3fa v1 = mesh->vertices[itime+1][tri.v[vid]]; +#else + const vuint<M>& v = getVertexOffset<vid>(); + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); + const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0); + const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1); + const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); + const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); +#endif + const Vec3<T> p0(v0.x,v0.y,v0.z); + const Vec3<T> p1(v1.x,v1.y,v1.z); + return lerp(p0,p1,ftime); + } + + template<int vid, int K, typename T> + __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const + { + Vec3<T> p0, p1; + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); + + for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask)) + { +#if defined(EMBREE_COMPACT_POLYS) + const TriangleMesh::Triangle& tri = mesh->triangle(primID(index)); + const Vec3fa v0 = mesh->vertices[itime[i]+0][tri.v[vid]]; + const Vec3fa v1 = mesh->vertices[itime[i]+1][tri.v[vid]]; +#else + const vuint<M>& v = getVertexOffset<vid>(); + const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0); + const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1); + const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); + const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); +#endif + p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z; + p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z; + } + return (T(one)-ftime)*p0 + ftime*p1; + } + + struct Triangle { + vfloat4 v0,v1,v2; + }; + +#if defined(EMBREE_COMPACT_POLYS) + + __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const + { + const unsigned int geomID = geomIDs[i]; + const unsigned int primID = primIDs[i]; + if (unlikely(primID == -1)) return { zero, zero, zero }; + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID); + const TriangleMesh::Triangle& tri = mesh->triangle(primID); + const vfloat4 v0 = (vfloat4) mesh->vertices0[tri.v[0]]; + const vfloat4 v1 = (vfloat4) mesh->vertices0[tri.v[1]]; + const vfloat4 v2 = (vfloat4) mesh->vertices0[tri.v[2]]; + return { v0, v1, v2 }; + } + + __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const + { + const unsigned int primID = primIDs[i]; + if (unlikely(primID == -1)) return { zero, zero, zero }; + const TriangleMesh::Triangle& tri = mesh->triangle(primID); + const vfloat4 v0 = (vfloat4) mesh->vertices[itime][tri.v[0]]; + const vfloat4 v1 = (vfloat4) mesh->vertices[itime][tri.v[1]]; + const vfloat4 v2 = (vfloat4) mesh->vertices[itime][tri.v[2]]; + return { v0, v1, v2 }; + } + +#else + + __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const + { + const float* vertices = scene->vertices[geomID(i)]; + const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); + const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); + const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); + return { v0, v1, v2 }; + } + + __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const + { + const float* vertices = (const float*) mesh->vertexPtr(0,itime); + const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); + const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); + const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); + return { v0, v1, v2 }; + } + +#endif + + /* Gather the triangles */ + __forceinline void gather(Vec3vf<M>& p0, Vec3vf<M>& p1, Vec3vf<M>& p2, const Scene* const scene) const; + + template<int K> +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019 + __noinline +#else + __forceinline +#endif + void gather(const vbool<K>& valid, + Vec3vf<K>& p0, + Vec3vf<K>& p1, + Vec3vf<K>& p2, + const size_t index, + const Scene* const scene, + const vfloat<K>& time) const + { + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); + + vfloat<K> ftime; + const vint<K> itime = mesh->timeSegment(time, ftime); + + const size_t first = bsf(movemask(valid)); + if (likely(all(valid,itime[first] == itime))) + { + p0 = getVertex<0>(index, scene, itime[first], ftime); + p1 = getVertex<1>(index, scene, itime[first], ftime); + p2 = getVertex<2>(index, scene, itime[first], ftime); + } else { + p0 = getVertex<0>(valid, index, scene, itime, ftime); + p1 = getVertex<1>(valid, index, scene, itime, ftime); + p2 = getVertex<2>(valid, index, scene, itime, ftime); + } + } + + __forceinline void gather(Vec3vf<M>& p0, + Vec3vf<M>& p1, + Vec3vf<M>& p2, + const TriangleMesh* mesh, + const Scene *const scene, + const int itime) const; + + __forceinline void gather(Vec3vf<M>& p0, + Vec3vf<M>& p1, + Vec3vf<M>& p2, + const Scene *const scene, + const float time) const; + + +#if !defined(EMBREE_COMPACT_POLYS) + template<int N> const vuint<M>& getVertexOffset() const; +#endif + }; + +#if !defined(EMBREE_COMPACT_POLYS) + template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<0>() const { return v0_; } + template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<1>() const { return v1_; } + template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<2>() const { return v2_; } +#endif + + template<> + __forceinline void TriangleMi<4>::gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + const Scene* const scene) const + { + const Triangle tri0 = loadTriangle(0,scene); + const Triangle tri1 = loadTriangle(1,scene); + const Triangle tri2 = loadTriangle(2,scene); + const Triangle tri3 = loadTriangle(3,scene); + transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); + transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); + transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); + } + + template<> + __forceinline void TriangleMi<4>::gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + const TriangleMesh* mesh, + const Scene *const scene, + const int itime) const + { + const Triangle tri0 = loadTriangle(0,itime,mesh); + const Triangle tri1 = loadTriangle(1,itime,mesh); + const Triangle tri2 = loadTriangle(2,itime,mesh); + const Triangle tri3 = loadTriangle(3,itime,mesh); + transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); + transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); + transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); + } + + template<> + __forceinline void TriangleMi<4>::gather(Vec3vf4& p0, + Vec3vf4& p1, + Vec3vf4& p2, + const Scene *const scene, + const float time) const + { + const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(0)); // in mblur mode all geometries are identical + + float ftime; + const int itime = mesh->timeSegment(time, ftime); + + Vec3vf4 a0,a1,a2; gather(a0,a1,a2,mesh,scene,itime); + Vec3vf4 b0,b1,b2; gather(b0,b1,b2,mesh,scene,itime+1); + p0 = lerp(a0,b0,vfloat4(ftime)); + p1 = lerp(a1,b1,vfloat4(ftime)); + p2 = lerp(a2,b2,vfloat4(ftime)); + } + } + + template<int M> + typename TriangleMi<M>::Type TriangleMi<M>::type; + + typedef TriangleMi<4> Triangle4i; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h new file mode 100644 index 0000000000..e2f106a62c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h @@ -0,0 +1,336 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "trianglei.h" +#include "triangle_intersector_moeller.h" +#include "triangle_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + /*! Intersects M triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMiIntersector1Moeller + { + typedef TriangleMi<M> Primitive; + typedef MoellerTrumboreIntersector1<Mx> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M triangles with K rays */ + template<int M, int Mx, int K, bool filter> + struct TriangleMiIntersectorKMoeller + { + typedef TriangleMi<M> Primitive; + typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) + { + const Scene* scene = context->scene; + for (size_t i=0; i<Primitive::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); + const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); + const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); + const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); + pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) + { + vbool<K> valid0 = valid_i; + const Scene* scene = context->scene; + + for (size_t i=0; i<Primitive::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); + const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); + const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); + const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); + pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + + /*! Intersects M triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMiIntersector1Pluecker + { + typedef TriangleMi<M> Primitive; + typedef PlueckerIntersector1<Mx> Precalculations; + + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M triangles with K rays */ + template<int M, int Mx, int K, bool filter> + struct TriangleMiIntersectorKPluecker + { + typedef TriangleMi<M> Primitive; + typedef PlueckerIntersectorK<Mx,K> Precalculations; + + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) + { + const Scene* scene = context->scene; + for (size_t i=0; i<Primitive::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); + const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); + const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); + const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); + pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) + { + vbool<K> valid0 = valid_i; + const Scene* scene = context->scene; + + for (size_t i=0; i<Primitive::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); + const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); + const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); + const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); + pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); + return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + + /*! Intersects M motion blur triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMiMBIntersector1Moeller + { + typedef TriangleMi<M> Primitive; + typedef MoellerTrumboreIntersector1<Mx> Precalculations; + + /*! Intersect a ray with the M triangles and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); + pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); + return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M motion blur triangles with K rays. */ + template<int M, int Mx, int K, bool filter> + struct TriangleMiMBIntersectorKMoeller + { + typedef TriangleMi<M> Primitive; + typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) + { + for (size_t i=0; i<TriangleMi<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) + { + vbool<K> valid0 = valid_i; + for (size_t i=0; i<TriangleMi<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); + pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); + return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + + /*! Intersects M motion blur triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMiMBIntersector1Pluecker + { + typedef TriangleMi<M> Primitive; + typedef PlueckerIntersector1<Mx> Precalculations; + + /*! Intersect a ray with the M triangles and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); + pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); + return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M motion blur triangles with K rays. */ + template<int M, int Mx, int K, bool filter> + struct TriangleMiMBIntersectorKPluecker + { + typedef TriangleMi<M> Primitive; + typedef PlueckerIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) + { + for (size_t i=0; i<TriangleMi<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) + { + vbool<K> valid0 = valid_i; + for (size_t i=0; i<TriangleMi<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); + pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); + return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev.h new file mode 100644 index 0000000000..19af389e73 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/trianglev.h @@ -0,0 +1,157 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + /* Stores the vertices of M triangles in struct of array layout */ + template <int M> + struct TriangleMv + { + public: + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + static Type type; + + public: + + /* Returns maximum number of stored triangles */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline TriangleMv() {} + + /* Construction from vertices and IDs */ + __forceinline TriangleMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs) + : v0(v0), v1(v1), v2(v2), geomIDs(geomIDs), primIDs(primIDs) {} + + /* Returns a mask that tells which triangles are valid */ + __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } + + /* Returns true if the specified triangle is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } + + /* Returns the number of stored triangles */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + __forceinline vuint<M>& geomID() { return geomIDs; } + __forceinline const vuint<M>& geomID() const { return geomIDs; } + __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } + + /* Returns the primitive IDs */ + __forceinline vuint<M>& primID() { return primIDs; } + __forceinline const vuint<M>& primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* Calculate the bounds of the triangles */ + __forceinline BBox3fa bounds() const + { + Vec3vf<M> lower = min(v0,v1,v2); + Vec3vf<M> upper = max(v0,v1,v2); + vbool<M> mask = valid(); + lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); + lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); + lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); + upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); + upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); + upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); + return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), + Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); + } + + /* Non temporal store */ + __forceinline static void store_nt(TriangleMv* dst, const TriangleMv& src) + { + vfloat<M>::store_nt(&dst->v0.x,src.v0.x); + vfloat<M>::store_nt(&dst->v0.y,src.v0.y); + vfloat<M>::store_nt(&dst->v0.z,src.v0.z); + vfloat<M>::store_nt(&dst->v1.x,src.v1.x); + vfloat<M>::store_nt(&dst->v1.y,src.v1.y); + vfloat<M>::store_nt(&dst->v1.z,src.v1.z); + vfloat<M>::store_nt(&dst->v2.x,src.v2.x); + vfloat<M>::store_nt(&dst->v2.y,src.v2.y); + vfloat<M>::store_nt(&dst->v2.z,src.v2.z); + vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); + vuint<M>::store_nt(&dst->primIDs,src.primIDs); + } + + /* Fill triangle from triangle list */ + __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene) + { + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; + + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRef& prim = prims[begin]; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID); + const TriangleMesh::Triangle& tri = mesh->triangle(primID); + const Vec3fa& p0 = mesh->vertex(tri.v[0]); + const Vec3fa& p1 = mesh->vertex(tri.v[1]); + const Vec3fa& p2 = mesh->vertex(tri.v[2]); + vgeomID [i] = geomID; + vprimID [i] = primID; + v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; + v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; + v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; + } + TriangleMv::store_nt(this,TriangleMv(v0,v1,v2,vgeomID,vprimID)); + } + + /* Updates the primitive */ + __forceinline BBox3fa update(TriangleMesh* mesh) + { + BBox3fa bounds = empty; + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; + + for (size_t i=0; i<M; i++) + { + if (primID(i) == -1) break; + const unsigned geomId = geomID(i); + const unsigned primId = primID(i); + const TriangleMesh::Triangle& tri = mesh->triangle(primId); + const Vec3fa p0 = mesh->vertex(tri.v[0]); + const Vec3fa p1 = mesh->vertex(tri.v[1]); + const Vec3fa p2 = mesh->vertex(tri.v[2]); + bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); + vgeomID [i] = geomId; + vprimID [i] = primId; + v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; + v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; + v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; + } + new (this) TriangleMv(v0,v1,v2,vgeomID,vprimID); + return bounds; + } + + public: + Vec3vf<M> v0; // 1st vertex of the triangles + Vec3vf<M> v1; // 2nd vertex of the triangles + Vec3vf<M> v2; // 3rd vertex of the triangles + private: + vuint<M> geomIDs; // geometry ID + vuint<M> primIDs; // primitive ID + }; + + template<int M> + typename TriangleMv<M>::Type TriangleMv<M>::type; + + typedef TriangleMv<4> Triangle4v; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h new file mode 100644 index 0000000000..6af0d5a11c --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h @@ -0,0 +1,206 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "triangle_intersector_pluecker.h" +#include "triangle_intersector_moeller.h" +#include "triangle_intersector_woop.h" + +namespace embree +{ + namespace isa + { + /*! Intersects M triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMvIntersector1Moeller + { + typedef TriangleMv<M> Primitive; + typedef MoellerTrumboreIntersector1<Mx> Precalculations; + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + + template<int M, int Mx, bool filter> + struct TriangleMvIntersector1Woop + { + typedef TriangleMv<M> Primitive; + typedef WoopIntersector1<Mx> intersec; + typedef WoopPrecalculations1<M> Precalculations; + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + + /*! Intersects M triangles with K rays */ + template<int M, int Mx, int K, bool filter> + struct TriangleMvIntersectorKMoeller + { + typedef TriangleMv<M> Primitive; + typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) + { + for (size_t i=0; i<M; i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); + const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); + const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); + pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<M; i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); + const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); + const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); + pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + } + }; + + /*! Intersects M triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMvIntersector1Pluecker + { + typedef TriangleMv<M> Primitive; + typedef PlueckerIntersector1<Mx> Precalculations; + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M triangles with K rays */ + template<int M, int Mx, int K, bool filter> + struct TriangleMvIntersectorKPluecker + { + typedef TriangleMv<M> Primitive; + typedef PlueckerIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) + { + for (size_t i=0; i<M; i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); + const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); + const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); + pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<M; i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); + const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); + const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); + pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(normal.trav_prims,1,1,1); + pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) + { + STAT3(shadow.trav_prims,1,1,1); + return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + } + }; + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h new file mode 100644 index 0000000000..63137aee16 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h @@ -0,0 +1,201 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "primitive.h" + +namespace embree +{ + /* Stores the vertices of M triangles in struct of array layout */ + template<int M> + struct TriangleMvMB + { + public: + struct Type : public PrimitiveType + { + const char* name() const; + size_t sizeActive(const char* This) const; + size_t sizeTotal(const char* This) const; + size_t getBytes(const char* This) const; + }; + + static Type type; + + public: + + /* primitive supports single time segments */ + static const bool singleTimeSegment = true; + + /* Returns maximum number of stored triangles */ + static __forceinline size_t max_size() { return M; } + + /* Returns required number of primitive blocks for N primitives */ + static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } + + public: + + /* Default constructor */ + __forceinline TriangleMvMB() {} + + /* Construction from vertices and IDs */ + __forceinline TriangleMvMB(const Vec3vf<M>& a0, const Vec3vf<M>& a1, + const Vec3vf<M>& b0, const Vec3vf<M>& b1, + const Vec3vf<M>& c0, const Vec3vf<M>& c1, + const vuint<M>& geomIDs, const vuint<M>& primIDs) + : v0(a0), v1(b0), v2(c0), dv0(a1-a0), dv1(b1-b0), dv2(c1-c0), geomIDs(geomIDs), primIDs(primIDs) {} + + /* Returns a mask that tells which triangles are valid */ + __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } + + /* Returns if the specified triangle is valid */ + __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } + + /* Returns the number of stored triangles */ + __forceinline size_t size() const { return bsf(~movemask(valid())); } + + /* Returns the geometry IDs */ + __forceinline vuint<M>& geomID() { return geomIDs; } + __forceinline const vuint<M>& geomID() const { return geomIDs; } + __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } + + /* Returns the primitive IDs */ + __forceinline vuint<M>& primID() { return primIDs; } + __forceinline const vuint<M>& primID() const { return primIDs; } + __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } + + /* Calculate the bounds of the triangles at t0 */ + __forceinline BBox3fa bounds0() const + { + Vec3vf<M> lower = min(v0,v1,v2); + Vec3vf<M> upper = max(v0,v1,v2); + const vbool<M> mask = valid(); + lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); + lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); + lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); + upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); + upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); + upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); + return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), + Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); + } + + /* Calculate the bounds of the triangles at t1 */ + __forceinline BBox3fa bounds1() const + { + const Vec3vf<M> p0 = v0+dv0; + const Vec3vf<M> p1 = v1+dv1; + const Vec3vf<M> p2 = v2+dv2; + Vec3vf<M> lower = min(p0,p1,p2); + Vec3vf<M> upper = max(p0,p1,p2); + const vbool<M> mask = valid(); + lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); + lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); + lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); + upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); + upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); + upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); + return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), + Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); + } + + /* Calculate the linear bounds of the primitive */ + __forceinline LBBox3fa linearBounds() const { + return LBBox3fa(bounds0(),bounds1()); + } + + /* Fill triangle from triangle list */ + __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) + { + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero; + Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero; + + BBox3fa bounds0 = empty; + BBox3fa bounds1 = empty; + + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRef& prim = prims[begin]; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID); + const TriangleMesh::Triangle& tri = mesh->triangle(primID); + const Vec3fa& a0 = mesh->vertex(tri.v[0],itime+0); bounds0.extend(a0); + const Vec3fa& a1 = mesh->vertex(tri.v[0],itime+1); bounds1.extend(a1); + const Vec3fa& b0 = mesh->vertex(tri.v[1],itime+0); bounds0.extend(b0); + const Vec3fa& b1 = mesh->vertex(tri.v[1],itime+1); bounds1.extend(b1); + const Vec3fa& c0 = mesh->vertex(tri.v[2],itime+0); bounds0.extend(c0); + const Vec3fa& c1 = mesh->vertex(tri.v[2],itime+1); bounds1.extend(c1); + vgeomID [i] = geomID; + vprimID [i] = primID; + va0.x[i] = a0.x; va0.y[i] = a0.y; va0.z[i] = a0.z; + va1.x[i] = a1.x; va1.y[i] = a1.y; va1.z[i] = a1.z; + vb0.x[i] = b0.x; vb0.y[i] = b0.y; vb0.z[i] = b0.z; + vb1.x[i] = b1.x; vb1.y[i] = b1.y; vb1.z[i] = b1.z; + vc0.x[i] = c0.x; vc0.y[i] = c0.y; vc0.z[i] = c0.z; + vc1.x[i] = c1.x; vc1.y[i] = c1.y; vc1.z[i] = c1.z; + } + new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID); + return LBBox3fa(bounds0,bounds1); + } + + /* Fill triangle from triangle list */ + __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) + { + vuint<M> vgeomID = -1, vprimID = -1; + Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero; + Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero; + + LBBox3fa allBounds = empty; + for (size_t i=0; i<M && begin<end; i++, begin++) + { + const PrimRefMB& prim = prims[begin]; + const unsigned geomID = prim.geomID(); + const unsigned primID = prim.primID(); + const TriangleMesh* const mesh = scene->get<TriangleMesh>(geomID); + const range<int> itime_range = mesh->timeSegmentRange(time_range); + assert(itime_range.size() == 1); + const int ilower = itime_range.begin(); + const TriangleMesh::Triangle& tri = mesh->triangle(primID); + allBounds.extend(mesh->linearBounds(primID, time_range)); + const Vec3fa& a0 = mesh->vertex(tri.v[0],ilower+0); + const Vec3fa& a1 = mesh->vertex(tri.v[0],ilower+1); + const Vec3fa& b0 = mesh->vertex(tri.v[1],ilower+0); + const Vec3fa& b1 = mesh->vertex(tri.v[1],ilower+1); + const Vec3fa& c0 = mesh->vertex(tri.v[2],ilower+0); + const Vec3fa& c1 = mesh->vertex(tri.v[2],ilower+1); + const BBox1f time_range_v(mesh->timeStep(ilower+0),mesh->timeStep(ilower+1)); + auto a01 = globalLinear(std::make_pair(a0,a1),time_range_v); + auto b01 = globalLinear(std::make_pair(b0,b1),time_range_v); + auto c01 = globalLinear(std::make_pair(c0,c1),time_range_v); + vgeomID [i] = geomID; + vprimID [i] = primID; + va0.x[i] = a01.first .x; va0.y[i] = a01.first .y; va0.z[i] = a01.first .z; + va1.x[i] = a01.second.x; va1.y[i] = a01.second.y; va1.z[i] = a01.second.z; + vb0.x[i] = b01.first .x; vb0.y[i] = b01.first .y; vb0.z[i] = b01.first .z; + vb1.x[i] = b01.second.x; vb1.y[i] = b01.second.y; vb1.z[i] = b01.second.z; + vc0.x[i] = c01.first .x; vc0.y[i] = c01.first .y; vc0.z[i] = c01.first .z; + vc1.x[i] = c01.second.x; vc1.y[i] = c01.second.y; vc1.z[i] = c01.second.z; + } + new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID); + return allBounds; + } + + public: + Vec3vf<M> v0; // 1st vertex of the triangles + Vec3vf<M> v1; // 2nd vertex of the triangles + Vec3vf<M> v2; // 3rd vertex of the triangles + Vec3vf<M> dv0; // difference vector between time steps t0 and t1 for first vertex + Vec3vf<M> dv1; // difference vector between time steps t0 and t1 for second vertex + Vec3vf<M> dv2; // difference vector between time steps t0 and t1 for third vertex + private: + vuint<M> geomIDs; // geometry ID + vuint<M> primIDs; // primitive ID + }; + + template<int M> + typename TriangleMvMB<M>::Type TriangleMvMB<M>::type; + + typedef TriangleMvMB<4> Triangle4vMB; +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h new file mode 100644 index 0000000000..35a260d826 --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h @@ -0,0 +1,211 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "triangle.h" +#include "intersector_epilog.h" + +namespace embree +{ + namespace isa + { + /*! Intersects M motion blur triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMvMBIntersector1Moeller + { + typedef TriangleMvMB<M> Primitive; + typedef MoellerTrumboreIntersector1<Mx> Precalculations; + + /*! Intersect a ray with the M triangles and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M motion blur triangles with K rays. */ + template<int M, int Mx, int K, bool filter> + struct TriangleMvMBIntersectorKMoeller + { + typedef TriangleMvMB<M> Primitive; + typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> time(ray.time()); + const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); + const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); + const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); + pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> time(ray.time()); + const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); + const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); + const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); + pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()[k]); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()[k]); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + + /*! Intersects M motion blur triangles with 1 ray */ + template<int M, int Mx, bool filter> + struct TriangleMvMBIntersector1Pluecker + { + typedef TriangleMvMB<M> Primitive; + typedef PlueckerIntersector1<Mx> Precalculations; + + /*! Intersect a ray with the M triangles and updates the hit. */ + static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of M triangles. */ + static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + } + + static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) + { + return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); + } + }; + + /*! Intersects M motion blur triangles with K rays. */ + template<int M, int Mx, int K, bool filter> + struct TriangleMvMBIntersectorKPluecker + { + typedef TriangleMvMB<M> Primitive; + typedef PlueckerIntersectorK<Mx,K> Precalculations; + + /*! Intersects K rays with M triangles. */ + static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(normal.trav_prims,1,popcnt(valid_i),K); + const Vec3vf<K> time(ray.time()); + const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); + const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); + const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); + pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + } + } + + /*! Test for K rays if they are occluded by any of the M triangles. */ + static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) + { + vbool<K> valid0 = valid_i; + + for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) + { + if (!tri.valid(i)) break; + STAT3(shadow.trav_prims,1,popcnt(valid0),K); + const Vec3vf<K> time(ray.time()); + const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); + const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); + const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); + pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + if (none(valid0)) break; + } + return !valid0; + } + + /*! Intersect a ray with M triangles and updates the hit. */ + static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(normal.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()[k]); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + + /*! Test if the ray is occluded by one of the M triangles. */ + static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) + { + STAT3(shadow.trav_prims,1,1,1); + const Vec3vf<Mx> time(ray.time()[k]); + const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); + const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); + const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); + return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + } + }; + } +} |