diff options
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/geometry')
73 files changed, 0 insertions, 19139 deletions
diff --git a/thirdparty/embree-aarch64/kernels/geometry/cone.h b/thirdparty/embree-aarch64/kernels/geometry/cone.h deleted file mode 100644 index 961ef86160..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/cone.h +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" - -namespace embree -{ - namespace isa - { - struct Cone - { - const Vec3fa p0; //!< start position of cone - const Vec3fa p1; //!< end position of cone - const float r0; //!< start radius of cone - const float r1; //!< end radius of cone - - __forceinline Cone(const Vec3fa& p0, const float r0, const Vec3fa& p1, const float r1) - : p0(p0), p1(p1), r0(r0), r1(r1) {} - - __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, - BBox1f& t_o, - float& u0_o, Vec3fa& Ng0_o, - float& u1_o, Vec3fa& Ng1_o) const - { - /* calculate quadratic equation to solve */ - const Vec3fa v0 = p0-org; - const Vec3fa v1 = p1-org; - - const float rl = rcp_length(v1-v0); - const Vec3fa P0 = v0, dP = (v1-v0)*rl; - const float dr = (r1-r0)*rl; - const Vec3fa O = -P0, dO = dir; - - const float dOdO = dot(dO,dO); - const float OdO = dot(dO,O); - const float OO = dot(O,O); - const float dOz = dot(dP,dO); - const float Oz = dot(dP,O); - - const float R = r0 + Oz*dr; - const float A = dOdO - sqr(dOz) * (1.0f+sqr(dr)); - const float B = 2.0f * (OdO - dOz*(Oz + R*dr)); - const float C = OO - (sqr(Oz) + sqr(R)); - - /* we miss the cone if determinant is smaller than zero */ - const float D = B*B - 4.0f*A*C; - if (D < 0.0f) return false; - - /* special case for rays that are "parallel" to the cone */ - const float eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); - if (unlikely(abs(A) < eps)) - { - /* cylinder case */ - if (abs(dr) < 16.0f*float(ulp)) { - if (C <= 0.0f) { t_o = BBox1f(neg_inf,pos_inf); return true; } - else { t_o = BBox1f(pos_inf,neg_inf); return false; } - } - - /* cone case */ - else - { - /* if we hit the negative cone there cannot be a hit */ - const float t = -C/B; - const float z0 = Oz+t*dOz; - const float z0r = r0+z0*dr; - if (z0r < 0.0f) return false; - - /* test if we start inside or outside the cone */ - if (dOz*dr > 0.0f) t_o = BBox1f(t,pos_inf); - else t_o = BBox1f(neg_inf,t); - } - } - - /* standard case for "non-parallel" rays */ - else - { - const float Q = sqrt(D); - const float rcp_2A = rcp(2.0f*A); - t_o.lower = (-B-Q)*rcp_2A; - t_o.upper = (-B+Q)*rcp_2A; - - /* standard case where both hits are on same cone */ - if (likely(A > 0.0f)) { - const float z0 = Oz+t_o.lower*dOz; - const float z0r = r0+z0*dr; - if (z0r < 0.0f) return false; - } - - /* special case where the hits are on the positive and negative cone */ - else - { - /* depending on the ray direction and the open direction - * of the cone we have a hit from inside or outside the - * cone */ - if (dOz*dr > 0) t_o.upper = pos_inf; - else t_o.lower = neg_inf; - } - } - - /* calculates u and Ng for near hit */ - { - u0_o = (Oz+t_o.lower*dOz)*rl; - const Vec3fa Pr = t_o.lower*dir; - const Vec3fa Pl = v0 + u0_o*(v1-v0); - const Vec3fa R = normalize(Pr-Pl); - const Vec3fa U = (p1-p0)+(r1-r0)*R; - const Vec3fa V = cross(p1-p0,R); - Ng0_o = cross(V,U); - } - - /* calculates u and Ng for far hit */ - { - u1_o = (Oz+t_o.upper*dOz)*rl; - const Vec3fa Pr = t_o.upper*dir; - const Vec3fa Pl = v0 + u1_o*(v1-v0); - const Vec3fa R = normalize(Pr-Pl); - const Vec3fa U = (p1-p0)+(r1-r0)*R; - const Vec3fa V = cross(p1-p0,R); - Ng1_o = cross(V,U); - } - return true; - } - - __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, BBox1f& t_o) const - { - float u0_o; Vec3fa Ng0_o; float u1_o; Vec3fa Ng1_o; - return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); - } - - static bool verify(const size_t id, const Cone& cone, const Ray& ray, bool shouldhit, const float t0, const float t1) - { - float eps = 0.001f; - BBox1f t; bool hit; - hit = cone.intersect(ray.org,ray.dir,t); - - bool failed = hit != shouldhit; - if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : (t0 == -1E6) ? t.lower > -1E6f : abs(t0-t.lower) > eps; - if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : (t1 == +1E6) ? t.upper < +1E6f : abs(t1-t.upper) > eps; - if (!failed) return true; - embree_cout << "Cone test " << id << " failed: cone = " << cone << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl; - return false; - } - - /* verify cone class */ - static bool verify() - { - bool passed = true; - const Cone cone0(Vec3fa(0.0f,0.0f,0.0f),0.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f); - passed &= verify(0,cone0,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,3.0f,pos_inf); - passed &= verify(1,cone0,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f); - passed &= verify(2,cone0,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),false,0.0f,0.0f); - passed &= verify(3,cone0,Ray(Vec3fa(+1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,3.0f); - passed &= verify(4,cone0,Ray(Vec3fa(-1.0f,0.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,1.0f,pos_inf); - passed &= verify(5,cone0,Ray(Vec3fa(+1.0f,0.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f); - passed &= verify(6,cone0,Ray(Vec3fa(+0.0f,0.0f,1.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,1.0f); - passed &= verify(7,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f); - passed &= verify(8,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(+1.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.5f,+1E6); - passed &= verify(9,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,+1.0f,+0.0f),0.0f,float(inf)),true,-1E6,-0.5f); - const Cone cone1(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),0.0f); - passed &= verify(10,cone1,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,2.0f); - passed &= verify(11,cone1,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,0.0f,4.0f); - const Cone cylinder(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f); - passed &= verify(12,cylinder,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); - passed &= verify(13,cylinder,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); - passed &= verify(14,cylinder,Ray(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f); - passed &= verify(15,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); - passed &= verify(16,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); - passed &= verify(17,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); - passed &= verify(18,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); - return passed; - } - - /*! output operator */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cone& c) { - return cout << "Cone { p0 = " << c.p0 << ", r0 = " << c.r0 << ", p1 = " << c.p1 << ", r1 = " << c.r1 << "}"; - } - }; - - template<int N> - struct ConeN - { - typedef Vec3<vfloat<N>> Vec3vfN; - - const Vec3vfN p0; //!< start position of cone - const Vec3vfN p1; //!< end position of cone - const vfloat<N> r0; //!< start radius of cone - const vfloat<N> r1; //!< end radius of cone - - __forceinline ConeN(const Vec3vfN& p0, const vfloat<N>& r0, const Vec3vfN& p1, const vfloat<N>& r1) - : p0(p0), p1(p1), r0(r0), r1(r1) {} - - __forceinline Cone operator[] (const size_t i) const - { - assert(i<N); - return Cone(Vec3fa(p0.x[i],p0.y[i],p0.z[i]),r0[i],Vec3fa(p1.x[i],p1.y[i],p1.z[i]),r1[i]); - } - - __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, - BBox<vfloat<N>>& t_o, - vfloat<N>& u0_o, Vec3vfN& Ng0_o, - vfloat<N>& u1_o, Vec3vfN& Ng1_o) const - { - /* calculate quadratic equation to solve */ - const Vec3vfN v0 = p0-Vec3vfN(org); - const Vec3vfN v1 = p1-Vec3vfN(org); - - const vfloat<N> rl = rcp_length(v1-v0); - const Vec3vfN P0 = v0, dP = (v1-v0)*rl; - const vfloat<N> dr = (r1-r0)*rl; - const Vec3vfN O = -P0, dO = dir; - - const vfloat<N> dOdO = dot(dO,dO); - const vfloat<N> OdO = dot(dO,O); - const vfloat<N> OO = dot(O,O); - const vfloat<N> dOz = dot(dP,dO); - const vfloat<N> Oz = dot(dP,O); - - const vfloat<N> R = r0 + Oz*dr; - const vfloat<N> A = dOdO - sqr(dOz) * (vfloat<N>(1.0f)+sqr(dr)); - const vfloat<N> B = 2.0f * (OdO - dOz*(Oz + R*dr)); - const vfloat<N> C = OO - (sqr(Oz) + sqr(R)); - - /* we miss the cone if determinant is smaller than zero */ - const vfloat<N> D = B*B - 4.0f*A*C; - vbool<N> valid = D >= 0.0f; - if (none(valid)) return valid; - - /* special case for rays that are "parallel" to the cone */ - const vfloat<N> eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); - const vbool<N> validt = valid & (abs(A) < eps); - const vbool<N> validf = valid & !(abs(A) < eps); - if (unlikely(any(validt))) - { - const vboolx validtt = validt & (abs(dr) < 16.0f*float(ulp)); - const vboolx validtf = validt & (abs(dr) >= 16.0f*float(ulp)); - - /* cylinder case */ - if (unlikely(any(validtt))) - { - t_o.lower = select(validtt, select(C <= 0.0f, vfloat<N>(neg_inf), vfloat<N>(pos_inf)), t_o.lower); - t_o.upper = select(validtt, select(C <= 0.0f, vfloat<N>(pos_inf), vfloat<N>(neg_inf)), t_o.upper); - valid &= !validtt | C <= 0.0f; - } - - /* cone case */ - if (any(validtf)) - { - /* if we hit the negative cone there cannot be a hit */ - const vfloat<N> t = -C/B; - const vfloat<N> z0 = Oz+t*dOz; - const vfloat<N> z0r = r0+z0*dr; - valid &= !validtf | z0r >= 0.0f; - - /* test if we start inside or outside the cone */ - t_o.lower = select(validtf, select(dOz*dr > 0.0f, t, vfloat<N>(neg_inf)), t_o.lower); - t_o.upper = select(validtf, select(dOz*dr > 0.0f, vfloat<N>(pos_inf), t), t_o.upper); - } - } - - /* standard case for "non-parallel" rays */ - if (likely(any(validf))) - { - const vfloat<N> Q = sqrt(D); - const vfloat<N> rcp_2A = 0.5f*rcp(A); - t_o.lower = select(validf, (-B-Q)*rcp_2A, t_o.lower); - t_o.upper = select(validf, (-B+Q)*rcp_2A, t_o.upper); - - /* standard case where both hits are on same cone */ - const vbool<N> validft = validf & A>0.0f; - const vbool<N> validff = validf & !(A>0.0f); - if (any(validft)) { - const vfloat<N> z0 = Oz+t_o.lower*dOz; - const vfloat<N> z0r = r0+z0*dr; - valid &= !validft | z0r >= 0.0f; - } - - /* special case where the hits are on the positive and negative cone */ - if (any(validff)) { - /* depending on the ray direction and the open direction - * of the cone we have a hit from inside or outside the - * cone */ - t_o.lower = select(validff, select(dOz*dr > 0.0f, t_o.lower, float(neg_inf)), t_o.lower); - t_o.upper = select(validff, select(dOz*dr > 0.0f, float(pos_inf), t_o.upper), t_o.upper); - } - } - - /* calculates u and Ng for near hit */ - { - u0_o = (Oz+t_o.lower*dOz)*rl; - const Vec3vfN Pr = t_o.lower*Vec3vfN(dir); - const Vec3vfN Pl = v0 + u0_o*(v1-v0); - const Vec3vfN R = normalize(Pr-Pl); - const Vec3vfN U = (p1-p0)+(r1-r0)*R; - const Vec3vfN V = cross(p1-p0,R); - Ng0_o = cross(V,U); - } - - /* calculates u and Ng for far hit */ - { - u1_o = (Oz+t_o.upper*dOz)*rl; - const Vec3vfN Pr = t_o.lower*Vec3vfN(dir); - const Vec3vfN Pl = v0 + u1_o*(v1-v0); - const Vec3vfN R = normalize(Pr-Pl); - const Vec3vfN U = (p1-p0)+(r1-r0)*R; - const Vec3vfN V = cross(p1-p0,R); - Ng1_o = cross(V,U); - } - return valid; - } - - __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const - { - vfloat<N> u0_o; Vec3vfN Ng0_o; vfloat<N> u1_o; Vec3vfN Ng1_o; - return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); - } - }; - } -} - diff --git a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h deleted file mode 100644 index 0902baff7d..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - namespace isa - { - namespace __coneline_internal - { - template<int M, typename Epilog, typename ray_tfar_func> - static __forceinline bool intersectCone(const vbool<M>& valid_i, - const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir, - const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar, - const Vec4vf<M>& v0, const Vec4vf<M>& v1, - const vbool<M>& cL, const vbool<M>& cR, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - - /* move ray origin closer to make calculations numerically stable */ - const vfloat<M> dOdO = sqr(ray_dir); - const vfloat<M> rcp_dOdO = rcp(dOdO); - const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz()); - const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO; - const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir; - - const Vec3vf<M> dP = v1.xyz() - v0.xyz(); - const Vec3vf<M> p0 = ray_org - v0.xyz(); - const Vec3vf<M> p1 = ray_org - v1.xyz(); - - const vfloat<M> dPdP = sqr(dP); - const vfloat<M> dP0 = dot(p0,dP); - const vfloat<M> dP1 = dot(p1,dP); - const vfloat<M> dOdP = dot(ray_dir,dP); - - // intersect cone body - const vfloat<M> dr = v0.w - v1.w; - const vfloat<M> hy = dPdP + sqr(dr); - const vfloat<M> dO0 = dot(ray_dir,p0); - const vfloat<M> OO = sqr(p0); - const vfloat<M> dPdP2 = sqr(dPdP); - const vfloat<M> dPdPr0 = dPdP*v0.w; - - const vfloat<M> A = dPdP2 - sqr(dOdP)*hy; - const vfloat<M> B = dPdP2*dO0 - dP0*dOdP*hy + dPdPr0*(dr*dOdP); - const vfloat<M> C = dPdP2*OO - sqr(dP0)*hy + dPdPr0*(2.0f*dr*dP0 - dPdPr0); - - const vfloat<M> D = B*B - A*C; - valid &= D >= 0.0f; - if (unlikely(none(valid))) { - return false; - } - - /* standard case for "non-parallel" rays */ - const vfloat<M> Q = sqrt(D); - const vfloat<M> rcp_A = rcp(A); - /* special case for rays that are "parallel" to the cone - assume miss */ - const vbool<M> isParallel = abs(A) <= min_rcp_input; - - vfloat<M> t_cone_lower = select (isParallel, neg_inf, (-B-Q)*rcp_A); - vfloat<M> t_cone_upper = select (isParallel, pos_inf, (-B+Q)*rcp_A); - const vfloat<M> y_lower = dP0 + t_cone_lower*dOdP; - const vfloat<M> y_upper = dP0 + t_cone_upper*dOdP; - t_cone_lower = select(valid & y_lower > 0.0f & y_lower < dPdP, t_cone_lower, pos_inf); - t_cone_upper = select(valid & y_upper > 0.0f & y_upper < dPdP, t_cone_upper, neg_inf); - - const vbool<M> hitDisk0 = valid & cL; - const vbool<M> hitDisk1 = valid & cR; - const vfloat<M> rcp_dOdP = rcp(dOdP); - const vfloat<M> t_disk0 = select (hitDisk0, select (sqr(p0*dOdP-ray_dir*dP0)<(sqr(v0.w)*sqr(dOdP)), -dP0*rcp_dOdP, pos_inf), pos_inf); - const vfloat<M> t_disk1 = select (hitDisk1, select (sqr(p1*dOdP-ray_dir*dP1)<(sqr(v1.w)*sqr(dOdP)), -dP1*rcp_dOdP, pos_inf), pos_inf); - const vfloat<M> t_disk_lower = min(t_disk0, t_disk1); - const vfloat<M> t_disk_upper = max(t_disk0, t_disk1); - - const vfloat<M> t_lower = min(t_cone_lower, t_disk_lower); - const vfloat<M> t_upper = max(t_cone_upper, select(t_lower==t_disk_lower, - select(t_disk_upper==vfloat<M>(pos_inf),neg_inf,t_disk_upper), - select(t_disk_lower==vfloat<M>(pos_inf),neg_inf,t_disk_lower))); - - const vbool<M> valid_lower = valid & ray_tnear <= dt+t_lower & dt+t_lower <= ray_tfar() & t_lower != vfloat<M>(pos_inf); - const vbool<M> valid_upper = valid & ray_tnear <= dt+t_upper & dt+t_upper <= ray_tfar() & t_upper != vfloat<M>(neg_inf); - - const vbool<M> valid_first = valid_lower | valid_upper; - if (unlikely(none(valid_first))) - return false; - - const vfloat<M> t_first = select(valid_lower, t_lower, t_upper); - const vfloat<M> y_first = select(valid_lower, y_lower, y_upper); - - const vfloat<M> rcp_dPdP = rcp(dPdP); - const Vec3vf<M> dP2drr0dP = dPdP*dr*v0.w*dP; - const Vec3vf<M> dPhy = dP*hy; - const vbool<M> cone_hit_first = valid & (t_first == t_cone_lower | t_first == t_cone_upper); - const vbool<M> disk0_hit_first = valid & (t_first == t_disk0); - const Vec3vf<M> Ng_first = select(cone_hit_first, dPdP2*(p0+t_first*ray_dir)+dP2drr0dP-dPhy*y_first, select(disk0_hit_first, -dP, dP)); - const vfloat<M> u_first = select(cone_hit_first, y_first*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one))); - - /* invoke intersection filter for first hit */ - RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first); - const bool is_hit_first = epilog(valid_first, hit); - - /* check for possible second hits before potentially accepted hit */ - const vfloat<M> t_second = t_upper; - const vfloat<M> y_second = y_upper; - const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_upper <= ray_tfar()); - if (unlikely(none(valid_second))) - return is_hit_first; - - /* invoke intersection filter for second hit */ - const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper; - const vbool<M> disk0_hit_second = t_second == t_disk0; - const Vec3vf<M> Ng_second = select(cone_hit_second, dPdP2*(p0+t_second*ray_dir)+dP2drr0dP-dPhy*y_second, select(disk0_hit_second, -dP, dP)); - const vfloat<M> u_second = select(cone_hit_second, y_second*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one))); - - hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second); - const bool is_hit_second = epilog(valid_second, hit); - - return is_hit_first | is_hit_second; - } - } - - template<int M> - struct ConeLineIntersectorHitM - { - __forceinline ConeLineIntersectorHitM() {} - - __forceinline ConeLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) - : vu(u), vv(v), vt(t), vNg(Ng) {} - - __forceinline void finalize() {} - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - public: - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct ConeCurveIntersector1 - { - typedef CurvePrecalculations1 Precalculations; - - struct ray_tfar { - Ray& ray; - __forceinline ray_tfar(Ray& ray) : ray(ray) {} - __forceinline vfloat<M> operator() () const { return ray.tfar; }; - }; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - Ray& ray, - IntersectContext* context, - const LineSegments* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, - const vbool<M>& cL, const vbool<M>& cR, - const Epilog& epilog) - { - const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); - const vfloat<M> ray_tnear(ray.tnear()); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog); - } - }; - - template<int M, int K> - struct ConeCurveIntersectorK - { - typedef CurvePrecalculationsK<K> Precalculations; - - struct ray_tfar { - RayK<K>& ray; - size_t k; - __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {} - __forceinline vfloat<M> operator() () const { return ray.tfar[k]; }; - }; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - RayK<K>& ray, size_t k, - IntersectContext* context, - const LineSegments* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, - const vbool<M>& cL, const vbool<M>& cR, - const Epilog& epilog) - { - const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); - const vfloat<M> ray_tnear = ray.tnear()[k]; - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h deleted file mode 100644 index d47218eb8b..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "coneline_intersector.h" -#include "intersector_epilog.h" - -namespace embree -{ - namespace isa - { - template<int M, int Mx, bool filter> - struct ConeCurveMiIntersector1 - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - return false; - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); - } - }; - - template<int M, int Mx, bool filter> - struct ConeCurveMiMBIntersector1 - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - return false; - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); - } - }; - - template<int M, int Mx, int K, bool filter> - struct ConeCurveMiIntersectorK - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct ConeCurveMiMBIntersectorK - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; - vbool<M> cL,cR; - line.gather(v0,v1,cL,cR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi.h deleted file mode 100644 index 51384f1959..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - template<int M> - struct CurveNi - { - struct Type : public PrimitiveType { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* Returns maximum number of stored primitives */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; } - - static __forceinline size_t bytes(size_t N) - { - const size_t f = N/M, r = N%M; - static_assert(sizeof(CurveNi) == 22+25*M, "internal data layout issue"); - return f*sizeof(CurveNi) + (r!=0)*(22 + 25*r); - } - - public: - - /*! Default constructor. */ - __forceinline CurveNi () {} - - /*! fill curve from curve list */ - __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene) - { - size_t end = min(begin+M,_end); - N = (uint8_t)(end-begin); - const unsigned int geomID0 = prims[begin].geomID(); - this->geomID(N) = geomID0; - ty = (uint8_t) scene->get(geomID0)->getType(); - - /* encode all primitives */ - BBox3fa bounds = empty; - for (size_t i=0; i<N; i++) - { - const PrimRef& prim = prims[begin+i]; - const unsigned int geomID = prim.geomID(); assert(geomID == geomID0); - const unsigned int primID = prim.primID(); - bounds.extend(scene->get(geomID)->vbounds(primID)); - } - - /* calculate offset and scale */ - Vec3fa loffset = bounds.lower; - float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f))); - if (bounds.size() == Vec3fa(zero)) lscale = 0.0f; - *this->offset(N) = loffset; - *this->scale(N) = lscale; - - /* encode all primitives */ - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRef& prim = prims[begin]; - const unsigned int geomID = prim.geomID(); - const unsigned int primID = prim.primID(); - const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpace(primID); - - const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz)); - const BBox3fa bounds = scene->get(geomID)->vbounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID); - - bounds_vx_x(N)[i] = (int8_t) space3.vx.x; - bounds_vx_y(N)[i] = (int8_t) space3.vx.y; - bounds_vx_z(N)[i] = (int8_t) space3.vx.z; - bounds_vx_lower(N)[i] = (short) clamp(floor(bounds.lower.x),-32767.0f,32767.0f); - bounds_vx_upper(N)[i] = (short) clamp(ceil (bounds.upper.x),-32767.0f,32767.0f); - assert(-32767.0f <= floor(bounds.lower.x) && floor(bounds.lower.x) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.upper.x) && ceil (bounds.upper.x) <= 32767.0f); - - bounds_vy_x(N)[i] = (int8_t) space3.vy.x; - bounds_vy_y(N)[i] = (int8_t) space3.vy.y; - bounds_vy_z(N)[i] = (int8_t) space3.vy.z; - bounds_vy_lower(N)[i] = (short) clamp(floor(bounds.lower.y),-32767.0f,32767.0f); - bounds_vy_upper(N)[i] = (short) clamp(ceil (bounds.upper.y),-32767.0f,32767.0f); - assert(-32767.0f <= floor(bounds.lower.y) && floor(bounds.lower.y) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.upper.y) && ceil (bounds.upper.y) <= 32767.0f); - - bounds_vz_x(N)[i] = (int8_t) space3.vz.x; - bounds_vz_y(N)[i] = (int8_t) space3.vz.y; - bounds_vz_z(N)[i] = (int8_t) space3.vz.z; - bounds_vz_lower(N)[i] = (short) clamp(floor(bounds.lower.z),-32767.0f,32767.0f); - bounds_vz_upper(N)[i] = (short) clamp(ceil (bounds.upper.z),-32767.0f,32767.0f); - assert(-32767.0f <= floor(bounds.lower.z) && floor(bounds.lower.z) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.upper.z) && ceil (bounds.upper.z) <= 32767.0f); - - this->primID(N)[i] = primID; - } - } - - template<typename BVH, typename Allocator> - __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) - { - size_t start = set.begin(); - size_t items = CurveNi::blocks(set.size()); - size_t numbytes = CurveNi::bytes(set.size()); - CurveNi* accel = (CurveNi*) alloc.malloc1(numbytes,BVH::byteAlignment); - for (size_t i=0; i<items; i++) { - accel[i].fill(prims,start,set.end(),bvh->scene); - } - return bvh->encodeLeaf((int8_t*)accel,items); - }; - - public: - - // 27.6 - 46 bytes per primitive - uint8_t ty; - uint8_t N; - uint8_t data[4+25*M+16]; - - /* - struct Layout - { - unsigned int geomID; - unsigned int primID[N]; - - int8_t bounds_vx_x[N]; - int8_t bounds_vx_y[N]; - int8_t bounds_vx_z[N]; - short bounds_vx_lower[N]; - short bounds_vx_upper[N]; - - int8_t bounds_vy_x[N]; - int8_t bounds_vy_y[N]; - int8_t bounds_vy_z[N]; - short bounds_vy_lower[N]; - short bounds_vy_upper[N]; - - int8_t bounds_vz_x[N]; - int8_t bounds_vz_y[N]; - int8_t bounds_vz_z[N]; - short bounds_vz_lower[N]; - short bounds_vz_upper[N]; - - Vec3f offset; - float scale; - }; - */ - - __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); } - __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); } - - __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); } - __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); } - - __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); } - __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); } - - __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); } - __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); } - - __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); } - __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); } - - __forceinline short* bounds_vx_lower(size_t N) { return (short*)((int8_t*)this+6+7*N); } - __forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((int8_t*)this+6+7*N); } - - __forceinline short* bounds_vx_upper(size_t N) { return (short*)((int8_t*)this+6+9*N); } - __forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((int8_t*)this+6+9*N); } - - __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+11*N); } - __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+11*N); } - - __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+12*N); } - __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+12*N); } - - __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+13*N); } - __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+13*N); } - - __forceinline short* bounds_vy_lower(size_t N) { return (short*)((int8_t*)this+6+14*N); } - __forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((int8_t*)this+6+14*N); } - - __forceinline short* bounds_vy_upper(size_t N) { return (short*)((int8_t*)this+6+16*N); } - __forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((int8_t*)this+6+16*N); } - - __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+18*N); } - __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+18*N); } - - __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+19*N); } - __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+19*N); } - - __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+20*N); } - __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+20*N); } - - __forceinline short* bounds_vz_lower(size_t N) { return (short*)((int8_t*)this+6+21*N); } - __forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((int8_t*)this+6+21*N); } - - __forceinline short* bounds_vz_upper(size_t N) { return (short*)((int8_t*)this+6+23*N); } - __forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((int8_t*)this+6+23*N); } - - __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+25*N); } - __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+25*N); } - - __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+25*N+12); } - __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+25*N+12); } - - __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+25*N+16; } - __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+25*N+16; } - }; - - template<int M> - typename CurveNi<M>::Type CurveNi<M>::type; - - typedef CurveNi<4> Curve4i; - typedef CurveNi<8> Curve8i; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h deleted file mode 100644 index 0f9038c9fc..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h +++ /dev/null @@ -1,569 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curveNi.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct CurveNiIntersector1 - { - typedef CurveNi<M> Primitive; - typedef Vec3vf<M> Vec3vfM; - typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o) - { - const size_t N = prim.N; - const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); - const Vec3fa offset = Vec3fa(offset_scale); - const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); - const Vec3fa org1 = (ray.org-offset)*scale; - const Vec3fa dir1 = ray.dir*scale; - - const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), - vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), - vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); - - const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); - const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); - const Vec3vfM rcp_dir2 = rcp_safe(dir2); - - const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - - const vfloat<M> round_up (1.0f+3.0f*float(ulp)); - const vfloat<M> round_down(1.0f-3.0f*float(ulp)); - const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear())); - const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar)); - tNear_o = tNear; - return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - - unsigned int vertexID = geom->curve(primID); - Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - - unsigned int vertexID = geom->curve(primID); - Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); - Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); - if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); - Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); - if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - }; - - template<int M, int K> - struct CurveNiIntersectorK - { - typedef CurveNi<M> Primitive; - typedef Vec3vf<M> Vec3vfM; - typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o) - { - const size_t N = prim.N; - const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); - const Vec3fa offset = Vec3fa(offset_scale); - const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); - - const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); - const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); - const Vec3fa org1 = (ray_org-offset)*scale; - const Vec3fa dir1 = ray_dir*scale; - - const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), - vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), - vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); - - const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); - const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); - const Vec3vfM rcp_dir2 = rcp_safe(dir2); - - const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - - const vfloat<M> round_up (1.0f+3.0f*float(ulp)); - const vfloat<M> round_down(1.0f-3.0f*float(ulp)); - const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k])); - const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k])); - tNear_o = tNear; - return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID)); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - - unsigned int vertexID = geom->curve(primID); - Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - - unsigned int vertexID = geom->curve(primID); - Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - const unsigned int primID1 = prim.primID(N)[i1]; - geom->prefetchL1_vertices(geom->curve(primID1)); - if (mask1) { - const size_t i2 = bsf(mask1); - const unsigned int primID2 = prim.primID(N)[i2]; - geom->prefetchL2_vertices(geom->curve(primID2)); - } - } - - if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); - Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID)); - if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); - Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID)); - if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h deleted file mode 100644 index 0cd8f833fd..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h +++ /dev/null @@ -1,278 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - template<int M> - struct CurveNiMB - { - struct Type : public PrimitiveType { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* Returns maximum number of stored primitives */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; } - - static __forceinline size_t bytes(size_t N) - { - const size_t f = N/M, r = N%M; - static_assert(sizeof(CurveNiMB) == 6+37*M+24, "internal data layout issue"); - return f*sizeof(CurveNiMB) + (r!=0)*(6+37*r+24); - } - - public: - - /*! Default constructor. */ - __forceinline CurveNiMB () {} - - /*! fill curve from curve list */ - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t _end, Scene* scene, const BBox1f time_range) - { - size_t end = min(begin+M,_end); - N = (uint8_t)(end-begin); - const unsigned int geomID0 = prims[begin].geomID(); - this->geomID(N) = geomID0; - ty = (uint8_t) scene->get(geomID0)->getType(); - - /* encode all primitives */ - LBBox3fa lbounds = empty; - for (size_t i=0; i<N; i++) - { - const PrimRefMB& prim = prims[begin+i]; - const unsigned int geomID = prim.geomID(); assert(geomID == geomID0); - const unsigned int primID = prim.primID(); - lbounds.extend(scene->get(geomID)->vlinearBounds(primID,time_range)); - } - BBox3fa bounds = lbounds.bounds(); - - /* calculate offset and scale */ - Vec3fa loffset = bounds.lower; - float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f))); - if (bounds.size() == Vec3fa(zero)) lscale = 0.0f; - *this->offset(N) = loffset; - *this->scale(N) = lscale; - this->time_offset(N) = time_range.lower; - this->time_scale(N) = 1.0f/time_range.size(); - - /* encode all primitives */ - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRefMB& prim = prims[begin]; - const unsigned int geomID = prim.geomID(); - const unsigned int primID = prim.primID(); - const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpaceMB(primID,time_range); - - const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz)); - const LBBox3fa bounds = scene->get(geomID)->vlinearBounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID,time_range); - - // NOTE: this weird (int8_t) (short) cast works around VS2015 Win32 compiler bug - bounds_vx_x(N)[i] = (int8_t) (short) space3.vx.x; - bounds_vx_y(N)[i] = (int8_t) (short) space3.vx.y; - bounds_vx_z(N)[i] = (int8_t) (short) space3.vx.z; - bounds_vx_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.x),-32767.0f,32767.0f); - bounds_vx_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.x),-32767.0f,32767.0f); - bounds_vx_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.x),-32767.0f,32767.0f); - bounds_vx_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.x),-32767.0f,32767.0f); - assert(-32767.0f <= floor(bounds.bounds0.lower.x) && floor(bounds.bounds0.lower.x) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.bounds0.upper.x) && ceil (bounds.bounds0.upper.x) <= 32767.0f); - assert(-32767.0f <= floor(bounds.bounds1.lower.x) && floor(bounds.bounds1.lower.x) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.bounds1.upper.x) && ceil (bounds.bounds1.upper.x) <= 32767.0f); - - bounds_vy_x(N)[i] = (int8_t) (short) space3.vy.x; - bounds_vy_y(N)[i] = (int8_t) (short) space3.vy.y; - bounds_vy_z(N)[i] = (int8_t) (short) space3.vy.z; - bounds_vy_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.y),-32767.0f,32767.0f); - bounds_vy_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.y),-32767.0f,32767.0f); - bounds_vy_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.y),-32767.0f,32767.0f); - bounds_vy_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.y),-32767.0f,32767.0f); - assert(-32767.0f <= floor(bounds.bounds0.lower.y) && floor(bounds.bounds0.lower.y) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.bounds0.upper.y) && ceil (bounds.bounds0.upper.y) <= 32767.0f); - assert(-32767.0f <= floor(bounds.bounds1.lower.y) && floor(bounds.bounds1.lower.y) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.bounds1.upper.y) && ceil (bounds.bounds1.upper.y) <= 32767.0f); - - bounds_vz_x(N)[i] = (int8_t) (short) space3.vz.x; - bounds_vz_y(N)[i] = (int8_t) (short) space3.vz.y; - bounds_vz_z(N)[i] = (int8_t) (short) space3.vz.z; - bounds_vz_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.z),-32767.0f,32767.0f); - bounds_vz_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.z),-32767.0f,32767.0f); - bounds_vz_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.z),-32767.0f,32767.0f); - bounds_vz_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.z),-32767.0f,32767.0f); - assert(-32767.0f <= floor(bounds.bounds0.lower.z) && floor(bounds.bounds0.lower.z) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.bounds0.upper.z) && ceil (bounds.bounds0.upper.z) <= 32767.0f); - assert(-32767.0f <= floor(bounds.bounds1.lower.z) && floor(bounds.bounds1.lower.z) <= 32767.0f); - assert(-32767.0f <= ceil (bounds.bounds1.upper.z) && ceil (bounds.bounds1.upper.z) <= 32767.0f); - - this->primID(N)[i] = primID; - } - - return lbounds; - } - - template<typename BVH, typename SetMB, typename Allocator> - __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) - { - size_t start = prims.begin(); - size_t end = prims.end(); - size_t items = CurveNiMB::blocks(prims.size()); - size_t numbytes = CurveNiMB::bytes(prims.size()); - CurveNiMB* accel = (CurveNiMB*) alloc.malloc1(numbytes,BVH::byteAlignment); - const typename BVH::NodeRef node = bvh->encodeLeaf((int8_t*)accel,items); - - LBBox3fa bounds = empty; - for (size_t i=0; i<items; i++) - bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range)); - - return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range); - }; - - - public: - - // 27.6 - 46 bytes per primitive - uint8_t ty; - uint8_t N; - uint8_t data[4+37*M+24]; - - /* - struct Layout - { - unsigned int geomID; - unsigned int primID[N]; - - int8_t bounds_vx_x[N]; - int8_t bounds_vx_y[N]; - int8_t bounds_vx_z[N]; - short bounds_vx_lower0[N]; - short bounds_vx_upper0[N]; - short bounds_vx_lower1[N]; - short bounds_vx_upper1[N]; - - int8_t bounds_vy_x[N]; - int8_t bounds_vy_y[N]; - int8_t bounds_vy_z[N]; - short bounds_vy_lower0[N]; - short bounds_vy_upper0[N]; - short bounds_vy_lower1[N]; - short bounds_vy_upper1[N]; - - int8_t bounds_vz_x[N]; - int8_t bounds_vz_y[N]; - int8_t bounds_vz_z[N]; - short bounds_vz_lower0[N]; - short bounds_vz_upper0[N]; - short bounds_vz_lower1[N]; - short bounds_vz_upper1[N]; - - Vec3f offset; - float scale; - - float time_offset; - float time_scale; - }; - */ - - __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); } - __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); } - - __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); } - __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); } - - __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); } - __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); } - - __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); } - __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); } - - __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); } - __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); } - - __forceinline short* bounds_vx_lower0(size_t N) { return (short*)((int8_t*)this+6+7*N); } - __forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((int8_t*)this+6+7*N); } - - __forceinline short* bounds_vx_upper0(size_t N) { return (short*)((int8_t*)this+6+9*N); } - __forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((int8_t*)this+6+9*N); } - - __forceinline short* bounds_vx_lower1(size_t N) { return (short*)((int8_t*)this+6+11*N); } - __forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((int8_t*)this+6+11*N); } - - __forceinline short* bounds_vx_upper1(size_t N) { return (short*)((int8_t*)this+6+13*N); } - __forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((int8_t*)this+6+13*N); } - - __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+15*N); } - __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+15*N); } - - __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+16*N); } - __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+16*N); } - - __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+17*N); } - __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+17*N); } - - __forceinline short* bounds_vy_lower0(size_t N) { return (short*)((int8_t*)this+6+18*N); } - __forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((int8_t*)this+6+18*N); } - - __forceinline short* bounds_vy_upper0(size_t N) { return (short*)((int8_t*)this+6+20*N); } - __forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((int8_t*)this+6+20*N); } - - __forceinline short* bounds_vy_lower1(size_t N) { return (short*)((int8_t*)this+6+22*N); } - __forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((int8_t*)this+6+22*N); } - - __forceinline short* bounds_vy_upper1(size_t N) { return (short*)((int8_t*)this+6+24*N); } - __forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((int8_t*)this+6+24*N); } - - __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+26*N); } - __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+26*N); } - - __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+27*N); } - __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+27*N); } - - __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+28*N); } - __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+28*N); } - - __forceinline short* bounds_vz_lower0(size_t N) { return (short*)((int8_t*)this+6+29*N); } - __forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((int8_t*)this+6+29*N); } - - __forceinline short* bounds_vz_upper0(size_t N) { return (short*)((int8_t*)this+6+31*N); } - __forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((int8_t*)this+6+31*N); } - - __forceinline short* bounds_vz_lower1(size_t N) { return (short*)((int8_t*)this+6+33*N); } - __forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((int8_t*)this+6+33*N); } - - __forceinline short* bounds_vz_upper1(size_t N) { return (short*)((int8_t*)this+6+35*N); } - __forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((int8_t*)this+6+35*N); } - - __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+37*N); } - __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+37*N); } - - __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+37*N+12); } - __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+37*N+12); } - - __forceinline float& time_offset(size_t N) { return *(float*)((int8_t*)this+6+37*N+16); } - __forceinline const float& time_offset(size_t N) const { return *(float*)((int8_t*)this+6+37*N+16); } - - __forceinline float& time_scale(size_t N) { return *(float*)((int8_t*)this+6+37*N+20); } - __forceinline const float& time_scale(size_t N) const { return *(float*)((int8_t*)this+6+37*N+20); } - - __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+37*N+24; } - __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+37*N+24; } - }; - - template<int M> - typename CurveNiMB<M>::Type CurveNiMB<M>::type; - - typedef CurveNiMB<4> Curve4iMB; - typedef CurveNiMB<8> Curve8iMB; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h deleted file mode 100644 index 0cbc764668..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h +++ /dev/null @@ -1,516 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curveNi_mb.h" -#include "../subdiv/linear_bezier_patch.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct CurveNiMBIntersector1 - { - typedef CurveNiMB<M> Primitive; - typedef Vec3vf<M> Vec3vfM; - typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o) - { - const size_t N = prim.N; - const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); - const Vec3fa offset = Vec3fa(offset_scale); - const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); - const Vec3fa org1 = (ray.org-offset)*scale; - const Vec3fa dir1 = ray.dir*scale; - - const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), - vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), - vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); - - const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); - const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); - const Vec3vfM rcp_dir2 = rcp_safe(dir2); - - const vfloat<M> ltime = (ray.time()-prim.time_offset(N))*prim.time_scale(N); - const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N)); - const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N)); - const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0); - const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N)); - const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N)); - const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0); - - const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N)); - const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N)); - const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0); - const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N)); - const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N)); - const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0); - - const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N)); - const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N)); - const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0); - const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N)); - const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N)); - const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0); - - const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - - const vfloat<M> round_up (1.0f+3.0f*float(ulp)); - const vfloat<M> round_down(1.0f-3.0f*float(ulp)); - const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear())); - const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar)); - tNear_o = tNear; - return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()); - - Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()); - - if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); - Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); - - if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()); - Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()); - if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); - Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time()); - if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - }; - - template<int M, int K> - struct CurveNiMBIntersectorK - { - typedef CurveNiMB<M> Primitive; - typedef Vec3vf<M> Vec3vfM; - typedef LinearSpace3<Vec3vfM>LinearSpace3vfM; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o) - { - const size_t N = prim.N; - const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N)); - const Vec3fa offset = Vec3fa(offset_scale); - const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale)); - - const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); - const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); - const Vec3fa org1 = (ray_org-offset)*scale; - const Vec3fa dir1 = ray_dir*scale; - - const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)), - vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)), - vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N))); - - const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1)); - const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1)); - const Vec3vfM rcp_dir2 = rcp_safe(dir2); - - const vfloat<M> ltime = (ray.time()[k]-prim.time_offset(N))*prim.time_scale(N); - const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N)); - const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N)); - const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0); - const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N)); - const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N)); - const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0); - - const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N)); - const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N)); - const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0); - const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N)); - const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N)); - const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0); - - const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N)); - const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N)); - const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0); - const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N)); - const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N)); - const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0); - - const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x); - const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y); - const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z); - - const vfloat<M> round_up (1.0f+3.0f*float(ulp)); - const vfloat<M> round_down(1.0f-3.0f*float(ulp)); - const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k])); - const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k])); - tNear_o = tNear; - return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar); - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]); - - Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]); - - if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); - Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); - - if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]); - Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]); - if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); - Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID); - const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]); - if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv.h deleted file mode 100644 index 6eb5e30b39..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curveNi.h" - -namespace embree -{ - template<int M> - struct CurveNv : public CurveNi<M> - { - using CurveNi<M>::N; - - struct Type : public PrimitiveType { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* Returns maximum number of stored primitives */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; } - - static __forceinline size_t bytes(size_t N) - { - const size_t f = N/M, r = N%M; - static_assert(sizeof(CurveNv) == 22+25*M+4*16*M, "internal data layout issue"); - return f*sizeof(CurveNv) + (r!=0)*(22 + 25*r + 4*16*r); - } - - public: - - /*! Default constructor. */ - __forceinline CurveNv () {} - - /*! fill curve from curve list */ - __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene) - { - size_t end = min(begin+M,_end); - size_t N = end-begin; - - /* encode all primitives */ - for (size_t i=0; i<N; i++) - { - const PrimRef& prim = prims[begin+i]; - const unsigned int geomID = prim.geomID(); - const unsigned int primID = prim.primID(); - CurveGeometry* mesh = (CurveGeometry*) scene->get(geomID); - const unsigned vtxID = mesh->curve(primID); - Vec3fa::storeu(&this->vertices(i,N)[0],mesh->vertex(vtxID+0)); - Vec3fa::storeu(&this->vertices(i,N)[1],mesh->vertex(vtxID+1)); - Vec3fa::storeu(&this->vertices(i,N)[2],mesh->vertex(vtxID+2)); - Vec3fa::storeu(&this->vertices(i,N)[3],mesh->vertex(vtxID+3)); - } - } - - template<typename BVH, typename Allocator> - __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) - { - if (set.size() == 0) - return BVH::emptyNode; - - /* fall back to CurveNi for oriented curves */ - unsigned int geomID = prims[set.begin()].geomID(); - if (bvh->scene->get(geomID)->getCurveType() == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) { - return CurveNi<M>::createLeaf(bvh,prims,set,alloc); - } - if (bvh->scene->get(geomID)->getCurveBasis() == Geometry::GTY_BASIS_HERMITE) { - return CurveNi<M>::createLeaf(bvh,prims,set,alloc); - } - - size_t start = set.begin(); - size_t items = CurveNv::blocks(set.size()); - size_t numbytes = CurveNv::bytes(set.size()); - CurveNv* accel = (CurveNv*) alloc.malloc1(numbytes,BVH::byteAlignment); - for (size_t i=0; i<items; i++) { - accel[i].CurveNv<M>::fill(prims,start,set.end(),bvh->scene); - accel[i].CurveNi<M>::fill(prims,start,set.end(),bvh->scene); - } - return bvh->encodeLeaf((char*)accel,items); - }; - - public: - unsigned char data[4*16*M]; - __forceinline Vec3fa* vertices(size_t i, size_t N) { return (Vec3fa*)CurveNi<M>::end(N)+4*i; } - __forceinline const Vec3fa* vertices(size_t i, size_t N) const { return (Vec3fa*)CurveNi<M>::end(N)+4*i; } - }; - - template<int M> - typename CurveNv<M>::Type CurveNv<M>::type; - - typedef CurveNv<4> Curve4v; - typedef CurveNv<8> Curve8v; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h deleted file mode 100644 index e20da2882e..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curveNv.h" -#include "curveNi_intersector.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct CurveNvIntersector1 : public CurveNiIntersector1<M> - { - typedef CurveNv<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); - const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); - const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); - const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); - const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - prefetchL1(&prim.vertices(i1,N)[0]); - prefetchL1(&prim.vertices(i1,N)[4]); - if (mask1) { - const size_t i2 = bsf(mask1); - prefetchL2(&prim.vertices(i2,N)[0]); - prefetchL2(&prim.vertices(i2,N)[4]); - } - } - - Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); - const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); - const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); - const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); - const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - prefetchL1(&prim.vertices(i1,N)[0]); - prefetchL1(&prim.vertices(i1,N)[4]); - if (mask1) { - const size_t i2 = bsf(mask1); - prefetchL2(&prim.vertices(i2,N)[0]); - prefetchL2(&prim.vertices(i2,N)[4]); - } - } - - if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar)); - } - return false; - } - }; - - template<int M, int K> - struct CurveNvIntersectorK : public CurveNiIntersectorK<M,K> - { - typedef CurveNv<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - template<typename Intersector, typename Epilog> - static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(normal.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); - const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); - const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); - const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); - const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - prefetchL1(&prim.vertices(i1,N)[0]); - prefetchL1(&prim.vertices(i1,N)[4]); - if (mask1) { - const size_t i2 = bsf(mask1); - prefetchL2(&prim.vertices(i2,N)[0]); - prefetchL2(&prim.vertices(i2,N)[4]); - } - } - - Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)); - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - } - - template<typename Intersector, typename Epilog> - static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim) - { - vfloat<M> tNear; - vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear); - - const size_t N = prim.N; - size_t mask = movemask(valid); - while (mask) - { - const size_t i = bscf(mask); - STAT3(shadow.trav_prims,1,1,1); - const unsigned int geomID = prim.geomID(N); - const unsigned int primID = prim.primID(N)[i]; - const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID); - const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]); - const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]); - const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]); - const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]); - - size_t mask1 = mask; - const size_t i1 = bscf(mask1); - if (mask) { - prefetchL1(&prim.vertices(i1,N)[0]); - prefetchL1(&prim.vertices(i1,N)[4]); - if (mask1) { - const size_t i2 = bsf(mask1); - prefetchL2(&prim.vertices(i2,N)[0]); - prefetchL2(&prim.vertices(i2,N)[4]); - } - } - - if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID))) - return true; - - mask &= movemask(tNear <= vfloat<M>(ray.tfar[k])); - } - return false; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h deleted file mode 100644 index 204958f7cc..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "../subdiv/bezier_curve.h" -#include "../common/primref.h" -#include "bezier_hair_intersector.h" -#include "bezier_ribbon_intersector.h" -#include "bezier_curve_intersector.h" -#include "oriented_curve_intersector.h" -#include "../bvh/node_intersector1.h" - -// FIXME: this file seems replicate of curve_intersector_virtual.h - -namespace embree -{ - namespace isa - { - struct VirtualCurveIntersector1 - { - typedef unsigned char Primitive; - typedef CurvePrecalculations1 Precalculations; - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; - leafIntersector.intersect<1>(&pre,&ray,context,prim); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; - return leafIntersector.occluded<1>(&pre,&ray,context,prim); - } - }; - - template<int K> - struct VirtualCurveIntersectorK - { - typedef unsigned char Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; - size_t mask = movemask(valid_i); - while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim); - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; - vbool<K> valid_o = false; - size_t mask = movemask(valid_i); - while (mask) { - size_t k = bscf(mask); - if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim)) - set(valid_o, k); - } - return valid_o; - } - - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; - leafIntersector.intersect<K>(&pre,&ray,k,context,prim); - } - - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty]; - return leafIntersector.occluded<K>(&pre,&ray,k,context,prim); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h deleted file mode 100644 index 343cc8ff28..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - namespace isa - { - template<typename NativeCurve3fa, int M> - struct DistanceCurveHit - { - __forceinline DistanceCurveHit() {} - - __forceinline DistanceCurveHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N, - const NativeCurve3fa& curve3D) - : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {} - - __forceinline void finalize() - { - vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N)); - vv = V; - vt = T; - } - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { - return curve3D.eval_du(vu[i]); - } - - public: - vfloat<M> U; - vfloat<M> V; - vfloat<M> T; - int i, N; - NativeCurve3fa curve3D; - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - }; - - template<typename NativeCurve3fa> - struct DistanceCurve1Intersector1 - { - template<typename Epilog> - __forceinline bool intersect(const CurvePrecalculations1& pre,Ray& ray, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3, - const Epilog& epilog) - { - const int N = geom->tessellationRate; - - /* transform control points into ray space */ - const NativeCurve3fa curve3Di(v0,v1,v2,v3); - const NativeCurve3fa curve3D = enlargeRadiusToMinWidth(context,geom,ray.org,curve3Di); - const NativeCurve3fa curve2D = curve3D.xfm_pr(pre.ray_space,ray.org); - - /* evaluate the bezier curve */ - vboolx valid = vfloatx(step) < vfloatx(float(N)); - const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N); - const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N); - - /* approximative intersection with cone */ - const Vec4vfx v = p1-p0; - const Vec4vfx w = -p0; - const vfloatx d0 = madd(w.x,v.x,w.y*v.y); - const vfloatx d1 = madd(v.x,v.x,v.y*v.y); - const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one)); - const Vec4vfx p = madd(u,v,p0); - const vfloatx t = p.z*pre.depth_scale; - const vfloatx d2 = madd(p.x,p.x,p.y*p.y); - const vfloatx r = p.w; - const vfloatx r2 = r*r; - valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar)); - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) - valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections - - /* update hit information */ - bool ishit = false; - if (unlikely(any(valid))) { - DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,0,N,curve3D); - ishit = ishit | epilog(valid,hit); - } - - if (unlikely(VSIZEX < N)) - { - /* process SIMD-size many segments per iteration */ - for (int i=VSIZEX; i<N; i+=VSIZEX) - { - /* evaluate the bezier curve */ - vboolx valid = vintx(i)+vintx(step) < vintx(N); - const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N); - const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N); - - /* approximative intersection with cone */ - const Vec4vfx v = p1-p0; - const Vec4vfx w = -p0; - const vfloatx d0 = madd(w.x,v.x,w.y*v.y); - const vfloatx d1 = madd(v.x,v.x,v.y*v.y); - const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one)); - const Vec4vfx p = madd(u,v,p0); - const vfloatx t = p.z*pre.depth_scale; - const vfloatx d2 = madd(p.x,p.x,p.y*p.y); - const vfloatx r = p.w; - const vfloatx r2 = r*r; - valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar)); - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) - valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections - - /* update hit information */ - if (unlikely(any(valid))) { - DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,i,N,curve3D); - ishit = ishit | epilog(valid,hit); - } - } - } - return ishit; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h deleted file mode 100644 index 47531027fc..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h +++ /dev/null @@ -1,417 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "curve_intersector_precalculations.h" -#include "curve_intersector_sweep.h" -#include "../subdiv/linear_bezier_patch.h" - -#define DBG(x) - -namespace embree -{ - namespace isa - { - template<typename Ray, typename Epilog> - struct TensorLinearCubicBezierSurfaceIntersector - { - const LinearSpace3fa& ray_space; - Ray& ray; - TensorLinearCubicBezierSurface3fa curve3d; - TensorLinearCubicBezierSurface2fa curve2d; - float eps; - const Epilog& epilog; - bool isHit; - - __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog) - : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false) - { - const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org); - curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R)); - const BBox2fa b2 = curve2d.bounds(); - eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper))); - } - - __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1) - { - if (p1 == p0) { - if (p0 == 0.0f) return Interval1f(u0,u1); - else return Interval1f(empty); - } - const float t = -p0/(p1-p0); - const float tt = lerp(u0,u1,t); - return Interval1f(tt); - } - - __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u) - { - if (sign(p0.lower) != sign(p0.upper)) u.extend(u0); - if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower)); - if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper)); - if (sign(p1.lower) != sign(p1.upper)) u.extend(u1); - } - - __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve) - { - Interval1f u = empty; - solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u); - solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u); - solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u); - solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u); - solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u); - solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u); - return intersect(u,Interval1f(0.0f,1.0f)); - } - - __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve) - { - Interval1f v = empty; - solve_linear(0.0f,1.0f,curve.v0,curve.v1,v); - return intersect(v,Interval1f(0.0f,1.0f)); - } - - __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2) - { - BBox2fa bounds = curve2.bounds(); - if (bounds.upper.x < 0.0f) return; - if (bounds.upper.y < 0.0f) return; - if (bounds.lower.x > 0.0f) return; - if (bounds.lower.y > 0.0f) return; - - if (max(cu.size(),cv.size()) < 1E-4f) - { - const float u = cu.center(); - const float v = cv.center(); - TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org); - const float t = curve_z.eval(u,v); - if (ray.tnear() <= t && t <= ray.tfar) { - const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v)); - BezierCurveHit hit(t,u,v,Ng); - isHit |= epilog(hit); - } - return; - } - - const Vec2fa dv = curve2.axis_v(); - const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv); - LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); - if (!curve0v.hasRoot()) return; - - const Interval1f v = bezier_clipping(curve0v); - if (isEmpty(v)) return; - TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v); - cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper)); - - const Vec2fa du = curve2.axis_u(); - const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du); - CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v(); - int roots = curve0u.maxRoots(); - if (roots == 0) return; - - if (roots == 1) - { - const Interval1f u = bezier_clipping(curve0u); - if (isEmpty(u)) return; - TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u); - cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper)); - solve_bezier_clipping(cu,cv,curve2b); - return; - } - - TensorLinearCubicBezierSurface2fa curve2l, curve2r; - curve2a.split_u(curve2l,curve2r); - solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l); - solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r); - } - - __forceinline bool solve_bezier_clipping() - { - solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d); - return isHit; - } - - __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv) - { - Vec2fa uv(cu.center(),cv.center()); - const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y); - const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y); - const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv)); - solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J); - } - - __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J) - { - Vec2fa uv = uv_in; - - for (size_t i=0; i<200; i++) - { - const Vec2fa f = curve2d.eval(uv.x,uv.y); - const Vec2fa duv = rcp_J*f; - uv -= duv; - - if (max(abs(f.x),abs(f.y)) < eps) - { - const float u = uv.x; - const float v = uv.y; - if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs - if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs - const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org); - const float t = curve_z.eval(u,v); - if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs - const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v)); - BezierCurveHit hit(t,u,v,Ng); - isHit |= epilog(hit); - return; - } - } - } - - __forceinline bool clip_v(BBox1f& cu, BBox1f& cv) - { - const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower); - const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv); - LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); - if (!curve0v.hasRoot()) return false; - Interval1f v = bezier_clipping(curve0v); - if (isEmpty(v)) return false; - v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f)); - cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper)); - return true; - } - - __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv) - { - /* perform bezier clipping in v-direction to get tight v-bounds */ - TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv); - const Vec2fa dv = curve2.axis_v(); - const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv); - LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u(); - if (unlikely(!curve0v.hasRoot())) return true; - Interval1f v = bezier_clipping(curve0v); - if (unlikely(isEmpty(v))) return true; - v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f)); - curve2 = curve2.clip_v(v); - cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper)); - - /* perform one newton raphson iteration */ - Vec2fa c(cu.center(),cv.center()); - Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv); - const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv)); - const Vec2fa c1 = c - rcp_J*f; - - /* calculate bounds of derivatives */ - const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds(); - const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds(); - - /* calculate krawczyk test */ - LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f), - Interval1f(0.0f), Interval1f(1.0f)); - - LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x), - Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y)); - - const LinearSpace2<Vec2f> rcp_J2(rcp_J); - const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2); - - const Vec2<Interval1f> x(cu,cv); - const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c))); - - /* test if there is no solution */ - const Vec2<Interval1f> KK = intersect(K,x); - if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true; - - /* exit if convergence cannot get proven, but terminate if we are very small */ - if (unlikely(!subset(K,x) && !very_small)) return false; - - /* solve using newton raphson iteration of convergence is guarenteed */ - solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J); - return true; - } - - __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv) - { - if (!clip_v(cu,cv)) return; - return solve_newton_raphson(cu,cv); - } - - __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv) - { - unsigned int sptr = 0; - const unsigned int stack_size = 4; - unsigned int mask_stack[stack_size]; - BBox1f cu_stack[stack_size]; - BBox1f cv_stack[stack_size]; - goto entry; - - /* terminate if stack is empty */ - while (sptr) - { - /* pop from stack */ - { - sptr--; - size_t mask = mask_stack[sptr]; - cu = cu_stack[sptr]; - cv = cv_stack[sptr]; - const size_t i = bscf(mask); - mask_stack[sptr] = mask; - if (mask) sptr++; // there are still items on the stack - - /* process next element recurse into each hit curve segment */ - const float u0 = float(i+0)*(1.0f/(VSIZEX-1)); - const float u1 = float(i+1)*(1.0f/(VSIZEX-1)); - const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1)); - cu = cui; - } - -#if 0 - solve_newton_raphson_no_recursion(cu,cv); - continue; - -#else - /* we assume convergence for small u ranges and verify using krawczyk */ - if (cu.size() < 1.0f/6.0f) { - const bool very_small = cu.size() < 0.001f || sptr >= stack_size; - if (solve_krawczyk(very_small,cu,cv)) { - continue; - } - } -#endif - - entry: - - /* split the curve into VSIZEX-1 segments in u-direction */ - vboolx valid = true; - TensorLinearCubicBezierSurface<Vec2vfx> subcurves = curve2d.clip_v(cv).vsplit_u(valid,cu); - - /* slabs test in u-direction */ - Vec2vfx ndv = cross(subcurves.axis_v()); - BBox<vfloatx> boundsv = subcurves.vxfm(ndv).bounds(); - valid &= boundsv.lower <= eps; - valid &= boundsv.upper >= -eps; - if (none(valid)) continue; - - /* slabs test in v-direction */ - Vec2vfx ndu = cross(subcurves.axis_u()); - BBox<vfloatx> boundsu = subcurves.vxfm(ndu).bounds(); - valid &= boundsu.lower <= eps; - valid &= boundsu.upper >= -eps; - if (none(valid)) continue; - - /* push valid segments to stack */ - assert(sptr < stack_size); - mask_stack [sptr] = movemask(valid); - cu_stack [sptr] = cu; - cv_stack [sptr] = cv; - sptr++; - } - } - - __forceinline bool solve_newton_raphson_main() - { - BBox1f vu(0.0f,1.0f); - BBox1f vv(0.0f,1.0f); - solve_newton_raphson_recursion(vu,vv); - return isHit; - } - }; - - - template<template<typename Ty> class SourceCurve> - struct OrientedCurve1Intersector1 - { - //template<typename Ty> using Curve = SourceCurve<Ty>; - typedef SourceCurve<Vec3ff> SourceCurve3ff; - typedef SourceCurve<Vec3fa> SourceCurve3fa; - - __forceinline OrientedCurve1Intersector1() {} - - __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i, - const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i, - const Epilog& epilog) const - { - STAT3(normal.trav_prims,1,1,1); - - SourceCurve3ff ccurve(v0i,v1i,v2i,v3i); - SourceCurve3fa ncurve(n0i,n1i,n2i,n3i); - ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve); - TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); - //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping(); - return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main(); - } - - template<typename Epilog> - __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const - { - STAT3(normal.trav_prims,1,1,1); - //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping(); - return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main(); - } - }; - - template<template<typename Ty> class SourceCurve, int K> - struct OrientedCurve1IntersectorK - { - //template<typename Ty> using Curve = SourceCurve<Ty>; - typedef SourceCurve<Vec3ff> SourceCurve3ff; - typedef SourceCurve<Vec3fa> SourceCurve3fa; - - struct Ray1 - { - __forceinline Ray1(RayK<K>& ray, size_t k) - : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} - - Vec3fa org; - Vec3fa dir; - float _tnear; - float& tfar; - - __forceinline float& tnear() { return _tnear; } - //__forceinline float& tfar() { return _tfar; } - __forceinline const float& tnear() const { return _tnear; } - //__forceinline const float& tfar() const { return _tfar; } - }; - - template<typename Epilog> - __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i, - const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i, - const Epilog& epilog) - { - STAT3(normal.trav_prims,1,1,1); - Ray1 ray(vray,k); - SourceCurve3ff ccurve(v0i,v1i,v2i,v3i); - SourceCurve3fa ncurve(n0i,n1i,n2i,n3i); - ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve); - TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); - //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping(); - return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main(); - } - - template<typename Epilog> - __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const TensorLinearCubicBezierSurface3fa& curve, - const Epilog& epilog) - { - STAT3(normal.trav_prims,1,1,1); - Ray1 ray(vray,k); - //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping(); - return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main(); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h deleted file mode 100644 index 6e9fc91925..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "../common/geometry.h" - -namespace embree -{ - namespace isa - { - struct CurvePrecalculations1 - { - float depth_scale; - LinearSpace3fa ray_space; - - __forceinline CurvePrecalculations1() {} - - __forceinline CurvePrecalculations1(const Ray& ray, const void* ptr) - { - depth_scale = rsqrt(dot(ray.dir,ray.dir)); - LinearSpace3fa space = frame(depth_scale*ray.dir); - space.vz *= depth_scale; - ray_space = space.transposed(); - } - }; - - template<int K> - struct CurvePrecalculationsK - { - vfloat<K> depth_scale; - LinearSpace3fa ray_space[K]; - - __forceinline CurvePrecalculationsK(const vbool<K>& valid, const RayK<K>& ray) - { - size_t mask = movemask(valid); - depth_scale = rsqrt(dot(ray.dir,ray.dir)); - while (mask) { - size_t k = bscf(mask); - Vec3fa ray_dir_k = Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); - LinearSpace3fa ray_space_k = frame(depth_scale[k]*ray_dir_k); - ray_space_k.vz *= depth_scale[k]; - ray_space[k] = ray_space_k.transposed(); - } - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h deleted file mode 100644 index a99cf99d56..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "quad_intersector.h" -#include "curve_intersector_precalculations.h" - -#define Bezier1Intersector1 RibbonCurve1Intersector1 -#define Bezier1IntersectorK RibbonCurve1IntersectorK - -namespace embree -{ - namespace isa - { - template<typename NativeCurve3ff, int M> - struct RibbonHit - { - __forceinline RibbonHit() {} - - __forceinline RibbonHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N, - const NativeCurve3ff& curve3D) - : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {} - - __forceinline void finalize() - { - vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N)); - vv = V; - vt = T; - } - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { - return curve3D.eval_du(vu[i]); - } - - public: - vfloat<M> U; - vfloat<M> V; - vfloat<M> T; - int i, N; - NativeCurve3ff curve3D; - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - }; - - /* calculate squared distance of point p0 to line p1->p2 */ - __forceinline std::pair<vfloatx,vfloatx> sqr_point_line_distance(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2) - { - const vfloatx num = det(p2-p1,p1-p0); - const vfloatx den2 = dot(p2-p1,p2-p1); - return std::make_pair(num*num,den2); - } - - /* performs culling against a cylinder */ - __forceinline vboolx cylinder_culling_test(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2, const vfloatx& r) - { - const std::pair<vfloatx,vfloatx> d = sqr_point_line_distance(p0,p1,p2); - return d.first <= r*r*d.second; - } - - template<typename NativeCurve3ff, typename Epilog> - __forceinline bool intersect_ribbon(const Vec3fa& ray_org, const Vec3fa& ray_dir, const float ray_tnear, const float& ray_tfar, - const LinearSpace3fa& ray_space, const float& depth_scale, - const NativeCurve3ff& curve3D, const int N, - const Epilog& epilog) - { - /* transform control points into ray space */ - const NativeCurve3ff curve2D = curve3D.xfm_pr(ray_space,ray_org); - float eps = 4.0f*float(ulp)*reduce_max(max(abs(curve2D.v0),abs(curve2D.v1),abs(curve2D.v2),abs(curve2D.v3))); - - /* evaluate the bezier curve */ - bool ishit = false; - vboolx valid = vfloatx(step) < vfloatx(float(N)); - const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N); - const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N); - valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w)); - - if (any(valid)) - { - Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(0,N); - Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(0,N); - dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt); - dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt); - const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f); - const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f); - const Vec3vfx nn0 = normalize(n0); - const Vec3vfx nn1 = normalize(n1); - const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0)); - const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1)); - const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0)); - const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); - - vfloatx vu,vv,vt; - vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); - - if (any(valid0)) - { - /* ignore self intersections */ - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) { - vfloatx r = lerp(p0.w, p1.w, vu); - valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; - } - - if (any(valid0)) - { - vv = madd(2.0f,vv,vfloatx(-1.0f)); - RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,0,N,curve3D); - ishit |= epilog(bhit.valid,bhit); - } - } - } - - if (unlikely(VSIZEX < N)) - { - /* process SIMD-size many segments per iteration */ - for (int i=VSIZEX; i<N; i+=VSIZEX) - { - /* evaluate the bezier curve */ - vboolx valid = vintx(i)+vintx(step) < vintx(N); - const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N); - const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N); - valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w)); - if (none(valid)) continue; - - Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(i,N); - Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(i,N); - dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt); - dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt); - const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f); - const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f); - const Vec3vfx nn0 = normalize(n0); - const Vec3vfx nn1 = normalize(n1); - const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0)); - const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1)); - const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0)); - const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); - - vfloatx vu,vv,vt; - vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); - - if (any(valid0)) - { - /* ignore self intersections */ - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) { - vfloatx r = lerp(p0.w, p1.w, vu); - valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; - } - - if (any(valid0)) - { - vv = madd(2.0f,vv,vfloatx(-1.0f)); - RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,i,N,curve3D); - ishit |= epilog(bhit.valid,bhit); - } - } - } - } - return ishit; - } - - template<template<typename Ty> class NativeCurve> - struct RibbonCurve1Intersector1 - { - typedef NativeCurve<Vec3ff> NativeCurve3ff; - - template<typename Epilog> - __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, - const Epilog& epilog) - { - const int N = geom->tessellationRate; - NativeCurve3ff curve(v0,v1,v2,v3); - curve = enlargeRadiusToMinWidth(context,geom,ray.org,curve); - return intersect_ribbon<NativeCurve3ff>(ray.org,ray.dir,ray.tnear(),ray.tfar, - pre.ray_space,pre.depth_scale, - curve,N, - epilog); - } - }; - - template<template<typename Ty> class NativeCurve, int K> - struct RibbonCurve1IntersectorK - { - typedef NativeCurve<Vec3ff> NativeCurve3ff; - - template<typename Epilog> - __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& ray, size_t k, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, - const Epilog& epilog) - { - const int N = geom->tessellationRate; - const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); - const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); - NativeCurve3ff curve(v0,v1,v2,v3); - curve = enlargeRadiusToMinWidth(context,geom,ray_org,curve); - return intersect_ribbon<NativeCurve3ff>(ray_org,ray_dir,ray.tnear()[k],ray.tfar[k], - pre.ray_space[k],pre.depth_scale[k], - curve,N, - epilog); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h deleted file mode 100644 index 883cedc3d2..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "cylinder.h" -#include "plane.h" -#include "line_intersector.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - namespace isa - { - static const size_t numJacobianIterations = 5; -#if defined(__AVX__) - static const size_t numBezierSubdivisions = 2; -#else - static const size_t numBezierSubdivisions = 3; -#endif - - struct BezierCurveHit - { - __forceinline BezierCurveHit() {} - - __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng) - : t(t), u(u), v(0.0f), Ng(Ng) {} - - __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng) - : t(t), u(u), v(v), Ng(Ng) {} - - __forceinline void finalize() {} - - public: - float t; - float u; - float v; - Vec3fa Ng; - }; - - template<typename NativeCurve3ff, typename Ray, typename Epilog> - __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i, - const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1, - const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du, - const Epilog& epilog) - { - if (tp.lower[i]+dt > ray.tfar) return false; - Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]); - if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]); - if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]); - BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o); - return epilog(hit); - } - - template<typename NativeCurve3ff, typename Ray, typename Epilog> - __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog) - { - const Vec3fa org = zero; - const Vec3fa dir = ray.dir; - const float length_ray_dir = length(dir); - - /* error of curve evaluations is propertional to largest coordinate */ - const BBox3ff box = curve.bounds(); - const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper))); - - for (size_t i=0; i<numJacobianIterations; i++) - { - const Vec3fa Q = madd(Vec3fa(t),dir,org); - //const Vec3fa dQdu = zero; - const Vec3fa dQdt = dir; - const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here - - Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu); - //const Vec3fa dPdt = zero; - - const Vec3fa R = Q-P; - const float len_R = length(R); //reduce_max(abs(R)); - const float R_err = max(Q_err,P_err); - const Vec3fa dRdu = /*dQdu*/-dPdu; - const Vec3fa dRdt = dQdt;//-dPdt; - - const Vec3fa T = normalize(dPdu); - const Vec3fa dTdu = dnormalize(dPdu,ddPdu); - //const Vec3fa dTdt = zero; - const float cos_err = P_err/length(dPdu); - - /* Error estimate for dot(R,T): - - dot(R,T) = cos(R,T) |R| |T| - = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err) - = cos(R,T)*|R|*|T| - +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err) - +- cos_error*(|R| + |T|) - +- lower order terms - with cos(R,T) being in [0,1] and |T| = 1 we get: - dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1) - */ - - const float f = dot(R,T); - const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R); - const float dfdu = dot(dRdu,T) + dot(R,dTdu); - const float dfdt = dot(dRdt,T);// + dot(R,dTdt); - - const float K = dot(R,R)-sqr(f); - const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu); - const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt); - const float rsqrt_K = rsqrt(K); - - const float g = sqrt(K)-P.w; - const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w; - const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w; - const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w; - - const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt); - const Vec2f dut = rcp(J)*Vec2f(f,g); - const Vec2f ut = Vec2f(u,t) - dut; - u = ut.x; t = ut.y; - - if (abs(f) < f_err && abs(g) < g_err) - { - t+=dt; - if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs - if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs - const Vec3fa R = normalize(Q-P); - const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu); - const Vec3fa V = cross(dPdu,R); - BezierCurveHit hit(t,u,cross(V,U)); - return epilog(hit); - } - } - return false; - } - - template<typename NativeCurve3ff, typename Ray, typename Epilog> - bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, - float u0, float u1, unsigned int depth, const Epilog& epilog) - { -#if defined(__AVX__) - typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues - typedef vint8 vintx; - typedef vfloat8 vfloatx; -#else - typedef vbool4 vboolx; - typedef vint4 vintx; - typedef vfloat4 vfloatx; -#endif - typedef Vec3<vfloatx> Vec3vfx; - typedef Vec4<vfloatx> Vec4vfx; - - unsigned int maxDepth = numBezierSubdivisions; - bool found = false; - const Vec3fa org = zero; - const Vec3fa dir = ray.dir; - - unsigned int sptr = 0; - const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below - struct StackEntry { - vboolx valid; - vfloatx tlower; - float u0; - float u1; - unsigned int depth; - }; - StackEntry stack[stack_size]; - goto entry; - - /* terminate if stack is empty */ - while (sptr) - { - /* pop from stack */ - { - sptr--; - vboolx valid = stack[sptr].valid; - const vfloatx tlower = stack[sptr].tlower; - valid &= tlower+dt <= ray.tfar; - if (none(valid)) continue; - u0 = stack[sptr].u0; - u1 = stack[sptr].u1; - depth = stack[sptr].depth; - const size_t i = select_min(valid,tlower); clear(valid,i); - stack[sptr].valid = valid; - if (any(valid)) sptr++; // there are still items on the stack - - /* process next segment */ - const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); - u0 = vu0[i+0]; - u1 = vu0[i+1]; - } - entry: - - /* subdivide curve */ - const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1))); - const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); - Vec4vfx P0, dP0du; curve.veval(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); - const Vec4vfx P3 = shift_right_1(P0); - const Vec4vfx dP3du = shift_right_1(dP0du); - const Vec4vfx P1 = P0 + dP0du; - const Vec4vfx P2 = P3 - dP3du; - - /* calculate bounding cylinders */ - const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0)); - const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0)); - const vfloatx maxr12 = sqrt(max(rr1,rr2)); - const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp); - const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp); - vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12; - vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12; - r_outer = one_plus_ulp*r_outer; - r_inner = max(0.0f,one_minus_ulp*r_inner); - const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer); - const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner); - vboolx valid = true; clear(valid,vfloatx::size-1); - - /* intersect with outer cylinder */ - BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1; - valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1); - if (none(valid)) continue; - - /* intersect with cap-planes */ - BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt); - tp = embree::intersect(tp,tc_outer); - BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir); - tp = embree::intersect(tp,h0); - BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir); - tp = embree::intersect(tp,h1); - valid &= tp.lower <= tp.upper; - if (none(valid)) continue; - - /* clamp and correct u parameter */ - u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f)); - u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f)); - u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size))); - u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size))); - - /* intersect with inner cylinder */ - BBox<vfloatx> tc_inner; - vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero; - const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1); - - /* at the unstable area we subdivide deeper */ - const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f); - const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f); - - /* subtract the inner interval from the current hit interval */ - BBox<vfloatx> tp0, tp1; - subtract(tp,tc_inner,tp0,tp1); - vboolx valid0 = valid & (tp0.lower <= tp0.upper); - vboolx valid1 = valid & (tp1.lower <= tp1.upper); - if (none(valid0 | valid1)) continue; - - /* iterate over all first hits front to back */ - const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth)); - vboolx recursion_valid0 = valid0 & (depth < termDepth0); - valid0 &= depth >= termDepth0; - - while (any(valid0)) - { - const size_t i = select_min(valid0,tp0.lower); clear(valid0,i); - found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog); - //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog); - valid0 &= tp0.lower+dt <= ray.tfar; - } - valid1 &= tp1.lower+dt <= ray.tfar; - - /* iterate over all second hits front to back */ - const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth)); - vboolx recursion_valid1 = valid1 & (depth < termDepth1); - valid1 &= depth >= termDepth1; - while (any(valid1)) - { - const size_t i = select_min(valid1,tp1.lower); clear(valid1,i); - found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog); - //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog); - valid1 &= tp1.lower+dt <= ray.tfar; - } - - /* push valid segments to stack */ - recursion_valid0 &= tp0.lower+dt <= ray.tfar; - recursion_valid1 &= tp1.lower+dt <= ray.tfar; - const vboolx recursion_valid = recursion_valid0 | recursion_valid1; - if (any(recursion_valid)) - { - assert(sptr < stack_size); - stack[sptr].valid = recursion_valid; - stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower); - stack[sptr].u0 = u0; - stack[sptr].u1 = u1; - stack[sptr].depth = depth+1; - sptr++; - } - } - return found; - } - - template<template<typename Ty> class NativeCurve> - struct SweepCurve1Intersector1 - { - typedef NativeCurve<Vec3ff> NativeCurve3ff; - - template<typename Epilog> - __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, - const Epilog& epilog) - { - STAT3(normal.trav_prims,1,1,1); - - /* move ray closer to make intersection stable */ - NativeCurve3ff curve0(v0,v1,v2,v3); - curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); - const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); - const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); - const NativeCurve3ff curve1 = curve0-ref; - return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); - } - }; - - template<template<typename Ty> class NativeCurve, int K> - struct SweepCurve1IntersectorK - { - typedef NativeCurve<Vec3ff> NativeCurve3ff; - - struct Ray1 - { - __forceinline Ray1(RayK<K>& ray, size_t k) - : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} - - Vec3fa org; - Vec3fa dir; - float _tnear; - float& tfar; - - __forceinline float& tnear() { return _tnear; } - //__forceinline float& tfar() { return _tfar; } - __forceinline const float& tnear() const { return _tnear; } - //__forceinline const float& tfar() const { return _tfar; } - - }; - - template<typename Epilog> - __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, - IntersectContext* context, - const CurveGeometry* geom, const unsigned int primID, - const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, - const Epilog& epilog) - { - STAT3(normal.trav_prims,1,1,1); - Ray1 ray(vray,k); - - /* move ray closer to make intersection stable */ - NativeCurve3ff curve0(v0,v1,v2,v3); - curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); - const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); - const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); - const NativeCurve3ff curve1 = curve0-ref; - return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h deleted file mode 100644 index e1f4238130..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h +++ /dev/null @@ -1,671 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "../subdiv/bezier_curve.h" -#include "../common/primref.h" -#include "curve_intersector_precalculations.h" -#include "../bvh/node_intersector1.h" -#include "../bvh/node_intersector_packet.h" - -#include "intersector_epilog.h" - -#include "../subdiv/bezier_curve.h" -#include "../subdiv/bspline_curve.h" -#include "../subdiv/hermite_curve.h" -#include "../subdiv/catmullrom_curve.h" - -#include "spherei_intersector.h" -#include "disci_intersector.h" - -#include "linei_intersector.h" -#include "roundlinei_intersector.h" -#include "conelinei_intersector.h" - -#include "curveNi_intersector.h" -#include "curveNv_intersector.h" -#include "curveNi_mb_intersector.h" - -#include "curve_intersector_distance.h" -#include "curve_intersector_ribbon.h" -#include "curve_intersector_oriented.h" -#include "curve_intersector_sweep.h" - -namespace embree -{ - struct VirtualCurveIntersector - { - typedef void (*Intersect1Ty)(void* pre, void* ray, IntersectContext* context, const void* primitive); - typedef bool (*Occluded1Ty )(void* pre, void* ray, IntersectContext* context, const void* primitive); - - typedef void (*Intersect4Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - typedef bool (*Occluded4Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - - typedef void (*Intersect8Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - typedef bool (*Occluded8Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - - typedef void (*Intersect16Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - typedef bool (*Occluded16Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - - public: - struct Intersectors - { - Intersectors() {} // WARNING: Do not zero initialize this, as we otherwise get problems with thread unsafe local static variable initialization (e.g. on VS2013) in curve_intersector_virtual.cpp. - - template<int K> void intersect(void* pre, void* ray, IntersectContext* context, const void* primitive); - template<int K> bool occluded (void* pre, void* ray, IntersectContext* context, const void* primitive); - - template<int K> void intersect(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - template<int K> bool occluded (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive); - - public: - Intersect1Ty intersect1; - Occluded1Ty occluded1; - Intersect4Ty intersect4; - Occluded4Ty occluded4; - Intersect8Ty intersect8; - Occluded8Ty occluded8; - Intersect16Ty intersect16; - Occluded16Ty occluded16; - }; - - Intersectors vtbl[Geometry::GTY_END]; - }; - - template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(intersect1); intersect1(pre,ray,context,primitive); } - template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(occluded1); return occluded1(pre,ray,context,primitive); } - - template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<4>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect4); intersect4(pre,ray,k,context,primitive); } - template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<4> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded4); return occluded4(pre,ray,k,context,primitive); } - -#if defined(__AVX__) - template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<8>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect8); intersect8(pre,ray,k,context,primitive); } - template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<8> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded8); return occluded8(pre,ray,k,context,primitive); } -#endif - -#if defined(__AVX512F__) - template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<16>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect16); intersect16(pre,ray,k,context,primitive); } - template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<16> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded16); return occluded16(pre,ray,k,context,primitive); } -#endif - - namespace isa - { - struct VirtualCurveIntersector1 - { - typedef unsigned char Primitive; - typedef CurvePrecalculations1 Precalculations; - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; - leafIntersector.intersect<1>(&pre,&ray,context,prim); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; - return leafIntersector.occluded<1>(&pre,&ray,context,prim); - } - }; - - template<int K> - struct VirtualCurveIntersectorK - { - typedef unsigned char Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; - size_t mask = movemask(valid_i); - while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim); - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; - vbool<K> valid_o = false; - size_t mask = movemask(valid_i); - while (mask) { - size_t k = bscf(mask); - if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim)) - set(valid_o, k); - } - return valid_o; - } - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; - leafIntersector.intersect<K>(&pre,&ray,k,context,prim); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - assert(num == 1); - RTCGeometryType ty = (RTCGeometryType)(*prim); - assert(This->leafIntersector); - VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty]; - return leafIntersector.occluded<K>(&pre,&ray,k,context,prim); - } - }; - - template<int N> - static VirtualCurveIntersector::Intersectors LinearRoundConeNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors LinearConeNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors LinearRoundConeNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors LinearConeNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - - template<int N> - static VirtualCurveIntersector::Intersectors LinearRibbonNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors LinearRibbonNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors SphereNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors SphereNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors DiscNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors DiscNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors OrientedDiscNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<int N> - static VirtualCurveIntersector::Intersectors OrientedDiscNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::occluded; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,N,8,true>::occluded; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,N,16,true>::occluded; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors RibbonNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors RibbonNvIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNvIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors RibbonNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiMBIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors CurveNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors CurveNvIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNvIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors CurveNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors OrientedCurveNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors OrientedCurveNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors HermiteRibbonNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors HermiteRibbonNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors HermiteCurveNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors HermiteCurveNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - - template<template<typename Ty> class Curve, int N> - static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiMBIntersectors() - { - VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >; -#if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >; -#endif -#if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >; -#endif - return intersectors; - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h deleted file mode 100644 index 69cf612275..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveBezierCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveBezierCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h deleted file mode 100644 index d37e41098e..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveBSplineCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveBSplineCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h deleted file mode 100644 index a133a11d63..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveCatmullRomCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveCatmullRomCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h deleted file mode 100644 index 9aec35da45..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveHermiteCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveHermiteCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h deleted file mode 100644 index dd37d194f5..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveLinearCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveLinearCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h deleted file mode 100644 index fe5ceed840..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurvePointInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector4iMB(VirtualCurveIntersector &prim); - -#if defined (__AVX__) - void AddVirtualCurvePointInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h b/thirdparty/embree-aarch64/kernels/geometry/cylinder.h deleted file mode 100644 index 39a582864c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" - -namespace embree -{ - namespace isa - { - struct Cylinder - { - const Vec3fa p0; //!< start location - const Vec3fa p1; //!< end position - const float rr; //!< squared radius of cylinder - - __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float r) - : p0(p0), p1(p1), rr(sqr(r)) {} - - __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float rr, bool) - : p0(p0), p1(p1), rr(rr) {} - - __forceinline bool intersect(const Vec3fa& org, - const Vec3fa& dir, - BBox1f& t_o, - float& u0_o, Vec3fa& Ng0_o, - float& u1_o, Vec3fa& Ng1_o) const - { - /* calculate quadratic equation to solve */ - const float rl = rcp_length(p1-p0); - const Vec3fa P0 = p0, dP = (p1-p0)*rl; - const Vec3fa O = org-P0, dO = dir; - - const float dOdO = dot(dO,dO); - const float OdO = dot(dO,O); - const float OO = dot(O,O); - const float dOz = dot(dP,dO); - const float Oz = dot(dP,O); - - const float A = dOdO - sqr(dOz); - const float B = 2.0f * (OdO - dOz*Oz); - const float C = OO - sqr(Oz) - rr; - - /* we miss the cylinder if determinant is smaller than zero */ - const float D = B*B - 4.0f*A*C; - if (D < 0.0f) { - t_o = BBox1f(pos_inf,neg_inf); - return false; - } - - /* special case for rays that are parallel to the cylinder */ - const float eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); - if (abs(A) < eps) - { - if (C <= 0.0f) { - t_o = BBox1f(neg_inf,pos_inf); - return true; - } else { - t_o = BBox1f(pos_inf,neg_inf); - return false; - } - } - - /* standard case for rays that are not parallel to the cylinder */ - const float Q = sqrt(D); - const float rcp_2A = rcp(2.0f*A); - const float t0 = (-B-Q)*rcp_2A; - const float t1 = (-B+Q)*rcp_2A; - - /* calculates u and Ng for near hit */ - { - u0_o = madd(t0,dOz,Oz)*rl; - const Vec3fa Pr = t0*dir; - const Vec3fa Pl = madd(u0_o,p1-p0,p0); - Ng0_o = Pr-Pl; - } - - /* calculates u and Ng for far hit */ - { - u1_o = madd(t1,dOz,Oz)*rl; - const Vec3fa Pr = t1*dir; - const Vec3fa Pl = madd(u1_o,p1-p0,p0); - Ng1_o = Pr-Pl; - } - - t_o.lower = t0; - t_o.upper = t1; - return true; - } - - __forceinline bool intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox1f& t_o) const - { - float u0_o; Vec3fa Ng0_o; - float u1_o; Vec3fa Ng1_o; - return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); - } - - static bool verify(const size_t id, const Cylinder& cylinder, const RayHit& ray, bool shouldhit, const float t0, const float t1) - { - float eps = 0.001f; - BBox1f t; bool hit; - hit = cylinder.intersect(ray.org,ray.dir,t); - - bool failed = hit != shouldhit; - if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : abs(t0-t.lower) > eps; - if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : abs(t1-t.upper) > eps; - if (!failed) return true; - embree_cout << "Cylinder test " << id << " failed: cylinder = " << cylinder << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl; - return false; - } - - /* verify cylinder class */ - static bool verify() - { - bool passed = true; - const Cylinder cylinder(Vec3fa(0.0f,0.0f,0.0f),Vec3fa(1.0f,0.0f,0.0f),1.0f); - passed &= verify(0,cylinder,RayHit(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); - passed &= verify(1,cylinder,RayHit(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f); - passed &= verify(2,cylinder,RayHit(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f); - passed &= verify(3,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); - passed &= verify(4,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf); - passed &= verify(5,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); - passed &= verify(6,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf); - return passed; - } - - /*! output operator */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cylinder& c) { - return cout << "Cylinder { p0 = " << c.p0 << ", p1 = " << c.p1 << ", r = " << sqrtf(c.rr) << "}"; - } - }; - - template<int N> - struct CylinderN - { - const Vec3vf<N> p0; //!< start location - const Vec3vf<N> p1; //!< end position - const vfloat<N> rr; //!< squared radius of cylinder - - __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& r) - : p0(p0), p1(p1), rr(sqr(r)) {} - - __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& rr, bool) - : p0(p0), p1(p1), rr(rr) {} - - - __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, - BBox<vfloat<N>>& t_o, - vfloat<N>& u0_o, Vec3vf<N>& Ng0_o, - vfloat<N>& u1_o, Vec3vf<N>& Ng1_o) const - { - /* calculate quadratic equation to solve */ - const vfloat<N> rl = rcp_length(p1-p0); - const Vec3vf<N> P0 = p0, dP = (p1-p0)*rl; - const Vec3vf<N> O = Vec3vf<N>(org)-P0, dO = dir; - - const vfloat<N> dOdO = dot(dO,dO); - const vfloat<N> OdO = dot(dO,O); - const vfloat<N> OO = dot(O,O); - const vfloat<N> dOz = dot(dP,dO); - const vfloat<N> Oz = dot(dP,O); - - const vfloat<N> A = dOdO - sqr(dOz); - const vfloat<N> B = 2.0f * (OdO - dOz*Oz); - const vfloat<N> C = OO - sqr(Oz) - rr; - - /* we miss the cylinder if determinant is smaller than zero */ - const vfloat<N> D = B*B - 4.0f*A*C; - vbool<N> valid = D >= 0.0f; - if (none(valid)) { - t_o = BBox<vfloat<N>>(empty); - return valid; - } - - /* standard case for rays that are not parallel to the cylinder */ - const vfloat<N> Q = sqrt(D); - const vfloat<N> rcp_2A = rcp(2.0f*A); - const vfloat<N> t0 = (-B-Q)*rcp_2A; - const vfloat<N> t1 = (-B+Q)*rcp_2A; - - /* calculates u and Ng for near hit */ - { - u0_o = madd(t0,dOz,Oz)*rl; - const Vec3vf<N> Pr = t0*Vec3vf<N>(dir); - const Vec3vf<N> Pl = madd(u0_o,p1-p0,p0); - Ng0_o = Pr-Pl; - } - - /* calculates u and Ng for far hit */ - { - u1_o = madd(t1,dOz,Oz)*rl; - const Vec3vf<N> Pr = t1*Vec3vf<N>(dir); - const Vec3vf<N> Pl = madd(u1_o,p1-p0,p0); - Ng1_o = Pr-Pl; - } - - t_o.lower = select(valid, t0, vfloat<N>(pos_inf)); - t_o.upper = select(valid, t1, vfloat<N>(neg_inf)); - - /* special case for rays that are parallel to the cylinder */ - const vfloat<N> eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz))); - vbool<N> validt = valid & (abs(A) < eps); - if (unlikely(any(validt))) - { - vbool<N> inside = C <= 0.0f; - t_o.lower = select(validt,select(inside,vfloat<N>(neg_inf),vfloat<N>(pos_inf)),t_o.lower); - t_o.upper = select(validt,select(inside,vfloat<N>(pos_inf),vfloat<N>(neg_inf)),t_o.upper); - valid &= !validt | inside; - } - return valid; - } - - __forceinline vbool<N> intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const - { - vfloat<N> u0_o; Vec3vf<N> Ng0_o; - vfloat<N> u1_o; Vec3vf<N> Ng1_o; - return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o); - } - }; - } -} - diff --git a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h deleted file mode 100644 index e8305780e5..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "../common/scene_points.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct DiscIntersectorHitM - { - __forceinline DiscIntersectorHitM() {} - - __forceinline DiscIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) - : vu(u), vv(v), vt(t), vNg(Ng) - { - } - - __forceinline void finalize() {} - - __forceinline Vec2f uv(const size_t i) const - { - return Vec2f(vu[i], vv[i]); - } - __forceinline float t(const size_t i) const - { - return vt[i]; - } - __forceinline Vec3fa Ng(const size_t i) const - { - return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]); - } - - public: - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct DiscIntersector1 - { - typedef CurvePrecalculations1 Precalculations; - - template<typename Epilog> - static __forceinline bool intersect( - const vbool<M>& valid_i, - Ray& ray, - IntersectContext* context, - const Points* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - - const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); - const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); - - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec3vf<M> center = v0.xyz(); - const vfloat<M> radius = v0.w; - - const Vec3vf<M> c0 = center - ray_org; - const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; - - valid &= (vfloat<M>(ray.tnear()) <= projC0) & (projC0 <= vfloat<M>(ray.tfar)); - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) - valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale; // ignore self intersections - if (unlikely(none(valid))) - return false; - - const Vec3vf<M> perp = c0 - projC0 * ray_dir; - const vfloat<M> l2 = dot(perp, perp); - const vfloat<M> r2 = radius * radius; - valid &= (l2 <= r2); - if (unlikely(none(valid))) - return false; - - DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir); - return epilog(valid, hit); - } - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - Ray& ray, - IntersectContext* context, - const Points* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, - const Vec3vf<M>& normal, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec3vf<M> center = v0.xyz(); - const vfloat<M> radius = v0.w; - - vfloat<M> divisor = dot(Vec3vf<M>((Vec3fa)ray.dir), normal); - const vbool<M> parallel = divisor == vfloat<M>(0.f); - valid &= !parallel; - divisor = select(parallel, 1.f, divisor); // prevent divide by zero - - vfloat<M> t = dot(center - Vec3vf<M>((Vec3fa)ray.org), Vec3vf<M>(normal)) / divisor; - - valid &= (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar)); - if (unlikely(none(valid))) - return false; - - Vec3vf<M> intersection = Vec3vf<M>((Vec3fa)ray.org) + Vec3vf<M>((Vec3fa)ray.dir) * t; - vfloat<M> dist2 = dot(intersection - center, intersection - center); - valid &= dist2 < radius * radius; - if (unlikely(none(valid))) - return false; - - DiscIntersectorHitM<M> hit(zero, zero, t, normal); - return epilog(valid, hit); - } - }; - - template<int M, int K> - struct DiscIntersectorK - { - typedef CurvePrecalculationsK<K> Precalculations; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - RayK<K>& ray, - size_t k, - IntersectContext* context, - const Points* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - - const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); - const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); - - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec3vf<M> center = v0.xyz(); - const vfloat<M> radius = v0.w; - - const Vec3vf<M> c0 = center - ray_org; - const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; - - valid &= (vfloat<M>(ray.tnear()[k]) <= projC0) & (projC0 <= vfloat<M>(ray.tfar[k])); - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) - valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale[k]; // ignore self intersections - if (unlikely(none(valid))) - return false; - - const Vec3vf<M> perp = c0 - projC0 * ray_dir; - const vfloat<M> l2 = dot(perp, perp); - const vfloat<M> r2 = radius * radius; - valid &= (l2 <= r2); - if (unlikely(none(valid))) - return false; - - DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir); - return epilog(valid, hit); - } - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - RayK<K>& ray, - size_t k, - IntersectContext* context, - const Points* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, - const Vec3vf<M>& normal, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); - - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec3vf<M> center = v0.xyz(); - const vfloat<M> radius = v0.w; - - vfloat<M> divisor = dot(Vec3vf<M>(ray_dir), normal); - const vbool<M> parallel = divisor == vfloat<M>(0.f); - valid &= !parallel; - divisor = select(parallel, 1.f, divisor); // prevent divide by zero - - vfloat<M> t = dot(center - Vec3vf<M>(ray_org), Vec3vf<M>(normal)) / divisor; - - valid &= (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k])); - if (unlikely(none(valid))) - return false; - - Vec3vf<M> intersection = Vec3vf<M>(ray_org) + Vec3vf<M>(ray_dir) * t; - vfloat<M> dist2 = dot(intersection - center, intersection - center); - valid &= dist2 < radius * radius; - if (unlikely(none(valid))) - return false; - - DiscIntersectorHitM<M> hit(zero, zero, t, normal); - return epilog(valid, hit); - } - }; - } // namespace isa -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h deleted file mode 100644 index e1dc3aa98e..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "disc_intersector.h" -#include "intersector_epilog.h" -#include "pointi.h" - -namespace embree -{ - namespace isa - { - template<int M, int Mx, bool filter> - struct DiscMiIntersector1 - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, - RayHit& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, - Ray& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, bool filter> - struct DiscMiMBIntersector1 - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, - RayHit& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, - Ray& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct DiscMiIntersectorK - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect( - const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded( - const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct DiscMiMBIntersectorK - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect( - const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded( - const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, bool filter> - struct OrientedDiscMiIntersector1 - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, - RayHit& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, - Ray& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, bool filter> - struct OrientedDiscMiMBIntersector1 - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, - RayHit& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, - Ray& ray, - IntersectContext* context, - const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct OrientedDiscMiIntersectorK - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect( - const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, n0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded( - const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, n0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct OrientedDiscMiMBIntersectorK - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect( - const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, n0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - - static __forceinline bool occluded( - const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(Disc.geomID()); - Vec4vf<M> v0; Vec3vf<M> n0; - Disc.gather(v0, n0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, n0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); - } - }; - } // namespace isa -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/filter.h b/thirdparty/embree-aarch64/kernels/geometry/filter.h deleted file mode 100644 index 4cdf7a395a..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/filter.h +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/geometry.h" -#include "../common/ray.h" -#include "../common/hit.h" -#include "../common/context.h" - -namespace embree -{ - namespace isa - { - __forceinline bool runIntersectionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) - { - if (geometry->intersectionFilterN) - { - assert(context->scene->hasGeometryFilterFunction()); - geometry->intersectionFilterN(args); - - if (args->valid[0] == 0) - return false; - } - - if (context->user->filter) { - assert(context->scene->hasContextFilterFunction()); - context->user->filter(args); - - if (args->valid[0] == 0) - return false; - } - - copyHitToRay(*(RayHit*)args->ray,*(Hit*)args->hit); - return true; - } - - __forceinline bool runIntersectionFilter1(const Geometry* const geometry, RayHit& ray, IntersectContext* context, Hit& hit) - { - RTCFilterFunctionNArguments args; - int mask = -1; - args.valid = &mask; - args.geometryUserPtr = geometry->userPtr; - args.context = context->user; - args.ray = (RTCRayN*)&ray; - args.hit = (RTCHitN*)&hit; - args.N = 1; - return runIntersectionFilter1Helper(&args,geometry,context); - } - - __forceinline void reportIntersection1(IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args) - { -#if defined(EMBREE_FILTER_FUNCTION) - IntersectContext* MAYBE_UNUSED context = args->internal_context; - const Geometry* const geometry = args->geometry; - if (geometry->intersectionFilterN) { - assert(context->scene->hasGeometryFilterFunction()); - geometry->intersectionFilterN(filter_args); - } - - //if (args->valid[0] == 0) - // return; - - if (context->user->filter) { - assert(context->scene->hasContextFilterFunction()); - context->user->filter(filter_args); - } -#endif - } - - __forceinline bool runOcclusionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) - { - if (geometry->occlusionFilterN) - { - assert(context->scene->hasGeometryFilterFunction()); - geometry->occlusionFilterN(args); - - if (args->valid[0] == 0) - return false; - } - - if (context->user->filter) { - assert(context->scene->hasContextFilterFunction()); - context->user->filter(args); - - if (args->valid[0] == 0) - return false; - } - return true; - } - - __forceinline bool runOcclusionFilter1(const Geometry* const geometry, Ray& ray, IntersectContext* context, Hit& hit) - { - RTCFilterFunctionNArguments args; - int mask = -1; - args.valid = &mask; - args.geometryUserPtr = geometry->userPtr; - args.context = context->user; - args.ray = (RTCRayN*)&ray; - args.hit = (RTCHitN*)&hit; - args.N = 1; - return runOcclusionFilter1Helper(&args,geometry,context); - } - - __forceinline void reportOcclusion1(OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args) - { -#if defined(EMBREE_FILTER_FUNCTION) - IntersectContext* MAYBE_UNUSED context = args->internal_context; - const Geometry* const geometry = args->geometry; - if (geometry->occlusionFilterN) { - assert(context->scene->hasGeometryFilterFunction()); - geometry->occlusionFilterN(filter_args); - } - - //if (args->valid[0] == 0) - // return false; - - if (context->user->filter) { - assert(context->scene->hasContextFilterFunction()); - context->user->filter(filter_args); - } -#endif - } - - template<int K> - __forceinline vbool<K> runIntersectionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) - { - vint<K>* mask = (vint<K>*) args->valid; - if (geometry->intersectionFilterN) - { - assert(context->scene->hasGeometryFilterFunction()); - geometry->intersectionFilterN(args); - } - - vbool<K> valid_o = *mask != vint<K>(zero); - if (none(valid_o)) return valid_o; - - if (context->user->filter) { - assert(context->scene->hasContextFilterFunction()); - context->user->filter(args); - } - - valid_o = *mask != vint<K>(zero); - if (none(valid_o)) return valid_o; - - copyHitToRay(valid_o,*(RayHitK<K>*)args->ray,*(HitK<K>*)args->hit); - return valid_o; - } - - template<int K> - __forceinline vbool<K> runIntersectionFilter(const vbool<K>& valid, const Geometry* const geometry, RayHitK<K>& ray, IntersectContext* context, HitK<K>& hit) - { - RTCFilterFunctionNArguments args; - vint<K> mask = valid.mask32(); - args.valid = (int*)&mask; - args.geometryUserPtr = geometry->userPtr; - args.context = context->user; - args.ray = (RTCRayN*)&ray; - args.hit = (RTCHitN*)&hit; - args.N = K; - return runIntersectionFilterHelper<K>(&args,geometry,context); - } - - template<int K> - __forceinline vbool<K> runOcclusionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context) - { - vint<K>* mask = (vint<K>*) args->valid; - if (geometry->occlusionFilterN) - { - assert(context->scene->hasGeometryFilterFunction()); - geometry->occlusionFilterN(args); - } - - vbool<K> valid_o = *mask != vint<K>(zero); - - if (none(valid_o)) return valid_o; - - if (context->user->filter) { - assert(context->scene->hasContextFilterFunction()); - context->user->filter(args); - } - - valid_o = *mask != vint<K>(zero); - - RayK<K>* ray = (RayK<K>*) args->ray; - ray->tfar = select(valid_o, vfloat<K>(neg_inf), ray->tfar); - return valid_o; - } - - template<int K> - __forceinline vbool<K> runOcclusionFilter(const vbool<K>& valid, const Geometry* const geometry, RayK<K>& ray, IntersectContext* context, HitK<K>& hit) - { - RTCFilterFunctionNArguments args; - vint<K> mask = valid.mask32(); - args.valid = (int*)&mask; - args.geometryUserPtr = geometry->userPtr; - args.context = context->user; - args.ray = (RTCRayN*)&ray; - args.hit = (RTCHitN*)&hit; - args.N = K; - return runOcclusionFilterHelper<K>(&args,geometry,context); - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h deleted file mode 100644 index 46a0af0827..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "grid_soa.h" -#include "grid_soa_intersector1.h" -#include "grid_soa_intersector_packet.h" -#include "../common/ray.h" - -namespace embree -{ - namespace isa - { - template<typename T> - class SubdivPatch1Precalculations : public T - { - public: - __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr) - : T(ray,ptr) {} - }; - - template<int K, typename T> - class SubdivPatch1PrecalculationsK : public T - { - public: - __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray) - : T(valid,ray) {} - }; - - class Grid1Intersector1 - { - public: - typedef GridSOA Primitive; - typedef Grid1Precalculations<GridSOAIntersector1::Precalculations> Precalculations; - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) - { - GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node); - } - static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) { - intersect(pre,ray,context,prim,ty,lazy_node); - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) - { - GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node); - } - static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) { - return occluded(pre,ray,context,prim,ty,lazy_node); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) { - assert(false && "not implemented"); - return false; - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) { - assert(false && "not implemented"); - return false; - } - }; - - template <int K> - struct GridIntersectorK - { - typedef GridSOA Primitive; - typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations; - - - static __forceinline void intersect(const vbool<K>& valid, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) - { - GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node); - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) - { - GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node); - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) - { - GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) - { - GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); - } - }; - - typedef Grid1IntersectorK<4> SubdivPatch1Intersector4; - typedef Grid1IntersectorK<8> SubdivPatch1Intersector8; - typedef Grid1IntersectorK<16> SubdivPatch1Intersector16; - - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h deleted file mode 100644 index d3b275586c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "../common/scene_subdiv_mesh.h" -#include "../bvh/bvh.h" -#include "../subdiv/tessellation.h" -#include "../subdiv/tessellation_cache.h" -#include "subdivpatch1.h" - -namespace embree -{ - namespace isa - { - class GridSOA - { - public: - - /*! GridSOA constructor */ - GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps, - const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, - const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr); - - /*! Subgrid creation */ - template<typename Allocator> - static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps, - unsigned x0, unsigned x1, unsigned y0, unsigned y1, - const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr) - { - const unsigned width = x1-x0+1; - const unsigned height = y1-y0+1; - const GridRange range(0,width-1,0,height-1); - size_t bvhBytes = 0; - if (time_steps == 1) - bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0); - else { - bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0); - bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D)); - } - const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float); - size_t rootBytes = time_steps*sizeof(BVH4::NodeRef); -#if !defined(__X86_64__) && !defined(__aarch64__) - rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding. -#endif - void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes); - assert(data); - return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o); - } - - /*! Grid creation */ - template<typename Allocator> - static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps, - const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr) - { - return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o); - } - - /*! returns reference to root */ - __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } - __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } - - /*! returns pointer to BVH array */ - __forceinline int8_t* bvhData() { return &data[0]; } - __forceinline const int8_t* bvhData() const { return &data[0]; } - - /*! returns pointer to Grid array */ - __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; } - __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; } - - __forceinline void* encodeLeaf(size_t u, size_t v) { - return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf - } - __forceinline float* decodeLeaf(size_t t, const void* ptr) { - return gridData(t) + (((size_t) (ptr) >> 4) - 1); - } - - /*! returns the size of the BVH over the grid in bytes */ - static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes); - - /*! returns the size of the temporal BVH over the time range BVHs */ - static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes); - - /*! calculates bounding box of grid range */ - __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const - { - const float* const grid_array = gridData(time); - const float* const grid_x_array = grid_array + 0 * dim_offset; - const float* const grid_y_array = grid_array + 1 * dim_offset; - const float* const grid_z_array = grid_array + 2 * dim_offset; - - /* compute the bounds just for the range! */ - BBox3fa bounds( empty ); - for (unsigned v = range.v_start; v<=range.v_end; v++) - { - for (unsigned u = range.u_start; u<=range.u_end; u++) - { - const float x = grid_x_array[ v * width + u]; - const float y = grid_y_array[ v * width + u]; - const float z = grid_z_array[ v * width + u]; - bounds.extend( Vec3fa(x,y,z) ); - } - } - assert(is_finite(bounds)); - return bounds; - } - - /*! Evaluates grid over patch and builds BVH4 tree over the grid. */ - std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o); - - /*! Create BVH4 tree over grid. */ - std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator); - - /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */ - std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o); - - /*! Create MBlur BVH4 tree over grid. */ - std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator); - - /*! Create MSMBlur BVH4 tree over grid. */ - std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o); - - template<typename Loader> - struct MapUV - { - typedef typename Loader::vfloat vfloat; - const float* const grid_uv; - size_t line_offset; - size_t lines; - - __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines) - : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {} - - __forceinline void operator() (vfloat& u, vfloat& v) const { - const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines); - const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]); - const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]); - const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]); - const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0; - u = uv[0];v = uv[1]; - } - }; - - struct Gather2x3 - { - enum { M = 4 }; - typedef vbool4 vbool; - typedef vint4 vint; - typedef vfloat4 vfloat; - - static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines) - { - vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset); - vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid - if (unlikely(line_offset == 2)) - { - r0 = shuffle<0,1,1,1>(r0); - r1 = shuffle<0,1,1,1>(r1); - } - return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11 - shuffle<1,1,2,2>(r0), // r01, r01, r02, r02 - shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12 - } - - static __forceinline void gather(const float* const grid_x, - const float* const grid_y, - const float* const grid_z, - const size_t line_offset, - const size_t lines, - Vec3vf4& v0_o, - Vec3vf4& v1_o, - Vec3vf4& v2_o) - { - const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines); - const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines); - const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines); - v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); - v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); - v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); - } - }; - -#if defined (__AVX__) - struct Gather3x3 - { - enum { M = 8 }; - typedef vbool8 vbool; - typedef vint8 vint; - typedef vfloat8 vfloat; - - static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines) - { - vfloat4 ra = vfloat4::loadu(grid + 0*line_offset); - vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid - vfloat4 rc; - if (likely(lines > 2)) - rc = vfloat4::loadu(grid + 2*line_offset); - else - rc = rb; - - if (unlikely(line_offset == 2)) - { - ra = shuffle<0,1,1,1>(ra); - rb = shuffle<0,1,1,1>(rb); - rc = shuffle<0,1,1,1>(rc); - } - - const vfloat8 r0 = vfloat8(ra,rb); - const vfloat8 r1 = vfloat8(rb,rc); - return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21 - shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12 - shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22 - } - - static __forceinline void gather(const float* const grid_x, - const float* const grid_y, - const float* const grid_z, - const size_t line_offset, - const size_t lines, - Vec3vf8& v0_o, - Vec3vf8& v1_o, - Vec3vf8& v2_o) - { - const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines); - const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines); - const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines); - v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); - v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); - v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); - } - }; -#endif - - template<typename vfloat> - static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv) - { - typedef typename vfloat::Int vint; - const vint iu = asInt(uv) & 0xffff; - const vint iv = srl(asInt(uv),16); - const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000); - const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000); - return Vec2<vfloat>(u,v); - } - - __forceinline unsigned int geomID() const { - return _geomID; - } - - __forceinline unsigned int primID() const { - return _primID; - } - - public: - BVH4::NodeRef troot; -#if !defined(__X86_64__) && !defined(__aarch64__) - unsigned align1; -#endif - unsigned time_steps; - unsigned width; - - unsigned height; - unsigned dim_offset; - unsigned _geomID; - unsigned _primID; - - unsigned align2; - unsigned gridOffset; - unsigned gridBytes; - unsigned rootOffset; - - int8_t data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h deleted file mode 100644 index 2ed922a5ae..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "grid_soa.h" -#include "../common/ray.h" -#include "triangle_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - class GridSOAIntersector1 - { - public: - typedef void Primitive; - - class Precalculations - { - public: - __forceinline Precalculations (const Ray& ray, const void* ptr) - : grid(nullptr) {} - - public: - GridSOA* grid; - int itime; - float ftime; - }; - - template<typename Loader> - static __forceinline void intersect(RayHit& ray, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t dim_offset = pre.grid->dim_offset; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - Vec3<vfloat> v0, v1, v2; - Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); - GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); - PlueckerIntersector1<Loader::M> intersector(ray,nullptr); - intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - }; - - template<typename Loader> - static __forceinline bool occluded(Ray& ray, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t dim_offset = pre.grid->dim_offset; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - Vec3<vfloat> v0, v1, v2; - Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); - - GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); - PlueckerIntersector1<Loader::M> intersector(ray,nullptr); - return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - } - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(0,prim); - -#if defined(__AVX__) - intersect<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre); -#else - intersect<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre); - if (likely(lines > 2)) - intersect<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre); -#endif - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(0,prim); - -#if defined(__AVX__) - return occluded<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre); -#else - if (occluded<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre)) return true; - if (likely(lines > 2)) - if (occluded<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre)) return true; -#endif - return false; - } - }; - - class GridSOAMBIntersector1 - { - public: - typedef void Primitive; - typedef GridSOAIntersector1::Precalculations Precalculations; - - template<typename Loader> - static __forceinline void intersect(RayHit& ray, const float ftime, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t dim_offset = pre.grid->dim_offset; - const size_t grid_offset = pre.grid->gridBytes >> 2; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - Vec3<vfloat> a0, a1, a2; - Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); - - Vec3<vfloat> b0, b1, b2; - Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); - - Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); - Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); - Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); - - GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); - PlueckerIntersector1<Loader::M> intersector(ray,nullptr); - intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - }; - - template<typename Loader> - static __forceinline bool occluded(Ray& ray, const float ftime, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t dim_offset = pre.grid->dim_offset; - const size_t grid_offset = pre.grid->gridBytes >> 2; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - Vec3<vfloat> a0, a1, a2; - Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); - - Vec3<vfloat> b0, b1, b2; - Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); - - Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); - Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); - Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); - - GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines); - PlueckerIntersector1<Loader::M> intersector(ray,nullptr); - return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - } - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim); - -#if defined(__AVX__) - intersect<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre); -#else - intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x, line_offset, lines, pre); - if (likely(lines > 2)) - intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre); -#endif - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim); - -#if defined(__AVX__) - return occluded<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre); -#else - if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x , line_offset, lines, pre)) return true; - if (likely(lines > 2)) - if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true; -#endif - return false; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h deleted file mode 100644 index 41d66e1e28..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h +++ /dev/null @@ -1,445 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "grid_soa.h" -#include "../common/ray.h" -#include "triangle_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - template<int K> - struct MapUV0 - { - const float* const grid_uv; - size_t ofs00, ofs01, ofs10, ofs11; - - __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) - : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} - - __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const { - const vfloat<K> uv00(grid_uv[ofs00]); - const vfloat<K> uv01(grid_uv[ofs01]); - const vfloat<K> uv10(grid_uv[ofs10]); - const vfloat<K> uv11(grid_uv[ofs11]); - const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00); - const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01); - const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10); - const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); - u = uv[0]; v = uv[1]; - } - }; - - template<int K> - struct MapUV1 - { - const float* const grid_uv; - size_t ofs00, ofs01, ofs10, ofs11; - - __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) - : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} - - __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const { - const vfloat<K> uv00(grid_uv[ofs00]); - const vfloat<K> uv01(grid_uv[ofs01]); - const vfloat<K> uv10(grid_uv[ofs10]); - const vfloat<K> uv11(grid_uv[ofs11]); - const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10); - const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01); - const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11); - const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); - u = uv[0]; v = uv[1]; - } - }; - - template<int K> - class GridSOAIntersectorK - { - public: - typedef void Primitive; - - class Precalculations - { -#if defined(__AVX__) - static const int M = 8; -#else - static const int M = 4; -#endif - - public: - __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) - : grid(nullptr), intersector(valid,ray) {} - - public: - GridSOA* grid; - PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector - }; - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t dim_offset = pre.grid->dim_offset; - const size_t line_offset = pre.grid->width; - const float* const grid_x = pre.grid->decodeLeaf(0,prim); - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - const size_t max_x = pre.grid->width == 2 ? 1 : 2; - const size_t max_y = pre.grid->height == 2 ? 1 : 2; - for (size_t y=0; y<max_y; y++) - { - for (size_t x=0; x<max_x; x++) - { - const size_t ofs00 = (y+0)*line_offset+(x+0); - const size_t ofs01 = (y+0)*line_offset+(x+1); - const size_t ofs10 = (y+1)*line_offset+(x+0); - const size_t ofs11 = (y+1)*line_offset+(x+1); - const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); - const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); - const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); - const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); - - pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - } - } - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t dim_offset = pre.grid->dim_offset; - const size_t line_offset = pre.grid->width; - const float* const grid_x = pre.grid->decodeLeaf(0,prim); - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - vbool<K> valid = valid_i; - const size_t max_x = pre.grid->width == 2 ? 1 : 2; - const size_t max_y = pre.grid->height == 2 ? 1 : 2; - for (size_t y=0; y<max_y; y++) - { - for (size_t x=0; x<max_x; x++) - { - const size_t ofs00 = (y+0)*line_offset+(x+0); - const size_t ofs01 = (y+0)*line_offset+(x+1); - const size_t ofs10 = (y+1)*line_offset+(x+0); - const size_t ofs11 = (y+1)*line_offset+(x+1); - const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); - const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); - const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); - const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); - - pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); - if (none(valid)) break; - pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); - if (none(valid)) break; - } - } - return !valid; - } - - template<typename Loader> - static __forceinline void intersect(RayHitK<K>& ray, size_t k, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t dim_offset = pre.grid->dim_offset; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); - pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); - }; - - template<typename Loader> - static __forceinline bool occluded(RayK<K>& ray, size_t k, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t dim_offset = pre.grid->dim_offset; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); - return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); - } - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(0,prim); -#if defined(__AVX__) - intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre); -#else - intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre); - if (likely(lines > 2)) - intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre); -#endif - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(0,prim); - -#if defined(__AVX__) - return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre); -#else - if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true; - if (likely(lines > 2)) - if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true; -#endif - return false; - } - }; - - template<int K> - class GridSOAMBIntersectorK - { - public: - typedef void Primitive; - typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations; - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - vfloat<K> vftime; - vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); - - vbool<K> valid1 = valid_i; - while (any(valid1)) { - const size_t j = bsf(movemask(valid1)); - const int itime = vitime[j]; - const vbool<K> valid2 = valid1 & (itime == vitime); - valid1 = valid1 & !valid2; - intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node); - } - } - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t grid_offset = pre.grid->gridBytes >> 2; - const size_t dim_offset = pre.grid->dim_offset; - const size_t line_offset = pre.grid->width; - const float* const grid_x = pre.grid->decodeLeaf(itime,prim); - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - const size_t max_x = pre.grid->width == 2 ? 1 : 2; - const size_t max_y = pre.grid->height == 2 ? 1 : 2; - for (size_t y=0; y<max_y; y++) - { - for (size_t x=0; x<max_x; x++) - { - size_t ofs00 = (y+0)*line_offset+(x+0); - size_t ofs01 = (y+0)*line_offset+(x+1); - size_t ofs10 = (y+1)*line_offset+(x+0); - size_t ofs11 = (y+1)*line_offset+(x+1); - const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); - const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); - const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); - const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); - ofs00 += grid_offset; - ofs01 += grid_offset; - ofs10 += grid_offset; - ofs11 += grid_offset; - const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); - const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); - const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); - const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); - const Vec3vf<K> p00 = lerp(a00,b00,ftime); - const Vec3vf<K> p01 = lerp(a01,b01,ftime); - const Vec3vf<K> p10 = lerp(a10,b10,ftime); - const Vec3vf<K> p11 = lerp(a11,b11,ftime); - - pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); - } - } - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - vfloat<K> vftime; - vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); - - vbool<K> valid_o = valid_i; - vbool<K> valid1 = valid_i; - while (any(valid1)) { - const int j = int(bsf(movemask(valid1))); - const int itime = vitime[j]; - const vbool<K> valid2 = valid1 & (itime == vitime); - valid1 = valid1 & !valid2; - valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node); - } - return !valid_o; - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - const size_t grid_offset = pre.grid->gridBytes >> 2; - const size_t dim_offset = pre.grid->dim_offset; - const size_t line_offset = pre.grid->width; - const float* const grid_x = pre.grid->decodeLeaf(itime,prim); - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - vbool<K> valid = valid_i; - const size_t max_x = pre.grid->width == 2 ? 1 : 2; - const size_t max_y = pre.grid->height == 2 ? 1 : 2; - for (size_t y=0; y<max_y; y++) - { - for (size_t x=0; x<max_x; x++) - { - size_t ofs00 = (y+0)*line_offset+(x+0); - size_t ofs01 = (y+0)*line_offset+(x+1); - size_t ofs10 = (y+1)*line_offset+(x+0); - size_t ofs11 = (y+1)*line_offset+(x+1); - const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); - const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); - const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); - const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); - ofs00 += grid_offset; - ofs01 += grid_offset; - ofs10 += grid_offset; - ofs11 += grid_offset; - const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); - const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); - const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); - const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); - const Vec3vf<K> p00 = lerp(a00,b00,ftime); - const Vec3vf<K> p01 = lerp(a01,b01,ftime); - const Vec3vf<K> p10 = lerp(a10,b10,ftime); - const Vec3vf<K> p11 = lerp(a11,b11,ftime); - - pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); - if (none(valid)) break; - pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); - if (none(valid)) break; - } - } - return valid; - } - - template<typename Loader> - static __forceinline void intersect(RayHitK<K>& ray, size_t k, - const float ftime, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t grid_offset = pre.grid->gridBytes >> 2; - const size_t dim_offset = pre.grid->dim_offset; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - Vec3<vfloat> a0, a1, a2; - Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); - - Vec3<vfloat> b0, b1, b2; - Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); - - Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); - Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); - Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); - - pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); - }; - - template<typename Loader> - static __forceinline bool occluded(RayK<K>& ray, size_t k, - const float ftime, - IntersectContext* context, - const float* const grid_x, - const size_t line_offset, - const size_t lines, - Precalculations& pre) - { - typedef typename Loader::vfloat vfloat; - const size_t grid_offset = pre.grid->gridBytes >> 2; - const size_t dim_offset = pre.grid->dim_offset; - const float* const grid_y = grid_x + 1 * dim_offset; - const float* const grid_z = grid_x + 2 * dim_offset; - const float* const grid_uv = grid_x + 3 * dim_offset; - - Vec3<vfloat> a0, a1, a2; - Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); - - Vec3<vfloat> b0, b1, b2; - Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); - - Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime)); - Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime)); - Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime)); - - return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID())); - } - - /*! Intersect a ray with the primitive. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - float ftime; - int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); - - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(itime,prim); - -#if defined(__AVX__) - intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre); -#else - intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre); - if (likely(lines > 2)) - intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre); -#endif - } - - /*! Test if the ray is occluded by the primitive */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - float ftime; - int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); - - const size_t line_offset = pre.grid->width; - const size_t lines = pre.grid->height; - const float* const grid_x = pre.grid->decodeLeaf(itime,prim); - -#if defined(__AVX__) - return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre); -#else - if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true; - if (likely(lines > 2)) - if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true; -#endif - return false; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance.h b/thirdparty/embree-aarch64/kernels/geometry/instance.h deleted file mode 100644 index 66893d581f..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/instance.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "../common/scene_instance.h" - -namespace embree -{ - struct InstancePrimitive - { - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored primitives */ - static __forceinline size_t max_size() { return 1; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return N; } - - public: - - InstancePrimitive (const Instance* instance, unsigned int instID) - : instance(instance) - , instID_(instID) - {} - - __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene) - { - assert(end-i == 1); - const PrimRef& prim = prims[i]; i++; - const unsigned int geomID = prim.geomID(); - const Instance* instance = scene->get<Instance>(geomID); - new (this) InstancePrimitive(instance, geomID); - } - - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime) - { - assert(end-i == 1); - const PrimRef& prim = prims[i]; i++; - const unsigned int geomID = prim.geomID(); - const Instance* instance = scene->get<Instance>(geomID); - new (this) InstancePrimitive(instance,geomID); - return instance->linearBounds(0,itime); - } - - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range) - { - assert(end-i == 1); - const PrimRefMB& prim = prims[i]; i++; - const unsigned int geomID = prim.geomID(); - const Instance* instance = scene->get<Instance>(geomID); - new (this) InstancePrimitive(instance,geomID); - return instance->linearBounds(0,time_range); - } - - /* Updates the primitive */ - __forceinline BBox3fa update(Instance* instance) { - return instance->bounds(0); - } - - public: - const Instance* instance; - const unsigned int instID_ = std::numeric_limits<unsigned int>::max (); - }; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h deleted file mode 100644 index 91731a39c5..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "instance.h" -#include "../common/ray.h" -#include "../common/point_query.h" - -namespace embree -{ - namespace isa - { - struct InstanceIntersector1 - { - typedef InstancePrimitive Primitive; - - struct Precalculations { - __forceinline Precalculations (const Ray& ray, const void *ptr) {} - }; - - static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim); - static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim); - static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim); - }; - - struct InstanceIntersector1MB - { - typedef InstancePrimitive Primitive; - - struct Precalculations { - __forceinline Precalculations (const Ray& ray, const void *ptr) {} - }; - - static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim); - static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim); - static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim); - }; - - template<int K> - struct InstanceIntersectorK - { - typedef InstancePrimitive Primitive; - - struct Precalculations { - __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {} - }; - - static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim); - static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim); - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { - intersect(vbool<K>(1<<int(k)),pre,ray,context,prim); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { - occluded(vbool<K>(1<<int(k)),pre,ray,context,prim); - return ray.tfar[k] < 0.0f; - } - }; - - template<int K> - struct InstanceIntersectorKMB - { - typedef InstancePrimitive Primitive; - - struct Precalculations { - __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {} - }; - - static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim); - static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim); - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { - intersect(vbool<K>(1<<int(k)),pre,ray,context,prim); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { - occluded(vbool<K>(1<<int(k)),pre,ray,context,prim); - return ray.tfar[k] < 0.0f; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h deleted file mode 100644 index 0df49dd6e9..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h +++ /dev/null @@ -1,1074 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "../common/context.h" -#include "filter.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct UVIdentity { - __forceinline void operator() (vfloat<M>& u, vfloat<M>& v) const {} - }; - - - template<bool filter> - struct Intersect1Epilog1 - { - RayHit& ray; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Intersect1Epilog1(RayHit& ray, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (Hit& hit) const - { - /* ray mask test */ - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - if ((geometry->mask & ray.mask) == 0) return false; -#endif - hit.finalize(); - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); - const float old_t = ray.tfar; - ray.tfar = hit.t; - bool found = runIntersectionFilter1(geometry,ray,context,h); - if (!found) ray.tfar = old_t; - return found; - } - } -#endif - - /* update hit information */ - ray.tfar = hit.t; - ray.Ng = hit.Ng; - ray.u = hit.u; - ray.v = hit.v; - ray.primID = primID; - ray.geomID = geomID; - instance_id_stack::copy(context->user->instID, ray.instID); - return true; - } - }; - - template<bool filter> - struct Occluded1Epilog1 - { - Ray& ray; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Occluded1Epilog1(Ray& ray, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (Hit& hit) const - { - /* ray mask test */ - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - - -#if defined(EMBREE_RAY_MASK) - if ((geometry->mask & ray.mask) == 0) return false; -#endif - hit.finalize(); - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) { - HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); - const float old_t = ray.tfar; - ray.tfar = hit.t; - const bool found = runOcclusionFilter1(geometry,ray,context,h); - if (!found) ray.tfar = old_t; - return found; - } - } -#endif - return true; - } - }; - - template<int K, bool filter> - struct Intersect1KEpilog1 - { - RayHitK<K>& ray; - size_t k; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Intersect1KEpilog1(RayHitK<K>& ray, size_t k, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (Hit& hit) const - { - /* ray mask test */ - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - if ((geometry->mask & ray.mask[k]) == 0) - return false; -#endif - hit.finalize(); - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); - const float old_t = ray.tfar[k]; - ray.tfar[k] = hit.t; - const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h)); - if (!found) ray.tfar[k] = old_t; - return found; - } - } -#endif - - /* update hit information */ - ray.tfar[k] = hit.t; - ray.Ng.x[k] = hit.Ng.x; - ray.Ng.y[k] = hit.Ng.y; - ray.Ng.z[k] = hit.Ng.z; - ray.u[k] = hit.u; - ray.v[k] = hit.v; - ray.primID[k] = primID; - ray.geomID[k] = geomID; - instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); - return true; - } - }; - - template<int K, bool filter> - struct Occluded1KEpilog1 - { - RayK<K>& ray; - size_t k; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Occluded1KEpilog1(RayK<K>& ray, size_t k, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (Hit& hit) const - { - /* ray mask test */ - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - if ((geometry->mask & ray.mask[k]) == 0) - return false; -#endif - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) { - hit.finalize(); - HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng); - const float old_t = ray.tfar[k]; - ray.tfar[k] = hit.t; - const bool found = any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h)); - if (!found) ray.tfar[k] = old_t; - return found; - } - } -#endif - return true; - } - }; - - template<int M, int Mx, bool filter> - struct Intersect1EpilogM - { - RayHit& ray; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - - __forceinline Intersect1EpilogM(RayHit& ray, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) - : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; - hit.finalize(); - size_t i = select_min(valid,hit.vt); - unsigned int geomID = geomIDs[i]; - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) - bool foundhit = false; - goto entry; - while (true) - { - if (unlikely(none(valid))) return foundhit; - i = select_min(valid,hit.vt); - - geomID = geomIDs[i]; - entry: - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - /* goto next hit if mask test fails */ - if ((geometry->mask & ray.mask) == 0) { - clear(valid,i); - continue; - } -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - /* call intersection filter function */ - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - const Vec2f uv = hit.uv(i); - HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); - const float old_t = ray.tfar; - ray.tfar = hit.t(i); - const bool found = runIntersectionFilter1(geometry,ray,context,h); - if (!found) ray.tfar = old_t; - foundhit |= found; - clear(valid,i); - valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value - continue; - } - } -#endif - break; - } -#endif - - /* update hit information */ - const Vec2f uv = hit.uv(i); - ray.tfar = hit.vt[i]; - ray.Ng.x = hit.vNg.x[i]; - ray.Ng.y = hit.vNg.y[i]; - ray.Ng.z = hit.vNg.z[i]; - ray.u = uv.x; - ray.v = uv.y; - ray.primID = primIDs[i]; - ray.geomID = geomID; - instance_id_stack::copy(context->user->instID, ray.instID); - return true; - - } - }; - -#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 - template<int M, bool filter> - struct Intersect1EpilogM<M,16,filter> - { - static const size_t Mx = 16; - RayHit& ray; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - - __forceinline Intersect1EpilogM(RayHit& ray, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) - : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const - { - Scene* MAYBE_UNUSED scene = context->scene; - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; - hit.finalize(); - size_t i = select_min(valid,hit.vt); - unsigned int geomID = geomIDs[i]; - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) - bool foundhit = false; - goto entry; - while (true) - { - if (unlikely(none(valid))) return foundhit; - i = select_min(valid,hit.vt); - - geomID = geomIDs[i]; - entry: - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - /* goto next hit if mask test fails */ - if ((geometry->mask & ray.mask) == 0) { - clear(valid,i); - continue; - } -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - /* call intersection filter function */ - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - const Vec2f uv = hit.uv(i); - HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); - const float old_t = ray.tfar; - ray.tfar = hit.t(i); - const bool found = runIntersectionFilter1(geometry,ray,context,h); - if (!found) ray.tfar = old_t; - foundhit |= found; - clear(valid,i); - valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value - continue; - } - } -#endif - break; - } -#endif - - vbool<Mx> finalMask(((unsigned int)1 << i)); - ray.update(finalMask,hit.vt,hit.vu,hit.vv,hit.vNg.x,hit.vNg.y,hit.vNg.z,geomID,primIDs); - instance_id_stack::foreach([&](unsigned level) - { - ray.instID[level] = context->user->instID[level]; - return (context->user->instID[level] != RTC_INVALID_GEOMETRY_ID); - }); - return true; - - } - }; -#endif - - template<int M, int Mx, bool filter> - struct Occluded1EpilogM - { - Ray& ray; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - - __forceinline Occluded1EpilogM(Ray& ray, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) - : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) - if (unlikely(filter)) - hit.finalize(); /* called only once */ - - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; - size_t m=movemask(valid); - goto entry; - while (true) - { - if (unlikely(m == 0)) return false; - entry: - size_t i=bsf(m); - - const unsigned int geomID = geomIDs[i]; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - /* goto next hit if mask test fails */ - if ((geometry->mask & ray.mask) == 0) { - m=btc(m,i); - continue; - } -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - /* if we have no filter then the test passed */ - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) - { - const Vec2f uv = hit.uv(i); - HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); - const float old_t = ray.tfar; - ray.tfar = hit.t(i); - if (runOcclusionFilter1(geometry,ray,context,h)) return true; - ray.tfar = old_t; - m=btc(m,i); - continue; - } - } -#endif - break; - } -#endif - - return true; - } - }; - - template<int M, bool filter> - struct Intersect1EpilogMU - { - RayHit& ray; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Intersect1EpilogMU(RayHit& ray, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const - { - /* ray mask test */ - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - if ((geometry->mask & ray.mask) == 0) return false; -#endif - - vbool<M> valid = valid_i; - hit.finalize(); - - size_t i = select_min(valid,hit.vt); - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) - { - bool foundhit = false; - while (true) - { - /* call intersection filter function */ - Vec2f uv = hit.uv(i); - const float old_t = ray.tfar; - ray.tfar = hit.t(i); - HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); - const bool found = runIntersectionFilter1(geometry,ray,context,h); - if (!found) ray.tfar = old_t; - foundhit |= found; - clear(valid,i); - valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value - if (unlikely(none(valid))) break; - i = select_min(valid,hit.vt); - } - return foundhit; - } -#endif - - /* update hit information */ - const Vec2f uv = hit.uv(i); - const Vec3fa Ng = hit.Ng(i); - ray.tfar = hit.t(i); - ray.Ng.x = Ng.x; - ray.Ng.y = Ng.y; - ray.Ng.z = Ng.z; - ray.u = uv.x; - ray.v = uv.y; - ray.primID = primID; - ray.geomID = geomID; - instance_id_stack::copy(context->user->instID, ray.instID); - return true; - } - }; - - template<int M, bool filter> - struct Occluded1EpilogMU - { - Ray& ray; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Occluded1EpilogMU(Ray& ray, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<M>& valid, Hit& hit) const - { - /* ray mask test */ - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - if ((geometry->mask & ray.mask) == 0) return false; -#endif - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) - { - hit.finalize(); - for (size_t m=movemask(valid), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) - { - const Vec2f uv = hit.uv(i); - const float old_t = ray.tfar; - ray.tfar = hit.t(i); - HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); - if (runOcclusionFilter1(geometry,ray,context,h)) return true; - ray.tfar = old_t; - } - return false; - } -#endif - return true; - } - }; - - template<int M, int K, bool filter> - struct IntersectKEpilogM - { - RayHitK<K>& ray; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - const size_t i; - - __forceinline IntersectKEpilogM(RayHitK<K>& ray, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs, - size_t i) - : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {} - - template<typename Hit> - __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - - vfloat<K> u, v, t; - Vec3vf<K> Ng; - vbool<K> valid = valid_i; - - std::tie(u,v,t,Ng) = hit(); - - const unsigned int geomID = geomIDs[i]; - const unsigned int primID = primIDs[i]; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - - /* ray masking test */ -#if defined(EMBREE_RAY_MASK) - valid &= (geometry->mask & ray.mask) != 0; - if (unlikely(none(valid))) return false; -#endif - - /* occlusion filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - HitK<K> h(context->user,geomID,primID,u,v,Ng); - const vfloat<K> old_t = ray.tfar; - ray.tfar = select(valid,t,ray.tfar); - const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h); - ray.tfar = select(m_accept,ray.tfar,old_t); - return m_accept; - } - } -#endif - - /* update hit information */ - vfloat<K>::store(valid,&ray.tfar,t); - vfloat<K>::store(valid,&ray.Ng.x,Ng.x); - vfloat<K>::store(valid,&ray.Ng.y,Ng.y); - vfloat<K>::store(valid,&ray.Ng.z,Ng.z); - vfloat<K>::store(valid,&ray.u,u); - vfloat<K>::store(valid,&ray.v,v); - vuint<K>::store(valid,&ray.primID,primID); - vuint<K>::store(valid,&ray.geomID,geomID); - instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid); - return valid; - } - }; - - template<int M, int K, bool filter> - struct OccludedKEpilogM - { - vbool<K>& valid0; - RayK<K>& ray; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - const size_t i; - - __forceinline OccludedKEpilogM(vbool<K>& valid0, - RayK<K>& ray, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs, - size_t i) - : valid0(valid0), ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {} - - template<typename Hit> - __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const - { - vbool<K> valid = valid_i; - - /* ray masking test */ - Scene* scene MAYBE_UNUSED = context->scene; - const unsigned int geomID = geomIDs[i]; - const unsigned int primID = primIDs[i]; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - valid &= (geometry->mask & ray.mask) != 0; - if (unlikely(none(valid))) return valid; -#endif - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) - { - vfloat<K> u, v, t; - Vec3vf<K> Ng; - std::tie(u,v,t,Ng) = hit(); - HitK<K> h(context->user,geomID,primID,u,v,Ng); - const vfloat<K> old_t = ray.tfar; - ray.tfar = select(valid,t,ray.tfar); - valid = runOcclusionFilter(valid,geometry,ray,context,h); - ray.tfar = select(valid,ray.tfar,old_t); - } - } -#endif - - /* update occlusion */ - valid0 = valid0 & !valid; - return valid; - } - }; - - template<int M, int K, bool filter> - struct IntersectKEpilogMU - { - RayHitK<K>& ray; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline IntersectKEpilogMU(RayHitK<K>& ray, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline vbool<K> operator() (const vbool<K>& valid_org, const Hit& hit) const - { - vbool<K> valid = valid_org; - vfloat<K> u, v, t; - Vec3vf<K> Ng; - std::tie(u,v,t,Ng) = hit(); - - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - - /* ray masking test */ -#if defined(EMBREE_RAY_MASK) - valid &= (geometry->mask & ray.mask) != 0; - if (unlikely(none(valid))) return false; -#endif - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - HitK<K> h(context->user,geomID,primID,u,v,Ng); - const vfloat<K> old_t = ray.tfar; - ray.tfar = select(valid,t,ray.tfar); - const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h); - ray.tfar = select(m_accept,ray.tfar,old_t); - return m_accept; - } - } -#endif - - /* update hit information */ - vfloat<K>::store(valid,&ray.tfar,t); - vfloat<K>::store(valid,&ray.Ng.x,Ng.x); - vfloat<K>::store(valid,&ray.Ng.y,Ng.y); - vfloat<K>::store(valid,&ray.Ng.z,Ng.z); - vfloat<K>::store(valid,&ray.u,u); - vfloat<K>::store(valid,&ray.v,v); - vuint<K>::store(valid,&ray.primID,primID); - vuint<K>::store(valid,&ray.geomID,geomID); - instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid); - - return valid; - } - }; - - template<int M, int K, bool filter> - struct OccludedKEpilogMU - { - vbool<K>& valid0; - RayK<K>& ray; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline OccludedKEpilogMU(vbool<K>& valid0, - RayK<K>& ray, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : valid0(valid0), ray(ray), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const - { - vbool<K> valid = valid_i; - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - valid &= (geometry->mask & ray.mask) != 0; - if (unlikely(none(valid))) return false; -#endif - - /* occlusion filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) - { - vfloat<K> u, v, t; - Vec3vf<K> Ng; - std::tie(u,v,t,Ng) = hit(); - HitK<K> h(context->user,geomID,primID,u,v,Ng); - const vfloat<K> old_t = ray.tfar; - ray.tfar = select(valid,t,ray.tfar); - valid = runOcclusionFilter(valid,geometry,ray,context,h); - ray.tfar = select(valid,ray.tfar,old_t); - } - } -#endif - - /* update occlusion */ - valid0 = valid0 & !valid; - return valid; - } - }; - - template<int M, int Mx, int K, bool filter> - struct Intersect1KEpilogM - { - RayHitK<K>& ray; - size_t k; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - - __forceinline Intersect1KEpilogM(RayHitK<K>& ray, size_t k, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) - : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - vbool<Mx> valid = valid_i; - hit.finalize(); - if (Mx > M) valid &= (1<<M)-1; - size_t i = select_min(valid,hit.vt); - assert(i<M); - unsigned int geomID = geomIDs[i]; - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) - bool foundhit = false; - goto entry; - while (true) - { - if (unlikely(none(valid))) return foundhit; - i = select_min(valid,hit.vt); - assert(i<M); - geomID = geomIDs[i]; - entry: - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - /* goto next hit if mask test fails */ - if ((geometry->mask & ray.mask[k]) == 0) { - clear(valid,i); - continue; - } -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - /* call intersection filter function */ - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - assert(i<M); - const Vec2f uv = hit.uv(i); - HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); - const float old_t = ray.tfar[k]; - ray.tfar[k] = hit.t(i); - const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h)); - if (!found) ray.tfar[k] = old_t; - foundhit = foundhit | found; - clear(valid,i); - valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value - continue; - } - } -#endif - break; - } -#endif - assert(i<M); - /* update hit information */ -#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 - ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<Mx>(hit.vNg.x),vfloat<Mx>(hit.vNg.y),vfloat<Mx>(hit.vNg.z),geomID,vuint<Mx>(primIDs)); -#else - const Vec2f uv = hit.uv(i); - ray.tfar[k] = hit.t(i); - ray.Ng.x[k] = hit.vNg.x[i]; - ray.Ng.y[k] = hit.vNg.y[i]; - ray.Ng.z[k] = hit.vNg.z[i]; - ray.u[k] = uv.x; - ray.v[k] = uv.y; - ray.primID[k] = primIDs[i]; - ray.geomID[k] = geomID; - instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); -#endif - return true; - } - }; - - template<int M, int Mx, int K, bool filter> - struct Occluded1KEpilogM - { - RayK<K>& ray; - size_t k; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - - __forceinline Occluded1KEpilogM(RayK<K>& ray, size_t k, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) - : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) - if (unlikely(filter)) - hit.finalize(); /* called only once */ - - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; - size_t m=movemask(valid); - goto entry; - while (true) - { - if (unlikely(m == 0)) return false; - entry: - size_t i=bsf(m); - - const unsigned int geomID = geomIDs[i]; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - /* goto next hit if mask test fails */ - if ((geometry->mask & ray.mask[k]) == 0) { - m=btc(m,i); - continue; - } -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - /* execute occlusion filer */ - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) - { - const Vec2f uv = hit.uv(i); - const float old_t = ray.tfar[k]; - ray.tfar[k] = hit.t(i); - HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); - if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true; - ray.tfar[k] = old_t; - m=btc(m,i); - continue; - } - } -#endif - break; - } -#endif - return true; - } - }; - - template<int M, int K, bool filter> - struct Intersect1KEpilogMU - { - RayHitK<K>& ray; - size_t k; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Intersect1KEpilogMU(RayHitK<K>& ray, size_t k, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - /* ray mask test */ - if ((geometry->mask & ray.mask[k]) == 0) - return false; -#endif - - /* finalize hit calculation */ - vbool<M> valid = valid_i; - hit.finalize(); - size_t i = select_min(valid,hit.vt); - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) - { - bool foundhit = false; - while (true) - { - const Vec2f uv = hit.uv(i); - const float old_t = ray.tfar[k]; - ray.tfar[k] = hit.t(i); - HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); - const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h)); - if (!found) ray.tfar[k] = old_t; - foundhit = foundhit | found; - clear(valid,i); - valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value - if (unlikely(none(valid))) break; - i = select_min(valid,hit.vt); - } - return foundhit; - } - } -#endif - - /* update hit information */ -#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 - const Vec3fa Ng = hit.Ng(i); - ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<M>(Ng.x),vfloat<M>(Ng.y),vfloat<M>(Ng.z),geomID,vuint<M>(primID)); -#else - const Vec2f uv = hit.uv(i); - const Vec3fa Ng = hit.Ng(i); - ray.tfar[k] = hit.t(i); - ray.Ng.x[k] = Ng.x; - ray.Ng.y[k] = Ng.y; - ray.Ng.z[k] = Ng.z; - ray.u[k] = uv.x; - ray.v[k] = uv.y; - ray.primID[k] = primID; - ray.geomID[k] = geomID; - instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); -#endif - return true; - } - }; - - template<int M, int K, bool filter> - struct Occluded1KEpilogMU - { - RayK<K>& ray; - size_t k; - IntersectContext* context; - const unsigned int geomID; - const unsigned int primID; - - __forceinline Occluded1KEpilogMU(RayK<K>& ray, size_t k, - IntersectContext* context, - const unsigned int geomID, - const unsigned int primID) - : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const - { - Scene* scene MAYBE_UNUSED = context->scene; - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); -#if defined(EMBREE_RAY_MASK) - /* ray mask test */ - if ((geometry->mask & ray.mask[k]) == 0) - return false; -#endif - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) - { - hit.finalize(); - for (size_t m=movemask(valid_i), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) - { - const Vec2f uv = hit.uv(i); - const float old_t = ray.tfar[k]; - ray.tfar[k] = hit.t(i); - HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i)); - if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true; - ray.tfar[k] = old_t; - } - return false; - } - } -#endif - return true; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h deleted file mode 100644 index 5c1ba5cb61..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/scene.h" -#include "../common/ray.h" -#include "../common/point_query.h" -#include "../bvh/node_intersector1.h" -#include "../bvh/node_intersector_packet.h" - -namespace embree -{ - namespace isa - { - template<typename Intersector> - struct ArrayIntersector1 - { - typedef typename Intersector::Primitive Primitive; - typedef typename Intersector::Precalculations Precalculations; - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - for (size_t i=0; i<num; i++) - Intersector::intersect(pre,ray,context,prim[i]); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - for (size_t i=0; i<num; i++) { - if (Intersector::occluded(pre,ray,context,prim[i])) - return true; - } - return false; - } - - template<int N> - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) - { - bool changed = false; - for (size_t i=0; i<num; i++) - changed |= Intersector::pointQuery(query, context, prim[i]); - return changed; - } - - template<int K> - static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - } - - template<int K> - static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - return valid; - } - }; - - template<int K, typename Intersector> - struct ArrayIntersectorK_1 - { - typedef typename Intersector::Primitive Primitive; - typedef typename Intersector::Precalculations Precalculations; - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - for (size_t i=0; i<num; i++) { - Intersector::intersect(valid,pre,ray,context,prim[i]); - } - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - vbool<K> valid0 = valid; - for (size_t i=0; i<num; i++) { - valid0 &= !Intersector::occluded(valid0,pre,ray,context,prim[i]); - if (none(valid0)) break; - } - return !valid0; - } - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - for (size_t i=0; i<num; i++) { - Intersector::intersect(pre,ray,k,context,prim[i]); - } - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - for (size_t i=0; i<num; i++) { - if (Intersector::occluded(pre,ray,k,context,prim[i])) - return true; - } - return false; - } - }; - - // ============================================================================================= - - template<int K, typename IntersectorK> - struct ArrayIntersectorKStream - { - typedef typename IntersectorK::Primitive PrimitiveK; - typedef typename IntersectorK::Precalculations PrecalculationsK; - - static __forceinline void intersectK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) - { - PrecalculationsK pre(valid,ray); // FIXME: might cause trouble - - for (size_t i=0; i<num; i++) { - IntersectorK::intersect(valid,pre,ray,context,prim[i]); - } - } - - static __forceinline vbool<K> occludedK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) - { - PrecalculationsK pre(valid,ray); // FIXME: might cause trouble - vbool<K> valid0 = valid; - for (size_t i=0; i<num; i++) { - valid0 &= !IntersectorK::occluded(valid0,pre,ray,context,prim[i]); - if (none(valid0)) break; - } - return !valid0; - } - - static __forceinline void intersect(const Accel::Intersectors* This, RayHitK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) - { - PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble - for (size_t i=0; i<num; i++) { - IntersectorK::intersect(pre,ray,k,context,prim[i]); - } - } - - static __forceinline bool occluded(const Accel::Intersectors* This, RayK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) - { - PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble - for (size_t i=0; i<num; i++) { - if (IntersectorK::occluded(pre,ray,k,context,prim[i])) - return true; - } - return false; - } - - static __forceinline size_t occluded(const Accel::Intersectors* This, size_t cur_mask, RayK<K>** __restrict__ inputPackets, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node) - { - size_t m_occluded = 0; - for (size_t i=0; i<num; i++) { - size_t bits = cur_mask & (~m_occluded); - for (; bits!=0; ) - { - const size_t rayID = bscf(bits); - RayHitK<K> &ray = *inputPackets[rayID / K]; - const size_t k = rayID % K; - PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble - if (IntersectorK::occluded(pre,ray,k,context,prim[i])) - { - m_occluded |= (size_t)1 << rayID; - ray.tfar[k] = neg_inf; - } - } - } - return m_occluded; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h deleted file mode 100644 index eef5b0b1fd..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct LineIntersectorHitM - { - __forceinline LineIntersectorHitM() {} - - __forceinline LineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) - : vu(u), vv(v), vt(t), vNg(Ng) {} - - __forceinline void finalize() {} - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - public: - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct FlatLinearCurveIntersector1 - { - typedef CurvePrecalculations1 Precalculations; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - Ray& ray, - IntersectContext* context, - const LineSegments* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, - const Epilog& epilog) - { - /* transform end points into ray space */ - vbool<M> valid = valid_i; - vfloat<M> depth_scale = pre.depth_scale; - LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space; - - const Vec3vf<M> ray_org ((Vec3fa)ray.org); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - - Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w); - Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w); - - /* approximative intersection with cone */ - const Vec4vf<M> v = p1-p0; - const Vec4vf<M> w = -p0; - const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y); - const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y); - const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one)); - const Vec4vf<M> p = madd(u,v,p0); - const vfloat<M> t = p.z; - const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y); - const vfloat<M> r = p.w; - const vfloat<M> r2 = r*r; - valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar)); - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) - valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections - if (unlikely(none(valid))) return false; - - /* ignore denormalized segments */ - const Vec3vf<M> T = v1.xyz()-v0.xyz(); - valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero)); - if (unlikely(none(valid))) return false; - - /* update hit information */ - LineIntersectorHitM<M> hit(u,zero,t,T); - return epilog(valid,hit); - } - }; - - template<int M, int K> - struct FlatLinearCurveIntersectorK - { - typedef CurvePrecalculationsK<K> Precalculations; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - RayK<K>& ray, size_t k, - IntersectContext* context, - const LineSegments* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, - const Epilog& epilog) - { - /* transform end points into ray space */ - vbool<M> valid = valid_i; - vfloat<M> depth_scale = pre.depth_scale[k]; - LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space[k]; - const Vec3vf<M> ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); - const Vec3vf<M> ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); - - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - - Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w); - Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w); - - /* approximative intersection with cone */ - const Vec4vf<M> v = p1-p0; - const Vec4vf<M> w = -p0; - const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y); - const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y); - const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one)); - const Vec4vf<M> p = madd(u,v,p0); - const vfloat<M> t = p.z; - const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y); - const vfloat<M> r = p.w; - const vfloat<M> r2 = r*r; - valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k])); - if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) - valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections - if (unlikely(none(valid))) return false; - - /* ignore denormalized segments */ - const Vec3vf<M> T = v1.xyz()-v0.xyz(); - valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero)); - if (unlikely(none(valid))) return false; - - /* update hit information */ - LineIntersectorHitM<M> hit(u,zero,t,T); - return epilog(valid,hit); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei.h b/thirdparty/embree-aarch64/kernels/geometry/linei.h deleted file mode 100644 index a72029ca53..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/linei.h +++ /dev/null @@ -1,709 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - template<int M> - struct LineMi - { - /* Virtual interface to query information about the line segment type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored line segments */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N line segments */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - /* Returns required number of bytes for N line segments */ - static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); } - - public: - - /* Default constructor */ - __forceinline LineMi() { } - - /* Construction from vertices and IDs */ - __forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype) - : gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs) - { - assert(all(vuint<M>(geomID()) == geomIDs)); - } - - /* Returns a mask that tells which line segments are valid */ - __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } - - /* Returns a mask that tells which line segments are valid */ - template<int Mx> - __forceinline vbool<Mx> valid() const { return vuint<Mx>(primIDs) != vuint<Mx>(-1); } - - /* Returns if the specified line segment is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } - - /* Returns the number of stored line segments */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - //template<class T> - //static __forceinline T unmask(T &index) { return index & 0x3fffffff; } - - __forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; } - //__forceinline vuint<M> geomID() { return unmask(geomIDs); } - //__forceinline const vuint<M> geomID() const { return unmask(geomIDs); } - //__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); } - - /* Returns the primitive IDs */ - __forceinline vuint<M>& primID() { return primIDs; } - __forceinline const vuint<M>& primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* gather the line segments */ - __forceinline void gather(Vec4vf<M>& p0, - Vec4vf<M>& p1, - const LineSegments* geom) const; - - __forceinline void gatheri(Vec4vf<M>& p0, - Vec4vf<M>& p1, - const LineSegments* geom, - const int itime) const; - - __forceinline void gather(Vec4vf<M>& p0, - Vec4vf<M>& p1, - const LineSegments* geom, - float time) const; - - /* gather the line segments with lateral info */ - __forceinline void gather(Vec4vf<M>& p0, - Vec4vf<M>& p1, - Vec4vf<M>& pL, - Vec4vf<M>& pR, - const LineSegments* geom) const; - - __forceinline void gatheri(Vec4vf<M>& p0, - Vec4vf<M>& p1, - Vec4vf<M>& pL, - Vec4vf<M>& pR, - const LineSegments* geom, - const int itime) const; - - __forceinline void gather(Vec4vf<M>& p0, - Vec4vf<M>& p1, - Vec4vf<M>& pL, - Vec4vf<M>& pR, - const LineSegments* geom, - float time) const; - - __forceinline void gather(Vec4vf<M>& p0, - Vec4vf<M>& p1, - vbool<M>& cL, - vbool<M>& cR, - const LineSegments* geom) const; - - __forceinline void gatheri(Vec4vf<M>& p0, - Vec4vf<M>& p1, - vbool<M>& cL, - vbool<M>& cR, - const LineSegments* geom, - const int itime) const; - - __forceinline void gather(Vec4vf<M>& p0, - Vec4vf<M>& p1, - vbool<M>& cL, - vbool<M>& cR, - const LineSegments* geom, - float time) const; - - /* Calculate the bounds of the line segments */ - __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const - { - BBox3fa bounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const LineSegments* geom = scene->get<LineSegments>(geomID(i)); - const Vec3ff& p0 = geom->vertex(v0[i]+0,itime); - const Vec3ff& p1 = geom->vertex(v0[i]+1,itime); - BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1)); - b = enlarge(b,Vec3fa(max(p0.w,p1.w))); - bounds.extend(b); - } - return bounds; - } - - /* Calculate the linear bounds of the primitive */ - __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) { - return LBBox3fa(bounds(scene,itime+0), bounds(scene,itime+1)); - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) { - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const LineSegments* geom = scene->get<LineSegments>(geomID(i)); - allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps)); - } - return allBounds; - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) - { - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const LineSegments* geom = scene->get<LineSegments>(geomID((unsigned int)i)); - allBounds.extend(geom->linearBounds(primID(i), time_range)); - } - return allBounds; - } - - /* Fill line segment from line segment list */ - template<typename PrimRefT> - __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) - { - Geometry::GType gty = scene->get(prims[begin].geomID())->getType(); - vuint<M> geomID, primID; - vuint<M> v0; - unsigned short leftExists = 0; - unsigned short rightExists = 0; - const PrimRefT* prim = &prims[begin]; - - for (size_t i=0; i<M; i++) - { - const LineSegments* geom = scene->get<LineSegments>(prim->geomID()); - if (begin<end) { - geomID[i] = prim->geomID(); - primID[i] = prim->primID(); - v0[i] = geom->segment(prim->primID()); - leftExists |= geom->segmentLeftExists(primID[i]) << i; - rightExists |= geom->segmentRightExists(primID[i]) << i; - begin++; - } else { - assert(i); - if (i>0) { - geomID[i] = geomID[i-1]; - primID[i] = -1; - v0[i] = v0[i-1]; - } - } - if (begin<end) prim = &prims[begin]; // FIXME: remove this line - } - new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store - } - - template<typename BVH, typename Allocator> - __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) - { - size_t start = set.begin(); - size_t items = LineMi::blocks(set.size()); - size_t numbytes = LineMi::bytes(set.size()); - LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float)); - for (size_t i=0; i<items; i++) { - accel[i].fill(prims,start,set.end(),bvh->scene); - } - return bvh->encodeLeaf((char*)accel,items); - }; - - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) - { - fill(prims,begin,end,scene); - return linearBounds(scene,itime); - } - - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) - { - fill(prims,begin,end,scene); - return linearBounds(scene,time_range); - } - - template<typename BVH, typename SetMB, typename Allocator> - __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) - { - size_t start = prims.begin(); - size_t end = prims.end(); - size_t items = LineMi::blocks(prims.size()); - size_t numbytes = LineMi::bytes(prims.size()); - LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float)); - const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items); - - LBBox3fa bounds = empty; - for (size_t i=0; i<items; i++) - bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range)); - - return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range); - }; - - /* Updates the primitive */ - __forceinline BBox3fa update(LineSegments* geom) - { - BBox3fa bounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const Vec3ff& p0 = geom->vertex(v0[i]+0); - const Vec3ff& p1 = geom->vertex(v0[i]+1); - BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1)); - b = enlarge(b,Vec3fa(max(p0.w,p1.w))); - bounds.extend(b); - } - return bounds; - } - - /*! output operator */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) { - return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}"; - } - - public: - unsigned char gtype; - unsigned char m; - unsigned int sharedGeomID; - unsigned short leftExists, rightExists; - vuint<M> v0; // index of start vertex - private: - vuint<M> primIDs; // primitive ID - }; - - template<> - __forceinline void LineMi<4>::gather(Vec4vf4& p0, - Vec4vf4& p1, - const LineSegments* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); - transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); - transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); - } - - template<> - __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, - Vec4vf4& p1, - const LineSegments* geom, - const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); - transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); - transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); - } - - template<> - __forceinline void LineMi<4>::gather(Vec4vf4& p0, - Vec4vf4& p1, - const LineSegments* geom, - float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf4 a0,a1; - gatheri(a0,a1,geom,itime); - Vec4vf4 b0,b1; - gatheri(b0,b1,geom,itime+1); - p0 = lerp(a0,b0,vfloat4(ftime)); - p1 = lerp(a1,b1,vfloat4(ftime)); - } - - template<> - __forceinline void LineMi<4>::gather(Vec4vf4& p0, - Vec4vf4& p1, - vbool4& cL, - vbool4& cR, - const LineSegments* geom) const - { - gather(p0,p1,geom); - cL = !vbool4(leftExists); - cR = !vbool4(rightExists); - } - - template<> - __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, - Vec4vf4& p1, - vbool4& cL, - vbool4& cR, - const LineSegments* geom, - const int itime) const - { - gatheri(p0,p1,geom,itime); - cL = !vbool4(leftExists); - cR = !vbool4(rightExists); - } - - template<> - __forceinline void LineMi<4>::gather(Vec4vf4& p0, - Vec4vf4& p1, - vbool4& cL, - vbool4& cR, - const LineSegments* geom, - float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf4 a0,a1; - gatheri(a0,a1,geom,itime); - Vec4vf4 b0,b1; - gatheri(b0,b1,geom,itime+1); - p0 = lerp(a0,b0,vfloat4(ftime)); - p1 = lerp(a1,b1,vfloat4(ftime)); - cL = !vbool4(leftExists); - cR = !vbool4(rightExists); - } - - template<> - __forceinline void LineMi<4>::gather(Vec4vf4& p0, - Vec4vf4& p1, - Vec4vf4& pL, - Vec4vf4& pR, - const LineSegments* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); - transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); - transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); - - const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf); - const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf); - const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf); - const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf); - transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w); - - const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf); - const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf); - const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf); - const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf); - transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w); - } - - template<> - __forceinline void LineMi<4>::gatheri(Vec4vf4& p0, - Vec4vf4& p1, - Vec4vf4& pL, - Vec4vf4& pR, - const LineSegments* geom, - const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); - transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); - transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w); - - const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf); - const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf); - const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf); - const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf); - transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w); - - const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf); - const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf); - const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf); - const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf); - transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w); - } - - template<> - __forceinline void LineMi<4>::gather(Vec4vf4& p0, - Vec4vf4& p1, - Vec4vf4& pL, - Vec4vf4& pR, - const LineSegments* geom, - float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf4 a0,a1,aL,aR; - gatheri(a0,a1,aL,aR,geom,itime); - Vec4vf4 b0,b1,bL,bR; - gatheri(b0,b1,bL,bR,geom,itime+1); - p0 = lerp(a0,b0,vfloat4(ftime)); - p1 = lerp(a1,b1,vfloat4(ftime)); - pL = lerp(aL,bL,vfloat4(ftime)); - pR = lerp(aR,bR,vfloat4(ftime)); - } - -#if defined(__AVX__) - - template<> - __forceinline void LineMi<8>::gather(Vec4vf8& p0, - Vec4vf8& p1, - const LineSegments* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4])); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5])); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6])); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7])); - transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); - const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1)); - const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1)); - const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1)); - const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1)); - transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); - } - - template<> - __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, - Vec4vf8& p1, - const LineSegments* geom, - const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime)); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime)); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime)); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime)); - transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); - const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime)); - const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime)); - const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime)); - const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime)); - transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); - } - - template<> - __forceinline void LineMi<8>::gather(Vec4vf8& p0, - Vec4vf8& p1, - const LineSegments* geom, - float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf8 a0,a1; - gatheri(a0,a1,geom,itime); - Vec4vf8 b0,b1; - gatheri(b0,b1,geom,itime+1); - p0 = lerp(a0,b0,vfloat8(ftime)); - p1 = lerp(a1,b1,vfloat8(ftime)); - } - - template<> - __forceinline void LineMi<8>::gather(Vec4vf8& p0, - Vec4vf8& p1, - Vec4vf8& pL, - Vec4vf8& pR, - const LineSegments* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0])); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1])); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2])); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3])); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4])); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5])); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6])); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7])); - transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1)); - const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1)); - const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1)); - const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1)); - const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1)); - transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); - - const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf); - const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf); - const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf); - const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf); - const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf); - const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf); - const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf); - const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf); - transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w); - - const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf); - const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf); - const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf); - const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf); - const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf); - const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf); - const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf); - const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf); - transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w); - } - - template<> - __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, - Vec4vf8& p1, - Vec4vf8& pL, - Vec4vf8& pR, - const LineSegments* geom, - const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime)); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime)); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime)); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime)); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime)); - transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w); - - const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime)); - const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime)); - const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime)); - const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime)); - const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime)); - const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime)); - const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime)); - const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime)); - transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w); - - const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf); - const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf); - const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf); - const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf); - const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf); - const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf); - const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf); - const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf); - transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w); - - const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf); - const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf); - const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf); - const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf); - const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf); - const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf); - const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf); - const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf); - transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w); - } - - template<> - __forceinline void LineMi<8>::gather(Vec4vf8& p0, - Vec4vf8& p1, - Vec4vf8& pL, - Vec4vf8& pR, - const LineSegments* geom, - float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf8 a0,a1,aL,aR; - gatheri(a0,a1,aL,aR,geom,itime); - Vec4vf8 b0,b1,bL,bR; - gatheri(b0,b1,bL,bR,geom,itime+1); - p0 = lerp(a0,b0,vfloat8(ftime)); - p1 = lerp(a1,b1,vfloat8(ftime)); - pL = lerp(aL,bL,vfloat8(ftime)); - pR = lerp(aR,bR,vfloat8(ftime)); - } - - template<> - __forceinline void LineMi<8>::gather(Vec4vf8& p0, - Vec4vf8& p1, - vbool8& cL, - vbool8& cR, - const LineSegments* geom) const - { - gather(p0,p1,geom); - cL = !vbool8(leftExists); - cR = !vbool8(rightExists); - } - - template<> - __forceinline void LineMi<8>::gatheri(Vec4vf8& p0, - Vec4vf8& p1, - vbool8& cL, - vbool8& cR, - const LineSegments* geom, - const int itime) const - { - gatheri(p0,p1,geom,itime); - cL = !vbool8(leftExists); - cR = !vbool8(rightExists); - } - - template<> - __forceinline void LineMi<8>::gather(Vec4vf8& p0, - Vec4vf8& p1, - vbool8& cL, - vbool8& cR, - const LineSegments* geom, - float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf8 a0,a1; - gatheri(a0,a1,geom,itime); - Vec4vf8 b0,b1; - gatheri(b0,b1,geom,itime+1); - p0 = lerp(a0,b0,vfloat8(ftime)); - p1 = lerp(a1,b1,vfloat8(ftime)); - cL = !vbool8(leftExists); - cR = !vbool8(rightExists); - } - -#endif - - template<int M> - typename LineMi<M>::Type LineMi<M>::type; - - typedef LineMi<4> Line4i; - typedef LineMi<8> Line8i; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h deleted file mode 100644 index a431796a88..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "linei.h" -#include "line_intersector.h" -#include "intersector_epilog.h" - -namespace embree -{ - namespace isa - { - template<int M, int Mx, bool filter> - struct FlatLinearCurveMiIntersector1 - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); - } - }; - - template<int M, int Mx, bool filter> - struct FlatLinearCurveMiMBIntersector1 - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); - } - }; - - template<int M, int Mx, int K, bool filter> - struct FlatLinearCurveMiIntersectorK - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct FlatLinearCurveMiMBIntersectorK - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/object.h b/thirdparty/embree-aarch64/kernels/geometry/object.h deleted file mode 100644 index f26391de52..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/object.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - struct Object - { - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored primitives */ - static __forceinline size_t max_size() { return 1; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return N; } - - public: - - /*! constructs a virtual object */ - Object (unsigned geomID, unsigned primID) - : _geomID(geomID), _primID(primID) {} - - __forceinline unsigned geomID() const { - return _geomID; - } - - __forceinline unsigned primID() const { - return _primID; - } - - /*! fill triangle from triangle list */ - __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene) - { - const PrimRef& prim = prims[i]; i++; - new (this) Object(prim.geomID(), prim.primID()); - } - - /*! fill triangle from triangle list */ - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime) - { - const PrimRef& prim = prims[i]; i++; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - new (this) Object(geomID, primID); - AccelSet* accel = (AccelSet*) scene->get(geomID); - return accel->linearBounds(primID,itime); - } - - /*! fill triangle from triangle list */ - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range) - { - const PrimRefMB& prim = prims[i]; i++; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - new (this) Object(geomID, primID); - AccelSet* accel = (AccelSet*) scene->get(geomID); - return accel->linearBounds(primID,time_range); - } - - /* Updates the primitive */ - __forceinline BBox3fa update(AccelSet* mesh) { - return mesh->bounds(primID()); - } - - private: - unsigned int _geomID; //!< geometry ID - unsigned int _primID; //!< primitive ID - }; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h deleted file mode 100644 index 97882e0e59..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "object.h" -#include "../common/ray.h" - -namespace embree -{ - namespace isa - { - template<bool mblur> - struct ObjectIntersector1 - { - typedef Object Primitive; - - static const bool validIntersectorK = false; - - struct Precalculations { - __forceinline Precalculations() {} - __forceinline Precalculations (const Ray& ray, const void *ptr) {} - }; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim) - { - AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); - - /* perform ray mask test */ -#if defined(EMBREE_RAY_MASK) - if ((ray.mask & accel->mask) == 0) - return; -#endif - - accel->intersect(ray,prim.geomID(),prim.primID(),context,reportIntersection1); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim) - { - AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); - /* perform ray mask test */ -#if defined(EMBREE_RAY_MASK) - if ((ray.mask & accel->mask) == 0) - return false; -#endif - - accel->occluded(ray,prim.geomID(),prim.primID(),context,&reportOcclusion1); - return ray.tfar < 0.0f; - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim) - { - AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID()); - context->geomID = prim.geomID(); - context->primID = prim.primID(); - return accel->pointQuery(query, context); - } - - template<int K> - static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - assert(false); - } - - template<int K> - static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node) - { - assert(false); - return valid; - } - }; - - template<int K, bool mblur> - struct ObjectIntersectorK - { - typedef Object Primitive; - - struct Precalculations { - __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {} - }; - - static __forceinline void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim) - { - vbool<K> valid = valid_i; - AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); - - /* perform ray mask test */ -#if defined(EMBREE_RAY_MASK) - valid &= (ray.mask & accel->mask) != 0; - if (none(valid)) return; -#endif - accel->intersect(valid,ray,prim.geomID(),prim.primID(),context,&reportIntersection1); - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim) - { - vbool<K> valid = valid_i; - AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID()); - - /* perform ray mask test */ -#if defined(EMBREE_RAY_MASK) - valid &= (ray.mask & accel->mask) != 0; - if (none(valid)) return false; -#endif - accel->occluded(valid,ray,prim.geomID(),prim.primID(),context,&reportOcclusion1); - return ray.tfar < 0.0f; - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { - intersect(vbool<K>(1<<int(k)),pre,ray,context,prim); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) { - occluded(vbool<K>(1<<int(k)),pre,ray,context,prim); - return ray.tfar[k] < 0.0f; - } - }; - - typedef ObjectIntersectorK<4,false> ObjectIntersector4; - typedef ObjectIntersectorK<8,false> ObjectIntersector8; - typedef ObjectIntersectorK<16,false> ObjectIntersector16; - - typedef ObjectIntersectorK<4,true> ObjectIntersector4MB; - typedef ObjectIntersectorK<8,true> ObjectIntersector8MB; - typedef ObjectIntersectorK<16,true> ObjectIntersector16MB; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/plane.h b/thirdparty/embree-aarch64/kernels/geometry/plane.h deleted file mode 100644 index ebe45db558..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/plane.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" - -namespace embree -{ - namespace isa - { - struct HalfPlane - { - const Vec3fa P; //!< plane origin - const Vec3fa N; //!< plane normal - - __forceinline HalfPlane(const Vec3fa& P, const Vec3fa& N) - : P(P), N(N) {} - - __forceinline BBox1f intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const - { - Vec3fa O = Vec3fa(ray_org) - P; - Vec3fa D = Vec3fa(ray_dir); - float ON = dot(O,N); - float DN = dot(D,N); - bool eps = abs(DN) < min_rcp_input; - float t = -ON*rcp(DN); - float lower = select(eps || DN < 0.0f, float(neg_inf), t); - float upper = select(eps || DN > 0.0f, float(pos_inf), t); - return BBox1f(lower,upper); - } - }; - - template<int M> - struct HalfPlaneN - { - const Vec3vf<M> P; //!< plane origin - const Vec3vf<M> N; //!< plane normal - - __forceinline HalfPlaneN(const Vec3vf<M>& P, const Vec3vf<M>& N) - : P(P), N(N) {} - - __forceinline BBox<vfloat<M>> intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const - { - Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray_org) - P; - Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray_dir); - vfloat<M> ON = dot(O,N); - vfloat<M> DN = dot(D,N); - vbool<M> eps = abs(DN) < min_rcp_input; - vfloat<M> t = -ON*rcp(DN); - vfloat<M> lower = select(eps | DN < 0.0f, vfloat<M>(neg_inf), t); - vfloat<M> upper = select(eps | DN > 0.0f, vfloat<M>(pos_inf), t); - return BBox<vfloat<M>>(lower,upper); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/pointi.h b/thirdparty/embree-aarch64/kernels/geometry/pointi.h deleted file mode 100644 index 4ba298e86b..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/pointi.h +++ /dev/null @@ -1,417 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - template<int M> - struct PointMi - { - /* Virtual interface to query information about the line segment type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored line segments */ - static __forceinline size_t max_size() - { - return M; - } - - /* Returns required number of primitive blocks for N line segments */ - static __forceinline size_t blocks(size_t N) - { - return (N + max_size() - 1) / max_size(); - } - - /* Returns required number of bytes for N line segments */ - static __forceinline size_t bytes(size_t N) - { - return blocks(N) * sizeof(PointMi); - } - - public: - /* Default constructor */ - __forceinline PointMi() {} - - /* Construction from vertices and IDs */ - __forceinline PointMi(const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype, uint32_t numPrimitives) - : gtype((unsigned char)gtype), - numPrimitives(numPrimitives), - sharedGeomID(geomIDs[0]), - primIDs(primIDs) - { - assert(all(vuint<M>(geomID()) == geomIDs)); - } - - /* Returns a mask that tells which line segments are valid */ - __forceinline vbool<M> valid() const { - return vint<M>(step) < vint<M>(numPrimitives); - } - - /* Returns a mask that tells which line segments are valid */ - template<int Mx> __forceinline vbool<Mx> valid() const { - return vint<Mx>(step) < vint<Mx>(numPrimitives); - } - - /* Returns if the specified line segment is valid */ - __forceinline bool valid(const size_t i) const - { - assert(i < M); - return i < numPrimitives; - } - - /* Returns the number of stored line segments */ - __forceinline size_t size() const { - return numPrimitives; - } - - __forceinline unsigned int geomID(unsigned int i = 0) const { - return sharedGeomID; - } - - __forceinline vuint<M>& primID() { - return primIDs; - } - __forceinline const vuint<M>& primID() const { - return primIDs; - } - __forceinline unsigned int primID(const size_t i) const { - assert(i < M); - return primIDs[i]; - } - - /* gather the line segments */ - __forceinline void gather(Vec4vf<M>& p0, const Points* geom) const; - __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom) const; - - __forceinline void gatheri(Vec4vf<M>& p0, const Points* geom, const int itime) const; - __forceinline void gatheri(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, const int itime) const; - - __forceinline void gather(Vec4vf<M>& p0, const Points* geom, float time) const; - __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, float time) const; - - /* Calculate the bounds of the line segments */ - __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const - { - BBox3fa bounds = empty; - for (size_t i = 0; i < M && valid(i); i++) { - const Points* geom = scene->get<Points>(geomID(i)); - bounds.extend(geom->bounds(primID(i),itime)); - } - return bounds; - } - - /* Calculate the linear bounds of the primitive */ - __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) { - return LBBox3fa(bounds(scene, itime + 0), bounds(scene, itime + 1)); - } - - __forceinline LBBox3fa linearBounds(const Scene* const scene, size_t itime, size_t numTimeSteps) - { - LBBox3fa allBounds = empty; - for (size_t i = 0; i < M && valid(i); i++) { - const Points* geom = scene->get<Points>(geomID(i)); - allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps)); - } - return allBounds; - } - - __forceinline LBBox3fa linearBounds(const Scene* const scene, const BBox1f time_range) - { - LBBox3fa allBounds = empty; - for (size_t i = 0; i < M && valid(i); i++) { - const Points* geom = scene->get<Points>(geomID((unsigned int)i)); - allBounds.extend(geom->linearBounds(primID(i), time_range)); - } - return allBounds; - } - - /* Fill line segment from line segment list */ - template<typename PrimRefT> - __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) - { - Geometry::GType gty = scene->get(prims[begin].geomID())->getType(); - vuint<M> geomID, primID; - vuint<M> v0; - const PrimRefT* prim = &prims[begin]; - - int numPrimitives = 0; - for (size_t i = 0; i < M; i++) { - if (begin < end) { - geomID[i] = prim->geomID(); - primID[i] = prim->primID(); - begin++; - numPrimitives++; - } else { - assert(i); - if (i > 0) { - geomID[i] = geomID[i - 1]; - primID[i] = primID[i - 1]; - } - } - if (begin < end) - prim = &prims[begin]; // FIXME: remove this line - } - new (this) PointMi(geomID, primID, gty, numPrimitives); // FIXME: use non temporal store - } - - template<typename BVH, typename Allocator> - __forceinline static typename BVH::NodeRef createLeaf(BVH* bvh, - const PrimRef* prims, - const range<size_t>& set, - const Allocator& alloc) - { - size_t start = set.begin(); - size_t items = PointMi::blocks(set.size()); - size_t numbytes = PointMi::bytes(set.size()); - PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float)); - for (size_t i = 0; i < items; i++) { - accel[i].fill(prims, start, set.end(), bvh->scene); - } - return bvh->encodeLeaf((char*)accel, items); - }; - - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) - { - fill(prims, begin, end, scene); - return linearBounds(scene, itime); - } - - __forceinline LBBox3fa fillMB( - const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) - { - fill(prims, begin, end, scene); - return linearBounds(scene, time_range); - } - - template<typename BVH, typename SetMB, typename Allocator> - __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc) - { - size_t start = prims.object_range.begin(); - size_t end = prims.object_range.end(); - size_t items = PointMi::blocks(prims.object_range.size()); - size_t numbytes = PointMi::bytes(prims.object_range.size()); - PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float)); - const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel, items); - - LBBox3fa bounds = empty; - for (size_t i = 0; i < items; i++) - bounds.extend(accel[i].fillMB(prims.prims->data(), start, end, bvh->scene, prims.time_range)); - - return typename BVH::NodeRecordMB4D(node, bounds, prims.time_range); - }; - - /*! output operator */ - friend __forceinline embree_ostream operator<<(embree_ostream cout, const PointMi& line) - { - return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}"; - } - - public: - unsigned char gtype; - unsigned char numPrimitives; - unsigned int sharedGeomID; - - private: - vuint<M> primIDs; // primitive ID - }; - - template<> - __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); - transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); - } - - template<> - __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); - transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); - const vfloat4 b0 = vfloat4(geom->normal(primID(0))); - const vfloat4 b1 = vfloat4(geom->normal(primID(1))); - const vfloat4 b2 = vfloat4(geom->normal(primID(2))); - const vfloat4 b3 = vfloat4(geom->normal(primID(3))); - transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z); - } - - template<> - __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, const Points* geom, const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); - transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); - } - - template<> - __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); - transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w); - const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime)); - const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime)); - const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime)); - const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime)); - transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z); - } - - template<> - __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom, float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf4 a0; gatheri(a0, geom, itime); - Vec4vf4 b0; gatheri(b0, geom, itime + 1); - p0 = lerp(a0, b0, vfloat4(ftime)); - } - - template<> - __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf4 a0, b0; - Vec3vf4 norm0, norm1; - gatheri(a0, norm0, geom, itime); - gatheri(b0, norm1, geom, itime + 1); - p0 = lerp(a0, b0, vfloat4(ftime)); - n0 = lerp(norm0, norm1, vfloat4(ftime)); - } - -#if defined(__AVX__) - - template<> - __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4))); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5))); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6))); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7))); - transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); - } - - template<> - __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0))); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1))); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2))); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3))); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4))); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5))); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6))); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7))); - transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); - const vfloat4 b0 = vfloat4(geom->normal(primID(0))); - const vfloat4 b1 = vfloat4(geom->normal(primID(1))); - const vfloat4 b2 = vfloat4(geom->normal(primID(2))); - const vfloat4 b3 = vfloat4(geom->normal(primID(3))); - const vfloat4 b4 = vfloat4(geom->normal(primID(4))); - const vfloat4 b5 = vfloat4(geom->normal(primID(5))); - const vfloat4 b6 = vfloat4(geom->normal(primID(6))); - const vfloat4 b7 = vfloat4(geom->normal(primID(7))); - transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z); - } - - template<> - __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, const Points* geom, const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime)); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime)); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime)); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime)); - transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); - } - - template<> - __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, const int itime) const - { - const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime)); - const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime)); - const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime)); - const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime)); - const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime)); - const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime)); - const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime)); - const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime)); - transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w); - const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime)); - const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime)); - const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime)); - const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime)); - const vfloat4 b4 = vfloat4(geom->normal(primID(4), itime)); - const vfloat4 b5 = vfloat4(geom->normal(primID(5), itime)); - const vfloat4 b6 = vfloat4(geom->normal(primID(6), itime)); - const vfloat4 b7 = vfloat4(geom->normal(primID(7), itime)); - transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z); - } - - template<> - __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom, float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf8 a0; - gatheri(a0, geom, itime); - Vec4vf8 b0; - gatheri(b0, geom, itime + 1); - p0 = lerp(a0, b0, vfloat8(ftime)); - } - - template<> - __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, float time) const - { - float ftime; - const int itime = geom->timeSegment(time, ftime); - - Vec4vf8 a0, b0; - Vec3vf8 norm0, norm1; - gatheri(a0, norm0, geom, itime); - gatheri(b0, norm1, geom, itime + 1); - p0 = lerp(a0, b0, vfloat8(ftime)); - n0 = lerp(norm0, norm1, vfloat8(ftime)); - } -#endif - - template<int M> - typename PointMi<M>::Type PointMi<M>::type; - - typedef PointMi<4> Point4i; - typedef PointMi<8> Point8i; - -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive.h b/thirdparty/embree-aarch64/kernels/geometry/primitive.h deleted file mode 100644 index 41e5b2b304..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/primitive.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/default.h" -#include "../common/scene.h" -#include "../../common/simd/simd.h" -#include "../common/primref.h" -#include "../common/primref_mb.h" - -namespace embree -{ - struct PrimitiveType - { - /*! returns name of this primitive type */ - virtual const char* name() const = 0; - - /*! Returns the number of stored active primitives in a block. */ - virtual size_t sizeActive(const char* This) const = 0; - - /*! Returns the number of stored active and inactive primitives in a block. */ - virtual size_t sizeTotal(const char* This) const = 0; - - /*! Returns the number of bytes of block. */ - virtual size_t getBytes(const char* This) const = 0; - }; - - template<typename Primitive> - struct PrimitivePointQuery1 - { - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim) - { - bool changed = false; - for (size_t i = 0; i < Primitive::max_size(); i++) - { - if (!prim.valid(i)) break; - STAT3(point_query.trav_prims,1,1,1); - AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID(i)); - context->geomID = prim.geomID(i); - context->primID = prim.primID(i); - changed |= accel->pointQuery(query, context); - } - return changed; - } - - static __forceinline void pointQueryNoop(PointQuery* query, PointQueryContext* context, const Primitive& prim) { } - }; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp b/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp deleted file mode 100644 index f93574c9c8..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "primitive.h" -#include "curveNv.h" -#include "curveNi.h" -#include "curveNi_mb.h" -#include "linei.h" -#include "triangle.h" -#include "trianglev.h" -#include "trianglev_mb.h" -#include "trianglei.h" -#include "quadv.h" -#include "quadi.h" -#include "subdivpatch1.h" -#include "object.h" -#include "instance.h" -#include "subgrid.h" - -namespace embree -{ - /********************** Curve4v **************************/ - - template<> - const char* Curve4v::Type::name () const { - return "curve4v"; - } - - template<> - size_t Curve4v::Type::sizeActive(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return ((Line4i*)This)->size(); - else - return ((Curve4v*)This)->N; - } - - template<> - size_t Curve4v::Type::sizeTotal(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return 4; - else - return ((Curve4v*)This)->N; - } - - template<> - size_t Curve4v::Type::getBytes(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return Line4i::bytes(sizeActive(This)); - else - return Curve4v::bytes(sizeActive(This)); - } - - /********************** Curve4i **************************/ - - template<> - const char* Curve4i::Type::name () const { - return "curve4i"; - } - - template<> - size_t Curve4i::Type::sizeActive(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return ((Line4i*)This)->size(); - else - return ((Curve4i*)This)->N; - } - - template<> - size_t Curve4i::Type::sizeTotal(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return 4; - else - return ((Curve4i*)This)->N; - } - - template<> - size_t Curve4i::Type::getBytes(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return Line4i::bytes(sizeActive(This)); - else - return Curve4i::bytes(sizeActive(This)); - } - - /********************** Curve4iMB **************************/ - - template<> - const char* Curve4iMB::Type::name () const { - return "curve4imb"; - } - - template<> - size_t Curve4iMB::Type::sizeActive(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return ((Line4i*)This)->size(); - else - return ((Curve4iMB*)This)->N; - } - - template<> - size_t Curve4iMB::Type::sizeTotal(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return 4; - else - return ((Curve4iMB*)This)->N; - } - - template<> - size_t Curve4iMB::Type::getBytes(const char* This) const - { - if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR) - return Line4i::bytes(sizeActive(This)); - else - return Curve4iMB::bytes(sizeActive(This)); - } - - /********************** Line4i **************************/ - - template<> - const char* Line4i::Type::name () const { - return "line4i"; - } - - template<> - size_t Line4i::Type::sizeActive(const char* This) const { - return ((Line4i*)This)->size(); - } - - template<> - size_t Line4i::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Line4i::Type::getBytes(const char* This) const { - return sizeof(Line4i); - } - - /********************** Triangle4 **************************/ - - template<> - const char* Triangle4::Type::name () const { - return "triangle4"; - } - - template<> - size_t Triangle4::Type::sizeActive(const char* This) const { - return ((Triangle4*)This)->size(); - } - - template<> - size_t Triangle4::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Triangle4::Type::getBytes(const char* This) const { - return sizeof(Triangle4); - } - - /********************** Triangle4v **************************/ - - template<> - const char* Triangle4v::Type::name () const { - return "triangle4v"; - } - - template<> - size_t Triangle4v::Type::sizeActive(const char* This) const { - return ((Triangle4v*)This)->size(); - } - - template<> - size_t Triangle4v::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Triangle4v::Type::getBytes(const char* This) const { - return sizeof(Triangle4v); - } - - /********************** Triangle4i **************************/ - - template<> - const char* Triangle4i::Type::name () const { - return "triangle4i"; - } - - template<> - size_t Triangle4i::Type::sizeActive(const char* This) const { - return ((Triangle4i*)This)->size(); - } - - template<> - size_t Triangle4i::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Triangle4i::Type::getBytes(const char* This) const { - return sizeof(Triangle4i); - } - - /********************** Triangle4vMB **************************/ - - template<> - const char* Triangle4vMB::Type::name () const { - return "triangle4vmb"; - } - - template<> - size_t Triangle4vMB::Type::sizeActive(const char* This) const { - return ((Triangle4vMB*)This)->size(); - } - - template<> - size_t Triangle4vMB::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Triangle4vMB::Type::getBytes(const char* This) const { - return sizeof(Triangle4vMB); - } - - /********************** Quad4v **************************/ - - template<> - const char* Quad4v::Type::name () const { - return "quad4v"; - } - - template<> - size_t Quad4v::Type::sizeActive(const char* This) const { - return ((Quad4v*)This)->size(); - } - - template<> - size_t Quad4v::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Quad4v::Type::getBytes(const char* This) const { - return sizeof(Quad4v); - } - - /********************** Quad4i **************************/ - - template<> - const char* Quad4i::Type::name () const { - return "quad4i"; - } - - template<> - size_t Quad4i::Type::sizeActive(const char* This) const { - return ((Quad4i*)This)->size(); - } - - template<> - size_t Quad4i::Type::sizeTotal(const char* This) const { - return 4; - } - - template<> - size_t Quad4i::Type::getBytes(const char* This) const { - return sizeof(Quad4i); - } - - /********************** SubdivPatch1 **************************/ - - const char* SubdivPatch1::Type::name () const { - return "subdivpatch1"; - } - - size_t SubdivPatch1::Type::sizeActive(const char* This) const { - return 1; - } - - size_t SubdivPatch1::Type::sizeTotal(const char* This) const { - return 1; - } - - size_t SubdivPatch1::Type::getBytes(const char* This) const { - return sizeof(SubdivPatch1); - } - - SubdivPatch1::Type SubdivPatch1::type; - - /********************** Virtual Object **************************/ - - const char* Object::Type::name () const { - return "object"; - } - - size_t Object::Type::sizeActive(const char* This) const { - return 1; - } - - size_t Object::Type::sizeTotal(const char* This) const { - return 1; - } - - size_t Object::Type::getBytes(const char* This) const { - return sizeof(Object); - } - - Object::Type Object::type; - - /********************** Instance **************************/ - - const char* InstancePrimitive::Type::name () const { - return "instance"; - } - - size_t InstancePrimitive::Type::sizeActive(const char* This) const { - return 1; - } - - size_t InstancePrimitive::Type::sizeTotal(const char* This) const { - return 1; - } - - size_t InstancePrimitive::Type::getBytes(const char* This) const { - return sizeof(InstancePrimitive); - } - - InstancePrimitive::Type InstancePrimitive::type; - - /********************** SubGrid **************************/ - - const char* SubGrid::Type::name () const { - return "subgrid"; - } - - size_t SubGrid::Type::sizeActive(const char* This) const { - return 1; - } - - size_t SubGrid::Type::sizeTotal(const char* This) const { - return 1; - } - - size_t SubGrid::Type::getBytes(const char* This) const { - return sizeof(SubGrid); - } - - SubGrid::Type SubGrid::type; - - /********************** SubGridQBVH4 **************************/ - - template<> - const char* SubGridQBVH4::Type::name () const { - return "SubGridQBVH4"; - } - - template<> - size_t SubGridQBVH4::Type::sizeActive(const char* This) const { - return 1; - } - - template<> - size_t SubGridQBVH4::Type::sizeTotal(const char* This) const { - return 1; - } - - template<> - size_t SubGridQBVH4::Type::getBytes(const char* This) const { - return sizeof(SubGridQBVH4); - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h deleted file mode 100644 index 57ff4e60e5..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -namespace embree -{ - namespace isa - { - /*! Intersects a ray with a quad with backface culling - * enabled. The quad v0,v1,v2,v3 is split into two triangles - * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two - * triangles gets intersected. */ - template<int N> - __forceinline vbool<N> intersect_quad_backface_culling(const vbool<N>& valid0, - const Vec3fa& ray_org, - const Vec3fa& ray_dir, - const float ray_tnear, - const float ray_tfar, - const Vec3vf<N>& quad_v0, - const Vec3vf<N>& quad_v1, - const Vec3vf<N>& quad_v2, - const Vec3vf<N>& quad_v3, - vfloat<N>& u_o, - vfloat<N>& v_o, - vfloat<N>& t_o) - { - /* calculate vertices relative to ray origin */ - vbool<N> valid = valid0; - const Vec3vf<N> O = Vec3vf<N>(ray_org); - const Vec3vf<N> D = Vec3vf<N>(ray_dir); - const Vec3vf<N> va = quad_v0-O; - const Vec3vf<N> vb = quad_v1-O; - const Vec3vf<N> vc = quad_v2-O; - const Vec3vf<N> vd = quad_v3-O; - - const Vec3vf<N> edb = vb-vd; - const vfloat<N> WW = dot(cross(vd,edb),D); - const Vec3vf<N> v0 = select(WW <= 0.0f,va,vc); - const Vec3vf<N> v1 = select(WW <= 0.0f,vb,vd); - const Vec3vf<N> v2 = select(WW <= 0.0f,vd,vb); - - /* calculate edges */ - const Vec3vf<N> e0 = v2-v0; - const Vec3vf<N> e1 = v0-v1; - - /* perform edge tests */ - const vfloat<N> U = dot(cross(v0,e0),D); - const vfloat<N> V = dot(cross(v1,e1),D); - valid &= max(U,V) <= 0.0f; - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<N> Ng = cross(e1,e0); - const vfloat<N> den = dot(Ng,D); - const vfloat<N> rcpDen = rcp(den); - - /* perform depth test */ - const vfloat<N> t = rcpDen*dot(v0,Ng); - valid &= vfloat<N>(ray_tnear) <= t & t <= vfloat<N>(ray_tfar); - if (unlikely(none(valid))) return false; - - /* avoid division by 0 */ - valid &= den != vfloat<N>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - t_o = t; - u_o = U * rcpDen; - v_o = V * rcpDen; - u_o = select(WW <= 0.0f,u_o,1.0f-u_o); - v_o = select(WW <= 0.0f,v_o,1.0f-v_o); - return valid; - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h deleted file mode 100644 index 74e8c7720c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h +++ /dev/null @@ -1,566 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "quadv.h" -#include "triangle_intersector_moeller.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct QuadHitM - { - __forceinline QuadHitM() {} - - __forceinline QuadHitM(const vbool<M>& valid, - const vfloat<M>& U, - const vfloat<M>& V, - const vfloat<M>& T, - const vfloat<M>& absDen, - const Vec3vf<M>& Ng, - const vbool<M>& flags) - : U(U), V(V), T(T), absDen(absDen), tri_Ng(Ng), valid(valid), flags(flags) {} - - __forceinline void finalize() - { - const vfloat<M> rcpAbsDen = rcp(absDen); - vt = T * rcpAbsDen; - const vfloat<M> u = min(U * rcpAbsDen,1.0f); - const vfloat<M> v = min(V * rcpAbsDen,1.0f); - const vfloat<M> u1 = vfloat<M>(1.0f) - u; - const vfloat<M> v1 = vfloat<M>(1.0f) - v; -#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) - vu = select(flags,u1,u); - vv = select(flags,v1,v); - vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z); -#else - const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); - vv = select(flags,u1,v); - vu = select(flags,v1,u); - vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z); -#endif - } - - __forceinline Vec2f uv(const size_t i) - { - const float u = vu[i]; - const float v = vv[i]; - return Vec2f(u,v); - } - - __forceinline float t(const size_t i) { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - private: - vfloat<M> U; - vfloat<M> V; - vfloat<M> T; - vfloat<M> absDen; - Vec3vf<M> tri_Ng; - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - - public: - const vbool<M> flags; - }; - - template<int K> - struct QuadHitK - { - __forceinline QuadHitK(const vfloat<K>& U, - const vfloat<K>& V, - const vfloat<K>& T, - const vfloat<K>& absDen, - const Vec3vf<K>& Ng, - const vbool<K>& flags) - : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vfloat<K> rcpAbsDen = rcp(absDen); - const vfloat<K> t = T * rcpAbsDen; - const vfloat<K> u0 = min(U * rcpAbsDen,1.0f); - const vfloat<K> v0 = min(V * rcpAbsDen,1.0f); - const vfloat<K> u1 = vfloat<K>(1.0f) - u0; - const vfloat<K> v1 = vfloat<K>(1.0f) - v0; - const vfloat<K> u = select(flags,u1,u0); - const vfloat<K> v = select(flags,v1,v0); - const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> T; - const vfloat<K> absDen; - const vbool<K> flags; - const Vec3vf<K> tri_Ng; - }; - - /* ----------------------------- */ - /* -- single ray intersectors -- */ - /* ----------------------------- */ - - - template<int M, bool filter> - struct QuadMIntersector1MoellerTrumbore; - - /*! Intersects M quads with 1 ray */ - template<int M, bool filter> - struct QuadMIntersector1MoellerTrumbore - { - __forceinline QuadMIntersector1MoellerTrumbore() {} - - __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - __forceinline void intersect(RayHit& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - MoellerTrumboreHitM<M> hit; - MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); - - /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) - epilog(hit.valid,hit); - - /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - epilog(hit.valid,hit); - } - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - MoellerTrumboreHitM<M> hit; - MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); - - /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) - { - if (epilog(hit.valid,hit)) - return true; - } - - /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - if (epilog(hit.valid,hit)) - return true; - } - return false; - } - }; - -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<bool filter> - struct QuadMIntersector1MoellerTrumbore<4,filter> - { - __forceinline QuadMIntersector1MoellerTrumbore() {} - - __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - const vbool16 flags(0xf0f0); - - MoellerTrumboreHitM<16> hit; - MoellerTrumboreIntersector1<16> intersector(ray,nullptr); - if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) - { - vfloat16 U = hit.U, V = hit.V, absDen = hit.absDen; -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat16(-1.0f),vfloat16(1.0f)); // FIXME: use XOR -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - if (likely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<bool filter> - struct QuadMIntersector1MoellerTrumbore<4,filter> - { - __forceinline QuadMIntersector1MoellerTrumbore() {} - - __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - MoellerTrumboreHitM<8> hit; - MoellerTrumboreIntersector1<8> intersector(ray,nullptr); - const vbool8 flags(0,0,0,0,1,1,1,1); - if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) - { - vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; - -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); // FIXME: use XOR -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - if (unlikely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - }; - -#endif - - /* ----------------------------- */ - /* -- ray packet intersectors -- */ - /* ----------------------------- */ - - - struct MoellerTrumboreIntersector1KTriangleM - { - /*! Intersect k'th ray from ray packet of size K with M triangles. */ - template<int M, int K, typename Epilog> - static __forceinline bool intersect(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Vec3vf<M>& tri_Ng, - const vbool<M>& flags, - const Epilog& epilog) - { - /* calculate denominator */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; - const Vec3vf<M> R = cross(C,D); - const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); - const vfloat<M> absDen = abs(den); - const vfloat<M> sgnDen = signmsk(den); - - /* perform edge tests */ - const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; - const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#else - vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#endif - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; - valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); - if (likely(none(valid))) return false; - - /* calculate hit information */ - QuadHitM<M> hit(valid,U,V,T,absDen,tri_Ng,flags); - return epilog(valid,hit); - } - - template<int M, int K, typename Epilog> - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const vbool<M>& flags, - const Epilog& epilog) - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - const Vec3vf<M> Ng = cross(e2,e1); - return intersect(ray,k,v0,e1,e2,Ng,flags,epilog); - } - }; - - template<int M, int K, bool filter> - struct QuadMIntersectorKMoellerTrumboreBase - { - __forceinline QuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {} - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Vec3vf<K>& tri_Ng, - const vbool<K>& flags, - const Epilog& epilog) const - { - /* calculate denominator */ - vbool<K> valid = valid0; - const Vec3vf<K> C = tri_v0 - ray.org; - const Vec3vf<K> R = cross(C,ray.dir); - const vfloat<K> den = dot(tri_Ng,ray.dir); - const vfloat<K> absDen = abs(den); - const vfloat<K> sgnDen = signmsk(den); - - /* test against edge p2 p0 */ - const vfloat<K> U = dot(R,tri_e2) ^ sgnDen; - valid &= U >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p0 p1 */ - const vfloat<K> V = dot(R,tri_e1) ^ sgnDen; - valid &= V >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p1 p2 */ - const vfloat<K> W = absDen-U-V; - valid &= W >= 0.0f; - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; - valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar); - if (unlikely(none(valid))) return false; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - valid &= den < vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#else - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#endif - - /* calculate hit information */ - QuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags); - return epilog(valid,hit); - } - - /*! Intersects K rays with one of M quads. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const vbool<K>& flags, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = tri_v0-tri_v1; - const Vec3vf<K> e2 = tri_v2-tri_v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,epilog); - } - - /*! Intersects K rays with one of M quads. */ - template<typename Epilog> - __forceinline bool intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const Vec3vf<K>& v3, - const Epilog& epilog) const - { - intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog); - if (none(valid0)) return true; - intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog); - return none(valid0); - } - }; - - template<int M, int K, bool filter> - struct QuadMIntersectorKMoellerTrumbore : public QuadMIntersectorKMoellerTrumboreBase<M,K,filter> - { - __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {} - - __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog); - MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; - if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; - return false; - } - }; - - -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<int K, bool filter> - struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter> - { - __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - const vbool16 flags(0xf0f0); - return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<int K, bool filter> - struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter> - { - __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - const vbool8 flags(0,0,0,0,1,1,1,1); - return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - }; - -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h deleted file mode 100644 index 7ca3aed0a0..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h +++ /dev/null @@ -1,529 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "quad_intersector_moeller.h" - -/*! Modified Pluecker ray/triangle intersector. The test first shifts - * the ray origin into the origin of the coordinate system and then - * uses Pluecker coordinates for the intersection. Due to the shift, - * the Pluecker coordinate calculation simplifies and the tests get - * numerically stable. The edge equations are watertight along the - * edge for neighboring triangles. */ - -namespace embree -{ - namespace isa - { - template<int M> - struct QuadHitPlueckerM - { - __forceinline QuadHitPlueckerM() {} - - __forceinline QuadHitPlueckerM(const vbool<M>& valid, - const vfloat<M>& U, - const vfloat<M>& V, - const vfloat<M>& UVW, - const vfloat<M>& t, - const Vec3vf<M>& Ng, - const vbool<M>& flags) - : U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {} - - __forceinline void finalize() - { - const vbool<M> invalid = abs(UVW) < min_rcp_input; - const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); - const vfloat<M> u = min(U * rcpUVW,1.0f); - const vfloat<M> v = min(V * rcpUVW,1.0f); - const vfloat<M> u1 = vfloat<M>(1.0f) - u; - const vfloat<M> v1 = vfloat<M>(1.0f) - v; -#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) - vu = select(flags,u1,u); - vv = select(flags,v1,v); - vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z); -#else - const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); - vv = select(flags,u1,v); - vu = select(flags,v1,u); - vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z); -#endif - } - - __forceinline Vec2f uv(const size_t i) - { - const float u = vu[i]; - const float v = vv[i]; - return Vec2f(u,v); - } - - __forceinline float t(const size_t i) { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - private: - vfloat<M> U; - vfloat<M> V; - vfloat<M> UVW; - Vec3vf<M> tri_Ng; - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - - public: - const vbool<M> flags; - }; - - template<int K> - struct QuadHitPlueckerK - { - __forceinline QuadHitPlueckerK(const vfloat<K>& U, - const vfloat<K>& V, - const vfloat<K>& UVW, - const vfloat<K>& t, - const Vec3vf<K>& Ng, - const vbool<K>& flags) - : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vbool<K> invalid = abs(UVW) < min_rcp_input; - const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); - const vfloat<K> u0 = min(U * rcpUVW,1.0f); - const vfloat<K> v0 = min(V * rcpUVW,1.0f); - const vfloat<K> u1 = vfloat<K>(1.0f) - u0; - const vfloat<K> v1 = vfloat<K>(1.0f) - v0; - const vfloat<K> u = select(flags,u1,u0); - const vfloat<K> v = select(flags,v1,v0); - const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> UVW; - const vfloat<K> t; - const vbool<K> flags; - const Vec3vf<K> tri_Ng; - }; - - struct PlueckerIntersectorTriangle1 - { - template<int M, typename Epilog> - static __forceinline bool intersect(Ray& ray, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const vbool<M>& flags, - const Epilog& epilog) - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); - const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags); - return epilog(valid,hit); - } - }; - - /*! Intersects M quads with 1 ray */ - template<int M, bool filter> - struct QuadMIntersector1Pluecker - { - __forceinline QuadMIntersector1Pluecker() {} - - __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - __forceinline void intersect(RayHit& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); - PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog); - PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true),epilog); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); - if (PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog)) return true; - if (PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true; - return false; - } - }; - -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<bool filter> - struct QuadMIntersector1Pluecker<4,filter> - { - __forceinline QuadMIntersector1Pluecker() {} - - __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - const vbool16 flags(0xf0f0); - return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<bool filter> - struct QuadMIntersector1Pluecker<4,filter> - { - __forceinline QuadMIntersector1Pluecker() {} - - __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - const vbool8 flags(0,0,0,0,1,1,1,1); - return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - }; - -#endif - - - /* ----------------------------- */ - /* -- ray packet intersectors -- */ - /* ----------------------------- */ - - struct PlueckerIntersector1KTriangleM - { - /*! Intersect k'th ray from ray packet of size K with M triangles. */ - template<int M, int K, typename Epilog> - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const vbool<M>& flags, - const Epilog& epilog) - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); - if (unlikely(none(valid))) return false; - - /* avoid division by 0 */ - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags); - return epilog(valid,hit); - } - }; - - template<int M, int K, bool filter> - struct QuadMIntersectorKPlueckerBase - { - __forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const vbool<K>& flags, - const Epilog& epilog) const - { - /* calculate vertices relative to ray origin */ - vbool<K> valid = valid0; - const Vec3vf<K> O = ray.org; - const Vec3vf<K> D = ray.dir; - const Vec3vf<K> v0 = tri_v0-O; - const Vec3vf<K> v1 = tri_v1-O; - const Vec3vf<K> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<K> e0 = v2-v0; - const Vec3vf<K> e1 = v0-v1; - const Vec3vf<K> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); - const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); - const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); - const vfloat<K> UVW = U+V+W; - const vfloat<K> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - valid &= max(U,V,W) <= eps; -#else - valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); - - /* perform depth test */ - const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); - const vfloat<K> t = rcp(den)*T; - valid &= ray.tnear() <= t & t <= ray.tfar; - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; - - /* calculate hit information */ - QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags); - return epilog(valid,hit); - } - - /*! Intersects K rays with one of M quads. */ - template<typename Epilog> - __forceinline bool intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const Vec3vf<K>& v3, - const Epilog& epilog) const - { - intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog); - if (none(valid0)) return true; - intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog); - return none(valid0); - } - }; - - template<int M, int K, bool filter> - struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter> - { - __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} - - __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog); - PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const vuint<M>& geomID, const vuint<M>& primID) const - { - Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; - if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; - return false; - } - }; - -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<int K, bool filter> - struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter> - { - __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - - const vbool16 flags(0xf0f0); - return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<int K, bool filter> - struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter> - { - __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); - const vbool8 flags(0,0,0,0,1,1,1,1); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - }; - -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi.h b/thirdparty/embree-aarch64/kernels/geometry/quadi.h deleted file mode 100644 index 741ec519ab..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quadi.h +++ /dev/null @@ -1,483 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "../common/scene.h" - -namespace embree -{ - /* Stores M quads from an indexed face set */ - template <int M> - struct QuadMi - { - /* Virtual interface to query information about the quad type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored quads */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline QuadMi() { } - - /* Construction from vertices and IDs */ - __forceinline QuadMi(const vuint<M>& v0, - const vuint<M>& v1, - const vuint<M>& v2, - const vuint<M>& v3, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) -#if defined(EMBREE_COMPACT_POLYS) - : geomIDs(geomIDs), primIDs(primIDs) {} -#else - : v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {} -#endif - - /* Returns a mask that tells which quads are valid */ - __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } - - /* Returns if the specified quad is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } - - /* Returns the number of stored quads */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - __forceinline vuint<M>& geomID() { return geomIDs; } - __forceinline const vuint<M>& geomID() const { return geomIDs; } - __forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; } - - /* Returns the primitive IDs */ - __forceinline vuint<M>& primID() { return primIDs; } - __forceinline const vuint<M>& primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* Calculate the bounds of the quads */ - __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const - { - BBox3fa bounds = empty; - for (size_t i=0; i<M && valid(i); i++) { - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); - bounds.extend(mesh->bounds(primID(i),itime)); - } - return bounds; - } - - /* Calculate the linear bounds of the primitive */ - __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) { - return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) - { - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); - allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps)); - } - return allBounds; - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) - { - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); - allBounds.extend(mesh->linearBounds(primID(i), time_range)); - } - return allBounds; - } - - /* Fill quad from quad list */ - template<typename PrimRefT> - __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) - { - vuint<M> geomID = -1, primID = -1; - const PrimRefT* prim = &prims[begin]; - vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero; - - for (size_t i=0; i<M; i++) - { - if (begin<end) { - geomID[i] = prim->geomID(); - primID[i] = prim->primID(); -#if !defined(EMBREE_COMPACT_POLYS) - const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID()); - const QuadMesh::Quad& q = mesh->quad(prim->primID()); - unsigned int_stride = mesh->vertices0.getStride()/4; - v0[i] = q.v[0] * int_stride; - v1[i] = q.v[1] * int_stride; - v2[i] = q.v[2] * int_stride; - v3[i] = q.v[3] * int_stride; -#endif - begin++; - } else { - assert(i); - if (likely(i > 0)) { - geomID[i] = geomID[0]; // always valid geomIDs - primID[i] = -1; // indicates invalid data - v0[i] = v0[0]; - v1[i] = v0[0]; - v2[i] = v0[0]; - v3[i] = v0[0]; - } - } - if (begin<end) prim = &prims[begin]; - } - new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store - } - - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) - { - fill(prims, begin, end, scene); - return linearBounds(scene, itime); - } - - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) - { - fill(prims, begin, end, scene); - return linearBounds(scene, time_range); - } - - friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) { - return cout << "QuadMi<" << M << ">( " -#if !defined(EMBREE_COMPACT_POLYS) - << "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", " -#endif - << "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )"; - } - - protected: -#if !defined(EMBREE_COMPACT_POLYS) - vuint<M> v0_; // 4 byte offset of 1st vertex - vuint<M> v1_; // 4 byte offset of 2nd vertex - vuint<M> v2_; // 4 byte offset of 3rd vertex - vuint<M> v3_; // 4 byte offset of 4th vertex -#endif - vuint<M> geomIDs; // geometry ID of mesh - vuint<M> primIDs; // primitive ID of primitive inside mesh - }; - - namespace isa - { - - template<int M> - struct QuadMi : public embree::QuadMi<M> - { -#if !defined(EMBREE_COMPACT_POLYS) - using embree::QuadMi<M>::v0_; - using embree::QuadMi<M>::v1_; - using embree::QuadMi<M>::v2_; - using embree::QuadMi<M>::v3_; -#endif - using embree::QuadMi<M>::geomIDs; - using embree::QuadMi<M>::primIDs; - using embree::QuadMi<M>::geomID; - using embree::QuadMi<M>::primID; - using embree::QuadMi<M>::valid; - - template<int vid> - __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const - { -#if defined(EMBREE_COMPACT_POLYS) - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); - const QuadMesh::Quad& quad = mesh->quad(primID(index)); - return (Vec3f) mesh->vertices[0][quad.v[vid]]; -#else - const vuint<M>& v = getVertexOffset<vid>(); - const float* vertices = scene->vertices[geomID(index)]; - return (Vec3f&) vertices[v[index]]; -#endif - } - - template<int vid, typename T> - __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const - { -#if defined(EMBREE_COMPACT_POLYS) - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); - const QuadMesh::Quad& quad = mesh->quad(primID(index)); - const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]]; - const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]]; -#else - const vuint<M>& v = getVertexOffset<vid>(); - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); - const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0); - const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1); - const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); - const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); -#endif - const Vec3<T> p0(v0.x,v0.y,v0.z); - const Vec3<T> p1(v1.x,v1.y,v1.z); - return lerp(p0,p1,ftime); - } - - template<int vid, int K, typename T> - __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const - { - Vec3<T> p0, p1; - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); - - for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask)) - { -#if defined(EMBREE_COMPACT_POLYS) - const QuadMesh::Quad& quad = mesh->quad(primID(index)); - const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]]; - const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]]; -#else - const vuint<M>& v = getVertexOffset<vid>(); - const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0); - const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1); - const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); - const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); -#endif - p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z; - p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z; - } - return (T(one)-ftime)*p0 + ftime*p1; - } - - struct Quad { - vfloat4 v0,v1,v2,v3; - }; - -#if defined(EMBREE_COMPACT_POLYS) - - __forceinline Quad loadQuad(const int i, const Scene* const scene) const - { - const unsigned int geomID = geomIDs[i]; - const unsigned int primID = primIDs[i]; - if (unlikely(primID == -1)) return { zero, zero, zero, zero }; - const QuadMesh* mesh = scene->get<QuadMesh>(geomID); - const QuadMesh::Quad& quad = mesh->quad(primID); - const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]]; - const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]]; - const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]]; - const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]]; - return { v0, v1, v2, v3 }; - } - - __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const - { - const unsigned int geomID = geomIDs[i]; - const unsigned int primID = primIDs[i]; - if (unlikely(primID == -1)) return { zero, zero, zero, zero }; - const QuadMesh* mesh = scene->get<QuadMesh>(geomID); - const QuadMesh::Quad& quad = mesh->quad(primID); - const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]]; - const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]]; - const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]]; - const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]]; - return { v0, v1, v2, v3 }; - } - -#else - - __forceinline Quad loadQuad(const int i, const Scene* const scene) const - { - const float* vertices = scene->vertices[geomID(i)]; - const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); - const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); - const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); - const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]); - return { v0, v1, v2, v3 }; - } - - __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const - { - const unsigned int geomID = geomIDs[i]; - const QuadMesh* mesh = scene->get<QuadMesh>(geomID); - const float* vertices = (const float*) mesh->vertexPtr(0,itime); - const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); - const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); - const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); - const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]); - return { v0, v1, v2, v3 }; - } - -#endif - - /* Gather the quads */ - __forceinline void gather(Vec3vf<M>& p0, - Vec3vf<M>& p1, - Vec3vf<M>& p2, - Vec3vf<M>& p3, - const Scene *const scene) const; - -#if defined(__AVX512F__) - __forceinline void gather(Vec3vf16& p0, - Vec3vf16& p1, - Vec3vf16& p2, - Vec3vf16& p3, - const Scene *const scene) const; -#endif - - template<int K> -#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019 - __noinline -#else - __forceinline -#endif - void gather(const vbool<K>& valid, - Vec3vf<K>& p0, - Vec3vf<K>& p1, - Vec3vf<K>& p2, - Vec3vf<K>& p3, - const size_t index, - const Scene* const scene, - const vfloat<K>& time) const - { - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); - - vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(time, ftime); - - const size_t first = bsf(movemask(valid)); - if (likely(all(valid,itime[first] == itime))) - { - p0 = getVertex<0>(index, scene, itime[first], ftime); - p1 = getVertex<1>(index, scene, itime[first], ftime); - p2 = getVertex<2>(index, scene, itime[first], ftime); - p3 = getVertex<3>(index, scene, itime[first], ftime); - } - else - { - p0 = getVertex<0>(valid, index, scene, itime, ftime); - p1 = getVertex<1>(valid, index, scene, itime, ftime); - p2 = getVertex<2>(valid, index, scene, itime, ftime); - p3 = getVertex<3>(valid, index, scene, itime, ftime); - } - } - - __forceinline void gather(Vec3vf<M>& p0, - Vec3vf<M>& p1, - Vec3vf<M>& p2, - Vec3vf<M>& p3, - const QuadMesh* mesh, - const Scene *const scene, - const int itime) const; - - __forceinline void gather(Vec3vf<M>& p0, - Vec3vf<M>& p1, - Vec3vf<M>& p2, - Vec3vf<M>& p3, - const Scene *const scene, - const float time) const; - - /* Updates the primitive */ - __forceinline BBox3fa update(QuadMesh* mesh) - { - BBox3fa bounds = empty; - for (size_t i=0; i<M; i++) - { - if (!valid(i)) break; - const unsigned primId = primID(i); - const QuadMesh::Quad& q = mesh->quad(primId); - const Vec3fa p0 = mesh->vertex(q.v[0]); - const Vec3fa p1 = mesh->vertex(q.v[1]); - const Vec3fa p2 = mesh->vertex(q.v[2]); - const Vec3fa p3 = mesh->vertex(q.v[3]); - bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3))); - } - return bounds; - } - - private: -#if !defined(EMBREE_COMPACT_POLYS) - template<int N> const vuint<M>& getVertexOffset() const; -#endif - }; - -#if !defined(EMBREE_COMPACT_POLYS) - template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; } - template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; } - template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; } - template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; } -#endif - - template<> - __forceinline void QuadMi<4>::gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const Scene *const scene) const - { - prefetchL1(((char*)this)+0*64); - prefetchL1(((char*)this)+1*64); - const Quad tri0 = loadQuad(0,scene); - const Quad tri1 = loadQuad(1,scene); - const Quad tri2 = loadQuad(2,scene); - const Quad tri3 = loadQuad(3,scene); - transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); - transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); - transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); - transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z); - } - - template<> - __forceinline void QuadMi<4>::gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const QuadMesh* mesh, - const Scene *const scene, - const int itime) const - { - // FIXME: for trianglei there all geometries are identical, is this the case here too? - - const Quad tri0 = loadQuad(0,itime,scene); - const Quad tri1 = loadQuad(1,itime,scene); - const Quad tri2 = loadQuad(2,itime,scene); - const Quad tri3 = loadQuad(3,itime,scene); - transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); - transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); - transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); - transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z); - } - - template<> - __forceinline void QuadMi<4>::gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const Scene *const scene, - const float time) const - { - const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical - - float ftime; - const int itime = mesh->timeSegment(time, ftime); - - Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime); - Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1); - p0 = lerp(a0,b0,vfloat4(ftime)); - p1 = lerp(a1,b1,vfloat4(ftime)); - p2 = lerp(a2,b2,vfloat4(ftime)); - p3 = lerp(a3,b3,vfloat4(ftime)); - } - } - - template<int M> - typename QuadMi<M>::Type QuadMi<M>::type; - - typedef QuadMi<4> Quad4i; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h deleted file mode 100644 index 96cf7f1ca2..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h +++ /dev/null @@ -1,350 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "quadi.h" -#include "quad_intersector_moeller.h" -#include "quad_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - /*! Intersects M quads with 1 ray */ - template<int M, bool filter> - struct QuadMiIntersector1Moeller - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations; - - /*! Intersect a ray with the M quads and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of M quads. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); - } - }; - - /*! Intersects M triangles with K rays. */ - template<int M, int K, bool filter> - struct QuadMiIntersectorKMoeller - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - Scene* scene = context->scene; - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); - const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); - const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); - const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); - pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - Scene* scene = context->scene; - vbool<K> valid0 = valid_i; - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); - const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); - const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); - const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) - break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - }; - - /*! Intersects M quads with 1 ray */ - template<int M, bool filter> - struct QuadMiIntersector1Pluecker - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersector1Pluecker<M,filter> Precalculations; - - /*! Intersect a ray with the M quads and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of M quads. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); - } - }; - - /*! Intersects M triangles with K rays. */ - template<int M, int K, bool filter> - struct QuadMiIntersectorKPluecker - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - Scene* scene = context->scene; - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); - const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); - const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); - const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); - pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - Scene* scene = context->scene; - vbool<K> valid0 = valid_i; - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene); - const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene); - const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene); - const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) - break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - }; - - /*! Intersects M motion blur quads with 1 ray */ - template<int M, bool filter> - struct QuadMiMBIntersector1Moeller - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations; - - /*! Intersect a ray with the M quads and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); - pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of M quads. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); - return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); - } - }; - - /*! Intersects M motion blur quads with K rays. */ - template<int M, int K, bool filter> - struct QuadMiMBIntersectorKMoeller - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations; - - /*! Intersects K rays with M quads. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); - pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M quads. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - vbool<K> valid0 = valid_i; - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); - if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) - break; - } - return !valid0; - } - - /*! Intersect a ray with M quads and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); - pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of the M quads. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - }; - - /*! Intersects M motion blur quads with 1 ray */ - template<int M, bool filter> - struct QuadMiMBIntersector1Pluecker - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersector1Pluecker<M,filter> Precalculations; - - /*! Intersect a ray with the M quads and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); - pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of M quads. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()); - return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); - } - }; - - /*! Intersects M motion blur quads with K rays. */ - template<int M, int K, bool filter> - struct QuadMiMBIntersectorKPluecker - { - typedef QuadMi<M> Primitive; - typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations; - - /*! Intersects K rays with M quads. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); - pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M quads. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad) - { - vbool<K> valid0 = valid_i; - for (size_t i=0; i<QuadMi<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); - if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) - break; - } - return !valid0; - } - - /*! Intersect a ray with M quads and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); - pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of the M quads. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID()); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv.h b/thirdparty/embree-aarch64/kernels/geometry/quadv.h deleted file mode 100644 index 0a1fe4d128..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quadv.h +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - /* Stores the vertices of M quads in struct of array layout */ - template <int M> - struct QuadMv - { - public: - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* Returns maximum number of stored quads */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline QuadMv() {} - - /* Construction from vertices and IDs */ - __forceinline QuadMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomIDs, const vuint<M>& primIDs) - : v0(v0), v1(v1), v2(v2), v3(v3), geomIDs(geomIDs), primIDs(primIDs) {} - - /* Returns a mask that tells which quads are valid */ - __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } - - /* Returns true if the specified quad is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } - - /* Returns the number of stored quads */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - __forceinline vuint<M>& geomID() { return geomIDs; } - __forceinline const vuint<M>& geomID() const { return geomIDs; } - __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } - - /* Returns the primitive IDs */ - __forceinline vuint<M> primID() { return primIDs; } - __forceinline const vuint<M> primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* Calculate the bounds of the quads */ - __forceinline BBox3fa bounds() const - { - Vec3vf<M> lower = min(v0,v1,v2,v3); - Vec3vf<M> upper = max(v0,v1,v2,v3); - vbool<M> mask = valid(); - lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); - lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); - lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); - upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); - upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); - upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); - return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), - Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); - } - - /* Non temporal store */ - __forceinline static void store_nt(QuadMv* dst, const QuadMv& src) - { - vfloat<M>::store_nt(&dst->v0.x,src.v0.x); - vfloat<M>::store_nt(&dst->v0.y,src.v0.y); - vfloat<M>::store_nt(&dst->v0.z,src.v0.z); - vfloat<M>::store_nt(&dst->v1.x,src.v1.x); - vfloat<M>::store_nt(&dst->v1.y,src.v1.y); - vfloat<M>::store_nt(&dst->v1.z,src.v1.z); - vfloat<M>::store_nt(&dst->v2.x,src.v2.x); - vfloat<M>::store_nt(&dst->v2.y,src.v2.y); - vfloat<M>::store_nt(&dst->v2.z,src.v2.z); - vfloat<M>::store_nt(&dst->v3.x,src.v3.x); - vfloat<M>::store_nt(&dst->v3.y,src.v3.y); - vfloat<M>::store_nt(&dst->v3.z,src.v3.z); - vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); - vuint<M>::store_nt(&dst->primIDs,src.primIDs); - } - - /* Fill quad from quad list */ - __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene) - { - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero; - - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRef& prim = prims[begin]; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const QuadMesh* __restrict__ const mesh = scene->get<QuadMesh>(geomID); - const QuadMesh::Quad& quad = mesh->quad(primID); - const Vec3fa& p0 = mesh->vertex(quad.v[0]); - const Vec3fa& p1 = mesh->vertex(quad.v[1]); - const Vec3fa& p2 = mesh->vertex(quad.v[2]); - const Vec3fa& p3 = mesh->vertex(quad.v[3]); - vgeomID [i] = geomID; - vprimID [i] = primID; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z; - } - QuadMv::store_nt(this,QuadMv(v0,v1,v2,v3,vgeomID,vprimID)); - } - - /* Updates the primitive */ - __forceinline BBox3fa update(QuadMesh* mesh) - { - BBox3fa bounds = empty; - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; - - for (size_t i=0; i<M; i++) - { - if (primID(i) == -1) break; - const unsigned geomId = geomID(i); - const unsigned primId = primID(i); - const QuadMesh::Quad& quad = mesh->quad(primId); - const Vec3fa p0 = mesh->vertex(quad.v[0]); - const Vec3fa p1 = mesh->vertex(quad.v[1]); - const Vec3fa p2 = mesh->vertex(quad.v[2]); - const Vec3fa p3 = mesh->vertex(quad.v[3]); - bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3))); - vgeomID [i] = geomId; - vprimID [i] = primId; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z; - } - new (this) QuadMv(v0,v1,v2,v3,vgeomID,vprimID); - return bounds; - } - - public: - Vec3vf<M> v0; // 1st vertex of the quads - Vec3vf<M> v1; // 2nd vertex of the quads - Vec3vf<M> v2; // 3rd vertex of the quads - Vec3vf<M> v3; // 4rd vertex of the quads - private: - vuint<M> geomIDs; // geometry ID - vuint<M> primIDs; // primitive ID - }; - - template<int M> - typename QuadMv<M>::Type QuadMv<M>::type; - - typedef QuadMv<4> Quad4v; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h deleted file mode 100644 index 30a24b291a..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "quadv.h" -#include "quad_intersector_moeller.h" -#include "quad_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - /*! Intersects M quads with 1 ray */ - template<int M, bool filter> - struct QuadMvIntersector1Moeller - { - typedef QuadMv<M> Primitive; - typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations; - - /*! Intersect a ray with the M quads and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of M quads. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); - } - }; - - /*! Intersects M triangles with K rays. */ - template<int M, int K, bool filter> - struct QuadMvIntersectorKMoeller - { - typedef QuadMv<M> Primitive; - typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) - { - for (size_t i=0; i<QuadMv<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); - const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); - const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); - const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); - pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<QuadMv<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); - const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); - const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); - const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) - break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - }; - - /*! Intersects M quads with 1 ray */ - template<int M, bool filter> - struct QuadMvIntersector1Pluecker - { - typedef QuadMv<M> Primitive; - typedef QuadMIntersector1Pluecker<M,filter> Precalculations; - - /*! Intersect a ray with the M quads and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of M quads. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad); - } - }; - - /*! Intersects M triangles with K rays. */ - template<int M, int K, bool filter> - struct QuadMvIntersectorKPluecker - { - typedef QuadMv<M> Primitive; - typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) - { - for (size_t i=0; i<QuadMv<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); - const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); - const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); - const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); - pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<QuadMv<M>::max_size(); i++) - { - if (!quad.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i); - const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i); - const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i); - const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) - break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID()); - } - }; - } -} - diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h deleted file mode 100644 index cdf68f486b..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h +++ /dev/null @@ -1,710 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "curve_intersector_precalculations.h" - - -/* - - This file implements the intersection of a ray with a round linear - curve segment. We define the geometry of such a round linear curve - segment from point p0 with radius r0 to point p1 with radius r1 - using the cone that touches spheres p0/r0 and p1/r1 tangentially - plus the sphere p1/r1. We denote the tangentially touching cone from - p0/r0 to p1/r1 with cone(p0,r0,p1,r1) and the cone plus the ending - sphere with cone_sphere(p0,r0,p1,r1). - - For multiple connected round linear curve segments this construction - yield a proper shape when viewed from the outside. Using the - following CSG we can also handle the interiour in most common cases: - - round_linear_curve(pl,rl,p0,r0,p1,r1,pr,rr) = - cone_sphere(p0,r0,p1,r1) - cone(pl,rl,p0,r0) - cone(p1,r1,pr,rr) - - Thus by subtracting the neighboring cone geometries, we cut away - parts of the center cone_sphere surface which lie inside the - combined curve. This approach works as long as geometry of the - current cone_sphere penetrates into direct neighbor segments only, - and not into segments further away. - - To construct a cone that touches two spheres at p0 and p1 with r0 - and r1, one has to increase the cone radius at r0 and r1 to obtain - larger radii w0 and w1, such that the infinite cone properly touches - the spheres. From the paper "Ray Tracing Generalized Tube - Primitives: Method and Applications" - (https://www.researchgate.net/publication/334378683_Ray_Tracing_Generalized_Tube_Primitives_Method_and_Applications) - one can derive the following equations for these increased - radii: - - sr = 1.0f / sqrt(1-sqr(dr)/sqr(p1-p0)) - w0 = sr*r0 - w1 = sr*r1 - - Further, we want the cone to start where it touches the sphere at p0 - and to end where it touches sphere at p1. Therefore, we need to - construct clipping locations y0 and y1 for the start and end of the - cone. These start and end clipping location of the cone can get - calculated as: - - Y0 = - r0 * (r1-r0) / length(p1-p0) - Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0) - - Where the cone starts a distance Y0 and ends a distance Y1 away of - point p0 along the cone center. The distance between Y1-Y0 can get - calculated as: - - dY = length(p1-p0) - (r1-r0)^2 / length(p1-p0) - - In the code below, Y will always be scaled by length(p1-p0) to - obtain y and you will find the terms r0*(r1-r0) and - (p1-p0)^2-(r1-r0)^2. - - */ - -namespace embree -{ - namespace isa - { - template<int M> - struct RoundLineIntersectorHitM - { - __forceinline RoundLineIntersectorHitM() {} - - __forceinline RoundLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng) - : vu(u), vv(v), vt(t), vNg(Ng) {} - - __forceinline void finalize() {} - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - public: - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - namespace __roundline_internal - { - template<int M> - struct ConeGeometry - { - ConeGeometry (const Vec4vf<M>& a, const Vec4vf<M>& b) - : p0(a.xyz()), p1(b.xyz()), dP(p1-p0), dPdP(dot(dP,dP)), r0(a.w), sqr_r0(sqr(r0)), r1(b.w), dr(r1-r0), drdr(dr*dr), r0dr (r0*dr), g(dPdP - drdr) {} - - /* - - This function tests if a point is accepted by first cone - clipping plane. - - First, we need to project the point onto the line p0->p1: - - Y = (p-p0)*(p1-p0)/length(p1-p0) - - This value y is the distance to the projection point from - p0. The clip distances are calculated as: - - Y0 = - r0 * (r1-r0) / length(p1-p0) - Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0) - - Thus to test if the point p is accepted by the first - clipping plane we need to test Y > Y0 and to test if it - is accepted by the second clipping plane we need to test - Y < Y1. - - By multiplying the calculations with length(p1-p0) these - calculation can get simplied to: - - y = (p-p0)*(p1-p0) - y0 = - r0 * (r1-r0) - y1 = (p1-p0)^2 - r1 * (r1-r0) - - and the test y > y0 and y < y1. - - */ - - __forceinline vbool<M> isClippedByPlane (const vbool<M>& valid_i, const Vec3vf<M>& p) const - { - const Vec3vf<M> p0p = p - p0; - const vfloat<M> y = dot(p0p,dP); - const vfloat<M> cap0 = -r0dr; - const vbool<M> inside_cone = y > cap0; - return valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)) & inside_cone; - } - - /* - - This function tests whether a point lies inside the capped cone - tangential to its ending spheres. - - Therefore one has to check if the point is inside the - region defined by the cone clipping planes, which is - performed similar as in the previous function. - - To perform the inside cone test we need to project the - point onto the line p0->p1: - - dP = p1-p0 - Y = (p-p0)*dP/length(dP) - - This value Y is the distance to the projection point from - p0. To obtain a parameter value u going from 0 to 1 along - the line p0->p1 we calculate: - - U = Y/length(dP) - - The radii to use at points p0 and p1 are: - - w0 = sr * r0 - w1 = sr * r1 - dw = w1-w0 - - Using these radii and u one can directly test if the point - lies inside the cone using the formula dP*dP < wy*wy with: - - wy = w0 + u*dw - py = p0 + u*dP - p - - By multiplying the calculations with length(p1-p0) and - inserting the definition of w can obtain simpler equations: - - y = (p-p0)*dP - ry = r0 + y/dP^2 * dr - wy = sr*ry - py = p0 + y/dP^2*dP - p - y0 = - r0 * dr - y1 = dP^2 - r1 * dr - - Thus for the in-cone test we get: - - py^2 < wy^2 - <=> py^2 < sr^2 * ry^2 - <=> py^2 * ( dP^2 - dr^2 ) < dP^2 * ry^2 - - This can further get simplified to: - - (p0-p)^2 * (dP^2 - dr^2) - y^2 < dP^2 * r0^2 + 2.0f*r0*dr*y; - - */ - - __forceinline vbool<M> isInsideCappedCone (const vbool<M>& valid_i, const Vec3vf<M>& p) const - { - const Vec3vf<M> p0p = p - p0; - const vfloat<M> y = dot(p0p,dP); - const vfloat<M> cap0 = -r0dr+vfloat<M>(ulp); - const vfloat<M> cap1 = -r1*dr + dPdP; - - vbool<M> inside_cone = valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)); - inside_cone &= y > cap0; // start clipping plane - inside_cone &= y < cap1; // end clipping plane - inside_cone &= sqr(p0p)*g - sqr(y) < dPdP * sqr_r0 + 2.0f*r0dr*y; // in cone test - return inside_cone; - } - - protected: - Vec3vf<M> p0; - Vec3vf<M> p1; - Vec3vf<M> dP; - vfloat<M> dPdP; - vfloat<M> r0; - vfloat<M> sqr_r0; - vfloat<M> r1; - vfloat<M> dr; - vfloat<M> drdr; - vfloat<M> r0dr; - vfloat<M> g; - }; - - template<int M> - struct ConeGeometryIntersector : public ConeGeometry<M> - { - using ConeGeometry<M>::p0; - using ConeGeometry<M>::p1; - using ConeGeometry<M>::dP; - using ConeGeometry<M>::dPdP; - using ConeGeometry<M>::r0; - using ConeGeometry<M>::sqr_r0; - using ConeGeometry<M>::r1; - using ConeGeometry<M>::dr; - using ConeGeometry<M>::r0dr; - using ConeGeometry<M>::g; - - ConeGeometryIntersector (const Vec3vf<M>& ray_org, const Vec3vf<M>& ray_dir, const vfloat<M>& dOdO, const vfloat<M>& rcp_dOdO, const Vec4vf<M>& a, const Vec4vf<M>& b) - : ConeGeometry<M>(a,b), org(ray_org), O(ray_org-p0), dO(ray_dir), dOdO(dOdO), rcp_dOdO(rcp_dOdO), OdP(dot(dP,O)), dOdP(dot(dP,dO)), yp(OdP + r0dr) {} - - /* - - This function intersects a ray with a cone that touches a - start sphere p0/r0 and end sphere p1/r1. - - To find this ray/cone intersections one could just - calculate radii w0 and w1 as described above and use a - standard ray/cone intersection routine with these - radii. However, it turns out that calculations can get - simplified when deriving a specialized ray/cone - intersection for this special case. We perform - calculations relative to the cone origin p0 and define: - - O = ray_org - p0 - dO = ray_dir - dP = p1-p0 - dr = r1-r0 - dw = w1-w0 - - For some t we can compute the potential hit point h = O + t*dO and - project it onto the cone vector dP to obtain u = (h*dP)/(dP*dP). In - case of an intersection, the squared distance from the hit point - projected onto the cone center line to the hit point should be equal - to the squared cone radius at u: - - (u*dP - h)^2 = (w0 + u*dw)^2 - - Inserting the definition of h, u, w0, and dw into this formula, then - factoring out all terms, and sorting by t^2, t^1, and t^0 terms - yields a quadratic equation to solve. - - Inserting u: - ( (h*dP)*dP/dP^2 - h )^2 = ( w0 + (h*dP)*dw/dP^2 )^2 - - Multiplying by dP^4: - ( (h*dP)*dP - h*dP^2 )^2 = ( w0*dP^2 + (h*dP)*dw )^2 - - Inserting w0 and dw: - ( (h*dP)*dP - h*dP^2 )^2 = ( r0*dP^2 + (h*dP)*dr )^2 / (1-dr^2/dP^2) - ( (h*dP)*dP - h*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (h*dP)*dr )^2 - - Now one can insert the definition of h, factor out, and presort by t: - ( ((O + t*dO)*dP)*dP - (O + t*dO)*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + ((O + t*dO)*dP)*dr )^2 - ( (O*dP)*dP-O*dP^2 + t*( (dO*dP)*dP - dO*dP^2 ) )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (O*dP)*dr + t*(dO*dP)*dr )^2 - - Factoring out further and sorting by t^2, t^1 and t^0 yields: - - 0 = t^2 * [ ((dO*dP)*dP - dO-dP^2)^2 * (dP^2 - dr^2) - dP^2*(dO*dP)^2*dr^2 ] - + 2*t^1 * [ ((O*dP)*dP - O*dP^2) * ((dO*dP)*dP - dO*dP^2) * (dP^2 - dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)*(dO*dP)*dr ] - + t^0 * [ ( (O*dP)*dP - O*dP^2)^2 * (dP^2-dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)^2 ] - - This can be simplified to: - - 0 = t^2 * [ (dP^2 - dr^2)*dO^2 - (dO*dP)^2 ] - + 2*t^1 * [ (dP^2 - dr^2)*(O*dO) - (dO*dP)*(O*dP + r0*dr) ] - + t^0 * [ (dP^2 - dr^2)*O^2 - (O*dP)^2 - r0^2*dP^2 - 2.0f*r0*dr*(O*dP) ] - - Solving this quadratic equation yields the values for t at which the - ray intersects the cone. - - */ - - __forceinline bool intersectCone(vbool<M>& valid, vfloat<M>& lower, vfloat<M>& upper) - { - /* return no hit by default */ - lower = pos_inf; - upper = neg_inf; - - /* compute quadratic equation A*t^2 + B*t + C = 0 */ - const vfloat<M> OO = dot(O,O); - const vfloat<M> OdO = dot(dO,O); - const vfloat<M> A = g * dOdO - sqr(dOdP); - const vfloat<M> B = 2.0f * (g*OdO - dOdP*yp); - const vfloat<M> C = g*OO - sqr(OdP) - sqr_r0*dPdP - 2.0f*r0dr*OdP; - - /* we miss the cone if determinant is smaller than zero */ - const vfloat<M> D = B*B - 4.0f*A*C; - valid &= (D >= 0.0f & g > 0.0f); // if g <= 0 then the cone is inside a sphere end - - /* When rays are parallel to the cone surface, then the - * ray may be inside or outside the cone. We just assume a - * miss in that case, which is fine as rays inside the - * cone would anyway hit the ending spheres in that - * case. */ - valid &= abs(A) > min_rcp_input; - if (unlikely(none(valid))) { - return false; - } - - /* compute distance to front and back hit */ - const vfloat<M> Q = sqrt(D); - const vfloat<M> rcp_2A = rcp(2.0f*A); - t_cone_front = (-B-Q)*rcp_2A; - y_cone_front = yp + t_cone_front*dOdP; - lower = select( (y_cone_front > -(float)ulp) & (y_cone_front <= g) & (g > 0.0f), t_cone_front, vfloat<M>(pos_inf)); -#if !defined (EMBREE_BACKFACE_CULLING_CURVES) - t_cone_back = (-B+Q)*rcp_2A; - y_cone_back = yp + t_cone_back *dOdP; - upper = select( (y_cone_back > -(float)ulp) & (y_cone_back <= g) & (g > 0.0f), t_cone_back , vfloat<M>(neg_inf)); -#endif - return true; - } - - /* - This function intersects the ray with the end sphere at - p1. We already clip away hits that are inside the - neighboring cone segment. - - */ - - __forceinline void intersectEndSphere(vbool<M>& valid, - const ConeGeometry<M>& coneR, - vfloat<M>& lower, vfloat<M>& upper) - { - /* calculate front and back hit with end sphere */ - const Vec3vf<M> O1 = org - p1; - const vfloat<M> O1dO = dot(O1,dO); - const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r1)); - const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) ); - - /* clip away front hit if it is inside next cone segment */ - t_sph1_front = (-O1dO - rhs1)*rcp_dOdO; - const Vec3vf<M> hit_front = org + t_sph1_front*dO; - vbool<M> valid_sph1_front = h2 >= 0.0f & yp + t_sph1_front*dOdP > g & !coneR.isClippedByPlane (valid, hit_front); - lower = select(valid_sph1_front, t_sph1_front, vfloat<M>(pos_inf)); - -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - /* clip away back hit if it is inside next cone segment */ - t_sph1_back = (-O1dO + rhs1)*rcp_dOdO; - const Vec3vf<M> hit_back = org + t_sph1_back*dO; - vbool<M> valid_sph1_back = h2 >= 0.0f & yp + t_sph1_back*dOdP > g & !coneR.isClippedByPlane (valid, hit_back); - upper = select(valid_sph1_back, t_sph1_back, vfloat<M>(neg_inf)); -#else - upper = vfloat<M>(neg_inf); -#endif - } - - __forceinline void intersectBeginSphere(const vbool<M>& valid, - vfloat<M>& lower, vfloat<M>& upper) - { - /* calculate front and back hit with end sphere */ - const Vec3vf<M> O1 = org - p0; - const vfloat<M> O1dO = dot(O1,dO); - const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r0)); - const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) ); - - /* clip away front hit if it is inside next cone segment */ - t_sph0_front = (-O1dO - rhs1)*rcp_dOdO; - vbool<M> valid_sph1_front = valid & h2 >= 0.0f & yp + t_sph0_front*dOdP < 0; - lower = select(valid_sph1_front, t_sph0_front, vfloat<M>(pos_inf)); - -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - /* clip away back hit if it is inside next cone segment */ - t_sph0_back = (-O1dO + rhs1)*rcp_dOdO; - vbool<M> valid_sph1_back = valid & h2 >= 0.0f & yp + t_sph0_back*dOdP < 0; - upper = select(valid_sph1_back, t_sph0_back, vfloat<M>(neg_inf)); -#else - upper = vfloat<M>(neg_inf); -#endif - } - - /* - - This function calculates the geometry normal of some cone hit. - - For a given hit point h (relative to p0) with a cone - starting at p0 with radius w0 and ending at p1 with - radius w1 one normally calculates the geometry normal by - first calculating the parmetric u hit location along the - cone: - - u = dot(h,dP)/dP^2 - - Using this value one can now directly calculate the - geometry normal by bending the connection vector (h-u*dP) - from hit to projected hit with some cone dependent value - dw/sqrt(dP^2) * normalize(dP): - - Ng = normalize(h-u*dP) - dw/length(dP) * normalize(dP) - - The length of the vector (h-u*dP) can also get calculated - by interpolating the radii as w0+u*dw which yields: - - Ng = (h-u*dP)/(w0+u*dw) - dw/dP^2 * dP - - Multiplying with (w0+u*dw) yield a scaled Ng': - - Ng' = (h-u*dP) - (w0+u*dw)*dw/dP^2*dP - - Inserting the definition of w0 and dw and refactoring - yield a furhter scaled Ng'': - - Ng'' = (dP^2 - dr^2) (h-q) - (r0+u*dr)*dr*dP - - Now inserting the definition of u gives and multiplying - with the denominator yields: - - Ng''' = (dP^2-dr^2)*(dP^2*h-dot(h,dP)*dP) - (dP^2*r0+dot(h,dP)*dr)*dr*dP - - Factoring out, cancelling terms, dividing by dP^2, and - factoring again yields finally: - - Ng'''' = (dP^2-dr^2)*h - dP*(dot(h,dP) + r0*dr) - - */ - - __forceinline Vec3vf<M> Ng_cone(const vbool<M>& front_hit) const - { -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back); - const vfloat<M> t = select(front_hit, t_cone_front, t_cone_back); - const Vec3vf<M> h = O + t*dO; - return g*h-dP*y; -#else - const Vec3vf<M> h = O + t_cone_front*dO; - return g*h-dP*y_cone_front; -#endif - } - - /* compute geometry normal of sphere hit as the difference - * vector from hit point to sphere center */ - - __forceinline Vec3vf<M> Ng_sphere1(const vbool<M>& front_hit) const - { -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - const vfloat<M> t_sph1 = select(front_hit, t_sph1_front, t_sph1_back); - return org+t_sph1*dO-p1; -#else - return org+t_sph1_front*dO-p1; -#endif - } - - __forceinline Vec3vf<M> Ng_sphere0(const vbool<M>& front_hit) const - { -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - const vfloat<M> t_sph0 = select(front_hit, t_sph0_front, t_sph0_back); - return org+t_sph0*dO-p0; -#else - return org+t_sph0_front*dO-p0; -#endif - } - - /* - This function calculates the u coordinate of a - hit. Therefore we use the hit distance y (which is zero - at the first cone clipping plane) and divide by distance - g between the clipping planes. - - */ - - __forceinline vfloat<M> u_cone(const vbool<M>& front_hit) const - { -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back); - return clamp(y*rcp(g)); -#else - return clamp(y_cone_front*rcp(g)); -#endif - } - - private: - Vec3vf<M> org; - Vec3vf<M> O; - Vec3vf<M> dO; - vfloat<M> dOdO; - vfloat<M> rcp_dOdO; - vfloat<M> OdP; - vfloat<M> dOdP; - - /* for ray/cone intersection */ - private: - vfloat<M> yp; - vfloat<M> y_cone_front; - vfloat<M> t_cone_front; -#if !defined (EMBREE_BACKFACE_CULLING_CURVES) - vfloat<M> y_cone_back; - vfloat<M> t_cone_back; -#endif - - /* for ray/sphere intersection */ - private: - vfloat<M> t_sph1_front; - vfloat<M> t_sph0_front; -#if !defined (EMBREE_BACKFACE_CULLING_CURVES) - vfloat<M> t_sph1_back; - vfloat<M> t_sph0_back; -#endif - }; - - - template<int M, typename Epilog, typename ray_tfar_func> - static __forceinline bool intersectConeSphere(const vbool<M>& valid_i, - const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir, - const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar, - const Vec4vf<M>& v0, const Vec4vf<M>& v1, - const Vec4vf<M>& vL, const Vec4vf<M>& vR, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - - /* move ray origin closer to make calculations numerically stable */ - const vfloat<M> dOdO = sqr(ray_dir); - const vfloat<M> rcp_dOdO = rcp(dOdO); - const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz()); - const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO; - const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir; - - /* intersect with cone from v0 to v1 */ - vfloat<M> t_cone_lower, t_cone_upper; - ConeGeometryIntersector<M> cone (ray_org, ray_dir, dOdO, rcp_dOdO, v0, v1); - vbool<M> validCone = valid; - cone.intersectCone(validCone, t_cone_lower, t_cone_upper); - - valid &= (validCone | (cone.g <= 0.0f)); // if cone is entirely in sphere end - check sphere - if (unlikely(none(valid))) - return false; - - /* cone hits inside the neighboring capped cones are inside the geometry and thus ignored */ - const ConeGeometry<M> coneL (v0, vL); - const ConeGeometry<M> coneR (v1, vR); -#if !defined(EMBREE_BACKFACE_CULLING_CURVES) - const Vec3vf<M> hit_lower = ray_org + t_cone_lower*ray_dir; - const Vec3vf<M> hit_upper = ray_org + t_cone_upper*ray_dir; - t_cone_lower = select (!coneL.isInsideCappedCone (validCone, hit_lower) & !coneR.isInsideCappedCone (validCone, hit_lower), t_cone_lower, vfloat<M>(pos_inf)); - t_cone_upper = select (!coneL.isInsideCappedCone (validCone, hit_upper) & !coneR.isInsideCappedCone (validCone, hit_upper), t_cone_upper, vfloat<M>(neg_inf)); -#endif - - /* intersect ending sphere */ - vfloat<M> t_sph1_lower, t_sph1_upper; - vfloat<M> t_sph0_lower = vfloat<M>(pos_inf); - vfloat<M> t_sph0_upper = vfloat<M>(neg_inf); - cone.intersectEndSphere(valid, coneR, t_sph1_lower, t_sph1_upper); - - const vbool<M> isBeginPoint = valid & (vL[0] == vfloat<M>(pos_inf)); - if (unlikely(any(isBeginPoint))) { - cone.intersectBeginSphere (isBeginPoint, t_sph0_lower, t_sph0_upper); - } - - /* CSG union of cone and end sphere */ - vfloat<M> t_sph_lower = min(t_sph0_lower, t_sph1_lower); - vfloat<M> t_cone_sphere_lower = min(t_cone_lower, t_sph_lower); -#if !defined (EMBREE_BACKFACE_CULLING_CURVES) - vfloat<M> t_sph_upper = max(t_sph0_upper, t_sph1_upper); - vfloat<M> t_cone_sphere_upper = max(t_cone_upper, t_sph_upper); - - /* filter out hits that are not in tnear/tfar range */ - const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf); - const vbool<M> valid_upper = valid & ray_tnear <= dt+t_cone_sphere_upper & dt+t_cone_sphere_upper <= ray_tfar() & t_cone_sphere_upper != vfloat<M>(neg_inf); - - /* check if there is a first hit */ - const vbool<M> valid_first = valid_lower | valid_upper; - if (unlikely(none(valid_first))) - return false; - - /* construct first hit */ - const vfloat<M> t_first = select(valid_lower, t_cone_sphere_lower, t_cone_sphere_upper); - const vbool<M> cone_hit_first = t_first == t_cone_lower | t_first == t_cone_upper; - const vbool<M> sph0_hit_first = t_first == t_sph0_lower | t_first == t_sph0_upper; - const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower))); - const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one))); - - /* invoke intersection filter for first hit */ - RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first); - const bool is_hit_first = epilog(valid_first, hit); - - /* check for possible second hits before potentially accepted hit */ - const vfloat<M> t_second = t_cone_sphere_upper; - const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_cone_sphere_upper <= ray_tfar()); - if (unlikely(none(valid_second))) - return is_hit_first; - - /* invoke intersection filter for second hit */ - const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper; - const vbool<M> sph0_hit_second = t_second == t_sph0_lower | t_second == t_sph0_upper; - const Vec3vf<M> Ng_second = select(cone_hit_second, cone.Ng_cone(false), select (sph0_hit_second, cone.Ng_sphere0(false), cone.Ng_sphere1(false))); - const vfloat<M> u_second = select(cone_hit_second, cone.u_cone(false), select (sph0_hit_second, vfloat<M>(zero), vfloat<M>(one))); - - hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second); - const bool is_hit_second = epilog(valid_second, hit); - - return is_hit_first | is_hit_second; -#else - /* filter out hits that are not in tnear/tfar range */ - const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf); - - /* check if there is a valid hit */ - if (unlikely(none(valid_lower))) - return false; - - /* construct first hit */ - const vbool<M> cone_hit_first = t_cone_sphere_lower == t_cone_lower | t_cone_sphere_lower == t_cone_upper; - const vbool<M> sph0_hit_first = t_cone_sphere_lower == t_sph0_lower | t_cone_sphere_lower == t_sph0_upper; - const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower))); - const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one))); - - /* invoke intersection filter for first hit */ - RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_cone_sphere_lower,Ng_first); - const bool is_hit_first = epilog(valid_lower, hit); - - return is_hit_first; -#endif - } - - } // end namespace __roundline_internal - - template<int M> - struct RoundLinearCurveIntersector1 - { - typedef CurvePrecalculations1 Precalculations; - - struct ray_tfar { - Ray& ray; - __forceinline ray_tfar(Ray& ray) : ray(ray) {} - __forceinline vfloat<M> operator() () const { return ray.tfar; }; - }; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - Ray& ray, - IntersectContext* context, - const LineSegments* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, - const Vec4vf<M>& vLi, const Vec4vf<M>& vRi, - const Epilog& epilog) - { - const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); - const vfloat<M> ray_tnear(ray.tnear()); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi); - const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi); - return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,vL,vR,epilog); - } - }; - - template<int M, int K> - struct RoundLinearCurveIntersectorK - { - typedef CurvePrecalculationsK<K> Precalculations; - - struct ray_tfar { - RayK<K>& ray; - size_t k; - __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {} - __forceinline vfloat<M> operator() () const { return ray.tfar[k]; }; - }; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - RayK<K>& ray, size_t k, - IntersectContext* context, - const LineSegments* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, const Vec4vf<M>& v1i, - const Vec4vf<M>& vLi, const Vec4vf<M>& vRi, - const Epilog& epilog) - { - const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); - const vfloat<M> ray_tnear = ray.tnear()[k]; - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi); - const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi); - return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h deleted file mode 100644 index 079817335e..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h +++ /dev/null @@ -1,136 +0,0 @@ -// ======================================================================== // -// Copyright 2009-2020 Intel Corporation // -// // -// Licensed under the Apache License, Version 2.0 (the "License"); // -// you may not use this file except in compliance with the License. // -// You may obtain a copy of the License at // -// // -// http://www.apache.org/licenses/LICENSE-2.0 // -// // -// Unless required by applicable law or agreed to in writing, software // -// distributed under the License is distributed on an "AS IS" BASIS, // -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // -// See the License for the specific language governing permissions and // -// limitations under the License. // -// ======================================================================== // - -#pragma once - -#include "roundline_intersector.h" -#include "intersector_epilog.h" - -namespace embree -{ - namespace isa - { - template<int M, int Mx, bool filter> - struct RoundLinearCurveMiIntersector1 - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); - } - }; - - template<int M, int Mx, bool filter> - struct RoundLinearCurveMiMBIntersector1 - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line); - } - }; - - template<int M, int Mx, int K, bool filter> - struct RoundLinearCurveMiIntersectorK - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct RoundLinearCurveMiMBIntersectorK - { - typedef LineMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(normal.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) - { - STAT3(shadow.trav_prims,1,1,1); - const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); - Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h deleted file mode 100644 index 3ab90c29ef..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "../common/scene_points.h" -#include "curve_intersector_precalculations.h" - -namespace embree -{ - namespace isa - { - template<int M> - struct SphereIntersectorHitM - { - __forceinline SphereIntersectorHitM() {} - - __forceinline SphereIntersectorHitM(const vfloat<M>& t, const Vec3vf<M>& Ng) - : vt(t), vNg(Ng) {} - - __forceinline void finalize() {} - - __forceinline Vec2f uv(const size_t i) const { - return Vec2f(0.0f, 0.0f); - } - __forceinline float t(const size_t i) const { - return vt[i]; - } - __forceinline Vec3fa Ng(const size_t i) const { - return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]); - } - - public: - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct SphereIntersector1 - { - typedef CurvePrecalculations1 Precalculations; - - template<typename Epilog> - static __forceinline bool intersect( - const vbool<M>& valid_i, Ray& ray, - const Precalculations& pre, const Vec4vf<M>& v0, const Epilog& epilog) - { - vbool<M> valid = valid_i; - - const vfloat<M> rd2 = rcp(dot(ray.dir, ray.dir)); - const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); - const Vec3vf<M> center = v0.xyz(); - const vfloat<M> radius = v0.w; - - const Vec3vf<M> c0 = center - ray_org; - const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; - const Vec3vf<M> perp = c0 - projC0 * ray_dir; - const vfloat<M> l2 = dot(perp, perp); - const vfloat<M> r2 = radius * radius; - valid &= (l2 <= r2); - if (unlikely(none(valid))) - return false; - - const vfloat<M> td = sqrt((r2 - l2) * rd2); - const vfloat<M> t_front = projC0 - td; - const vfloat<M> t_back = projC0 + td; - - const vbool<M> valid_front = valid & (ray.tnear() <= t_front) & (t_front <= ray.tfar); - const vbool<M> valid_back = valid & (ray.tnear() <= t_back ) & (t_back <= ray.tfar); - - /* check if there is a first hit */ - const vbool<M> valid_first = valid_front | valid_back; - if (unlikely(none(valid_first))) - return false; - - /* construct first hit */ - const vfloat<M> td_front = -td; - const vfloat<M> td_back = +td; - const vfloat<M> t_first = select(valid_front, t_front, t_back); - const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp; - SphereIntersectorHitM<M> hit(t_first, Ng_first); - - /* invoke intersection filter for first hit */ - const bool is_hit_first = epilog(valid_first, hit); - - /* check for possible second hits before potentially accepted hit */ - const vfloat<M> t_second = t_back; - const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar); - if (unlikely(none(valid_second))) - return is_hit_first; - - /* invoke intersection filter for second hit */ - const Vec3vf<M> Ng_second = td_back * ray_dir - perp; - hit = SphereIntersectorHitM<M> (t_second, Ng_second); - const bool is_hit_second = epilog(valid_second, hit); - - return is_hit_first | is_hit_second; - } - - template<typename Epilog> - static __forceinline bool intersect( - const vbool<M>& valid_i, Ray& ray, IntersectContext* context, const Points* geom, - const Precalculations& pre, const Vec4vf<M>& v0i, const Epilog& epilog) - { - const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - return intersect(valid_i,ray,pre,v0,epilog); - } - }; - - template<int M, int K> - struct SphereIntersectorK - { - typedef CurvePrecalculationsK<K> Precalculations; - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid_i, - RayK<K>& ray, size_t k, - IntersectContext* context, - const Points* geom, - const Precalculations& pre, - const Vec4vf<M>& v0i, - const Epilog& epilog) - { - vbool<M> valid = valid_i; - - const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); - const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); - const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); - - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec3vf<M> center = v0.xyz(); - const vfloat<M> radius = v0.w; - - const Vec3vf<M> c0 = center - ray_org; - const vfloat<M> projC0 = dot(c0, ray_dir) * rd2; - const Vec3vf<M> perp = c0 - projC0 * ray_dir; - const vfloat<M> l2 = dot(perp, perp); - const vfloat<M> r2 = radius * radius; - valid &= (l2 <= r2); - if (unlikely(none(valid))) - return false; - - const vfloat<M> td = sqrt((r2 - l2) * rd2); - const vfloat<M> t_front = projC0 - td; - const vfloat<M> t_back = projC0 + td; - - const vbool<M> valid_front = valid & (ray.tnear()[k] <= t_front) & (t_front <= ray.tfar[k]); - const vbool<M> valid_back = valid & (ray.tnear()[k] <= t_back ) & (t_back <= ray.tfar[k]); - - /* check if there is a first hit */ - const vbool<M> valid_first = valid_front | valid_back; - if (unlikely(none(valid_first))) - return false; - - /* construct first hit */ - const vfloat<M> td_front = -td; - const vfloat<M> td_back = +td; - const vfloat<M> t_first = select(valid_front, t_front, t_back); - const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp; - SphereIntersectorHitM<M> hit(t_first, Ng_first); - - /* invoke intersection filter for first hit */ - const bool is_hit_first = epilog(valid_first, hit); - - /* check for possible second hits before potentially accepted hit */ - const vfloat<M> t_second = t_back; - const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar[k]); - if (unlikely(none(valid_second))) - return is_hit_first; - - /* invoke intersection filter for second hit */ - const Vec3vf<M> Ng_second = td_back * ray_dir - perp; - hit = SphereIntersectorHitM<M> (t_second, Ng_second); - const bool is_hit_second = epilog(valid_second, hit); - - return is_hit_first | is_hit_second; - } - }; - } // namespace isa -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h deleted file mode 100644 index 1146847602..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "intersector_epilog.h" -#include "pointi.h" -#include "sphere_intersector.h" - -namespace embree -{ - namespace isa - { - template<int M, int Mx, bool filter> - struct SphereMiIntersector1 - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, - RayHit& ray, - IntersectContext* context, - const Primitive& sphere) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, - Ray& ray, - IntersectContext* context, - const Primitive& sphere) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, - PointQueryContext* context, - const Primitive& sphere) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere); - } - }; - - template<int M, int Mx, bool filter> - struct SphereMiMBIntersector1 - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculations1 Precalculations; - - static __forceinline void intersect(const Precalculations& pre, - RayHit& ray, - IntersectContext* context, - const Primitive& sphere) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, - Ray& ray, - IntersectContext* context, - const Primitive& sphere) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, - PointQueryContext* context, - const Primitive& sphere) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere); - } - }; - - template<int M, int Mx, int K, bool filter> - struct SphereMiIntersectorK - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect( - const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); - } - - static __forceinline bool occluded( - const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); - } - }; - - template<int M, int Mx, int K, bool filter> - struct SphereMiMBIntersectorK - { - typedef PointMi<M> Primitive; - typedef CurvePrecalculationsK<K> Precalculations; - - static __forceinline void intersect( - const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) - { - STAT3(normal.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); - } - - static __forceinline bool occluded( - const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere) - { - STAT3(shadow.trav_prims, 1, 1, 1); - const Points* geom = context->scene->get<Points>(sphere.geomID()); - Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); - } - }; - } // namespace isa -} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h deleted file mode 100644 index 94ad46ad87..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../geometry/primitive.h" -#include "../subdiv/subdivpatch1base.h" - -namespace embree -{ - - struct __aligned(64) SubdivPatch1 : public SubdivPatch1Base - { - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - - static Type type; - - public: - - /*! constructor for cached subdiv patch */ - SubdivPatch1 (const unsigned int gID, - const unsigned int pID, - const unsigned int subPatch, - const SubdivMesh *const mesh, - const size_t time, - const Vec2f uv[4], - const float edge_level[4], - const int subdiv[4], - const int simd_width) - : SubdivPatch1Base(gID,pID,subPatch,mesh,time,uv,edge_level,subdiv,simd_width) {} - }; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h deleted file mode 100644 index 74ec1de258..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subdivpatch1.h" -#include "grid_soa.h" -#include "grid_soa_intersector1.h" -#include "grid_soa_intersector_packet.h" -#include "../common/ray.h" - -namespace embree -{ - namespace isa - { - template<typename T> - class SubdivPatch1Precalculations : public T - { - public: - __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr) - : T(ray,ptr) {} - }; - - template<int K, typename T> - class SubdivPatch1PrecalculationsK : public T - { - public: - __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray) - : T(valid,ray) {} - }; - - class SubdivPatch1Intersector1 - { - public: - typedef GridSOA Primitive; - typedef SubdivPatch1Precalculations<GridSOAIntersector1::Precalculations> Precalculations; - - static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - lazy_node = prim->root(0); - pre.grid = (Primitive*)prim; - return false; - } - - /*! Intersect a ray with the primitive. */ - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node); - else processLazyNode(pre,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { - intersect(This,pre,ray,context,prim,ty,tray,lazy_node); - } - - /*! Test if the ray is occluded by the primitive */ - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) return GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node); - else return processLazyNode(pre,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { - return occluded(This,pre,ray,context,prim,ty,tray,lazy_node); - } - - template<int N> - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) - { - // TODO: PointQuery implement - assert(false && "not implemented"); - return false; - } - - template<int N> - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) { - return pointQuery(This,query,context,prim,ty,tquery,lazy_node); - } - }; - - class SubdivPatch1MBIntersector1 - { - public: - typedef SubdivPatch1 Primitive; - typedef GridSOAMBIntersector1::Precalculations Precalculations; - - static __forceinline bool processLazyNode(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node) - { - Primitive* prim = (Primitive*) prim_i; - GridSOA* grid = nullptr; - grid = (GridSOA*) prim->root_ref.get(); - pre.itime = getTimeSegment(ray.time(), float(grid->time_steps-1), pre.ftime); - lazy_node = grid->root(pre.itime); - pre.grid = grid; - return false; - } - - /*! Intersect a ray with the primitive. */ - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) GridSOAMBIntersector1::intersect(pre,ray,context,prim,lazy_node); - else processLazyNode(pre,ray,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { - intersect(This,pre,ray,context,prim,ty,tray,lazy_node); - } - - /*! Test if the ray is occluded by the primitive */ - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) return GridSOAMBIntersector1::occluded(pre,ray,context,prim,lazy_node); - else return processLazyNode(pre,ray,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { - return occluded(This,pre,ray,context,prim,ty,tray,lazy_node); - } - - template<int N> - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) - { - // TODO: PointQuery implement - assert(false && "not implemented"); - return false; - } - - template<int N, int Nx, bool robust> - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) { - return pointQuery(This,query,context,prim,ty,tquery,lazy_node); - } - }; - - template <int K> - struct SubdivPatch1IntersectorK - { - typedef GridSOA Primitive; - typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations; - - static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node) - { - lazy_node = prim->root(0); - pre.grid = (Primitive*)prim; - return false; - } - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node); - else processLazyNode(pre,context,prim,lazy_node); - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node); - else return processLazyNode(pre,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); - else processLazyNode(pre,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); - else return processLazyNode(pre,context,prim,lazy_node); - } - }; - - typedef SubdivPatch1IntersectorK<4> SubdivPatch1Intersector4; - typedef SubdivPatch1IntersectorK<8> SubdivPatch1Intersector8; - typedef SubdivPatch1IntersectorK<16> SubdivPatch1Intersector16; - - template <int K> - struct SubdivPatch1MBIntersectorK - { - typedef SubdivPatch1 Primitive; - //typedef GridSOAMBIntersectorK<K>::Precalculations Precalculations; - typedef SubdivPatch1PrecalculationsK<K,typename GridSOAMBIntersectorK<K>::Precalculations> Precalculations; - - static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node) - { - Primitive* prim = (Primitive*) prim_i; - GridSOA* grid = (GridSOA*) prim->root_ref.get(); - lazy_node = grid->troot; - pre.grid = grid; - return false; - } - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node); - else processLazyNode(pre,context,prim,lazy_node); - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node); - else return processLazyNode(pre,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); - else processLazyNode(pre,context,prim,lazy_node); - } - - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); - else return processLazyNode(pre,context,prim,lazy_node); - } - }; - - typedef SubdivPatch1MBIntersectorK<4> SubdivPatch1MBIntersector4; - typedef SubdivPatch1MBIntersectorK<8> SubdivPatch1MBIntersector8; - typedef SubdivPatch1MBIntersectorK<16> SubdivPatch1MBIntersector16; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid.h deleted file mode 100644 index 39fa6fb0f0..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h +++ /dev/null @@ -1,517 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/ray.h" -#include "../common/scene_grid_mesh.h" -#include "../bvh/bvh.h" - -namespace embree -{ - /* Stores M quads from an indexed face set */ - struct SubGrid - { - /* Virtual interface to query information about the quad type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored quads */ - static __forceinline size_t max_size() { return 1; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline SubGrid() { } - - /* Construction from vertices and IDs */ - __forceinline SubGrid(const unsigned int x, - const unsigned int y, - const unsigned int geomID, - const unsigned int primID) - : _x(x), _y(y), _geomID(geomID), _primID(primID) - { - } - - __forceinline bool invalid3x3X() const { return (unsigned int)_x & (1<<15); } - __forceinline bool invalid3x3Y() const { return (unsigned int)_y & (1<<15); } - - /* Gather the quads */ - __forceinline void gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const GridMesh* const mesh, - const GridMesh::Grid &g) const - { - /* first quad always valid */ - const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; - const size_t vtxID01 = vtxID00 + 1; - const vfloat4 vtx00 = vfloat4::loadu(mesh->vertexPtr(vtxID00)); - const vfloat4 vtx01 = vfloat4::loadu(mesh->vertexPtr(vtxID01)); - const size_t vtxID10 = vtxID00 + g.lineVtxOffset; - const size_t vtxID11 = vtxID01 + g.lineVtxOffset; - const vfloat4 vtx10 = vfloat4::loadu(mesh->vertexPtr(vtxID10)); - const vfloat4 vtx11 = vfloat4::loadu(mesh->vertexPtr(vtxID11)); - - /* deltaX => vtx02, vtx12 */ - const size_t deltaX = invalid3x3X() ? 0 : 1; - const size_t vtxID02 = vtxID01 + deltaX; - const vfloat4 vtx02 = vfloat4::loadu(mesh->vertexPtr(vtxID02)); - const size_t vtxID12 = vtxID11 + deltaX; - const vfloat4 vtx12 = vfloat4::loadu(mesh->vertexPtr(vtxID12)); - - /* deltaY => vtx20, vtx21 */ - const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; - const size_t vtxID20 = vtxID10 + deltaY; - const size_t vtxID21 = vtxID11 + deltaY; - const vfloat4 vtx20 = vfloat4::loadu(mesh->vertexPtr(vtxID20)); - const vfloat4 vtx21 = vfloat4::loadu(mesh->vertexPtr(vtxID21)); - - /* deltaX/deltaY => vtx22 */ - const size_t vtxID22 = vtxID11 + deltaX + deltaY; - const vfloat4 vtx22 = vfloat4::loadu(mesh->vertexPtr(vtxID22)); - - transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z); - transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z); - transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z); - transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z); - } - - template<typename T> - __forceinline vfloat4 getVertexMB(const GridMesh* const mesh, const size_t offset, const size_t itime, const float ftime) const - { - const T v0 = T::loadu(mesh->vertexPtr(offset,itime+0)); - const T v1 = T::loadu(mesh->vertexPtr(offset,itime+1)); - return lerp(v0,v1,ftime); - } - - /* Gather the quads */ - __forceinline void gatherMB(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const GridMesh* const mesh, - const GridMesh::Grid &g, - const size_t itime, - const float ftime) const - { - /* first quad always valid */ - const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; - const size_t vtxID01 = vtxID00 + 1; - const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime); - const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime); - const size_t vtxID10 = vtxID00 + g.lineVtxOffset; - const size_t vtxID11 = vtxID01 + g.lineVtxOffset; - const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime); - const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime); - - /* deltaX => vtx02, vtx12 */ - const size_t deltaX = invalid3x3X() ? 0 : 1; - const size_t vtxID02 = vtxID01 + deltaX; - const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime); - const size_t vtxID12 = vtxID11 + deltaX; - const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime); - - /* deltaY => vtx20, vtx21 */ - const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; - const size_t vtxID20 = vtxID10 + deltaY; - const size_t vtxID21 = vtxID11 + deltaY; - const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime); - const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime); - - /* deltaX/deltaY => vtx22 */ - const size_t vtxID22 = vtxID11 + deltaX + deltaY; - const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime); - - transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z); - transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z); - transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z); - transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z); - } - - - - /* Gather the quads */ - __forceinline void gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const Scene *const scene) const - { - const GridMesh* const mesh = scene->get<GridMesh>(geomID()); - const GridMesh::Grid &g = mesh->grid(primID()); - gather(p0,p1,p2,p3,mesh,g); - } - - /* Gather the quads in the motion blur case */ - __forceinline void gatherMB(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - Vec3vf4& p3, - const Scene *const scene, - const size_t itime, - const float ftime) const - { - const GridMesh* const mesh = scene->get<GridMesh>(geomID()); - const GridMesh::Grid &g = mesh->grid(primID()); - gatherMB(p0,p1,p2,p3,mesh,g,itime,ftime); - } - - /* Gather the quads */ - __forceinline void gather(Vec3fa vtx[16], const Scene *const scene) const - { - const GridMesh* mesh = scene->get<GridMesh>(geomID()); - const GridMesh::Grid &g = mesh->grid(primID()); - - /* first quad always valid */ - const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; - const size_t vtxID01 = vtxID00 + 1; - const Vec3fa vtx00 = Vec3fa::loadu(mesh->vertexPtr(vtxID00)); - const Vec3fa vtx01 = Vec3fa::loadu(mesh->vertexPtr(vtxID01)); - const size_t vtxID10 = vtxID00 + g.lineVtxOffset; - const size_t vtxID11 = vtxID01 + g.lineVtxOffset; - const Vec3fa vtx10 = Vec3fa::loadu(mesh->vertexPtr(vtxID10)); - const Vec3fa vtx11 = Vec3fa::loadu(mesh->vertexPtr(vtxID11)); - - /* deltaX => vtx02, vtx12 */ - const size_t deltaX = invalid3x3X() ? 0 : 1; - const size_t vtxID02 = vtxID01 + deltaX; - const Vec3fa vtx02 = Vec3fa::loadu(mesh->vertexPtr(vtxID02)); - const size_t vtxID12 = vtxID11 + deltaX; - const Vec3fa vtx12 = Vec3fa::loadu(mesh->vertexPtr(vtxID12)); - - /* deltaY => vtx20, vtx21 */ - const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; - const size_t vtxID20 = vtxID10 + deltaY; - const size_t vtxID21 = vtxID11 + deltaY; - const Vec3fa vtx20 = Vec3fa::loadu(mesh->vertexPtr(vtxID20)); - const Vec3fa vtx21 = Vec3fa::loadu(mesh->vertexPtr(vtxID21)); - - /* deltaX/deltaY => vtx22 */ - const size_t vtxID22 = vtxID11 + deltaX + deltaY; - const Vec3fa vtx22 = Vec3fa::loadu(mesh->vertexPtr(vtxID22)); - - vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10; - vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11; - vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20; - vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21; - } - - /* Gather the quads */ - __forceinline void gatherMB(vfloat4 vtx[16], const Scene *const scene, const size_t itime, const float ftime) const - { - const GridMesh* mesh = scene->get<GridMesh>(geomID()); - const GridMesh::Grid &g = mesh->grid(primID()); - - /* first quad always valid */ - const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset; - const size_t vtxID01 = vtxID00 + 1; - const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime); - const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime); - const size_t vtxID10 = vtxID00 + g.lineVtxOffset; - const size_t vtxID11 = vtxID01 + g.lineVtxOffset; - const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime); - const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime); - - /* deltaX => vtx02, vtx12 */ - const size_t deltaX = invalid3x3X() ? 0 : 1; - const size_t vtxID02 = vtxID01 + deltaX; - const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime); - const size_t vtxID12 = vtxID11 + deltaX; - const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime); - - /* deltaY => vtx20, vtx21 */ - const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset; - const size_t vtxID20 = vtxID10 + deltaY; - const size_t vtxID21 = vtxID11 + deltaY; - const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime); - const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime); - - /* deltaX/deltaY => vtx22 */ - const size_t vtxID22 = vtxID11 + deltaX + deltaY; - const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime); - - vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10; - vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11; - vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20; - vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21; - } - - - /* Calculate the bounds of the subgrid */ - __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const - { - BBox3fa bounds = empty; - FATAL("not implemented yet"); - return bounds; - } - - /* Calculate the linear bounds of the primitive */ - __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) - { - return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) - { - LBBox3fa allBounds = empty; - FATAL("not implemented yet"); - return allBounds; - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) - { - LBBox3fa allBounds = empty; - FATAL("not implemented yet"); - return allBounds; - } - - - friend embree_ostream operator<<(embree_ostream cout, const SubGrid& sg) { - return cout << "SubGrid " << " ( x " << sg.x() << ", y = " << sg.y() << ", geomID = " << sg.geomID() << ", primID = " << sg.primID() << " )"; - } - - __forceinline unsigned int geomID() const { return _geomID; } - __forceinline unsigned int primID() const { return _primID; } - __forceinline unsigned int x() const { return (unsigned int)_x & 0x7fff; } - __forceinline unsigned int y() const { return (unsigned int)_y & 0x7fff; } - - private: - unsigned short _x; - unsigned short _y; - unsigned int _geomID; // geometry ID of mesh - unsigned int _primID; // primitive ID of primitive inside mesh - }; - - struct SubGridID { - unsigned short x; - unsigned short y; - unsigned int primID; - - __forceinline SubGridID() {} - __forceinline SubGridID(const unsigned int x, const unsigned int y, const unsigned int primID) : - x(x), y(y), primID(primID) {} - }; - - /* QuantizedBaseNode as large subgrid leaf */ - template<int N> - struct SubGridQBVHN - { - /* Virtual interface to query information about the quad type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - __forceinline size_t size() const - { - for (size_t i=0;i<N;i++) - if (primID(i) == -1) return i; - return N; - } - - __forceinline void clear() { - for (size_t i=0;i<N;i++) - subgridIDs[i] = SubGridID(0,0,(unsigned int)-1); - qnode.clear(); - } - - /* Default constructor */ - __forceinline SubGridQBVHN() { } - - /* Construction from vertices and IDs */ - __forceinline SubGridQBVHN(const unsigned int x[N], - const unsigned int y[N], - const unsigned int primID[N], - const BBox3fa * const subGridBounds, - const unsigned int geomID, - const unsigned int items) - { - clear(); - _geomID = geomID; - - __aligned(64) typename BVHN<N>::AABBNode node; - node.clear(); - for (size_t i=0;i<items;i++) - { - subgridIDs[i] = SubGridID(x[i],y[i],primID[i]); - node.setBounds(i,subGridBounds[i]); - } - qnode.init_dim(node); - } - - __forceinline unsigned int geomID() const { return _geomID; } - __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; } - __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; } - __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; } - - __forceinline SubGrid subgrid(const size_t i) const { - assert(i < N); - assert(primID(i) != -1); - return SubGrid(x(i),y(i),geomID(),primID(i)); - } - - public: - SubGridID subgridIDs[N]; - - typename BVHN<N>::QuantizedBaseNode qnode; - - unsigned int _geomID; // geometry ID of mesh - - - friend embree_ostream operator<<(embree_ostream cout, const SubGridQBVHN& sg) { - cout << "SubGridQBVHN " << embree_endl; - for (size_t i=0;i<N;i++) - cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl; - cout << "geomID " << sg._geomID << embree_endl; - cout << "lowerX " << sg.qnode.dequantizeLowerX() << embree_endl; - cout << "upperX " << sg.qnode.dequantizeUpperX() << embree_endl; - cout << "lowerY " << sg.qnode.dequantizeLowerY() << embree_endl; - cout << "upperY " << sg.qnode.dequantizeUpperY() << embree_endl; - cout << "lowerZ " << sg.qnode.dequantizeLowerZ() << embree_endl; - cout << "upperZ " << sg.qnode.dequantizeUpperZ() << embree_endl; - return cout; - } - - }; - - template<int N> - typename SubGridQBVHN<N>::Type SubGridQBVHN<N>::type; - - typedef SubGridQBVHN<4> SubGridQBVH4; - typedef SubGridQBVHN<8> SubGridQBVH8; - - - /* QuantizedBaseNode as large subgrid leaf */ - template<int N> - struct SubGridMBQBVHN - { - /* Virtual interface to query information about the quad type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - __forceinline size_t size() const - { - for (size_t i=0;i<N;i++) - if (primID(i) == -1) return i; - return N; - } - - __forceinline void clear() { - for (size_t i=0;i<N;i++) - subgridIDs[i] = SubGridID(0,0,(unsigned int)-1); - qnode.clear(); - } - - /* Default constructor */ - __forceinline SubGridMBQBVHN() { } - - /* Construction from vertices and IDs */ - __forceinline SubGridMBQBVHN(const unsigned int x[N], - const unsigned int y[N], - const unsigned int primID[N], - const BBox3fa * const subGridBounds0, - const BBox3fa * const subGridBounds1, - const unsigned int geomID, - const float toffset, - const float tscale, - const unsigned int items) - { - clear(); - _geomID = geomID; - time_offset = toffset; - time_scale = tscale; - - __aligned(64) typename BVHN<N>::AABBNode node0,node1; - node0.clear(); - node1.clear(); - for (size_t i=0;i<items;i++) - { - subgridIDs[i] = SubGridID(x[i],y[i],primID[i]); - node0.setBounds(i,subGridBounds0[i]); - node1.setBounds(i,subGridBounds1[i]); - } - qnode.node0.init_dim(node0); - qnode.node1.init_dim(node1); - } - - __forceinline unsigned int geomID() const { return _geomID; } - __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; } - __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; } - __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; } - - __forceinline SubGrid subgrid(const size_t i) const { - assert(i < N); - assert(primID(i) != -1); - return SubGrid(x(i),y(i),geomID(),primID(i)); - } - - __forceinline float adjustTime(const float t) const { return time_scale * (t-time_offset); } - - template<int K> - __forceinline vfloat<K> adjustTime(const vfloat<K> &t) const { return time_scale * (t-time_offset); } - - public: - SubGridID subgridIDs[N]; - - typename BVHN<N>::QuantizedBaseNodeMB qnode; - - float time_offset; - float time_scale; - unsigned int _geomID; // geometry ID of mesh - - - friend embree_ostream operator<<(embree_ostream cout, const SubGridMBQBVHN& sg) { - cout << "SubGridMBQBVHN " << embree_endl; - for (size_t i=0;i<N;i++) - cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl; - cout << "geomID " << sg._geomID << embree_endl; - cout << "time_offset " << sg.time_offset << embree_endl; - cout << "time_scale " << sg.time_scale << embree_endl; - cout << "lowerX " << sg.qnode.node0.dequantizeLowerX() << embree_endl; - cout << "upperX " << sg.qnode.node0.dequantizeUpperX() << embree_endl; - cout << "lowerY " << sg.qnode.node0.dequantizeLowerY() << embree_endl; - cout << "upperY " << sg.qnode.node0.dequantizeUpperY() << embree_endl; - cout << "lowerZ " << sg.qnode.node0.dequantizeLowerZ() << embree_endl; - cout << "upperZ " << sg.qnode.node0.dequantizeUpperZ() << embree_endl; - cout << "lowerX " << sg.qnode.node1.dequantizeLowerX() << embree_endl; - cout << "upperX " << sg.qnode.node1.dequantizeUpperX() << embree_endl; - cout << "lowerY " << sg.qnode.node1.dequantizeLowerY() << embree_endl; - cout << "upperY " << sg.qnode.node1.dequantizeUpperY() << embree_endl; - cout << "lowerZ " << sg.qnode.node1.dequantizeLowerZ() << embree_endl; - cout << "upperZ " << sg.qnode.node1.dequantizeUpperZ() << embree_endl; - return cout; - } - - }; - -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h deleted file mode 100644 index 045eee4329..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h +++ /dev/null @@ -1,518 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subgrid.h" -#include "subgrid_intersector_moeller.h" -#include "subgrid_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - - // ======================================================================================= - // =================================== SubGridIntersectors =============================== - // ======================================================================================= - - - template<int N, bool filter> - struct SubGridIntersector1Moeller - { - typedef SubGridQBVHN<N> Primitive; - typedef SubGridQuadMIntersector1MoellerTrumbore<4,filter> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(normal.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(shadow.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid) - { - STAT3(point_query.trav_prims,1,1,1); - AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID()); - assert(accel); - context->geomID = subgrid.geomID(); - context->primID = subgrid.primID(); - return accel->pointQuery(query, context); - } - - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); -#if defined(__AVX__) - STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); -#endif - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (unlikely(dist[ID] > ray.tfar)) continue; - intersect(pre,ray,context,prim[i].subgrid(ID)); - } - } - } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (occluded(pre,ray,context,prim[i].subgrid(ID))) - return true; - } - } - return false; - } - - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) - { - bool changed = false; - for (size_t i=0;i<num;i++) - { - vfloat<N> dist; - size_t mask; - if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) { - mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist); - } else { - mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist); - } - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - changed |= pointQuery(query, context, prim[i].subgrid(ID)); - } - } - return changed; - } - }; - - template<int N, bool filter> - struct SubGridIntersector1Pluecker - { - typedef SubGridQBVHN<N> Primitive; - typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(normal.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(shadow.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid) - { - STAT3(point_query.trav_prims,1,1,1); - AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID()); - context->geomID = subgrid.geomID(); - context->primID = subgrid.primID(); - return accel->pointQuery(query, context); - } - - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); -#if defined(__AVX__) - STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); -#endif - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (unlikely(dist[ID] > ray.tfar)) continue; - intersect(pre,ray,context,prim[i].subgrid(ID)); - } - } - } - - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (occluded(pre,ray,context,prim[i].subgrid(ID))) - return true; - } - } - return false; - } - - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) - { - bool changed = false; - for (size_t i=0;i<num;i++) - { - vfloat<N> dist; - size_t mask; - if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) { - mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist); - } else { - mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist); - } -#if defined(__AVX__) - STAT3(point_query.trav_hit_boxes[popcnt(mask)],1,1,1); -#endif - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - changed |= pointQuery(query, context, prim[i].subgrid(ID)); - } - } - return changed; - } - }; - - template<int N, int K, bool filter> - struct SubGridIntersectorKMoeller - { - typedef SubGridQBVHN<N> Primitive; - typedef SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> Precalculations; - - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid) - { - Vec3fa vtx[16]; - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - subgrid.gather(vtx,context->scene); - for (unsigned int i=0; i<4; i++) - { - const Vec3vf<K> p0 = vtx[i*4+0]; - const Vec3vf<K> p1 = vtx[i*4+1]; - const Vec3vf<K> p2 = vtx[i*4+2]; - const Vec3vf<K> p3 = vtx[i*4+3]; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i)); - } - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid) - { - vbool<K> valid0 = valid_i; - Vec3fa vtx[16]; - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - subgrid.gather(vtx,context->scene); - for (unsigned int i=0; i<4; i++) - { - const Vec3vf<K> p0 = vtx[i*4+0]; - const Vec3vf<K> p1 = vtx[i*4+1]; - const Vec3vf<K> p2 = vtx[i*4+2]; - const Vec3vf<K> p3 = vtx[i*4+3]; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) - break; - } - return !valid0; - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(normal.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(shadow.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); - } - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; - for (size_t j=0;j<num;j++) - { - size_t m_valid = movemask(prim[j].qnode.validMask()); - vfloat<K> dist; - while(m_valid) - { - const size_t i = bscf(m_valid); - if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; - intersect(valid,pre,ray,context,prim[j].subgrid(i)); - } - } - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; - vbool<K> valid0 = valid; - for (size_t j=0;j<num;j++) - { - size_t m_valid = movemask(prim[j].qnode.validMask()); - vfloat<K> dist; - while(m_valid) - { - const size_t i = bscf(m_valid); - if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; - valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i)); - if (none(valid0)) break; - } - } - return !valid0; - } - - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (unlikely(dist[ID] > ray.tfar[k])) continue; - intersect(pre,ray,k,context,prim[i].subgrid(ID)); - } - } - } - - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (occluded(pre,ray,k,context,prim[i].subgrid(ID))) - return true; - } - } - return false; - } - }; - - - template<int N, int K, bool filter> - struct SubGridIntersectorKPluecker - { - typedef SubGridQBVHN<N> Primitive; - typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations; - - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid) - { - Vec3fa vtx[16]; - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - subgrid.gather(vtx,context->scene); - for (unsigned int i=0; i<4; i++) - { - const Vec3vf<K> p0 = vtx[i*4+0]; - const Vec3vf<K> p1 = vtx[i*4+1]; - const Vec3vf<K> p2 = vtx[i*4+2]; - const Vec3vf<K> p3 = vtx[i*4+3]; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i)); - } - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid) - { - vbool<K> valid0 = valid_i; - Vec3fa vtx[16]; - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - subgrid.gather(vtx,context->scene); - for (unsigned int i=0; i<4; i++) - { - const Vec3vf<K> p0 = vtx[i*4+0]; - const Vec3vf<K> p1 = vtx[i*4+1]; - const Vec3vf<K> p2 = vtx[i*4+2]; - const Vec3vf<K> p3 = vtx[i*4+3]; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) - break; - } - return !valid0; - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(normal.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(shadow.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); - } - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; - for (size_t j=0;j<num;j++) - { - size_t m_valid = movemask(prim[j].qnode.validMask()); - vfloat<K> dist; - while(m_valid) - { - const size_t i = bscf(m_valid); - if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; - intersect(valid,pre,ray,context,prim[j].subgrid(i)); - } - } - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; - vbool<K> valid0 = valid; - for (size_t j=0;j<num;j++) - { - size_t m_valid = movemask(prim[j].qnode.validMask()); - vfloat<K> dist; - while(m_valid) - { - const size_t i = bscf(m_valid); - if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue; - valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i)); - if (none(valid0)) break; - } - } - return !valid0; - } - - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (unlikely(dist[ID] > ray.tfar[k])) continue; - intersect(pre,ray,k,context,prim[i].subgrid(ID)); - } - } - } - - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask())); - - if (occluded(pre,ray,k,context,prim[i].subgrid(ID))) - return true; - } - } - return false; - } - }; - - - - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h deleted file mode 100644 index f65b4abf61..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subgrid.h" -#include "quad_intersector_moeller.h" - -namespace embree -{ - namespace isa - { - - /* ----------------------------- */ - /* -- single ray intersectors -- */ - /* ----------------------------- */ - - template<int M> - __forceinline void interpolateUV(MoellerTrumboreHitM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid) - { - /* correct U,V interpolation across the entire grid */ - const vint<M> sx((int)subgrid.x()); - const vint<M> sy((int)subgrid.y()); - const vint<M> sxM(sx + vint<M>(0,1,1,0)); - const vint<M> syM(sy + vint<M>(0,0,1,1)); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX; - hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY; - } - - template<int M, bool filter> - struct SubGridQuadMIntersector1MoellerTrumbore; - - template<int M, bool filter> - struct SubGridQuadMIntersector1MoellerTrumbore - { - __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} - - __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - __forceinline void intersect(RayHit& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - MoellerTrumboreHitM<M> hit; - MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - - /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - MoellerTrumboreHitM<M> hit; - MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) - return true; - } - - /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) - return true; - } - return false; - } - }; - -#if defined (__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<bool filter> - struct SubGridQuadMIntersector1MoellerTrumbore<4,filter> - { - __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} - - __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - MoellerTrumboreHitM<8> hit; - MoellerTrumboreIntersector1<8> intersector(ray,nullptr); - const vbool8 flags(0,0,0,0,1,1,1,1); - if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) - { - vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; - -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - /* correct U,V interpolation across the entire grid */ - const vint8 sx((int)subgrid.x()); - const vint8 sy((int)subgrid.y()); - const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0)); - const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1)); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX; - hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY; - - if (unlikely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - }; - -#endif - - // ============================================================================================================================ - // ============================================================================================================================ - // ============================================================================================================================ - - - /* ----------------------------- */ - /* -- ray packet intersectors -- */ - /* ----------------------------- */ - - template<int K> - struct SubGridQuadHitK - { - __forceinline SubGridQuadHitK(const vfloat<K>& U, - const vfloat<K>& V, - const vfloat<K>& T, - const vfloat<K>& absDen, - const Vec3vf<K>& Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid& subgrid, - const unsigned int i) - : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vfloat<K> rcpAbsDen = rcp(absDen); - const vfloat<K> t = T * rcpAbsDen; - const vfloat<K> u0 = min(U * rcpAbsDen,1.0f); - const vfloat<K> v0 = min(V * rcpAbsDen,1.0f); - const vfloat<K> u1 = vfloat<K>(1.0f) - u0; - const vfloat<K> v1 = vfloat<K>(1.0f) - v0; - const vfloat<K> uu = select(flags,u1,u0); - const vfloat<K> vv = select(flags,v1,v0); - const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); - const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); - const float inv_resX = rcp((float)(int)(g.resX-1)); - const float inv_resY = rcp((float)(int)(g.resY-1)); - const vfloat<K> u = (uu + (float)(int)sx) * inv_resX; - const vfloat<K> v = (vv + (float)(int)sy) * inv_resY; - const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> T; - const vfloat<K> absDen; - const vbool<K> flags; - const Vec3vf<K> tri_Ng; - - const GridMesh::Grid &g; - const SubGrid& subgrid; - const size_t i; - }; - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKMoellerTrumboreBase - { - __forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {} - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Vec3vf<K>& tri_Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - /* calculate denominator */ - vbool<K> valid = valid0; - const Vec3vf<K> C = tri_v0 - ray.org; - const Vec3vf<K> R = cross(C,ray.dir); - const vfloat<K> den = dot(tri_Ng,ray.dir); - const vfloat<K> absDen = abs(den); - const vfloat<K> sgnDen = signmsk(den); - - /* test against edge p2 p0 */ - const vfloat<K> U = dot(R,tri_e2) ^ sgnDen; - valid &= U >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p0 p1 */ - const vfloat<K> V = dot(R,tri_e1) ^ sgnDen; - valid &= V >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p1 p2 */ - const vfloat<K> W = absDen-U-V; - valid &= W >= 0.0f; - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; - valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar); - if (unlikely(none(valid))) return false; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - valid &= den < vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#else - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#endif - - /* calculate hit information */ - SubGridQuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags,g,subgrid,i); - return epilog(valid,hit); - } - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = tri_v0-tri_v1; - const Vec3vf<K> e2 = tri_v2-tri_v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,g,subgrid,i,epilog); - } - - template<typename Epilog> - __forceinline bool intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const Vec3vf<K>& v3, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog); - if (none(valid0)) return true; - intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog); - return none(valid0); - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Vec3vf<M>& tri_Ng, - MoellerTrumboreHitM<M> &hit) - { - /* calculate denominator */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; - const Vec3vf<M> R = cross(C,D); - const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); - const vfloat<M> absDen = abs(den); - const vfloat<M> sgnDen = signmsk(den); - - /* perform edge tests */ - const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; - const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#else - vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#endif - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; - valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); - if (likely(none(valid))) return false; - - /* calculate hit information */ - new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); - return true; - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - MoellerTrumboreHitM<M> &hit) - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - const Vec3vf<M> Ng = cross(e2,e1); - return intersect1(ray,k,v0,e1,e2,Ng,hit); - } - - }; - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter> - { - __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {} - - __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - MoellerTrumboreHitM<4> hit; - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - MoellerTrumboreHitM<4> hit; - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) return true; - } - - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) return true; - } - return false; - } - }; - - -#if defined (__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<int K, bool filter> - struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter> - { - __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - const vbool8 flags(0,0,0,0,1,1,1,1); - - MoellerTrumboreHitM<8> hit; - if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit)) - { - vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - - /* correct U,V interpolation across the entire grid */ - const vint8 sx((int)subgrid.x()); - const vint8 sy((int)subgrid.y()); - const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0)); - const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1)); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX; - hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY; - if (unlikely(epilog(hit.valid,hit))) - return true; - - } - return false; - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); - } - }; - -#endif - - - - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h deleted file mode 100644 index 1cd88aa799..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subgrid.h" -#include "quad_intersector_moeller.h" -#include "quad_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - - template<int M> - struct SubGridQuadHitPlueckerM - { - __forceinline SubGridQuadHitPlueckerM() {} - - __forceinline SubGridQuadHitPlueckerM(const vbool<M>& valid, - const vfloat<M>& U, - const vfloat<M>& V, - const vfloat<M>& UVW, - const vfloat<M>& t, - const Vec3vf<M>& Ng, - const vbool<M>& flags) : valid(valid), vt(t) - { - const vbool<M> invalid = abs(UVW) < min_rcp_input; - const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); - const vfloat<M> u = min(U * rcpUVW,1.0f); - const vfloat<M> v = min(V * rcpUVW,1.0f); - const vfloat<M> u1 = vfloat<M>(1.0f) - u; - const vfloat<M> v1 = vfloat<M>(1.0f) - v; -#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) - vu = select(flags,u1,u); - vv = select(flags,v1,v); - vNg = Vec3vf<M>(Ng.x,Ng.y,Ng.z); -#else - const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); - vv = select(flags,u1,v); - vu = select(flags,v1,u); - vNg = Vec3vf<M>(flip*Ng.x,flip*Ng.y,flip*Ng.z); -#endif - } - - __forceinline void finalize() - { - } - - __forceinline Vec2f uv(const size_t i) - { - const float u = vu[i]; - const float v = vv[i]; - return Vec2f(u,v); - } - - __forceinline float t(const size_t i) { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - __forceinline void interpolateUV(SubGridQuadHitPlueckerM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY) - { - /* correct U,V interpolation across the entire grid */ - const vint<M> sx((int)subgrid.x()); - const vint<M> sy((int)subgrid.y()); - const vint<M> sxM(sx + stepX); - const vint<M> syM(sy + stepY); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.vu = (hit.vu + vfloat<M>(sxM)) * inv_resX; - hit.vv = (hit.vv + vfloat<M>(syM)) * inv_resY; - } - - template<int M> - __forceinline static bool intersectPluecker(Ray& ray, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const vbool<M>& flags, - SubGridQuadHitPlueckerM<M> &hit) - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); - const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,Ng,flags); - return true; - } - - template<int M, bool filter> - struct SubGridQuadMIntersector1Pluecker; - - template<int M, bool filter> - struct SubGridQuadMIntersector1Pluecker - { - __forceinline SubGridQuadMIntersector1Pluecker() {} - - __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - __forceinline void intersect(RayHit& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - SubGridQuadHitPlueckerM<M> hit; - Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - - /* intersect second triangle */ - if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - SubGridQuadHitPlueckerM<M> hit; - Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) - return true; - } - - /* intersect second triangle */ - if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) - return true; - } - - return false; - } - }; - -#if defined (__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<bool filter> - struct SubGridQuadMIntersector1Pluecker<4,filter> - { - __forceinline SubGridQuadMIntersector1Pluecker() {} - - __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - SubGridQuadHitPlueckerM<8> hit; - const vbool8 flags(0,0,0,0,1,1,1,1); - if (unlikely(intersectPluecker(ray,vtx0,vtx1,vtx2,flags,hit))) - { - /* correct U,V interpolation across the entire grid */ - interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); - if (unlikely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - }; - -#endif - - - /* ----------------------------- */ - /* -- ray packet intersectors -- */ - /* ----------------------------- */ - - template<int K> - struct SubGridQuadHitPlueckerK - { - __forceinline SubGridQuadHitPlueckerK(const vfloat<K>& U, - const vfloat<K>& V, - const vfloat<K>& UVW, - const vfloat<K>& t, - const Vec3vf<K>& Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid& subgrid, - const unsigned int i) - : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vbool<K> invalid = abs(UVW) < min_rcp_input; - const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); - const vfloat<K> u0 = min(U * rcpUVW,1.0f); - const vfloat<K> v0 = min(V * rcpUVW,1.0f); - const vfloat<K> u1 = vfloat<K>(1.0f) - u0; - const vfloat<K> v1 = vfloat<K>(1.0f) - v0; - const vfloat<K> uu = select(flags,u1,u0); - const vfloat<K> vv = select(flags,v1,v0); - const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); - const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); - const float inv_resX = rcp((float)(int)(g.resX-1)); - const float inv_resY = rcp((float)(int)(g.resY-1)); - const vfloat<K> u = (uu + (float)(int)sx) * inv_resX; - const vfloat<K> v = (vv + (float)(int)sy) * inv_resY; - const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> UVW; - const vfloat<K> t; - const vfloat<K> absDen; - const vbool<K> flags; - const Vec3vf<K> tri_Ng; - - const GridMesh::Grid &g; - const SubGrid& subgrid; - const size_t i; - }; - - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKPlueckerBase - { - __forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const Vec3vf<K>& tri_Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - /* calculate denominator */ - /* calculate vertices relative to ray origin */ - vbool<K> valid = valid0; - const Vec3vf<K> O = ray.org; - const Vec3vf<K> D = ray.dir; - const Vec3vf<K> v0 = tri_v0-O; - const Vec3vf<K> v1 = tri_v1-O; - const Vec3vf<K> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<K> e0 = v2-v0; - const Vec3vf<K> e1 = v0-v1; - const Vec3vf<K> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); - const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); - const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); - const vfloat<K> UVW = U+V+W; - const vfloat<K> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - valid &= max(U,V,W) <= eps; -#else - valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); - - /* perform depth test */ - const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); - const vfloat<K> t = rcp(den)*T; - valid &= ray.tnear() <= t & t <= ray.tfar; - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; - - /* calculate hit information */ - SubGridQuadHitPlueckerK<K> hit(U,V,UVW,t,tri_Ng,flags,g,subgrid,i); - return epilog(valid,hit); - } - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = v0-v1; - const Vec3vf<K> e2 = v2-v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray,v0,v1,v2,Ng,flags,g,subgrid,i,epilog); - } - - template<typename Epilog> - __forceinline bool intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const Vec3vf<K>& v3, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog); - if (none(valid0)) return true; - intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog); - return none(valid0); - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const Vec3vf<M>& tri_Ng, - const vbool<M>& flags, - SubGridQuadHitPlueckerM<M> &hit) - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps ; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); - if (unlikely(none(valid))) return false; - - /* avoid division by 0 */ - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,tri_Ng,flags); - return true; - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const vbool<M>& flags, - SubGridQuadHitPlueckerM<M> &hit) - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - const Vec3vf<M> Ng = cross(e2,e1); // FIXME: optimize!!! - return intersect1(ray,k,v0,v1,v2,Ng,flags,hit); - } - - }; - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter> - { - __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) - : SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} - - __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - SubGridQuadHitPlueckerM<4> hit; - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - SubGridQuadHitPlueckerM<4> hit; - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) return true; - } - - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) return true; - } - return false; - } - }; - - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h deleted file mode 100644 index 400a88b985..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subgrid_intersector.h" - -namespace embree -{ - namespace isa - { - template<int N, bool filter> - struct SubGridMBIntersector1Pluecker - { - typedef SubGridMBQBVHN<N> Primitive; - typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(normal.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - float ftime; - const int itime = mesh->timeSegment(ray.time(), ftime); - Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime); - pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(shadow.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - float ftime; - const int itime = mesh->timeSegment(ray.time(), ftime); - - Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime); - return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, subgrid); - } - - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - for (size_t i=0;i<num;i++) - { - vfloat<Nx> dist; - const float time = prim[i].adjustTime(ray.time()); - - assert(time <= 1.0f); - size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); -#if defined(__AVX__) - STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); -#endif - while(mask != 0) - { - const size_t ID = bscf(mask); - if (unlikely(dist[ID] > ray.tfar)) continue; - intersect(pre,ray,context,prim[i].subgrid(ID)); - } - } - } - - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - for (size_t i=0;i<num;i++) - { - const float time = prim[i].adjustTime(ray.time()); - assert(time <= 1.0f); - vfloat<Nx> dist; - size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - if (occluded(pre,ray,context,prim[i].subgrid(ID))) - return true; - } - } - return false; - } - - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node) - { - assert(false && "not implemented"); - return false; - } - }; - - - template<int N, int K, bool filter> - struct SubGridMBIntersectorKPluecker - { - typedef SubGridMBQBVHN<N> Primitive; - typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations; - - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid) - { - size_t m_valid = movemask(valid_i); - while(m_valid) - { - size_t ID = bscf(m_valid); - intersect(pre,ray,ID,context,subgrid); - } - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid) - { - vbool<K> valid0 = valid_i; - size_t m_valid = movemask(valid_i); - while(m_valid) - { - size_t ID = bscf(m_valid); - if (occluded(pre,ray,ID,context,subgrid)) - clear(valid0,ID); - } - return !valid0; - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(normal.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(ray.time(), ftime); - Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]); - pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid) - { - STAT3(shadow.trav_prims,1,1,1); - const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID()); - const GridMesh::Grid &g = mesh->grid(subgrid.primID()); - - vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(ray.time(), ftime); - Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]); - return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); - } - - template<bool robust> - static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; - for (size_t j=0;j<num;j++) - { - size_t m_valid = movemask(prim[j].qnode.validMask()); - const vfloat<K> time = prim[j].adjustTime(ray.time()); - - vfloat<K> dist; - while(m_valid) - { - const size_t i = bscf(m_valid); - if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue; - intersect(valid,pre,ray,context,prim[j].subgrid(i)); - } - } - } - - template<bool robust> - static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK; - - vbool<K> valid0 = valid; - for (size_t j=0;j<num;j++) - { - size_t m_valid = movemask(prim[j].qnode.validMask()); - const vfloat<K> time = prim[j].adjustTime(ray.time()); - vfloat<K> dist; - while(m_valid) - { - const size_t i = bscf(m_valid); - if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue; - valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i)); - if (none(valid0)) break; - } - } - return !valid0; - } - - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - for (size_t i=0;i<num;i++) - { - vfloat<N> dist; - const float time = prim[i].adjustTime(ray.time()[k]); - assert(time <= 1.0f); - - size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - if (unlikely(dist[ID] > ray.tfar[k])) continue; - intersect(pre,ray,k,context,prim[i].subgrid(ID)); - } - } - } - - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) - { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; - - for (size_t i=0;i<num;i++) - { - vfloat<N> dist; - const float time = prim[i].adjustTime(ray.time()[k]); - assert(time <= 1.0f); - - size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); - while(mask != 0) - { - const size_t ID = bscf(mask); - if (occluded(pre,ray,k,context,prim[i].subgrid(ID))) - return true; - } - } - return false; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle.h b/thirdparty/embree-aarch64/kernels/geometry/triangle.h deleted file mode 100644 index 0dedf6dc4c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - /* Precalculated representation for M triangles. Stores for each - triangle a base vertex, two edges, and the geometry normal to - speed up intersection calculations */ - template<int M> - struct TriangleM - { - public: - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* Returns maximum number of stored triangles */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline TriangleM() {} - - /* Construction from vertices and IDs */ - __forceinline TriangleM(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs) - : v0(v0), e1(v0-v1), e2(v2-v0), geomIDs(geomIDs), primIDs(primIDs) {} - - /* Returns a mask that tells which triangles are valid */ - __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } - - /* Returns true if the specified triangle is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } - - /* Returns the number of stored triangles */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - __forceinline vuint<M>& geomID() { return geomIDs; } - __forceinline const vuint<M>& geomID() const { return geomIDs; } - __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } - - /* Returns the primitive IDs */ - __forceinline vuint<M>& primID() { return primIDs; } - __forceinline const vuint<M>& primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* Calculate the bounds of the triangle */ - __forceinline BBox3fa bounds() const - { - Vec3vf<M> p0 = v0; - Vec3vf<M> p1 = v0-e1; - Vec3vf<M> p2 = v0+e2; - Vec3vf<M> lower = min(p0,p1,p2); - Vec3vf<M> upper = max(p0,p1,p2); - vbool<M> mask = valid(); - lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); - lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); - lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); - upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); - upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); - upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); - return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), - Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); - } - - /* Non temporal store */ - __forceinline static void store_nt(TriangleM* dst, const TriangleM& src) - { - vfloat<M>::store_nt(&dst->v0.x,src.v0.x); - vfloat<M>::store_nt(&dst->v0.y,src.v0.y); - vfloat<M>::store_nt(&dst->v0.z,src.v0.z); - vfloat<M>::store_nt(&dst->e1.x,src.e1.x); - vfloat<M>::store_nt(&dst->e1.y,src.e1.y); - vfloat<M>::store_nt(&dst->e1.z,src.e1.z); - vfloat<M>::store_nt(&dst->e2.x,src.e2.x); - vfloat<M>::store_nt(&dst->e2.y,src.e2.y); - vfloat<M>::store_nt(&dst->e2.z,src.e2.z); - vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); - vuint<M>::store_nt(&dst->primIDs,src.primIDs); - } - - /* Fill triangle from triangle list */ - __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene) - { - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; - - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRef& prim = prims[begin]; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID); - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const Vec3fa& p0 = mesh->vertex(tri.v[0]); - const Vec3fa& p1 = mesh->vertex(tri.v[1]); - const Vec3fa& p2 = mesh->vertex(tri.v[2]); - vgeomID [i] = geomID; - vprimID [i] = primID; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - } - TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID)); - } - - /* Updates the primitive */ - __forceinline BBox3fa update(TriangleMesh* mesh) - { - BBox3fa bounds = empty; - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; - - for (size_t i=0; i<M; i++) - { - if (unlikely(geomID(i) == -1)) break; - const unsigned geomId = geomID(i); - const unsigned primId = primID(i); - const TriangleMesh::Triangle& tri = mesh->triangle(primId); - const Vec3fa p0 = mesh->vertex(tri.v[0]); - const Vec3fa p1 = mesh->vertex(tri.v[1]); - const Vec3fa p2 = mesh->vertex(tri.v[2]); - bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); - vgeomID [i] = geomId; - vprimID [i] = primId; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - } - TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID)); - return bounds; - } - - public: - Vec3vf<M> v0; // base vertex of the triangles - Vec3vf<M> e1; // 1st edge of the triangles (v0-v1) - Vec3vf<M> e2; // 2nd edge of the triangles (v2-v0) - private: - vuint<M> geomIDs; // geometry IDs - vuint<M> primIDs; // primitive IDs - }; - - template<int M> - typename TriangleM<M>::Type TriangleM<M>::type; - - typedef TriangleM<4> Triangle4; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h deleted file mode 100644 index 125a42c5fe..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "triangle.h" -#include "triangle_intersector_moeller.h" - -namespace embree -{ - namespace isa - { - /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMIntersector1Moeller - { - typedef TriangleM<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; - - /*! Intersect a ray with the M triangles and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleM<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleM<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - - }; - - /*! Intersects M triangles with K rays. */ - template<int M, int Mx, int K, bool filter> - struct TriangleMIntersectorKMoeller - { - typedef TriangleM<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleM<M>& tri) - { - STAT_USER(0,TriangleM<M>::max_size()); - for (size_t i=0; i<TriangleM<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i); - const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i); - const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i); - pre.intersectEdgeK(valid_i,ray,p0,e1,e2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleM<M>& tri) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<TriangleM<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i); - const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i); - const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i); - pre.intersectEdgeK(valid0,ray,p0,e1,e2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h deleted file mode 100644 index b5a8519236..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h +++ /dev/null @@ -1,403 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "triangle.h" -#include "intersector_epilog.h" - -/*! This intersector implements a modified version of the Moeller - * Trumbore intersector from the paper "Fast, Minimum Storage - * Ray-Triangle Intersection". In contrast to the paper we - * precalculate some factors and factor the calculations differently - * to allow precalculating the cross product e1 x e2. The resulting - * algorithm is similar to the fastest one of the paper "Optimizing - * Ray-Triangle Intersection via Automated Search". */ - -namespace embree -{ - namespace isa - { - template<int M> - struct MoellerTrumboreHitM - { - __forceinline MoellerTrumboreHitM() {} - - __forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng) - : U(U), V(V), T(T), absDen(absDen), valid(valid), vNg(Ng) {} - - __forceinline void finalize() - { - const vfloat<M> rcpAbsDen = rcp(absDen); - vt = T * rcpAbsDen; - vu = U * rcpAbsDen; - vv = V * rcpAbsDen; - } - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - public: - vfloat<M> U; - vfloat<M> V; - vfloat<M> T; - vfloat<M> absDen; - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct MoellerTrumboreIntersector1 - { - __forceinline MoellerTrumboreIntersector1() {} - - __forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {} - - __forceinline bool intersect(const vbool<M>& valid0, - Ray& ray, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Vec3vf<M>& tri_Ng, - MoellerTrumboreHitM<M>& hit) const - { - /* calculate denominator */ - vbool<M> valid = valid0; - const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); - const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); - const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; - const Vec3vf<M> R = cross(C,D); - const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); - - const vfloat<M> absDen = abs(den); - const vfloat<M> sgnDen = signmsk(den); - - /* perform edge tests */ - const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; - const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - valid &= (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#else - valid &= (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#endif - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; - valid &= (absDen*vfloat<M>(ray.tnear()) < T) & (T <= absDen*vfloat<M>(ray.tfar)); - if (likely(none(valid))) return false; - - - /* update hit information */ - new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); - - return true; - } - - __forceinline bool intersectEdge(Ray& ray, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - MoellerTrumboreHitM<M>& hit) const - { - vbool<M> valid = true; - const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1); - return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,hit); - } - - __forceinline bool intersect(Ray& ray, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - MoellerTrumboreHitM<M>& hit) const - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,v0,e1,e2,hit); - } - - __forceinline bool intersect(const vbool<M>& valid, - Ray& ray, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - MoellerTrumboreHitM<M>& hit) const - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - return intersectEdge(valid,ray,v0,e1,e2,hit); - } - - template<typename Epilog> - __forceinline bool intersectEdge(Ray& ray, - const Vec3vf<M>& v0, - const Vec3vf<M>& e1, - const Vec3vf<M>& e2, - const Epilog& epilog) const - { - MoellerTrumboreHitM<M> hit; - if (likely(intersectEdge(ray,v0,e1,e2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) const - { - MoellerTrumboreHitM<M> hit; - if (likely(intersect(ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - __forceinline bool intersect(const vbool<M>& valid, - Ray& ray, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) const - { - MoellerTrumboreHitM<M> hit; - if (likely(intersect(valid,ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); - return false; - } - }; - - template<int K> - struct MoellerTrumboreHitK - { - __forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng) - : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vfloat<K> rcpAbsDen = rcp(absDen); - const vfloat<K> t = T * rcpAbsDen; - const vfloat<K> u = U * rcpAbsDen; - const vfloat<K> v = V * rcpAbsDen; - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> T; - const vfloat<K> absDen; - const Vec3vf<K> Ng; - }; - - template<int M, int K> - struct MoellerTrumboreIntersectorK - { - __forceinline MoellerTrumboreIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - //RayK<K>& ray, - const Vec3vf<K>& ray_org, - const Vec3vf<K>& ray_dir, - const vfloat<K>& ray_tnear, - const vfloat<K>& ray_tfar, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Vec3vf<K>& tri_Ng, - const Epilog& epilog) const - { - /* calculate denominator */ - vbool<K> valid = valid0; - const Vec3vf<K> C = tri_v0 - ray_org; - const Vec3vf<K> R = cross(C,ray_dir); - const vfloat<K> den = dot(tri_Ng,ray_dir); - const vfloat<K> absDen = abs(den); - const vfloat<K> sgnDen = signmsk(den); - - /* test against edge p2 p0 */ - const vfloat<K> U = dot(tri_e2,R) ^ sgnDen; - valid &= U >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p0 p1 */ - const vfloat<K> V = dot(tri_e1,R) ^ sgnDen; - valid &= V >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p1 p2 */ - const vfloat<K> W = absDen-U-V; - valid &= W >= 0.0f; - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; - valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar); - if (unlikely(none(valid))) return false; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - valid &= den < vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#else - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#endif - - /* calculate hit information */ - MoellerTrumboreHitK<K> hit(U,V,T,absDen,tri_Ng); - return epilog(valid,hit); - } - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = tri_v0-tri_v1; - const Vec3vf<K> e2 = tri_v2-tri_v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog); - } - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Epilog& epilog) const - { - const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1); - return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog); - } - - /*! Intersect k'th ray from ray packet of size K with M triangles. */ - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - MoellerTrumboreHitM<M>& hit) const - { - /* calculate denominator */ - typedef Vec3vf<M> Vec3vfM; - const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1); - - const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vfM C = Vec3vfM(tri_v0) - O; - const Vec3vfM R = cross(C,D); - const vfloat<M> den = dot(Vec3vfM(tri_Ng),D); - const vfloat<M> absDen = abs(den); - const vfloat<M> sgnDen = signmsk(den); - - /* perform edge tests */ - const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen; - const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#else - vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#endif - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; - valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); - if (likely(none(valid))) return false; - - /* calculate hit information */ - new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); - return true; - } - - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const BBox<vfloat<M>>& time_range, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - MoellerTrumboreHitM<M>& hit) const - { - if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit))) - { - hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]); - hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper; - return any(hit.valid); - } - return false; - } - - template<typename Epilog> - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Epilog& epilog) const - { - MoellerTrumboreHitM<M> hit; - if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const BBox<vfloat<M>>& time_range, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Epilog& epilog) const - { - MoellerTrumboreHitM<M> hit; - if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - __forceinline bool intersect(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) const - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,k,v0,e1,e2,epilog); - } - - template<typename Epilog> - __forceinline bool intersect(RayK<K>& ray, - size_t k, - const BBox<vfloat<M>>& time_range, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) const - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,k,time_range,v0,e1,e2,epilog); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h deleted file mode 100644 index f1de99d208..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "triangle.h" -#include "trianglev.h" -#include "trianglev_mb.h" -#include "intersector_epilog.h" - -/*! Modified Pluecker ray/triangle intersector. The test first shifts - * the ray origin into the origin of the coordinate system and then - * uses Pluecker coordinates for the intersection. Due to the shift, - * the Pluecker coordinate calculation simplifies and the tests get - * numerically stable. The edge equations are watertight along the - * edge for neighboring triangles. */ - -namespace embree -{ - namespace isa - { - template<int M, typename UVMapper> - struct PlueckerHitM - { - __forceinline PlueckerHitM(const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV) - : U(U), V(V), UVW(UVW), mapUV(mapUV), vt(t), vNg(Ng) {} - - __forceinline void finalize() - { - const vbool<M> invalid = abs(UVW) < min_rcp_input; - const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); - vu = U * rcpUVW; - vv = V * rcpUVW; - mapUV(vu,vv); - } - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - private: - const vfloat<M> U; - const vfloat<M> V; - const vfloat<M> UVW; - const UVMapper& mapUV; - - public: - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct PlueckerIntersector1 - { - __forceinline PlueckerIntersector1() {} - - __forceinline PlueckerIntersector1(const Ray& ray, const void* ptr) {} - - template<typename UVMapper, typename Epilog> - __forceinline bool intersect(Ray& ray, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const UVMapper& mapUV, - const Epilog& epilog) const - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); - const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV); - return epilog(valid,hit); - } - }; - - template<int K, typename UVMapper> - struct PlueckerHitK - { - __forceinline PlueckerHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& UVW, const vfloat<K>& t, const Vec3vf<K>& Ng, const UVMapper& mapUV) - : U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vbool<K> invalid = abs(UVW) < min_rcp_input; - const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); - vfloat<K> u = U * rcpUVW; - vfloat<K> v = V * rcpUVW; - mapUV(u,v); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> UVW; - const vfloat<K> t; - const Vec3vf<K> Ng; - const UVMapper& mapUV; - }; - - template<int M, int K> - struct PlueckerIntersectorK - { - __forceinline PlueckerIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} - - /*! Intersects K rays with one of M triangles. */ - template<typename UVMapper, typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const UVMapper& mapUV, - const Epilog& epilog) const - { - /* calculate vertices relative to ray origin */ - vbool<K> valid = valid0; - const Vec3vf<K> O = ray.org; - const Vec3vf<K> D = ray.dir; - const Vec3vf<K> v0 = tri_v0-O; - const Vec3vf<K> v1 = tri_v1-O; - const Vec3vf<K> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<K> e0 = v2-v0; - const Vec3vf<K> e1 = v0-v1; - const Vec3vf<K> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); - const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); - const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); - const vfloat<K> UVW = U+V+W; - const vfloat<K> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - valid &= max(U,V,W) <= eps; -#else - valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); - - /* perform depth test */ - const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); - const vfloat<K> t = rcp(den)*T; - valid &= ray.tnear() <= t & t <= ray.tfar; - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; - - /* calculate hit information */ - PlueckerHitK<K,UVMapper> hit(U,V,UVW,t,Ng,mapUV); - return epilog(valid,hit); - } - - /*! Intersect k'th ray from ray packet of size K with M triangles. */ - template<typename UVMapper, typename Epilog> - __forceinline bool intersect(RayK<K>& ray, size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const UVMapper& mapUV, - const Epilog& epilog) const - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); - if (unlikely(none(valid))) return false; - - /* avoid division by 0 */ - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV); - return epilog(valid,hit); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h deleted file mode 100644 index 63e649d8fb..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h +++ /dev/null @@ -1,418 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "triangle.h" -#include "intersector_epilog.h" - -/*! This intersector implements a modified version of the Woop's ray-triangle intersection test */ - -namespace embree -{ - namespace isa - { - template<int M> - struct WoopHitM - { - __forceinline WoopHitM() {} - - __forceinline WoopHitM(const vbool<M>& valid, - const vfloat<M>& U, - const vfloat<M>& V, - const vfloat<M>& T, - const vfloat<M>& inv_det, - const Vec3vf<M>& Ng) - : U(U), V(V), T(T), inv_det(inv_det), valid(valid), vNg(Ng) {} - - __forceinline void finalize() - { - vt = T; - vu = U*inv_det; - vv = V*inv_det; - } - - __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } - __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - private: - const vfloat<M> U; - const vfloat<M> V; - const vfloat<M> T; - const vfloat<M> inv_det; - - public: - const vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - struct WoopPrecalculations1 - { - unsigned int kx,ky,kz; - Vec3vf<M> org; - Vec3fa S; - __forceinline WoopPrecalculations1() {} - - __forceinline WoopPrecalculations1(const Ray& ray, const void* ptr) - { - kz = maxDim(abs(ray.dir)); - kx = (kz+1) % 3; - ky = (kx+1) % 3; - const float inv_dir_kz = rcp(ray.dir[kz]); - if (ray.dir[kz]) std::swap(kx,ky); - S.x = ray.dir[kx] * inv_dir_kz; - S.y = ray.dir[ky] * inv_dir_kz; - S.z = inv_dir_kz; - org = Vec3vf<M>(ray.org[kx],ray.org[ky],ray.org[kz]); - } - }; - - - template<int M> - struct WoopIntersector1 - { - - typedef WoopPrecalculations1<M> Precalculations; - - __forceinline WoopIntersector1() {} - - __forceinline WoopIntersector1(const Ray& ray, const void* ptr) {} - - static __forceinline bool intersect(const vbool<M>& valid0, - Ray& ray, - const Precalculations& pre, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - WoopHitM<M>& hit) - { - vbool<M> valid = valid0; - - /* vertices relative to ray origin */ - const Vec3vf<M> org = Vec3vf<M>(pre.org.x,pre.org.y,pre.org.z); - const Vec3vf<M> A = Vec3vf<M>(tri_v0[pre.kx],tri_v0[pre.ky],tri_v0[pre.kz]) - org; - const Vec3vf<M> B = Vec3vf<M>(tri_v1[pre.kx],tri_v1[pre.ky],tri_v1[pre.kz]) - org; - const Vec3vf<M> C = Vec3vf<M>(tri_v2[pre.kx],tri_v2[pre.ky],tri_v2[pre.kz]) - org; - - /* shear and scale vertices */ - const vfloat<M> Ax = nmadd(A.z,pre.S.x,A.x); - const vfloat<M> Ay = nmadd(A.z,pre.S.y,A.y); - const vfloat<M> Bx = nmadd(B.z,pre.S.x,B.x); - const vfloat<M> By = nmadd(B.z,pre.S.y,B.y); - const vfloat<M> Cx = nmadd(C.z,pre.S.x,C.x); - const vfloat<M> Cy = nmadd(C.z,pre.S.y,C.y); - - /* scaled barycentric */ - const vfloat<M> U0 = Cx*By; - const vfloat<M> U1 = Cy*Bx; - const vfloat<M> V0 = Ax*Cy; - const vfloat<M> V1 = Ay*Cx; - const vfloat<M> W0 = Bx*Ay; - const vfloat<M> W1 = By*Ax; -#if !defined(__AVX512F__) - valid &= (U0 >= U1) & (V0 >= V1) & (W0 >= W1) | - (U0 <= U1) & (V0 <= V1) & (W0 <= W1); -#else - valid &= ge(ge(U0 >= U1,V0,V1),W0,W1) | le(le(U0 <= U1,V0,V1),W0,W1); -#endif - - if (likely(none(valid))) return false; - const vfloat<M> U = U0-U1; - const vfloat<M> V = V0-V1; - const vfloat<M> W = W0-W1; - - const vfloat<M> det = U+V+W; - - valid &= det != 0.0f; - const vfloat<M> inv_det = rcp(det); - - const vfloat<M> Az = pre.S.z * A.z; - const vfloat<M> Bz = pre.S.z * B.z; - const vfloat<M> Cz = pre.S.z * C.z; - const vfloat<M> T = madd(U,Az,madd(V,Bz,W*Cz)); - const vfloat<M> t = T * inv_det; - /* perform depth test */ - valid &= (vfloat<M>(ray.tnear()) < t) & (t <= vfloat<M>(ray.tfar)); - if (likely(none(valid))) return false; - - const Vec3vf<M> tri_Ng = cross(tri_v2-tri_v0,tri_v0-tri_v1); - - /* update hit information */ - new (&hit) WoopHitM<M>(valid,U,V,t,inv_det,tri_Ng); - return true; - } - - static __forceinline bool intersect(Ray& ray, - const Precalculations& pre, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - WoopHitM<M>& hit) - { - vbool<M> valid = true; - return intersect(valid,ray,pre,v0,v1,v2,hit); - } - - - template<typename Epilog> - static __forceinline bool intersect(Ray& ray, - const Precalculations& pre, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) - { - WoopHitM<M> hit; - if (likely(intersect(ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - static __forceinline bool intersect(const vbool<M>& valid, - Ray& ray, - const Precalculations& pre, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) - { - WoopHitM<M> hit; - if (likely(intersect(valid,ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit); - return false; - } - }; - -#if 0 - template<int K> - struct WoopHitK - { - __forceinline WoopHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng) - : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vfloat<K> rcpAbsDen = rcp(absDen); - const vfloat<K> t = T * rcpAbsDen; - const vfloat<K> u = U * rcpAbsDen; - const vfloat<K> v = V * rcpAbsDen; - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> T; - const vfloat<K> absDen; - const Vec3vf<K> Ng; - }; - - template<int M, int K> - struct WoopIntersectorK - { - __forceinline WoopIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - //RayK<K>& ray, - const Vec3vf<K>& ray_org, - const Vec3vf<K>& ray_dir, - const vfloat<K>& ray_tnear, - const vfloat<K>& ray_tfar, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Vec3vf<K>& tri_Ng, - const Epilog& epilog) const - { - /* calculate denominator */ - vbool<K> valid = valid0; - const Vec3vf<K> C = tri_v0 - ray_org; - const Vec3vf<K> R = cross(C,ray_dir); - const vfloat<K> den = dot(tri_Ng,ray_dir); - const vfloat<K> absDen = abs(den); - const vfloat<K> sgnDen = signmsk(den); - - /* test against edge p2 p0 */ - const vfloat<K> U = dot(tri_e2,R) ^ sgnDen; - valid &= U >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p0 p1 */ - const vfloat<K> V = dot(tri_e1,R) ^ sgnDen; - valid &= V >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p1 p2 */ - const vfloat<K> W = absDen-U-V; - valid &= W >= 0.0f; - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; - valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar); - if (unlikely(none(valid))) return false; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - valid &= den < vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#else - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#endif - - /* calculate hit information */ - WoopHitK<K> hit(U,V,T,absDen,tri_Ng); - return epilog(valid,hit); - } - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = tri_v0-tri_v1; - const Vec3vf<K> e2 = tri_v2-tri_v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog); - } - - /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> - __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Epilog& epilog) const - { - const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1); - return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog); - } - - /*! Intersect k'th ray from ray packet of size K with M triangles. */ - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - WoopHitM<M>& hit) const - { - /* calculate denominator */ - typedef Vec3vf<M> Vec3vfM; - const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1); - - const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vfM C = Vec3vfM(tri_v0) - O; - const Vec3vfM R = cross(C,D); - const vfloat<M> den = dot(Vec3vfM(tri_Ng),D); - const vfloat<M> absDen = abs(den); - const vfloat<M> sgnDen = signmsk(den); - - /* perform edge tests */ - const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen; - const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#else - vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#endif - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; - valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); - if (likely(none(valid))) return false; - - /* calculate hit information */ - new (&hit) WoopHitM<M>(valid,U,V,T,absDen,tri_Ng); - return true; - } - - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const BBox<vfloat<M>>& time_range, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - WoopHitM<M>& hit) const - { - if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit))) - { - hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]); - hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper; - return any(hit.valid); - } - return false; - } - - template<typename Epilog> - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Epilog& epilog) const - { - WoopHitM<M> hit; - if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - __forceinline bool intersectEdge(RayK<K>& ray, - size_t k, - const BBox<vfloat<M>>& time_range, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Epilog& epilog) const - { - WoopHitM<M> hit; - if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); - return false; - } - - template<typename Epilog> - __forceinline bool intersect(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) const - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,k,v0,e1,e2,epilog); - } - - template<typename Epilog> - __forceinline bool intersect(RayK<K>& ray, - size_t k, - const BBox<vfloat<M>>& time_range, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const Epilog& epilog) const - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,k,time_range,v0,e1,e2,epilog); - } - }; -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h deleted file mode 100644 index 91b35c36f3..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "primitive.h" - -namespace embree -{ - namespace isa - { - struct TriangleTriangleIntersector - { - __forceinline static float T(float pa0, float pa1, float da0, float da1) { - return pa0 + (pa1-pa0)*da0/(da0-da1); - } - - __forceinline static bool point_line_side(const Vec2f& p, const Vec2f& a0, const Vec2f& a1) { - return det(p-a0,a0-a1) >= 0.0f; - } - - __forceinline static bool point_inside_triangle(const Vec2f& p, const Vec2f& a, const Vec2f& b, const Vec2f& c) - { - const bool pab = point_line_side(p,a,b); - const bool pbc = point_line_side(p,b,c); - const bool pca = point_line_side(p,c,a); - return pab == pbc && pab == pca; - } - - __forceinline static bool intersect_line_line(const Vec2f& a0, const Vec2f& a1, const Vec2f& b0, const Vec2f& b1) - { - const bool different_sides0 = point_line_side(b0,a0,a1) != point_line_side(b1,a0,a1); - const bool different_sides1 = point_line_side(a0,b0,b1) != point_line_side(a1,b0,b1); - return different_sides0 && different_sides1; - } - - __forceinline static bool intersect_triangle_triangle (const Vec2f& a0, const Vec2f& a1, const Vec2f& a2, - const Vec2f& b0, const Vec2f& b1, const Vec2f& b2) - { - const bool a01_b01 = intersect_line_line(a0,a1,b0,b1); - if (a01_b01) return true; - const bool a01_b12 = intersect_line_line(a0,a1,b1,b2); - if (a01_b12) return true; - const bool a01_b20 = intersect_line_line(a0,a1,b2,b0); - if (a01_b20) return true; - const bool a12_b01 = intersect_line_line(a1,a2,b0,b1); - if (a12_b01) return true; - const bool a12_b12 = intersect_line_line(a1,a2,b1,b2); - if (a12_b12) return true; - const bool a12_b20 = intersect_line_line(a1,a2,b2,b0); - if (a12_b20) return true; - const bool a20_b01 = intersect_line_line(a2,a0,b0,b1); - if (a20_b01) return true; - const bool a20_b12 = intersect_line_line(a2,a0,b1,b2); - if (a20_b12) return true; - const bool a20_b20 = intersect_line_line(a2,a0,b2,b0); - if (a20_b20) return true; - - bool a_in_b = point_inside_triangle(a0,b0,b1,b2) && point_inside_triangle(a1,b0,b1,b2) && point_inside_triangle(a2,b0,b1,b2); - if (a_in_b) return true; - - bool b_in_a = point_inside_triangle(b0,a0,a1,a2) && point_inside_triangle(b1,a0,a1,a2) && point_inside_triangle(b2,a0,a1,a2); - if (b_in_a) return true; - - return false; - } - - static bool intersect_triangle_triangle (const Vec3fa& a0, const Vec3fa& a1, const Vec3fa& a2, - const Vec3fa& b0, const Vec3fa& b1, const Vec3fa& b2) - { - const float eps = 1E-5f; - - /* calculate triangle planes */ - const Vec3fa Na = cross(a1-a0,a2-a0); - const float Ca = dot(Na,a0); - const Vec3fa Nb = cross(b1-b0,b2-b0); - const float Cb = dot(Nb,b0); - - /* project triangle A onto plane B */ - const float da0 = dot(Nb,a0)-Cb; - const float da1 = dot(Nb,a1)-Cb; - const float da2 = dot(Nb,a2)-Cb; - if (max(da0,da1,da2) < -eps) return false; - if (min(da0,da1,da2) > +eps) return false; - //CSTAT(bvh_collide_prim_intersections4++); - - /* project triangle B onto plane A */ - const float db0 = dot(Na,b0)-Ca; - const float db1 = dot(Na,b1)-Ca; - const float db2 = dot(Na,b2)-Ca; - if (max(db0,db1,db2) < -eps) return false; - if (min(db0,db1,db2) > +eps) return false; - //CSTAT(bvh_collide_prim_intersections5++); - - if (unlikely((std::fabs(da0) < eps && std::fabs(da1) < eps && std::fabs(da2) < eps) || - (std::fabs(db0) < eps && std::fabs(db1) < eps && std::fabs(db2) < eps))) - { - const size_t dz = maxDim(Na); - const size_t dx = (dz+1)%3; - const size_t dy = (dx+1)%3; - const Vec2f A0(a0[dx],a0[dy]); - const Vec2f A1(a1[dx],a1[dy]); - const Vec2f A2(a2[dx],a2[dy]); - const Vec2f B0(b0[dx],b0[dy]); - const Vec2f B1(b1[dx],b1[dy]); - const Vec2f B2(b2[dx],b2[dy]); - return intersect_triangle_triangle(A0,A1,A2,B0,B1,B2); - } - - const Vec3fa D = cross(Na,Nb); - const float pa0 = dot(D,a0); - const float pa1 = dot(D,a1); - const float pa2 = dot(D,a2); - const float pb0 = dot(D,b0); - const float pb1 = dot(D,b1); - const float pb2 = dot(D,b2); - - BBox1f ba = empty; - if (min(da0,da1) <= 0.0f && max(da0,da1) >= 0.0f && abs(da0-da1) > 0.0f) ba.extend(T(pa0,pa1,da0,da1)); - if (min(da1,da2) <= 0.0f && max(da1,da2) >= 0.0f && abs(da1-da2) > 0.0f) ba.extend(T(pa1,pa2,da1,da2)); - if (min(da2,da0) <= 0.0f && max(da2,da0) >= 0.0f && abs(da2-da0) > 0.0f) ba.extend(T(pa2,pa0,da2,da0)); - - BBox1f bb = empty; - if (min(db0,db1) <= 0.0f && max(db0,db1) >= 0.0f && abs(db0-db1) > 0.0f) bb.extend(T(pb0,pb1,db0,db1)); - if (min(db1,db2) <= 0.0f && max(db1,db2) >= 0.0f && abs(db1-db2) > 0.0f) bb.extend(T(pb1,pb2,db1,db2)); - if (min(db2,db0) <= 0.0f && max(db2,db0) >= 0.0f && abs(db2-db0) > 0.0f) bb.extend(T(pb2,pb0,db2,db0)); - - return conjoint(ba,bb); - } - }; - } -} - - diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei.h deleted file mode 100644 index 4f3118cc0c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h +++ /dev/null @@ -1,442 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" -#include "../common/scene.h" - -namespace embree -{ - /* Stores M triangles from an indexed face set */ - template <int M> - struct TriangleMi - { - /* Virtual interface to query information about the triangle type */ - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* primitive supports multiple time segments */ - static const bool singleTimeSegment = false; - - /* Returns maximum number of stored triangles */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline TriangleMi() { } - - /* Construction from vertices and IDs */ - __forceinline TriangleMi(const vuint<M>& v0, - const vuint<M>& v1, - const vuint<M>& v2, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) -#if defined(EMBREE_COMPACT_POLYS) - : geomIDs(geomIDs), primIDs(primIDs) {} -#else - : v0_(v0), v1_(v1), v2_(v2), geomIDs(geomIDs), primIDs(primIDs) {} -#endif - - /* Returns a mask that tells which triangles are valid */ - __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } - - /* Returns if the specified triangle is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } - - /* Returns the number of stored triangles */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - __forceinline vuint<M> geomID() const { return geomIDs; } - __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } - - /* Returns the primitive IDs */ - __forceinline vuint<M> primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* Calculate the bounds of the triangles */ - __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const - { - BBox3fa bounds = empty; - for (size_t i=0; i<M && valid(i); i++) { - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i)); - bounds.extend(mesh->bounds(primID(i),itime)); - } - return bounds; - } - - /* Calculate the linear bounds of the primitive */ - __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime) { - return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) - { - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i)); - allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps)); - } - return allBounds; - } - - __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) - { - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && valid(i); i++) - { - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i)); - allBounds.extend(mesh->linearBounds(primID(i), time_range)); - } - return allBounds; - } - - /* Non-temporal store */ - __forceinline static void store_nt(TriangleMi* dst, const TriangleMi& src) - { -#if !defined(EMBREE_COMPACT_POLYS) - vuint<M>::store_nt(&dst->v0_,src.v0_); - vuint<M>::store_nt(&dst->v1_,src.v1_); - vuint<M>::store_nt(&dst->v2_,src.v2_); -#endif - vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); - vuint<M>::store_nt(&dst->primIDs,src.primIDs); - } - - /* Fill triangle from triangle list */ - template<typename PrimRefT> - __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) - { - vuint<M> v0 = zero, v1 = zero, v2 = zero; - vuint<M> geomID = -1, primID = -1; - const PrimRefT* prim = &prims[begin]; - - for (size_t i=0; i<M; i++) - { - if (begin<end) { - geomID[i] = prim->geomID(); - primID[i] = prim->primID(); -#if !defined(EMBREE_COMPACT_POLYS) - const TriangleMesh* mesh = scene->get<TriangleMesh>(prim->geomID()); - const TriangleMesh::Triangle& tri = mesh->triangle(prim->primID()); - unsigned int int_stride = mesh->vertices0.getStride()/4; - v0[i] = tri.v[0] * int_stride; - v1[i] = tri.v[1] * int_stride; - v2[i] = tri.v[2] * int_stride; -#endif - begin++; - } else { - assert(i); - if (likely(i > 0)) { - geomID[i] = geomID[0]; - primID[i] = -1; - v0[i] = v0[0]; - v1[i] = v0[0]; - v2[i] = v0[0]; - } - } - if (begin<end) prim = &prims[begin]; - } - new (this) TriangleMi(v0,v1,v2,geomID,primID); // FIXME: use non temporal store - } - - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) - { - fill(prims, begin, end, scene); - return linearBounds(scene, itime); - } - - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) - { - fill(prims, begin, end, scene); - return linearBounds(scene, time_range); - } - - /* Updates the primitive */ - __forceinline BBox3fa update(TriangleMesh* mesh) - { - BBox3fa bounds = empty; - for (size_t i=0; i<M; i++) - { - if (primID(i) == -1) break; - const unsigned int primId = primID(i); - const TriangleMesh::Triangle& tri = mesh->triangle(primId); - const Vec3fa p0 = mesh->vertex(tri.v[0]); - const Vec3fa p1 = mesh->vertex(tri.v[1]); - const Vec3fa p2 = mesh->vertex(tri.v[2]); - bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); - } - return bounds; - } - - protected: -#if !defined(EMBREE_COMPACT_POLYS) - vuint<M> v0_; // 4 byte offset of 1st vertex - vuint<M> v1_; // 4 byte offset of 2nd vertex - vuint<M> v2_; // 4 byte offset of 3rd vertex -#endif - vuint<M> geomIDs; // geometry ID of mesh - vuint<M> primIDs; // primitive ID of primitive inside mesh - }; - - namespace isa - { - - template<int M> - struct TriangleMi : public embree::TriangleMi<M> - { -#if !defined(EMBREE_COMPACT_POLYS) - using embree::TriangleMi<M>::v0_; - using embree::TriangleMi<M>::v1_; - using embree::TriangleMi<M>::v2_; -#endif - using embree::TriangleMi<M>::geomIDs; - using embree::TriangleMi<M>::primIDs; - using embree::TriangleMi<M>::geomID; - using embree::TriangleMi<M>::primID; - using embree::TriangleMi<M>::valid; - - /* loads a single vertex */ - template<int vid> - __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const - { -#if defined(EMBREE_COMPACT_POLYS) - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); - const TriangleMesh::Triangle& tri = mesh->triangle(primID(index)); - return (Vec3f) mesh->vertices[0][tri.v[vid]]; -#else - const vuint<M>& v = getVertexOffset<vid>(); - const float* vertices = scene->vertices[geomID(index)]; - return (Vec3f&) vertices[v[index]]; -#endif - } - - template<int vid, typename T> - __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const - { -#if defined(EMBREE_COMPACT_POLYS) - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); - const TriangleMesh::Triangle& tri = mesh->triangle(primID(index)); - const Vec3fa v0 = mesh->vertices[itime+0][tri.v[vid]]; - const Vec3fa v1 = mesh->vertices[itime+1][tri.v[vid]]; -#else - const vuint<M>& v = getVertexOffset<vid>(); - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); - const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0); - const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1); - const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); - const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); -#endif - const Vec3<T> p0(v0.x,v0.y,v0.z); - const Vec3<T> p1(v1.x,v1.y,v1.z); - return lerp(p0,p1,ftime); - } - - template<int vid, int K, typename T> - __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const - { - Vec3<T> p0, p1; - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); - - for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask)) - { -#if defined(EMBREE_COMPACT_POLYS) - const TriangleMesh::Triangle& tri = mesh->triangle(primID(index)); - const Vec3fa v0 = mesh->vertices[itime[i]+0][tri.v[vid]]; - const Vec3fa v1 = mesh->vertices[itime[i]+1][tri.v[vid]]; -#else - const vuint<M>& v = getVertexOffset<vid>(); - const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0); - const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1); - const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); - const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); -#endif - p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z; - p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z; - } - return (T(one)-ftime)*p0 + ftime*p1; - } - - struct Triangle { - vfloat4 v0,v1,v2; - }; - -#if defined(EMBREE_COMPACT_POLYS) - - __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const - { - const unsigned int geomID = geomIDs[i]; - const unsigned int primID = primIDs[i]; - if (unlikely(primID == -1)) return { zero, zero, zero }; - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID); - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const vfloat4 v0 = (vfloat4) mesh->vertices0[tri.v[0]]; - const vfloat4 v1 = (vfloat4) mesh->vertices0[tri.v[1]]; - const vfloat4 v2 = (vfloat4) mesh->vertices0[tri.v[2]]; - return { v0, v1, v2 }; - } - - __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const - { - const unsigned int primID = primIDs[i]; - if (unlikely(primID == -1)) return { zero, zero, zero }; - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const vfloat4 v0 = (vfloat4) mesh->vertices[itime][tri.v[0]]; - const vfloat4 v1 = (vfloat4) mesh->vertices[itime][tri.v[1]]; - const vfloat4 v2 = (vfloat4) mesh->vertices[itime][tri.v[2]]; - return { v0, v1, v2 }; - } - -#else - - __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const - { - const float* vertices = scene->vertices[geomID(i)]; - const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); - const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); - const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); - return { v0, v1, v2 }; - } - - __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const - { - const float* vertices = (const float*) mesh->vertexPtr(0,itime); - const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); - const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); - const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); - return { v0, v1, v2 }; - } - -#endif - - /* Gather the triangles */ - __forceinline void gather(Vec3vf<M>& p0, Vec3vf<M>& p1, Vec3vf<M>& p2, const Scene* const scene) const; - - template<int K> -#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019 - __noinline -#else - __forceinline -#endif - void gather(const vbool<K>& valid, - Vec3vf<K>& p0, - Vec3vf<K>& p1, - Vec3vf<K>& p2, - const size_t index, - const Scene* const scene, - const vfloat<K>& time) const - { - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); - - vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(time, ftime); - - const size_t first = bsf(movemask(valid)); - if (likely(all(valid,itime[first] == itime))) - { - p0 = getVertex<0>(index, scene, itime[first], ftime); - p1 = getVertex<1>(index, scene, itime[first], ftime); - p2 = getVertex<2>(index, scene, itime[first], ftime); - } else { - p0 = getVertex<0>(valid, index, scene, itime, ftime); - p1 = getVertex<1>(valid, index, scene, itime, ftime); - p2 = getVertex<2>(valid, index, scene, itime, ftime); - } - } - - __forceinline void gather(Vec3vf<M>& p0, - Vec3vf<M>& p1, - Vec3vf<M>& p2, - const TriangleMesh* mesh, - const Scene *const scene, - const int itime) const; - - __forceinline void gather(Vec3vf<M>& p0, - Vec3vf<M>& p1, - Vec3vf<M>& p2, - const Scene *const scene, - const float time) const; - - -#if !defined(EMBREE_COMPACT_POLYS) - template<int N> const vuint<M>& getVertexOffset() const; -#endif - }; - -#if !defined(EMBREE_COMPACT_POLYS) - template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<0>() const { return v0_; } - template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<1>() const { return v1_; } - template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<2>() const { return v2_; } -#endif - - template<> - __forceinline void TriangleMi<4>::gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - const Scene* const scene) const - { - const Triangle tri0 = loadTriangle(0,scene); - const Triangle tri1 = loadTriangle(1,scene); - const Triangle tri2 = loadTriangle(2,scene); - const Triangle tri3 = loadTriangle(3,scene); - transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); - transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); - transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); - } - - template<> - __forceinline void TriangleMi<4>::gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - const TriangleMesh* mesh, - const Scene *const scene, - const int itime) const - { - const Triangle tri0 = loadTriangle(0,itime,mesh); - const Triangle tri1 = loadTriangle(1,itime,mesh); - const Triangle tri2 = loadTriangle(2,itime,mesh); - const Triangle tri3 = loadTriangle(3,itime,mesh); - transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); - transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); - transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); - } - - template<> - __forceinline void TriangleMi<4>::gather(Vec3vf4& p0, - Vec3vf4& p1, - Vec3vf4& p2, - const Scene *const scene, - const float time) const - { - const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(0)); // in mblur mode all geometries are identical - - float ftime; - const int itime = mesh->timeSegment(time, ftime); - - Vec3vf4 a0,a1,a2; gather(a0,a1,a2,mesh,scene,itime); - Vec3vf4 b0,b1,b2; gather(b0,b1,b2,mesh,scene,itime+1); - p0 = lerp(a0,b0,vfloat4(ftime)); - p1 = lerp(a1,b1,vfloat4(ftime)); - p2 = lerp(a2,b2,vfloat4(ftime)); - } - } - - template<int M> - typename TriangleMi<M>::Type TriangleMi<M>::type; - - typedef TriangleMi<4> Triangle4i; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h deleted file mode 100644 index e2f106a62c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h +++ /dev/null @@ -1,336 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "trianglei.h" -#include "triangle_intersector_moeller.h" -#include "triangle_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMiIntersector1Moeller - { - typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> - struct TriangleMiIntersectorKMoeller - { - typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; - - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) - { - const Scene* scene = context->scene; - for (size_t i=0; i<Primitive::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); - const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); - const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); - const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) - { - vbool<K> valid0 = valid_i; - const Scene* scene = context->scene; - - for (size_t i=0; i<Primitive::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); - const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); - const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); - const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - - /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMiIntersector1Pluecker - { - typedef TriangleMi<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; - - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> - struct TriangleMiIntersectorKPluecker - { - typedef TriangleMi<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; - - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) - { - const Scene* scene = context->scene; - for (size_t i=0; i<Primitive::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); - const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); - const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); - const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) - { - vbool<K> valid0 = valid_i; - const Scene* scene = context->scene; - - for (size_t i=0; i<Primitive::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size()); - const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); - const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); - const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - - /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMiMBIntersector1Moeller - { - typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; - - /*! Intersect a ray with the M triangles and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> - struct TriangleMiMBIntersectorKMoeller - { - typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) - { - for (size_t i=0; i<TriangleMi<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) - { - vbool<K> valid0 = valid_i; - for (size_t i=0; i<TriangleMi<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - - /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMiMBIntersector1Pluecker - { - typedef TriangleMi<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; - - /*! Intersect a ray with the M triangles and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> - struct TriangleMiMBIntersectorKPluecker - { - typedef TriangleMi<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) - { - for (size_t i=0; i<TriangleMi<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) - { - vbool<K> valid0 = valid_i; - for (size_t i=0; i<TriangleMi<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev.h deleted file mode 100644 index 19af389e73..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - /* Stores the vertices of M triangles in struct of array layout */ - template <int M> - struct TriangleMv - { - public: - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - static Type type; - - public: - - /* Returns maximum number of stored triangles */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline TriangleMv() {} - - /* Construction from vertices and IDs */ - __forceinline TriangleMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs) - : v0(v0), v1(v1), v2(v2), geomIDs(geomIDs), primIDs(primIDs) {} - - /* Returns a mask that tells which triangles are valid */ - __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } - - /* Returns true if the specified triangle is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } - - /* Returns the number of stored triangles */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - __forceinline vuint<M>& geomID() { return geomIDs; } - __forceinline const vuint<M>& geomID() const { return geomIDs; } - __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } - - /* Returns the primitive IDs */ - __forceinline vuint<M>& primID() { return primIDs; } - __forceinline const vuint<M>& primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* Calculate the bounds of the triangles */ - __forceinline BBox3fa bounds() const - { - Vec3vf<M> lower = min(v0,v1,v2); - Vec3vf<M> upper = max(v0,v1,v2); - vbool<M> mask = valid(); - lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); - lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); - lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); - upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); - upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); - upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); - return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), - Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); - } - - /* Non temporal store */ - __forceinline static void store_nt(TriangleMv* dst, const TriangleMv& src) - { - vfloat<M>::store_nt(&dst->v0.x,src.v0.x); - vfloat<M>::store_nt(&dst->v0.y,src.v0.y); - vfloat<M>::store_nt(&dst->v0.z,src.v0.z); - vfloat<M>::store_nt(&dst->v1.x,src.v1.x); - vfloat<M>::store_nt(&dst->v1.y,src.v1.y); - vfloat<M>::store_nt(&dst->v1.z,src.v1.z); - vfloat<M>::store_nt(&dst->v2.x,src.v2.x); - vfloat<M>::store_nt(&dst->v2.y,src.v2.y); - vfloat<M>::store_nt(&dst->v2.z,src.v2.z); - vuint<M>::store_nt(&dst->geomIDs,src.geomIDs); - vuint<M>::store_nt(&dst->primIDs,src.primIDs); - } - - /* Fill triangle from triangle list */ - __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene) - { - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; - - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRef& prim = prims[begin]; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID); - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const Vec3fa& p0 = mesh->vertex(tri.v[0]); - const Vec3fa& p1 = mesh->vertex(tri.v[1]); - const Vec3fa& p2 = mesh->vertex(tri.v[2]); - vgeomID [i] = geomID; - vprimID [i] = primID; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - } - TriangleMv::store_nt(this,TriangleMv(v0,v1,v2,vgeomID,vprimID)); - } - - /* Updates the primitive */ - __forceinline BBox3fa update(TriangleMesh* mesh) - { - BBox3fa bounds = empty; - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> v0 = zero, v1 = zero, v2 = zero; - - for (size_t i=0; i<M; i++) - { - if (primID(i) == -1) break; - const unsigned geomId = geomID(i); - const unsigned primId = primID(i); - const TriangleMesh::Triangle& tri = mesh->triangle(primId); - const Vec3fa p0 = mesh->vertex(tri.v[0]); - const Vec3fa p1 = mesh->vertex(tri.v[1]); - const Vec3fa p2 = mesh->vertex(tri.v[2]); - bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); - vgeomID [i] = geomId; - vprimID [i] = primId; - v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; - v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; - v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; - } - new (this) TriangleMv(v0,v1,v2,vgeomID,vprimID); - return bounds; - } - - public: - Vec3vf<M> v0; // 1st vertex of the triangles - Vec3vf<M> v1; // 2nd vertex of the triangles - Vec3vf<M> v2; // 3rd vertex of the triangles - private: - vuint<M> geomIDs; // geometry ID - vuint<M> primIDs; // primitive ID - }; - - template<int M> - typename TriangleMv<M>::Type TriangleMv<M>::type; - - typedef TriangleMv<4> Triangle4v; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h deleted file mode 100644 index 6af0d5a11c..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h +++ /dev/null @@ -1,206 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "triangle.h" -#include "triangle_intersector_pluecker.h" -#include "triangle_intersector_moeller.h" -#include "triangle_intersector_woop.h" - -namespace embree -{ - namespace isa - { - /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMvIntersector1Moeller - { - typedef TriangleMv<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - - template<int M, int Mx, bool filter> - struct TriangleMvIntersector1Woop - { - typedef TriangleMv<M> Primitive; - typedef WoopIntersector1<Mx> intersec; - typedef WoopPrecalculations1<M> Precalculations; - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - - /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> - struct TriangleMvIntersectorKMoeller - { - typedef TriangleMv<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) - { - for (size_t i=0; i<M; i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); - const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); - const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); - pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<M; i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); - const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); - const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); - pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx - } - }; - - /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMvIntersector1Pluecker - { - typedef TriangleMv<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> - struct TriangleMvIntersectorKPluecker - { - typedef TriangleMv<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) - { - for (size_t i=0; i<M; i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); - const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); - const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); - pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<M; i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i); - const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i); - const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i); - pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) - { - STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h deleted file mode 100644 index 63137aee16..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "primitive.h" - -namespace embree -{ - /* Stores the vertices of M triangles in struct of array layout */ - template<int M> - struct TriangleMvMB - { - public: - struct Type : public PrimitiveType - { - const char* name() const; - size_t sizeActive(const char* This) const; - size_t sizeTotal(const char* This) const; - size_t getBytes(const char* This) const; - }; - - static Type type; - - public: - - /* primitive supports single time segments */ - static const bool singleTimeSegment = true; - - /* Returns maximum number of stored triangles */ - static __forceinline size_t max_size() { return M; } - - /* Returns required number of primitive blocks for N primitives */ - static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } - - public: - - /* Default constructor */ - __forceinline TriangleMvMB() {} - - /* Construction from vertices and IDs */ - __forceinline TriangleMvMB(const Vec3vf<M>& a0, const Vec3vf<M>& a1, - const Vec3vf<M>& b0, const Vec3vf<M>& b1, - const Vec3vf<M>& c0, const Vec3vf<M>& c1, - const vuint<M>& geomIDs, const vuint<M>& primIDs) - : v0(a0), v1(b0), v2(c0), dv0(a1-a0), dv1(b1-b0), dv2(c1-c0), geomIDs(geomIDs), primIDs(primIDs) {} - - /* Returns a mask that tells which triangles are valid */ - __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); } - - /* Returns if the specified triangle is valid */ - __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; } - - /* Returns the number of stored triangles */ - __forceinline size_t size() const { return bsf(~movemask(valid())); } - - /* Returns the geometry IDs */ - __forceinline vuint<M>& geomID() { return geomIDs; } - __forceinline const vuint<M>& geomID() const { return geomIDs; } - __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; } - - /* Returns the primitive IDs */ - __forceinline vuint<M>& primID() { return primIDs; } - __forceinline const vuint<M>& primID() const { return primIDs; } - __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } - - /* Calculate the bounds of the triangles at t0 */ - __forceinline BBox3fa bounds0() const - { - Vec3vf<M> lower = min(v0,v1,v2); - Vec3vf<M> upper = max(v0,v1,v2); - const vbool<M> mask = valid(); - lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); - lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); - lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); - upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); - upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); - upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); - return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), - Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); - } - - /* Calculate the bounds of the triangles at t1 */ - __forceinline BBox3fa bounds1() const - { - const Vec3vf<M> p0 = v0+dv0; - const Vec3vf<M> p1 = v1+dv1; - const Vec3vf<M> p2 = v2+dv2; - Vec3vf<M> lower = min(p0,p1,p2); - Vec3vf<M> upper = max(p0,p1,p2); - const vbool<M> mask = valid(); - lower.x = select(mask,lower.x,vfloat<M>(pos_inf)); - lower.y = select(mask,lower.y,vfloat<M>(pos_inf)); - lower.z = select(mask,lower.z,vfloat<M>(pos_inf)); - upper.x = select(mask,upper.x,vfloat<M>(neg_inf)); - upper.y = select(mask,upper.y,vfloat<M>(neg_inf)); - upper.z = select(mask,upper.z,vfloat<M>(neg_inf)); - return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)), - Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z))); - } - - /* Calculate the linear bounds of the primitive */ - __forceinline LBBox3fa linearBounds() const { - return LBBox3fa(bounds0(),bounds1()); - } - - /* Fill triangle from triangle list */ - __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) - { - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero; - Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero; - - BBox3fa bounds0 = empty; - BBox3fa bounds1 = empty; - - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRef& prim = prims[begin]; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID); - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - const Vec3fa& a0 = mesh->vertex(tri.v[0],itime+0); bounds0.extend(a0); - const Vec3fa& a1 = mesh->vertex(tri.v[0],itime+1); bounds1.extend(a1); - const Vec3fa& b0 = mesh->vertex(tri.v[1],itime+0); bounds0.extend(b0); - const Vec3fa& b1 = mesh->vertex(tri.v[1],itime+1); bounds1.extend(b1); - const Vec3fa& c0 = mesh->vertex(tri.v[2],itime+0); bounds0.extend(c0); - const Vec3fa& c1 = mesh->vertex(tri.v[2],itime+1); bounds1.extend(c1); - vgeomID [i] = geomID; - vprimID [i] = primID; - va0.x[i] = a0.x; va0.y[i] = a0.y; va0.z[i] = a0.z; - va1.x[i] = a1.x; va1.y[i] = a1.y; va1.z[i] = a1.z; - vb0.x[i] = b0.x; vb0.y[i] = b0.y; vb0.z[i] = b0.z; - vb1.x[i] = b1.x; vb1.y[i] = b1.y; vb1.z[i] = b1.z; - vc0.x[i] = c0.x; vc0.y[i] = c0.y; vc0.z[i] = c0.z; - vc1.x[i] = c1.x; vc1.y[i] = c1.y; vc1.z[i] = c1.z; - } - new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID); - return LBBox3fa(bounds0,bounds1); - } - - /* Fill triangle from triangle list */ - __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) - { - vuint<M> vgeomID = -1, vprimID = -1; - Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero; - Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero; - - LBBox3fa allBounds = empty; - for (size_t i=0; i<M && begin<end; i++, begin++) - { - const PrimRefMB& prim = prims[begin]; - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const TriangleMesh* const mesh = scene->get<TriangleMesh>(geomID); - const range<int> itime_range = mesh->timeSegmentRange(time_range); - assert(itime_range.size() == 1); - const int ilower = itime_range.begin(); - const TriangleMesh::Triangle& tri = mesh->triangle(primID); - allBounds.extend(mesh->linearBounds(primID, time_range)); - const Vec3fa& a0 = mesh->vertex(tri.v[0],ilower+0); - const Vec3fa& a1 = mesh->vertex(tri.v[0],ilower+1); - const Vec3fa& b0 = mesh->vertex(tri.v[1],ilower+0); - const Vec3fa& b1 = mesh->vertex(tri.v[1],ilower+1); - const Vec3fa& c0 = mesh->vertex(tri.v[2],ilower+0); - const Vec3fa& c1 = mesh->vertex(tri.v[2],ilower+1); - const BBox1f time_range_v(mesh->timeStep(ilower+0),mesh->timeStep(ilower+1)); - auto a01 = globalLinear(std::make_pair(a0,a1),time_range_v); - auto b01 = globalLinear(std::make_pair(b0,b1),time_range_v); - auto c01 = globalLinear(std::make_pair(c0,c1),time_range_v); - vgeomID [i] = geomID; - vprimID [i] = primID; - va0.x[i] = a01.first .x; va0.y[i] = a01.first .y; va0.z[i] = a01.first .z; - va1.x[i] = a01.second.x; va1.y[i] = a01.second.y; va1.z[i] = a01.second.z; - vb0.x[i] = b01.first .x; vb0.y[i] = b01.first .y; vb0.z[i] = b01.first .z; - vb1.x[i] = b01.second.x; vb1.y[i] = b01.second.y; vb1.z[i] = b01.second.z; - vc0.x[i] = c01.first .x; vc0.y[i] = c01.first .y; vc0.z[i] = c01.first .z; - vc1.x[i] = c01.second.x; vc1.y[i] = c01.second.y; vc1.z[i] = c01.second.z; - } - new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID); - return allBounds; - } - - public: - Vec3vf<M> v0; // 1st vertex of the triangles - Vec3vf<M> v1; // 2nd vertex of the triangles - Vec3vf<M> v2; // 3rd vertex of the triangles - Vec3vf<M> dv0; // difference vector between time steps t0 and t1 for first vertex - Vec3vf<M> dv1; // difference vector between time steps t0 and t1 for second vertex - Vec3vf<M> dv2; // difference vector between time steps t0 and t1 for third vertex - private: - vuint<M> geomIDs; // geometry ID - vuint<M> primIDs; // primitive ID - }; - - template<int M> - typename TriangleMvMB<M>::Type TriangleMvMB<M>::type; - - typedef TriangleMvMB<4> Triangle4vMB; -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h deleted file mode 100644 index 35a260d826..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "triangle.h" -#include "intersector_epilog.h" - -namespace embree -{ - namespace isa - { - /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMvMBIntersector1Moeller - { - typedef TriangleMvMB<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; - - /*! Intersect a ray with the M triangles and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> - struct TriangleMvMBIntersectorKMoeller - { - typedef TriangleMvMB<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> time(ray.time()); - const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); - const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); - const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); - pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> time(ray.time()); - const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); - const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); - const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); - pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - - /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> - struct TriangleMvMBIntersector1Pluecker - { - typedef TriangleMvMB<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; - - /*! Intersect a ray with the M triangles and updates the hit. */ - static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of M triangles. */ - static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); - } - - static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) - { - return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri); - } - }; - - /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> - struct TriangleMvMBIntersectorKPluecker - { - typedef TriangleMvMB<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; - - /*! Intersects K rays with M triangles. */ - static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(normal.trav_prims,1,popcnt(valid_i),K); - const Vec3vf<K> time(ray.time()); - const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); - const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); - const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); - pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); - } - } - - /*! Test for K rays if they are occluded by any of the M triangles. */ - static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) - { - vbool<K> valid0 = valid_i; - - for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++) - { - if (!tri.valid(i)) break; - STAT3(shadow.trav_prims,1,popcnt(valid0),K); - const Vec3vf<K> time(ray.time()); - const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i)); - const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i)); - const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i)); - pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); - if (none(valid0)) break; - } - return !valid0; - } - - /*! Intersect a ray with M triangles and updates the hit. */ - static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - - /*! Test if the ray is occluded by one of the M triangles. */ - static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) - { - STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); - } - }; - } -} |