summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/geometry
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/geometry')
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/cone.h321
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h209
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h141
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi.h222
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h569
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h278
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h516
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNv.h101
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h181
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h98
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h129
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h417
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h49
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h214
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h362
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h671
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h21
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h22
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/cylinder.h223
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h216
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h277
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/filter.h204
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h99
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa.h275
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h207
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h445
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/instance.h78
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h84
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h1074
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h172
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/line_intersector.h141
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/linei.h709
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h124
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/object.h84
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/object_intersector.h127
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/plane.h57
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/pointi.h417
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/primitive.h49
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp379
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h76
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h566
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h529
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadi.h483
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h350
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadv.h165
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h181
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h710
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h136
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h183
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h156
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h38
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h237
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid.h517
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h518
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h493
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h508
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h236
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle.h162
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h96
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h403
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h247
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h418
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h132
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglei.h442
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h336
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev.h157
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h206
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h201
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h211
73 files changed, 0 insertions, 19139 deletions
diff --git a/thirdparty/embree-aarch64/kernels/geometry/cone.h b/thirdparty/embree-aarch64/kernels/geometry/cone.h
deleted file mode 100644
index 961ef86160..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/cone.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- struct Cone
- {
- const Vec3fa p0; //!< start position of cone
- const Vec3fa p1; //!< end position of cone
- const float r0; //!< start radius of cone
- const float r1; //!< end radius of cone
-
- __forceinline Cone(const Vec3fa& p0, const float r0, const Vec3fa& p1, const float r1)
- : p0(p0), p1(p1), r0(r0), r1(r1) {}
-
- __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir,
- BBox1f& t_o,
- float& u0_o, Vec3fa& Ng0_o,
- float& u1_o, Vec3fa& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const Vec3fa v0 = p0-org;
- const Vec3fa v1 = p1-org;
-
- const float rl = rcp_length(v1-v0);
- const Vec3fa P0 = v0, dP = (v1-v0)*rl;
- const float dr = (r1-r0)*rl;
- const Vec3fa O = -P0, dO = dir;
-
- const float dOdO = dot(dO,dO);
- const float OdO = dot(dO,O);
- const float OO = dot(O,O);
- const float dOz = dot(dP,dO);
- const float Oz = dot(dP,O);
-
- const float R = r0 + Oz*dr;
- const float A = dOdO - sqr(dOz) * (1.0f+sqr(dr));
- const float B = 2.0f * (OdO - dOz*(Oz + R*dr));
- const float C = OO - (sqr(Oz) + sqr(R));
-
- /* we miss the cone if determinant is smaller than zero */
- const float D = B*B - 4.0f*A*C;
- if (D < 0.0f) return false;
-
- /* special case for rays that are "parallel" to the cone */
- const float eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- if (unlikely(abs(A) < eps))
- {
- /* cylinder case */
- if (abs(dr) < 16.0f*float(ulp)) {
- if (C <= 0.0f) { t_o = BBox1f(neg_inf,pos_inf); return true; }
- else { t_o = BBox1f(pos_inf,neg_inf); return false; }
- }
-
- /* cone case */
- else
- {
- /* if we hit the negative cone there cannot be a hit */
- const float t = -C/B;
- const float z0 = Oz+t*dOz;
- const float z0r = r0+z0*dr;
- if (z0r < 0.0f) return false;
-
- /* test if we start inside or outside the cone */
- if (dOz*dr > 0.0f) t_o = BBox1f(t,pos_inf);
- else t_o = BBox1f(neg_inf,t);
- }
- }
-
- /* standard case for "non-parallel" rays */
- else
- {
- const float Q = sqrt(D);
- const float rcp_2A = rcp(2.0f*A);
- t_o.lower = (-B-Q)*rcp_2A;
- t_o.upper = (-B+Q)*rcp_2A;
-
- /* standard case where both hits are on same cone */
- if (likely(A > 0.0f)) {
- const float z0 = Oz+t_o.lower*dOz;
- const float z0r = r0+z0*dr;
- if (z0r < 0.0f) return false;
- }
-
- /* special case where the hits are on the positive and negative cone */
- else
- {
- /* depending on the ray direction and the open direction
- * of the cone we have a hit from inside or outside the
- * cone */
- if (dOz*dr > 0) t_o.upper = pos_inf;
- else t_o.lower = neg_inf;
- }
- }
-
- /* calculates u and Ng for near hit */
- {
- u0_o = (Oz+t_o.lower*dOz)*rl;
- const Vec3fa Pr = t_o.lower*dir;
- const Vec3fa Pl = v0 + u0_o*(v1-v0);
- const Vec3fa R = normalize(Pr-Pl);
- const Vec3fa U = (p1-p0)+(r1-r0)*R;
- const Vec3fa V = cross(p1-p0,R);
- Ng0_o = cross(V,U);
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = (Oz+t_o.upper*dOz)*rl;
- const Vec3fa Pr = t_o.upper*dir;
- const Vec3fa Pl = v0 + u1_o*(v1-v0);
- const Vec3fa R = normalize(Pr-Pl);
- const Vec3fa U = (p1-p0)+(r1-r0)*R;
- const Vec3fa V = cross(p1-p0,R);
- Ng1_o = cross(V,U);
- }
- return true;
- }
-
- __forceinline bool intersect(const Vec3fa& org, const Vec3fa& dir, BBox1f& t_o) const
- {
- float u0_o; Vec3fa Ng0_o; float u1_o; Vec3fa Ng1_o;
- return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
-
- static bool verify(const size_t id, const Cone& cone, const Ray& ray, bool shouldhit, const float t0, const float t1)
- {
- float eps = 0.001f;
- BBox1f t; bool hit;
- hit = cone.intersect(ray.org,ray.dir,t);
-
- bool failed = hit != shouldhit;
- if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : (t0 == -1E6) ? t.lower > -1E6f : abs(t0-t.lower) > eps;
- if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : (t1 == +1E6) ? t.upper < +1E6f : abs(t1-t.upper) > eps;
- if (!failed) return true;
- embree_cout << "Cone test " << id << " failed: cone = " << cone << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl;
- return false;
- }
-
- /* verify cone class */
- static bool verify()
- {
- bool passed = true;
- const Cone cone0(Vec3fa(0.0f,0.0f,0.0f),0.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f);
- passed &= verify(0,cone0,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,3.0f,pos_inf);
- passed &= verify(1,cone0,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f);
- passed &= verify(2,cone0,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(3,cone0,Ray(Vec3fa(+1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,3.0f);
- passed &= verify(4,cone0,Ray(Vec3fa(-1.0f,0.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,1.0f,pos_inf);
- passed &= verify(5,cone0,Ray(Vec3fa(+1.0f,0.0f,0.0f),Vec3fa(-1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,1.0f);
- passed &= verify(6,cone0,Ray(Vec3fa(+0.0f,0.0f,1.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,1.0f,1.0f);
- passed &= verify(7,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(8,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(+1.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.5f,+1E6);
- passed &= verify(9,cone0,Ray(Vec3fa(+0.0f,1.0f,0.0f),Vec3fa(-1.0f,+1.0f,+0.0f),0.0f,float(inf)),true,-1E6,-0.5f);
- const Cone cone1(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),0.0f);
- passed &= verify(10,cone1,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa(+1.0f,+0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,2.0f);
- passed &= verify(11,cone1,Ray(Vec3fa(-1.0f,0.0f,2.0f),Vec3fa(+0.0f,+0.0f,-1.0f),0.0f,float(inf)),true,0.0f,4.0f);
- const Cone cylinder(Vec3fa(0.0f,0.0f,0.0f),1.0f,Vec3fa(1.0f,0.0f,0.0f),1.0f);
- passed &= verify(12,cylinder,Ray(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(13,cylinder,Ray(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(14,cylinder,Ray(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(15,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(16,cylinder,Ray(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(17,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- passed &= verify(18,cylinder,Ray(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- return passed;
- }
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cone& c) {
- return cout << "Cone { p0 = " << c.p0 << ", r0 = " << c.r0 << ", p1 = " << c.p1 << ", r1 = " << c.r1 << "}";
- }
- };
-
- template<int N>
- struct ConeN
- {
- typedef Vec3<vfloat<N>> Vec3vfN;
-
- const Vec3vfN p0; //!< start position of cone
- const Vec3vfN p1; //!< end position of cone
- const vfloat<N> r0; //!< start radius of cone
- const vfloat<N> r1; //!< end radius of cone
-
- __forceinline ConeN(const Vec3vfN& p0, const vfloat<N>& r0, const Vec3vfN& p1, const vfloat<N>& r1)
- : p0(p0), p1(p1), r0(r0), r1(r1) {}
-
- __forceinline Cone operator[] (const size_t i) const
- {
- assert(i<N);
- return Cone(Vec3fa(p0.x[i],p0.y[i],p0.z[i]),r0[i],Vec3fa(p1.x[i],p1.y[i],p1.z[i]),r1[i]);
- }
-
- __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir,
- BBox<vfloat<N>>& t_o,
- vfloat<N>& u0_o, Vec3vfN& Ng0_o,
- vfloat<N>& u1_o, Vec3vfN& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const Vec3vfN v0 = p0-Vec3vfN(org);
- const Vec3vfN v1 = p1-Vec3vfN(org);
-
- const vfloat<N> rl = rcp_length(v1-v0);
- const Vec3vfN P0 = v0, dP = (v1-v0)*rl;
- const vfloat<N> dr = (r1-r0)*rl;
- const Vec3vfN O = -P0, dO = dir;
-
- const vfloat<N> dOdO = dot(dO,dO);
- const vfloat<N> OdO = dot(dO,O);
- const vfloat<N> OO = dot(O,O);
- const vfloat<N> dOz = dot(dP,dO);
- const vfloat<N> Oz = dot(dP,O);
-
- const vfloat<N> R = r0 + Oz*dr;
- const vfloat<N> A = dOdO - sqr(dOz) * (vfloat<N>(1.0f)+sqr(dr));
- const vfloat<N> B = 2.0f * (OdO - dOz*(Oz + R*dr));
- const vfloat<N> C = OO - (sqr(Oz) + sqr(R));
-
- /* we miss the cone if determinant is smaller than zero */
- const vfloat<N> D = B*B - 4.0f*A*C;
- vbool<N> valid = D >= 0.0f;
- if (none(valid)) return valid;
-
- /* special case for rays that are "parallel" to the cone */
- const vfloat<N> eps = float(1<<8)*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- const vbool<N> validt = valid & (abs(A) < eps);
- const vbool<N> validf = valid & !(abs(A) < eps);
- if (unlikely(any(validt)))
- {
- const vboolx validtt = validt & (abs(dr) < 16.0f*float(ulp));
- const vboolx validtf = validt & (abs(dr) >= 16.0f*float(ulp));
-
- /* cylinder case */
- if (unlikely(any(validtt)))
- {
- t_o.lower = select(validtt, select(C <= 0.0f, vfloat<N>(neg_inf), vfloat<N>(pos_inf)), t_o.lower);
- t_o.upper = select(validtt, select(C <= 0.0f, vfloat<N>(pos_inf), vfloat<N>(neg_inf)), t_o.upper);
- valid &= !validtt | C <= 0.0f;
- }
-
- /* cone case */
- if (any(validtf))
- {
- /* if we hit the negative cone there cannot be a hit */
- const vfloat<N> t = -C/B;
- const vfloat<N> z0 = Oz+t*dOz;
- const vfloat<N> z0r = r0+z0*dr;
- valid &= !validtf | z0r >= 0.0f;
-
- /* test if we start inside or outside the cone */
- t_o.lower = select(validtf, select(dOz*dr > 0.0f, t, vfloat<N>(neg_inf)), t_o.lower);
- t_o.upper = select(validtf, select(dOz*dr > 0.0f, vfloat<N>(pos_inf), t), t_o.upper);
- }
- }
-
- /* standard case for "non-parallel" rays */
- if (likely(any(validf)))
- {
- const vfloat<N> Q = sqrt(D);
- const vfloat<N> rcp_2A = 0.5f*rcp(A);
- t_o.lower = select(validf, (-B-Q)*rcp_2A, t_o.lower);
- t_o.upper = select(validf, (-B+Q)*rcp_2A, t_o.upper);
-
- /* standard case where both hits are on same cone */
- const vbool<N> validft = validf & A>0.0f;
- const vbool<N> validff = validf & !(A>0.0f);
- if (any(validft)) {
- const vfloat<N> z0 = Oz+t_o.lower*dOz;
- const vfloat<N> z0r = r0+z0*dr;
- valid &= !validft | z0r >= 0.0f;
- }
-
- /* special case where the hits are on the positive and negative cone */
- if (any(validff)) {
- /* depending on the ray direction and the open direction
- * of the cone we have a hit from inside or outside the
- * cone */
- t_o.lower = select(validff, select(dOz*dr > 0.0f, t_o.lower, float(neg_inf)), t_o.lower);
- t_o.upper = select(validff, select(dOz*dr > 0.0f, float(pos_inf), t_o.upper), t_o.upper);
- }
- }
-
- /* calculates u and Ng for near hit */
- {
- u0_o = (Oz+t_o.lower*dOz)*rl;
- const Vec3vfN Pr = t_o.lower*Vec3vfN(dir);
- const Vec3vfN Pl = v0 + u0_o*(v1-v0);
- const Vec3vfN R = normalize(Pr-Pl);
- const Vec3vfN U = (p1-p0)+(r1-r0)*R;
- const Vec3vfN V = cross(p1-p0,R);
- Ng0_o = cross(V,U);
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = (Oz+t_o.upper*dOz)*rl;
- const Vec3vfN Pr = t_o.lower*Vec3vfN(dir);
- const Vec3vfN Pl = v0 + u1_o*(v1-v0);
- const Vec3vfN R = normalize(Pr-Pl);
- const Vec3vfN U = (p1-p0)+(r1-r0)*R;
- const Vec3vfN V = cross(p1-p0,R);
- Ng1_o = cross(V,U);
- }
- return valid;
- }
-
- __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const
- {
- vfloat<N> u0_o; Vec3vfN Ng0_o; vfloat<N> u1_o; Vec3vfN Ng1_o;
- return intersect(org,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h
deleted file mode 100644
index 0902baff7d..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h
+++ /dev/null
@@ -1,209 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- namespace __coneline_internal
- {
- template<int M, typename Epilog, typename ray_tfar_func>
- static __forceinline bool intersectCone(const vbool<M>& valid_i,
- const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir,
- const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar,
- const Vec4vf<M>& v0, const Vec4vf<M>& v1,
- const vbool<M>& cL, const vbool<M>& cR,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- /* move ray origin closer to make calculations numerically stable */
- const vfloat<M> dOdO = sqr(ray_dir);
- const vfloat<M> rcp_dOdO = rcp(dOdO);
- const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz());
- const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO;
- const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir;
-
- const Vec3vf<M> dP = v1.xyz() - v0.xyz();
- const Vec3vf<M> p0 = ray_org - v0.xyz();
- const Vec3vf<M> p1 = ray_org - v1.xyz();
-
- const vfloat<M> dPdP = sqr(dP);
- const vfloat<M> dP0 = dot(p0,dP);
- const vfloat<M> dP1 = dot(p1,dP);
- const vfloat<M> dOdP = dot(ray_dir,dP);
-
- // intersect cone body
- const vfloat<M> dr = v0.w - v1.w;
- const vfloat<M> hy = dPdP + sqr(dr);
- const vfloat<M> dO0 = dot(ray_dir,p0);
- const vfloat<M> OO = sqr(p0);
- const vfloat<M> dPdP2 = sqr(dPdP);
- const vfloat<M> dPdPr0 = dPdP*v0.w;
-
- const vfloat<M> A = dPdP2 - sqr(dOdP)*hy;
- const vfloat<M> B = dPdP2*dO0 - dP0*dOdP*hy + dPdPr0*(dr*dOdP);
- const vfloat<M> C = dPdP2*OO - sqr(dP0)*hy + dPdPr0*(2.0f*dr*dP0 - dPdPr0);
-
- const vfloat<M> D = B*B - A*C;
- valid &= D >= 0.0f;
- if (unlikely(none(valid))) {
- return false;
- }
-
- /* standard case for "non-parallel" rays */
- const vfloat<M> Q = sqrt(D);
- const vfloat<M> rcp_A = rcp(A);
- /* special case for rays that are "parallel" to the cone - assume miss */
- const vbool<M> isParallel = abs(A) <= min_rcp_input;
-
- vfloat<M> t_cone_lower = select (isParallel, neg_inf, (-B-Q)*rcp_A);
- vfloat<M> t_cone_upper = select (isParallel, pos_inf, (-B+Q)*rcp_A);
- const vfloat<M> y_lower = dP0 + t_cone_lower*dOdP;
- const vfloat<M> y_upper = dP0 + t_cone_upper*dOdP;
- t_cone_lower = select(valid & y_lower > 0.0f & y_lower < dPdP, t_cone_lower, pos_inf);
- t_cone_upper = select(valid & y_upper > 0.0f & y_upper < dPdP, t_cone_upper, neg_inf);
-
- const vbool<M> hitDisk0 = valid & cL;
- const vbool<M> hitDisk1 = valid & cR;
- const vfloat<M> rcp_dOdP = rcp(dOdP);
- const vfloat<M> t_disk0 = select (hitDisk0, select (sqr(p0*dOdP-ray_dir*dP0)<(sqr(v0.w)*sqr(dOdP)), -dP0*rcp_dOdP, pos_inf), pos_inf);
- const vfloat<M> t_disk1 = select (hitDisk1, select (sqr(p1*dOdP-ray_dir*dP1)<(sqr(v1.w)*sqr(dOdP)), -dP1*rcp_dOdP, pos_inf), pos_inf);
- const vfloat<M> t_disk_lower = min(t_disk0, t_disk1);
- const vfloat<M> t_disk_upper = max(t_disk0, t_disk1);
-
- const vfloat<M> t_lower = min(t_cone_lower, t_disk_lower);
- const vfloat<M> t_upper = max(t_cone_upper, select(t_lower==t_disk_lower,
- select(t_disk_upper==vfloat<M>(pos_inf),neg_inf,t_disk_upper),
- select(t_disk_lower==vfloat<M>(pos_inf),neg_inf,t_disk_lower)));
-
- const vbool<M> valid_lower = valid & ray_tnear <= dt+t_lower & dt+t_lower <= ray_tfar() & t_lower != vfloat<M>(pos_inf);
- const vbool<M> valid_upper = valid & ray_tnear <= dt+t_upper & dt+t_upper <= ray_tfar() & t_upper != vfloat<M>(neg_inf);
-
- const vbool<M> valid_first = valid_lower | valid_upper;
- if (unlikely(none(valid_first)))
- return false;
-
- const vfloat<M> t_first = select(valid_lower, t_lower, t_upper);
- const vfloat<M> y_first = select(valid_lower, y_lower, y_upper);
-
- const vfloat<M> rcp_dPdP = rcp(dPdP);
- const Vec3vf<M> dP2drr0dP = dPdP*dr*v0.w*dP;
- const Vec3vf<M> dPhy = dP*hy;
- const vbool<M> cone_hit_first = valid & (t_first == t_cone_lower | t_first == t_cone_upper);
- const vbool<M> disk0_hit_first = valid & (t_first == t_disk0);
- const Vec3vf<M> Ng_first = select(cone_hit_first, dPdP2*(p0+t_first*ray_dir)+dP2drr0dP-dPhy*y_first, select(disk0_hit_first, -dP, dP));
- const vfloat<M> u_first = select(cone_hit_first, y_first*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- /* invoke intersection filter for first hit */
- RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first);
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_upper;
- const vfloat<M> y_second = y_upper;
- const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_upper <= ray_tfar());
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper;
- const vbool<M> disk0_hit_second = t_second == t_disk0;
- const Vec3vf<M> Ng_second = select(cone_hit_second, dPdP2*(p0+t_second*ray_dir)+dP2drr0dP-dPhy*y_second, select(disk0_hit_second, -dP, dP));
- const vfloat<M> u_second = select(cone_hit_second, y_second*rcp_dPdP, select(disk0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
- }
- }
-
- template<int M>
- struct ConeLineIntersectorHitM
- {
- __forceinline ConeLineIntersectorHitM() {}
-
- __forceinline ConeLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct ConeCurveIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- struct ray_tfar {
- Ray& ray;
- __forceinline ray_tfar(Ray& ray) : ray(ray) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const vbool<M>& cL, const vbool<M>& cR,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const vfloat<M> ray_tnear(ray.tnear());
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog);
- }
- };
-
- template<int M, int K>
- struct ConeCurveIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- struct ray_tfar {
- RayK<K>& ray;
- size_t k;
- __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar[k]; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const vbool<M>& cL, const vbool<M>& cR,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> ray_tnear = ray.tnear()[k];
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h
deleted file mode 100644
index d47218eb8b..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "coneline_intersector.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct ConeCurveMiIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- return false;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct ConeCurveMiMBIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- return false;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct ConeCurveMiIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct ConeCurveMiMBIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1;
- vbool<M> cL,cR;
- line.gather(v0,v1,cL,cR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi.h
deleted file mode 100644
index 51384f1959..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- template<int M>
- struct CurveNi
- {
- struct Type : public PrimitiveType {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
-
- static __forceinline size_t bytes(size_t N)
- {
- const size_t f = N/M, r = N%M;
- static_assert(sizeof(CurveNi) == 22+25*M, "internal data layout issue");
- return f*sizeof(CurveNi) + (r!=0)*(22 + 25*r);
- }
-
- public:
-
- /*! Default constructor. */
- __forceinline CurveNi () {}
-
- /*! fill curve from curve list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene)
- {
- size_t end = min(begin+M,_end);
- N = (uint8_t)(end-begin);
- const unsigned int geomID0 = prims[begin].geomID();
- this->geomID(N) = geomID0;
- ty = (uint8_t) scene->get(geomID0)->getType();
-
- /* encode all primitives */
- BBox3fa bounds = empty;
- for (size_t i=0; i<N; i++)
- {
- const PrimRef& prim = prims[begin+i];
- const unsigned int geomID = prim.geomID(); assert(geomID == geomID0);
- const unsigned int primID = prim.primID();
- bounds.extend(scene->get(geomID)->vbounds(primID));
- }
-
- /* calculate offset and scale */
- Vec3fa loffset = bounds.lower;
- float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f)));
- if (bounds.size() == Vec3fa(zero)) lscale = 0.0f;
- *this->offset(N) = loffset;
- *this->scale(N) = lscale;
-
- /* encode all primitives */
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpace(primID);
-
- const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz));
- const BBox3fa bounds = scene->get(geomID)->vbounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID);
-
- bounds_vx_x(N)[i] = (int8_t) space3.vx.x;
- bounds_vx_y(N)[i] = (int8_t) space3.vx.y;
- bounds_vx_z(N)[i] = (int8_t) space3.vx.z;
- bounds_vx_lower(N)[i] = (short) clamp(floor(bounds.lower.x),-32767.0f,32767.0f);
- bounds_vx_upper(N)[i] = (short) clamp(ceil (bounds.upper.x),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.lower.x) && floor(bounds.lower.x) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.upper.x) && ceil (bounds.upper.x) <= 32767.0f);
-
- bounds_vy_x(N)[i] = (int8_t) space3.vy.x;
- bounds_vy_y(N)[i] = (int8_t) space3.vy.y;
- bounds_vy_z(N)[i] = (int8_t) space3.vy.z;
- bounds_vy_lower(N)[i] = (short) clamp(floor(bounds.lower.y),-32767.0f,32767.0f);
- bounds_vy_upper(N)[i] = (short) clamp(ceil (bounds.upper.y),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.lower.y) && floor(bounds.lower.y) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.upper.y) && ceil (bounds.upper.y) <= 32767.0f);
-
- bounds_vz_x(N)[i] = (int8_t) space3.vz.x;
- bounds_vz_y(N)[i] = (int8_t) space3.vz.y;
- bounds_vz_z(N)[i] = (int8_t) space3.vz.z;
- bounds_vz_lower(N)[i] = (short) clamp(floor(bounds.lower.z),-32767.0f,32767.0f);
- bounds_vz_upper(N)[i] = (short) clamp(ceil (bounds.upper.z),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.lower.z) && floor(bounds.lower.z) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.upper.z) && ceil (bounds.upper.z) <= 32767.0f);
-
- this->primID(N)[i] = primID;
- }
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
- {
- size_t start = set.begin();
- size_t items = CurveNi::blocks(set.size());
- size_t numbytes = CurveNi::bytes(set.size());
- CurveNi* accel = (CurveNi*) alloc.malloc1(numbytes,BVH::byteAlignment);
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return bvh->encodeLeaf((int8_t*)accel,items);
- };
-
- public:
-
- // 27.6 - 46 bytes per primitive
- uint8_t ty;
- uint8_t N;
- uint8_t data[4+25*M+16];
-
- /*
- struct Layout
- {
- unsigned int geomID;
- unsigned int primID[N];
-
- int8_t bounds_vx_x[N];
- int8_t bounds_vx_y[N];
- int8_t bounds_vx_z[N];
- short bounds_vx_lower[N];
- short bounds_vx_upper[N];
-
- int8_t bounds_vy_x[N];
- int8_t bounds_vy_y[N];
- int8_t bounds_vy_z[N];
- short bounds_vy_lower[N];
- short bounds_vy_upper[N];
-
- int8_t bounds_vz_x[N];
- int8_t bounds_vz_y[N];
- int8_t bounds_vz_z[N];
- short bounds_vz_lower[N];
- short bounds_vz_upper[N];
-
- Vec3f offset;
- float scale;
- };
- */
-
- __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); }
- __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); }
-
- __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); }
- __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); }
-
- __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); }
- __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); }
-
- __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); }
- __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); }
-
- __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); }
- __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); }
-
- __forceinline short* bounds_vx_lower(size_t N) { return (short*)((int8_t*)this+6+7*N); }
- __forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((int8_t*)this+6+7*N); }
-
- __forceinline short* bounds_vx_upper(size_t N) { return (short*)((int8_t*)this+6+9*N); }
- __forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((int8_t*)this+6+9*N); }
-
- __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+11*N); }
- __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+11*N); }
-
- __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+12*N); }
- __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+12*N); }
-
- __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+13*N); }
- __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+13*N); }
-
- __forceinline short* bounds_vy_lower(size_t N) { return (short*)((int8_t*)this+6+14*N); }
- __forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((int8_t*)this+6+14*N); }
-
- __forceinline short* bounds_vy_upper(size_t N) { return (short*)((int8_t*)this+6+16*N); }
- __forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((int8_t*)this+6+16*N); }
-
- __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+18*N); }
- __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+18*N); }
-
- __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+19*N); }
- __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+19*N); }
-
- __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+20*N); }
- __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+20*N); }
-
- __forceinline short* bounds_vz_lower(size_t N) { return (short*)((int8_t*)this+6+21*N); }
- __forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((int8_t*)this+6+21*N); }
-
- __forceinline short* bounds_vz_upper(size_t N) { return (short*)((int8_t*)this+6+23*N); }
- __forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((int8_t*)this+6+23*N); }
-
- __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+25*N); }
- __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+25*N); }
-
- __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+25*N+12); }
- __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+25*N+12); }
-
- __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+25*N+16; }
- __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+25*N+16; }
- };
-
- template<int M>
- typename CurveNi<M>::Type CurveNi<M>::type;
-
- typedef CurveNi<4> Curve4i;
- typedef CurveNi<8> Curve8i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h
deleted file mode 100644
index 0f9038c9fc..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h
+++ /dev/null
@@ -1,569 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNi.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct CurveNiIntersector1
- {
- typedef CurveNi<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
- const Vec3fa org1 = (ray.org-offset)*scale;
- const Vec3fa dir1 = ray.dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
- };
-
- template<int M, int K>
- struct CurveNiIntersectorK
- {
- typedef CurveNi<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
-
- const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- const Vec3fa org1 = (ray_org-offset)*scale;
- const Vec3fa dir1 = ray_dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> t_lower_x = (vfloat<M>::load(prim.bounds_vx_lower(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vfloat<M>::load(prim.bounds_vx_upper(N))-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vfloat<M>::load(prim.bounds_vy_lower(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vfloat<M>::load(prim.bounds_vy_upper(N))-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vfloat<M>::load(prim.bounds_vz_lower(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vfloat<M>::load(prim.bounds_vz_upper(N))-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k]));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k]));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID));
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
-
- unsigned int vertexID = geom->curve(primID);
- Vec3ff a0,a1,a2,a3; Vec3fa n0,n1,n2,n3; geom->gather(a0,a1,a2,a3,n0,n1,n2,n3,vertexID);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- const unsigned int primID1 = prim.primID(N)[i1];
- geom->prefetchL1_vertices(geom->curve(primID1));
- if (mask1) {
- const size_t i2 = bsf(mask1);
- const unsigned int primID2 = prim.primID(N)[i2];
- geom->prefetchL2_vertices(geom->curve(primID2));
- }
- }
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,n0,n1,n2,n3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; Vec3fa n0,dn0,n1,dn1; geom->gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,geom->curve(primID));
- if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,n0,dn0,n1,dn1,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h
deleted file mode 100644
index 0cd8f833fd..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- template<int M>
- struct CurveNiMB
- {
- struct Type : public PrimitiveType {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
-
- static __forceinline size_t bytes(size_t N)
- {
- const size_t f = N/M, r = N%M;
- static_assert(sizeof(CurveNiMB) == 6+37*M+24, "internal data layout issue");
- return f*sizeof(CurveNiMB) + (r!=0)*(6+37*r+24);
- }
-
- public:
-
- /*! Default constructor. */
- __forceinline CurveNiMB () {}
-
- /*! fill curve from curve list */
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t _end, Scene* scene, const BBox1f time_range)
- {
- size_t end = min(begin+M,_end);
- N = (uint8_t)(end-begin);
- const unsigned int geomID0 = prims[begin].geomID();
- this->geomID(N) = geomID0;
- ty = (uint8_t) scene->get(geomID0)->getType();
-
- /* encode all primitives */
- LBBox3fa lbounds = empty;
- for (size_t i=0; i<N; i++)
- {
- const PrimRefMB& prim = prims[begin+i];
- const unsigned int geomID = prim.geomID(); assert(geomID == geomID0);
- const unsigned int primID = prim.primID();
- lbounds.extend(scene->get(geomID)->vlinearBounds(primID,time_range));
- }
- BBox3fa bounds = lbounds.bounds();
-
- /* calculate offset and scale */
- Vec3fa loffset = bounds.lower;
- float lscale = reduce_min(256.0f/(bounds.size()*sqrt(3.0f)));
- if (bounds.size() == Vec3fa(zero)) lscale = 0.0f;
- *this->offset(N) = loffset;
- *this->scale(N) = lscale;
- this->time_offset(N) = time_range.lower;
- this->time_scale(N) = 1.0f/time_range.size();
-
- /* encode all primitives */
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRefMB& prim = prims[begin];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- const LinearSpace3fa space2 = scene->get(geomID)->computeAlignedSpaceMB(primID,time_range);
-
- const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz));
- const LBBox3fa bounds = scene->get(geomID)->vlinearBounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID,time_range);
-
- // NOTE: this weird (int8_t) (short) cast works around VS2015 Win32 compiler bug
- bounds_vx_x(N)[i] = (int8_t) (short) space3.vx.x;
- bounds_vx_y(N)[i] = (int8_t) (short) space3.vx.y;
- bounds_vx_z(N)[i] = (int8_t) (short) space3.vx.z;
- bounds_vx_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.x),-32767.0f,32767.0f);
- bounds_vx_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.x),-32767.0f,32767.0f);
- bounds_vx_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.x),-32767.0f,32767.0f);
- bounds_vx_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.x),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.bounds0.lower.x) && floor(bounds.bounds0.lower.x) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds0.upper.x) && ceil (bounds.bounds0.upper.x) <= 32767.0f);
- assert(-32767.0f <= floor(bounds.bounds1.lower.x) && floor(bounds.bounds1.lower.x) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds1.upper.x) && ceil (bounds.bounds1.upper.x) <= 32767.0f);
-
- bounds_vy_x(N)[i] = (int8_t) (short) space3.vy.x;
- bounds_vy_y(N)[i] = (int8_t) (short) space3.vy.y;
- bounds_vy_z(N)[i] = (int8_t) (short) space3.vy.z;
- bounds_vy_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.y),-32767.0f,32767.0f);
- bounds_vy_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.y),-32767.0f,32767.0f);
- bounds_vy_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.y),-32767.0f,32767.0f);
- bounds_vy_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.y),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.bounds0.lower.y) && floor(bounds.bounds0.lower.y) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds0.upper.y) && ceil (bounds.bounds0.upper.y) <= 32767.0f);
- assert(-32767.0f <= floor(bounds.bounds1.lower.y) && floor(bounds.bounds1.lower.y) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds1.upper.y) && ceil (bounds.bounds1.upper.y) <= 32767.0f);
-
- bounds_vz_x(N)[i] = (int8_t) (short) space3.vz.x;
- bounds_vz_y(N)[i] = (int8_t) (short) space3.vz.y;
- bounds_vz_z(N)[i] = (int8_t) (short) space3.vz.z;
- bounds_vz_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.z),-32767.0f,32767.0f);
- bounds_vz_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.z),-32767.0f,32767.0f);
- bounds_vz_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.z),-32767.0f,32767.0f);
- bounds_vz_upper1(N)[i] = (short) clamp(ceil (bounds.bounds1.upper.z),-32767.0f,32767.0f);
- assert(-32767.0f <= floor(bounds.bounds0.lower.z) && floor(bounds.bounds0.lower.z) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds0.upper.z) && ceil (bounds.bounds0.upper.z) <= 32767.0f);
- assert(-32767.0f <= floor(bounds.bounds1.lower.z) && floor(bounds.bounds1.lower.z) <= 32767.0f);
- assert(-32767.0f <= ceil (bounds.bounds1.upper.z) && ceil (bounds.bounds1.upper.z) <= 32767.0f);
-
- this->primID(N)[i] = primID;
- }
-
- return lbounds;
- }
-
- template<typename BVH, typename SetMB, typename Allocator>
- __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
- {
- size_t start = prims.begin();
- size_t end = prims.end();
- size_t items = CurveNiMB::blocks(prims.size());
- size_t numbytes = CurveNiMB::bytes(prims.size());
- CurveNiMB* accel = (CurveNiMB*) alloc.malloc1(numbytes,BVH::byteAlignment);
- const typename BVH::NodeRef node = bvh->encodeLeaf((int8_t*)accel,items);
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
-
- return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
- };
-
-
- public:
-
- // 27.6 - 46 bytes per primitive
- uint8_t ty;
- uint8_t N;
- uint8_t data[4+37*M+24];
-
- /*
- struct Layout
- {
- unsigned int geomID;
- unsigned int primID[N];
-
- int8_t bounds_vx_x[N];
- int8_t bounds_vx_y[N];
- int8_t bounds_vx_z[N];
- short bounds_vx_lower0[N];
- short bounds_vx_upper0[N];
- short bounds_vx_lower1[N];
- short bounds_vx_upper1[N];
-
- int8_t bounds_vy_x[N];
- int8_t bounds_vy_y[N];
- int8_t bounds_vy_z[N];
- short bounds_vy_lower0[N];
- short bounds_vy_upper0[N];
- short bounds_vy_lower1[N];
- short bounds_vy_upper1[N];
-
- int8_t bounds_vz_x[N];
- int8_t bounds_vz_y[N];
- int8_t bounds_vz_z[N];
- short bounds_vz_lower0[N];
- short bounds_vz_upper0[N];
- short bounds_vz_lower1[N];
- short bounds_vz_upper1[N];
-
- Vec3f offset;
- float scale;
-
- float time_offset;
- float time_scale;
- };
- */
-
- __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); }
- __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); }
-
- __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); }
- __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); }
-
- __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); }
- __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); }
-
- __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); }
- __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); }
-
- __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); }
- __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); }
-
- __forceinline short* bounds_vx_lower0(size_t N) { return (short*)((int8_t*)this+6+7*N); }
- __forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((int8_t*)this+6+7*N); }
-
- __forceinline short* bounds_vx_upper0(size_t N) { return (short*)((int8_t*)this+6+9*N); }
- __forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((int8_t*)this+6+9*N); }
-
- __forceinline short* bounds_vx_lower1(size_t N) { return (short*)((int8_t*)this+6+11*N); }
- __forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((int8_t*)this+6+11*N); }
-
- __forceinline short* bounds_vx_upper1(size_t N) { return (short*)((int8_t*)this+6+13*N); }
- __forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((int8_t*)this+6+13*N); }
-
- __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+15*N); }
- __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+15*N); }
-
- __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+16*N); }
- __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+16*N); }
-
- __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+17*N); }
- __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+17*N); }
-
- __forceinline short* bounds_vy_lower0(size_t N) { return (short*)((int8_t*)this+6+18*N); }
- __forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((int8_t*)this+6+18*N); }
-
- __forceinline short* bounds_vy_upper0(size_t N) { return (short*)((int8_t*)this+6+20*N); }
- __forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((int8_t*)this+6+20*N); }
-
- __forceinline short* bounds_vy_lower1(size_t N) { return (short*)((int8_t*)this+6+22*N); }
- __forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((int8_t*)this+6+22*N); }
-
- __forceinline short* bounds_vy_upper1(size_t N) { return (short*)((int8_t*)this+6+24*N); }
- __forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((int8_t*)this+6+24*N); }
-
- __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+26*N); }
- __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+26*N); }
-
- __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+27*N); }
- __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+27*N); }
-
- __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+28*N); }
- __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+28*N); }
-
- __forceinline short* bounds_vz_lower0(size_t N) { return (short*)((int8_t*)this+6+29*N); }
- __forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((int8_t*)this+6+29*N); }
-
- __forceinline short* bounds_vz_upper0(size_t N) { return (short*)((int8_t*)this+6+31*N); }
- __forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((int8_t*)this+6+31*N); }
-
- __forceinline short* bounds_vz_lower1(size_t N) { return (short*)((int8_t*)this+6+33*N); }
- __forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((int8_t*)this+6+33*N); }
-
- __forceinline short* bounds_vz_upper1(size_t N) { return (short*)((int8_t*)this+6+35*N); }
- __forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((int8_t*)this+6+35*N); }
-
- __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+37*N); }
- __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+37*N); }
-
- __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+37*N+12); }
- __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+37*N+12); }
-
- __forceinline float& time_offset(size_t N) { return *(float*)((int8_t*)this+6+37*N+16); }
- __forceinline const float& time_offset(size_t N) const { return *(float*)((int8_t*)this+6+37*N+16); }
-
- __forceinline float& time_scale(size_t N) { return *(float*)((int8_t*)this+6+37*N+20); }
- __forceinline const float& time_scale(size_t N) const { return *(float*)((int8_t*)this+6+37*N+20); }
-
- __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+37*N+24; }
- __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+37*N+24; }
- };
-
- template<int M>
- typename CurveNiMB<M>::Type CurveNiMB<M>::type;
-
- typedef CurveNiMB<4> Curve4iMB;
- typedef CurveNiMB<8> Curve8iMB;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h
deleted file mode 100644
index 0cbc764668..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h
+++ /dev/null
@@ -1,516 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNi_mb.h"
-#include "../subdiv/linear_bezier_patch.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct CurveNiMBIntersector1
- {
- typedef CurveNiMB<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline vbool<M> intersect(Ray& ray, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
- const Vec3fa org1 = (ray.org-offset)*scale;
- const Vec3fa dir1 = ray.dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> ltime = (ray.time()-prim.time_offset(N))*prim.time_scale(N);
- const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N));
- const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N));
- const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0);
- const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N));
- const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N));
- const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0);
-
- const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N));
- const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N));
- const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0);
- const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N));
- const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N));
- const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0);
-
- const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N));
- const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N));
- const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0);
- const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N));
- const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N));
- const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0);
-
- const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time());
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time());
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
- Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
-
- if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time());
- Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time());
- if (Intersector().intersect(pre,ray,context,geom,primID,p0,t0,p1,t1,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
- Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray.org, primID,ray.time());
- if (Intersector().intersect(pre,ray,context,geom,primID,curve,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
- };
-
- template<int M, int K>
- struct CurveNiMBIntersectorK
- {
- typedef CurveNiMB<M> Primitive;
- typedef Vec3vf<M> Vec3vfM;
- typedef LinearSpace3<Vec3vfM>LinearSpace3vfM;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline vbool<M> intersect(RayK<K>& ray, const size_t k, const Primitive& prim, vfloat<M>& tNear_o)
- {
- const size_t N = prim.N;
- const vfloat4 offset_scale = vfloat4::loadu(prim.offset(N));
- const Vec3fa offset = Vec3fa(offset_scale);
- const Vec3fa scale = Vec3fa(shuffle<3,3,3,3>(offset_scale));
-
- const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- const Vec3fa org1 = (ray_org-offset)*scale;
- const Vec3fa dir1 = ray_dir*scale;
-
- const LinearSpace3vfM space(vfloat<M>::load(prim.bounds_vx_x(N)), vfloat<M>::load(prim.bounds_vx_y(N)), vfloat<M>::load(prim.bounds_vx_z(N)),
- vfloat<M>::load(prim.bounds_vy_x(N)), vfloat<M>::load(prim.bounds_vy_y(N)), vfloat<M>::load(prim.bounds_vy_z(N)),
- vfloat<M>::load(prim.bounds_vz_x(N)), vfloat<M>::load(prim.bounds_vz_y(N)), vfloat<M>::load(prim.bounds_vz_z(N)));
-
- const Vec3vfM dir2 = xfmVector(space,Vec3vfM(dir1));
- const Vec3vfM org2 = xfmPoint (space,Vec3vfM(org1));
- const Vec3vfM rcp_dir2 = rcp_safe(dir2);
-
- const vfloat<M> ltime = (ray.time()[k]-prim.time_offset(N))*prim.time_scale(N);
- const vfloat<M> vx_lower0 = vfloat<M>::load(prim.bounds_vx_lower0(N));
- const vfloat<M> vx_lower1 = vfloat<M>::load(prim.bounds_vx_lower1(N));
- const vfloat<M> vx_lower = madd(ltime,vx_lower1-vx_lower0,vx_lower0);
- const vfloat<M> vx_upper0 = vfloat<M>::load(prim.bounds_vx_upper0(N));
- const vfloat<M> vx_upper1 = vfloat<M>::load(prim.bounds_vx_upper1(N));
- const vfloat<M> vx_upper = madd(ltime,vx_upper1-vx_upper0,vx_upper0);
-
- const vfloat<M> vy_lower0 = vfloat<M>::load(prim.bounds_vy_lower0(N));
- const vfloat<M> vy_lower1 = vfloat<M>::load(prim.bounds_vy_lower1(N));
- const vfloat<M> vy_lower = madd(ltime,vy_lower1-vy_lower0,vy_lower0);
- const vfloat<M> vy_upper0 = vfloat<M>::load(prim.bounds_vy_upper0(N));
- const vfloat<M> vy_upper1 = vfloat<M>::load(prim.bounds_vy_upper1(N));
- const vfloat<M> vy_upper = madd(ltime,vy_upper1-vy_upper0,vy_upper0);
-
- const vfloat<M> vz_lower0 = vfloat<M>::load(prim.bounds_vz_lower0(N));
- const vfloat<M> vz_lower1 = vfloat<M>::load(prim.bounds_vz_lower1(N));
- const vfloat<M> vz_lower = madd(ltime,vz_lower1-vz_lower0,vz_lower0);
- const vfloat<M> vz_upper0 = vfloat<M>::load(prim.bounds_vz_upper0(N));
- const vfloat<M> vz_upper1 = vfloat<M>::load(prim.bounds_vz_upper1(N));
- const vfloat<M> vz_upper = madd(ltime,vz_upper1-vz_upper0,vz_upper0);
-
- const vfloat<M> t_lower_x = (vx_lower-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_upper_x = (vx_upper-vfloat<M>(org2.x))*vfloat<M>(rcp_dir2.x);
- const vfloat<M> t_lower_y = (vy_lower-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_upper_y = (vy_upper-vfloat<M>(org2.y))*vfloat<M>(rcp_dir2.y);
- const vfloat<M> t_lower_z = (vz_lower-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
- const vfloat<M> t_upper_z = (vz_upper-vfloat<M>(org2.z))*vfloat<M>(rcp_dir2.z);
-
- const vfloat<M> round_up (1.0f+3.0f*float(ulp));
- const vfloat<M> round_down(1.0f-3.0f*float(ulp));
- const vfloat<M> tNear = round_down*max(mini(t_lower_x,t_upper_x),mini(t_lower_y,t_upper_y),mini(t_lower_z,t_upper_z),vfloat<M>(ray.tnear()[k]));
- const vfloat<M> tFar = round_up *min(maxi(t_lower_x,t_upper_x),maxi(t_lower_y,t_upper_y),maxi(t_lower_z,t_upper_z),vfloat<M>(ray.tfar[k]));
- tNear_o = tNear;
- return (vint<M>(step) < vint<M>(prim.N)) & (tNear <= tFar);
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]);
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff a0,a1,a2,a3; geom->gather(a0,a1,a2,a3,geom->curve(primID),ray.time()[k]);
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_n(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
- Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_n(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_h(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]);
- Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_h(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- Vec3ff p0,t0,p1,t1; geom->gather_hermite(p0,t0,p1,t1,geom->curve(primID),ray.time()[k]);
- if (Intersector().intersect(pre,ray,k,context,geom,primID,p0,t0,p1,t1,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_hn(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
-
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
- Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_hn(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = context->scene->get<CurveGeometry>(geomID);
- const Vec3fa ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const TensorLinearCubicBezierSurface3fa curve = geom->getNormalOrientedHermiteCurve<typename Intersector::SourceCurve3ff, typename Intersector::SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,ray.time()[k]);
- if (Intersector().intersect(pre,ray,k,context,geom,primID,curve,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv.h
deleted file mode 100644
index 6eb5e30b39..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNi.h"
-
-namespace embree
-{
- template<int M>
- struct CurveNv : public CurveNi<M>
- {
- using CurveNi<M>::N;
-
- struct Type : public PrimitiveType {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+M-1)/M; }
-
- static __forceinline size_t bytes(size_t N)
- {
- const size_t f = N/M, r = N%M;
- static_assert(sizeof(CurveNv) == 22+25*M+4*16*M, "internal data layout issue");
- return f*sizeof(CurveNv) + (r!=0)*(22 + 25*r + 4*16*r);
- }
-
- public:
-
- /*! Default constructor. */
- __forceinline CurveNv () {}
-
- /*! fill curve from curve list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene)
- {
- size_t end = min(begin+M,_end);
- size_t N = end-begin;
-
- /* encode all primitives */
- for (size_t i=0; i<N; i++)
- {
- const PrimRef& prim = prims[begin+i];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- CurveGeometry* mesh = (CurveGeometry*) scene->get(geomID);
- const unsigned vtxID = mesh->curve(primID);
- Vec3fa::storeu(&this->vertices(i,N)[0],mesh->vertex(vtxID+0));
- Vec3fa::storeu(&this->vertices(i,N)[1],mesh->vertex(vtxID+1));
- Vec3fa::storeu(&this->vertices(i,N)[2],mesh->vertex(vtxID+2));
- Vec3fa::storeu(&this->vertices(i,N)[3],mesh->vertex(vtxID+3));
- }
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
- {
- if (set.size() == 0)
- return BVH::emptyNode;
-
- /* fall back to CurveNi for oriented curves */
- unsigned int geomID = prims[set.begin()].geomID();
- if (bvh->scene->get(geomID)->getCurveType() == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) {
- return CurveNi<M>::createLeaf(bvh,prims,set,alloc);
- }
- if (bvh->scene->get(geomID)->getCurveBasis() == Geometry::GTY_BASIS_HERMITE) {
- return CurveNi<M>::createLeaf(bvh,prims,set,alloc);
- }
-
- size_t start = set.begin();
- size_t items = CurveNv::blocks(set.size());
- size_t numbytes = CurveNv::bytes(set.size());
- CurveNv* accel = (CurveNv*) alloc.malloc1(numbytes,BVH::byteAlignment);
- for (size_t i=0; i<items; i++) {
- accel[i].CurveNv<M>::fill(prims,start,set.end(),bvh->scene);
- accel[i].CurveNi<M>::fill(prims,start,set.end(),bvh->scene);
- }
- return bvh->encodeLeaf((char*)accel,items);
- };
-
- public:
- unsigned char data[4*16*M];
- __forceinline Vec3fa* vertices(size_t i, size_t N) { return (Vec3fa*)CurveNi<M>::end(N)+4*i; }
- __forceinline const Vec3fa* vertices(size_t i, size_t N) const { return (Vec3fa*)CurveNi<M>::end(N)+4*i; }
- };
-
- template<int M>
- typename CurveNv<M>::Type CurveNv<M>::type;
-
- typedef CurveNv<4> Curve4v;
- typedef CurveNv<8> Curve8v;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h
deleted file mode 100644
index e20da2882e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curveNv.h"
-#include "curveNi_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct CurveNvIntersector1 : public CurveNiIntersector1<M>
- {
- typedef CurveNv<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersector1<M>::intersect(ray,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- if (Intersector().intersect(pre,ray,context,geom,primID,a0,a1,a2,a3,Epilog(ray,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar));
- }
- return false;
- }
- };
-
- template<int M, int K>
- struct CurveNvIntersectorK : public CurveNiIntersectorK<M,K>
- {
- typedef CurveNv<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Intersector, typename Epilog>
- static __forceinline void intersect_t(Precalculations& pre, RayHitK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(normal.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID));
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- }
-
- template<typename Intersector, typename Epilog>
- static __forceinline bool occluded_t(Precalculations& pre, RayK<K>& ray, const size_t k, IntersectContext* context, const Primitive& prim)
- {
- vfloat<M> tNear;
- vbool<M> valid = CurveNiIntersectorK<M,K>::intersect(ray,k,prim,tNear);
-
- const size_t N = prim.N;
- size_t mask = movemask(valid);
- while (mask)
- {
- const size_t i = bscf(mask);
- STAT3(shadow.trav_prims,1,1,1);
- const unsigned int geomID = prim.geomID(N);
- const unsigned int primID = prim.primID(N)[i];
- const CurveGeometry* geom = (CurveGeometry*) context->scene->get(geomID);
- const Vec3ff a0 = Vec3ff::loadu(&prim.vertices(i,N)[0]);
- const Vec3ff a1 = Vec3ff::loadu(&prim.vertices(i,N)[1]);
- const Vec3ff a2 = Vec3ff::loadu(&prim.vertices(i,N)[2]);
- const Vec3ff a3 = Vec3ff::loadu(&prim.vertices(i,N)[3]);
-
- size_t mask1 = mask;
- const size_t i1 = bscf(mask1);
- if (mask) {
- prefetchL1(&prim.vertices(i1,N)[0]);
- prefetchL1(&prim.vertices(i1,N)[4]);
- if (mask1) {
- const size_t i2 = bsf(mask1);
- prefetchL2(&prim.vertices(i2,N)[0]);
- prefetchL2(&prim.vertices(i2,N)[4]);
- }
- }
-
- if (Intersector().intersect(pre,ray,k,context,geom,primID,a0,a1,a2,a3,Epilog(ray,k,context,geomID,primID)))
- return true;
-
- mask &= movemask(tNear <= vfloat<M>(ray.tfar[k]));
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h
deleted file mode 100644
index 204958f7cc..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../subdiv/bezier_curve.h"
-#include "../common/primref.h"
-#include "bezier_hair_intersector.h"
-#include "bezier_ribbon_intersector.h"
-#include "bezier_curve_intersector.h"
-#include "oriented_curve_intersector.h"
-#include "../bvh/node_intersector1.h"
-
-// FIXME: this file seems replicate of curve_intersector_virtual.h
-
-namespace embree
-{
- namespace isa
- {
- struct VirtualCurveIntersector1
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<1>(&pre,&ray,context,prim);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<1>(&pre,&ray,context,prim);
- }
- };
-
- template<int K>
- struct VirtualCurveIntersectorK
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- size_t mask = movemask(valid_i);
- while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim);
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- vbool<K> valid_o = false;
- size_t mask = movemask(valid_i);
- while (mask) {
- size_t k = bscf(mask);
- if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim))
- set(valid_o, k);
- }
- return valid_o;
- }
-
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<K>(&pre,&ray,k,context,prim);
- }
-
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurvePrimitive::Intersectors& leafIntersector = ((VirtualCurvePrimitive*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<K>(&pre,&ray,k,context,prim);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h
deleted file mode 100644
index 343cc8ff28..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename NativeCurve3fa, int M>
- struct DistanceCurveHit
- {
- __forceinline DistanceCurveHit() {}
-
- __forceinline DistanceCurveHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
- const NativeCurve3fa& curve3D)
- : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
-
- __forceinline void finalize()
- {
- vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
- vv = V;
- vt = T;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const {
- return curve3D.eval_du(vu[i]);
- }
-
- public:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- int i, N;
- NativeCurve3fa curve3D;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- };
-
- template<typename NativeCurve3fa>
- struct DistanceCurve1Intersector1
- {
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculations1& pre,Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3,
- const Epilog& epilog)
- {
- const int N = geom->tessellationRate;
-
- /* transform control points into ray space */
- const NativeCurve3fa curve3Di(v0,v1,v2,v3);
- const NativeCurve3fa curve3D = enlargeRadiusToMinWidth(context,geom,ray.org,curve3Di);
- const NativeCurve3fa curve2D = curve3D.xfm_pr(pre.ray_space,ray.org);
-
- /* evaluate the bezier curve */
- vboolx valid = vfloatx(step) < vfloatx(float(N));
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N);
-
- /* approximative intersection with cone */
- const Vec4vfx v = p1-p0;
- const Vec4vfx w = -p0;
- const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
- const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
- const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
- const Vec4vfx p = madd(u,v,p0);
- const vfloatx t = p.z*pre.depth_scale;
- const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
- const vfloatx r = p.w;
- const vfloatx r2 = r*r;
- valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections
-
- /* update hit information */
- bool ishit = false;
- if (unlikely(any(valid))) {
- DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,0,N,curve3D);
- ishit = ishit | epilog(valid,hit);
- }
-
- if (unlikely(VSIZEX < N))
- {
- /* process SIMD-size many segments per iteration */
- for (int i=VSIZEX; i<N; i+=VSIZEX)
- {
- /* evaluate the bezier curve */
- vboolx valid = vintx(i)+vintx(step) < vintx(N);
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N);
-
- /* approximative intersection with cone */
- const Vec4vfx v = p1-p0;
- const Vec4vfx w = -p0;
- const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
- const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
- const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
- const Vec4vfx p = madd(u,v,p0);
- const vfloatx t = p.z*pre.depth_scale;
- const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
- const vfloatx r = p.w;
- const vfloatx r2 = r*r;
- valid &= (d2 <= r2) & (vfloatx(ray.tnear()) <= t) & (t <= vfloatx(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*pre.depth_scale; // ignore self intersections
-
- /* update hit information */
- if (unlikely(any(valid))) {
- DistanceCurveHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,i,N,curve3D);
- ishit = ishit | epilog(valid,hit);
- }
- }
- }
- return ishit;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h
deleted file mode 100644
index 47531027fc..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-#include "curve_intersector_sweep.h"
-#include "../subdiv/linear_bezier_patch.h"
-
-#define DBG(x)
-
-namespace embree
-{
- namespace isa
- {
- template<typename Ray, typename Epilog>
- struct TensorLinearCubicBezierSurfaceIntersector
- {
- const LinearSpace3fa& ray_space;
- Ray& ray;
- TensorLinearCubicBezierSurface3fa curve3d;
- TensorLinearCubicBezierSurface2fa curve2d;
- float eps;
- const Epilog& epilog;
- bool isHit;
-
- __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog)
- : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false)
- {
- const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org);
- curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R));
- const BBox2fa b2 = curve2d.bounds();
- eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper)));
- }
-
- __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1)
- {
- if (p1 == p0) {
- if (p0 == 0.0f) return Interval1f(u0,u1);
- else return Interval1f(empty);
- }
- const float t = -p0/(p1-p0);
- const float tt = lerp(u0,u1,t);
- return Interval1f(tt);
- }
-
- __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u)
- {
- if (sign(p0.lower) != sign(p0.upper)) u.extend(u0);
- if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower));
- if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper));
- if (sign(p1.lower) != sign(p1.upper)) u.extend(u1);
- }
-
- __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve)
- {
- Interval1f u = empty;
- solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u);
- solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u);
- solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u);
- solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u);
- solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u);
- solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u);
- return intersect(u,Interval1f(0.0f,1.0f));
- }
-
- __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve)
- {
- Interval1f v = empty;
- solve_linear(0.0f,1.0f,curve.v0,curve.v1,v);
- return intersect(v,Interval1f(0.0f,1.0f));
- }
-
- __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2)
- {
- BBox2fa bounds = curve2.bounds();
- if (bounds.upper.x < 0.0f) return;
- if (bounds.upper.y < 0.0f) return;
- if (bounds.lower.x > 0.0f) return;
- if (bounds.lower.y > 0.0f) return;
-
- if (max(cu.size(),cv.size()) < 1E-4f)
- {
- const float u = cu.center();
- const float v = cv.center();
- TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
- const float t = curve_z.eval(u,v);
- if (ray.tnear() <= t && t <= ray.tfar) {
- const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
- BezierCurveHit hit(t,u,v,Ng);
- isHit |= epilog(hit);
- }
- return;
- }
-
- const Vec2fa dv = curve2.axis_v();
- const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
- LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
- if (!curve0v.hasRoot()) return;
-
- const Interval1f v = bezier_clipping(curve0v);
- if (isEmpty(v)) return;
- TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v);
- cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
-
- const Vec2fa du = curve2.axis_u();
- const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du);
- CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v();
- int roots = curve0u.maxRoots();
- if (roots == 0) return;
-
- if (roots == 1)
- {
- const Interval1f u = bezier_clipping(curve0u);
- if (isEmpty(u)) return;
- TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u);
- cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper));
- solve_bezier_clipping(cu,cv,curve2b);
- return;
- }
-
- TensorLinearCubicBezierSurface2fa curve2l, curve2r;
- curve2a.split_u(curve2l,curve2r);
- solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l);
- solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r);
- }
-
- __forceinline bool solve_bezier_clipping()
- {
- solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d);
- return isHit;
- }
-
- __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv)
- {
- Vec2fa uv(cu.center(),cv.center());
- const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y);
- const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y);
- const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
- solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J);
- }
-
- __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J)
- {
- Vec2fa uv = uv_in;
-
- for (size_t i=0; i<200; i++)
- {
- const Vec2fa f = curve2d.eval(uv.x,uv.y);
- const Vec2fa duv = rcp_J*f;
- uv -= duv;
-
- if (max(abs(f.x),abs(f.y)) < eps)
- {
- const float u = uv.x;
- const float v = uv.y;
- if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs
- if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs
- const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
- const float t = curve_z.eval(u,v);
- if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs
- const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
- BezierCurveHit hit(t,u,v,Ng);
- isHit |= epilog(hit);
- return;
- }
- }
- }
-
- __forceinline bool clip_v(BBox1f& cu, BBox1f& cv)
- {
- const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower);
- const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv);
- LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
- if (!curve0v.hasRoot()) return false;
- Interval1f v = bezier_clipping(curve0v);
- if (isEmpty(v)) return false;
- v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
- cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
- return true;
- }
-
- __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv)
- {
- /* perform bezier clipping in v-direction to get tight v-bounds */
- TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv);
- const Vec2fa dv = curve2.axis_v();
- const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
- LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
- if (unlikely(!curve0v.hasRoot())) return true;
- Interval1f v = bezier_clipping(curve0v);
- if (unlikely(isEmpty(v))) return true;
- v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
- curve2 = curve2.clip_v(v);
- cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
-
- /* perform one newton raphson iteration */
- Vec2fa c(cu.center(),cv.center());
- Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv);
- const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
- const Vec2fa c1 = c - rcp_J*f;
-
- /* calculate bounds of derivatives */
- const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds();
- const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds();
-
- /* calculate krawczyk test */
- LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f),
- Interval1f(0.0f), Interval1f(1.0f));
-
- LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x),
- Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y));
-
- const LinearSpace2<Vec2f> rcp_J2(rcp_J);
- const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2);
-
- const Vec2<Interval1f> x(cu,cv);
- const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c)));
-
- /* test if there is no solution */
- const Vec2<Interval1f> KK = intersect(K,x);
- if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true;
-
- /* exit if convergence cannot get proven, but terminate if we are very small */
- if (unlikely(!subset(K,x) && !very_small)) return false;
-
- /* solve using newton raphson iteration of convergence is guarenteed */
- solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J);
- return true;
- }
-
- __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv)
- {
- if (!clip_v(cu,cv)) return;
- return solve_newton_raphson(cu,cv);
- }
-
- __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv)
- {
- unsigned int sptr = 0;
- const unsigned int stack_size = 4;
- unsigned int mask_stack[stack_size];
- BBox1f cu_stack[stack_size];
- BBox1f cv_stack[stack_size];
- goto entry;
-
- /* terminate if stack is empty */
- while (sptr)
- {
- /* pop from stack */
- {
- sptr--;
- size_t mask = mask_stack[sptr];
- cu = cu_stack[sptr];
- cv = cv_stack[sptr];
- const size_t i = bscf(mask);
- mask_stack[sptr] = mask;
- if (mask) sptr++; // there are still items on the stack
-
- /* process next element recurse into each hit curve segment */
- const float u0 = float(i+0)*(1.0f/(VSIZEX-1));
- const float u1 = float(i+1)*(1.0f/(VSIZEX-1));
- const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1));
- cu = cui;
- }
-
-#if 0
- solve_newton_raphson_no_recursion(cu,cv);
- continue;
-
-#else
- /* we assume convergence for small u ranges and verify using krawczyk */
- if (cu.size() < 1.0f/6.0f) {
- const bool very_small = cu.size() < 0.001f || sptr >= stack_size;
- if (solve_krawczyk(very_small,cu,cv)) {
- continue;
- }
- }
-#endif
-
- entry:
-
- /* split the curve into VSIZEX-1 segments in u-direction */
- vboolx valid = true;
- TensorLinearCubicBezierSurface<Vec2vfx> subcurves = curve2d.clip_v(cv).vsplit_u(valid,cu);
-
- /* slabs test in u-direction */
- Vec2vfx ndv = cross(subcurves.axis_v());
- BBox<vfloatx> boundsv = subcurves.vxfm(ndv).bounds();
- valid &= boundsv.lower <= eps;
- valid &= boundsv.upper >= -eps;
- if (none(valid)) continue;
-
- /* slabs test in v-direction */
- Vec2vfx ndu = cross(subcurves.axis_u());
- BBox<vfloatx> boundsu = subcurves.vxfm(ndu).bounds();
- valid &= boundsu.lower <= eps;
- valid &= boundsu.upper >= -eps;
- if (none(valid)) continue;
-
- /* push valid segments to stack */
- assert(sptr < stack_size);
- mask_stack [sptr] = movemask(valid);
- cu_stack [sptr] = cu;
- cv_stack [sptr] = cv;
- sptr++;
- }
- }
-
- __forceinline bool solve_newton_raphson_main()
- {
- BBox1f vu(0.0f,1.0f);
- BBox1f vv(0.0f,1.0f);
- solve_newton_raphson_recursion(vu,vv);
- return isHit;
- }
- };
-
-
- template<template<typename Ty> class SourceCurve>
- struct OrientedCurve1Intersector1
- {
- //template<typename Ty> using Curve = SourceCurve<Ty>;
- typedef SourceCurve<Vec3ff> SourceCurve3ff;
- typedef SourceCurve<Vec3fa> SourceCurve3fa;
-
- __forceinline OrientedCurve1Intersector1() {}
-
- __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
- const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
- const Epilog& epilog) const
- {
- STAT3(normal.trav_prims,1,1,1);
-
- SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
- SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
- ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
- TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
- }
-
- template<typename Epilog>
- __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const
- {
- STAT3(normal.trav_prims,1,1,1);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
- }
- };
-
- template<template<typename Ty> class SourceCurve, int K>
- struct OrientedCurve1IntersectorK
- {
- //template<typename Ty> using Curve = SourceCurve<Ty>;
- typedef SourceCurve<Vec3ff> SourceCurve3ff;
- typedef SourceCurve<Vec3fa> SourceCurve3fa;
-
- struct Ray1
- {
- __forceinline Ray1(RayK<K>& ray, size_t k)
- : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
-
- Vec3fa org;
- Vec3fa dir;
- float _tnear;
- float& tfar;
-
- __forceinline float& tnear() { return _tnear; }
- //__forceinline float& tfar() { return _tfar; }
- __forceinline const float& tnear() const { return _tnear; }
- //__forceinline const float& tfar() const { return _tfar; }
- };
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
- const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
- Ray1 ray(vray,k);
- SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
- SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
- ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
- TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
- }
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const TensorLinearCubicBezierSurface3fa& curve,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
- Ray1 ray(vray,k);
- //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
- return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h
deleted file mode 100644
index 6e9fc91925..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/geometry.h"
-
-namespace embree
-{
- namespace isa
- {
- struct CurvePrecalculations1
- {
- float depth_scale;
- LinearSpace3fa ray_space;
-
- __forceinline CurvePrecalculations1() {}
-
- __forceinline CurvePrecalculations1(const Ray& ray, const void* ptr)
- {
- depth_scale = rsqrt(dot(ray.dir,ray.dir));
- LinearSpace3fa space = frame(depth_scale*ray.dir);
- space.vz *= depth_scale;
- ray_space = space.transposed();
- }
- };
-
- template<int K>
- struct CurvePrecalculationsK
- {
- vfloat<K> depth_scale;
- LinearSpace3fa ray_space[K];
-
- __forceinline CurvePrecalculationsK(const vbool<K>& valid, const RayK<K>& ray)
- {
- size_t mask = movemask(valid);
- depth_scale = rsqrt(dot(ray.dir,ray.dir));
- while (mask) {
- size_t k = bscf(mask);
- Vec3fa ray_dir_k = Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- LinearSpace3fa ray_space_k = frame(depth_scale[k]*ray_dir_k);
- ray_space_k.vz *= depth_scale[k];
- ray_space[k] = ray_space_k.transposed();
- }
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h
deleted file mode 100644
index a99cf99d56..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "quad_intersector.h"
-#include "curve_intersector_precalculations.h"
-
-#define Bezier1Intersector1 RibbonCurve1Intersector1
-#define Bezier1IntersectorK RibbonCurve1IntersectorK
-
-namespace embree
-{
- namespace isa
- {
- template<typename NativeCurve3ff, int M>
- struct RibbonHit
- {
- __forceinline RibbonHit() {}
-
- __forceinline RibbonHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
- const NativeCurve3ff& curve3D)
- : U(U), V(V), T(T), i(i), N(N), curve3D(curve3D), valid(valid) {}
-
- __forceinline void finalize()
- {
- vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
- vv = V;
- vt = T;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const {
- return curve3D.eval_du(vu[i]);
- }
-
- public:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- int i, N;
- NativeCurve3ff curve3D;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- };
-
- /* calculate squared distance of point p0 to line p1->p2 */
- __forceinline std::pair<vfloatx,vfloatx> sqr_point_line_distance(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2)
- {
- const vfloatx num = det(p2-p1,p1-p0);
- const vfloatx den2 = dot(p2-p1,p2-p1);
- return std::make_pair(num*num,den2);
- }
-
- /* performs culling against a cylinder */
- __forceinline vboolx cylinder_culling_test(const Vec2vfx& p0, const Vec2vfx& p1, const Vec2vfx& p2, const vfloatx& r)
- {
- const std::pair<vfloatx,vfloatx> d = sqr_point_line_distance(p0,p1,p2);
- return d.first <= r*r*d.second;
- }
-
- template<typename NativeCurve3ff, typename Epilog>
- __forceinline bool intersect_ribbon(const Vec3fa& ray_org, const Vec3fa& ray_dir, const float ray_tnear, const float& ray_tfar,
- const LinearSpace3fa& ray_space, const float& depth_scale,
- const NativeCurve3ff& curve3D, const int N,
- const Epilog& epilog)
- {
- /* transform control points into ray space */
- const NativeCurve3ff curve2D = curve3D.xfm_pr(ray_space,ray_org);
- float eps = 4.0f*float(ulp)*reduce_max(max(abs(curve2D.v0),abs(curve2D.v1),abs(curve2D.v2),abs(curve2D.v3)));
-
- /* evaluate the bezier curve */
- bool ishit = false;
- vboolx valid = vfloatx(step) < vfloatx(float(N));
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N);
- valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w));
-
- if (any(valid))
- {
- Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(0,N);
- Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(0,N);
- dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt);
- dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt);
- const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f);
- const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f);
- const Vec3vfx nn0 = normalize(n0);
- const Vec3vfx nn1 = normalize(n1);
- const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1));
- const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1));
-
- vfloatx vu,vv,vt;
- vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt);
-
- if (any(valid0))
- {
- /* ignore self intersections */
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) {
- vfloatx r = lerp(p0.w, p1.w, vu);
- valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale;
- }
-
- if (any(valid0))
- {
- vv = madd(2.0f,vv,vfloatx(-1.0f));
- RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,0,N,curve3D);
- ishit |= epilog(bhit.valid,bhit);
- }
- }
- }
-
- if (unlikely(VSIZEX < N))
- {
- /* process SIMD-size many segments per iteration */
- for (int i=VSIZEX; i<N; i+=VSIZEX)
- {
- /* evaluate the bezier curve */
- vboolx valid = vintx(i)+vintx(step) < vintx(N);
- const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N);
- const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N);
- valid &= cylinder_culling_test(zero,Vec2vfx(p0.x,p0.y),Vec2vfx(p1.x,p1.y),max(p0.w,p1.w));
- if (none(valid)) continue;
-
- Vec3vfx dp0dt = curve2D.template derivative0<VSIZEX>(i,N);
- Vec3vfx dp1dt = curve2D.template derivative1<VSIZEX>(i,N);
- dp0dt = select(reduce_max(abs(dp0dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp0dt);
- dp1dt = select(reduce_max(abs(dp1dt)) < vfloatx(eps),Vec3vfx(p1-p0),dp1dt);
- const Vec3vfx n0(dp0dt.y,-dp0dt.x,0.0f);
- const Vec3vfx n1(dp1dt.y,-dp1dt.x,0.0f);
- const Vec3vfx nn0 = normalize(n0);
- const Vec3vfx nn1 = normalize(n1);
- const Vec3vfx lp0 = madd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx lp1 = madd(p1.w,nn1,Vec3vfx(p1));
- const Vec3vfx up0 = nmadd(p0.w,nn0,Vec3vfx(p0));
- const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1));
-
- vfloatx vu,vv,vt;
- vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt);
-
- if (any(valid0))
- {
- /* ignore self intersections */
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f) {
- vfloatx r = lerp(p0.w, p1.w, vu);
- valid0 &= vt > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale;
- }
-
- if (any(valid0))
- {
- vv = madd(2.0f,vv,vfloatx(-1.0f));
- RibbonHit<NativeCurve3ff,VSIZEX> bhit(valid0,vu,vv,vt,i,N,curve3D);
- ishit |= epilog(bhit.valid,bhit);
- }
- }
- }
- }
- return ishit;
- }
-
- template<template<typename Ty> class NativeCurve>
- struct RibbonCurve1Intersector1
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- const int N = geom->tessellationRate;
- NativeCurve3ff curve(v0,v1,v2,v3);
- curve = enlargeRadiusToMinWidth(context,geom,ray.org,curve);
- return intersect_ribbon<NativeCurve3ff>(ray.org,ray.dir,ray.tnear(),ray.tfar,
- pre.ray_space,pre.depth_scale,
- curve,N,
- epilog);
- }
- };
-
- template<template<typename Ty> class NativeCurve, int K>
- struct RibbonCurve1IntersectorK
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& ray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- const int N = geom->tessellationRate;
- const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3fa ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
- NativeCurve3ff curve(v0,v1,v2,v3);
- curve = enlargeRadiusToMinWidth(context,geom,ray_org,curve);
- return intersect_ribbon<NativeCurve3ff>(ray_org,ray_dir,ray.tnear()[k],ray.tfar[k],
- pre.ray_space[k],pre.depth_scale[k],
- curve,N,
- epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h
deleted file mode 100644
index 883cedc3d2..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "cylinder.h"
-#include "plane.h"
-#include "line_intersector.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- static const size_t numJacobianIterations = 5;
-#if defined(__AVX__)
- static const size_t numBezierSubdivisions = 2;
-#else
- static const size_t numBezierSubdivisions = 3;
-#endif
-
- struct BezierCurveHit
- {
- __forceinline BezierCurveHit() {}
-
- __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
- : t(t), u(u), v(0.0f), Ng(Ng) {}
-
- __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
- : t(t), u(u), v(v), Ng(Ng) {}
-
- __forceinline void finalize() {}
-
- public:
- float t;
- float u;
- float v;
- Vec3fa Ng;
- };
-
- template<typename NativeCurve3ff, typename Ray, typename Epilog>
- __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
- const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
- const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
- const Epilog& epilog)
- {
- if (tp.lower[i]+dt > ray.tfar) return false;
- Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
- if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
- if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
- BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
- return epilog(hit);
- }
-
- template<typename NativeCurve3ff, typename Ray, typename Epilog>
- __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
- {
- const Vec3fa org = zero;
- const Vec3fa dir = ray.dir;
- const float length_ray_dir = length(dir);
-
- /* error of curve evaluations is propertional to largest coordinate */
- const BBox3ff box = curve.bounds();
- const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
-
- for (size_t i=0; i<numJacobianIterations; i++)
- {
- const Vec3fa Q = madd(Vec3fa(t),dir,org);
- //const Vec3fa dQdu = zero;
- const Vec3fa dQdt = dir;
- const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
-
- Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
- //const Vec3fa dPdt = zero;
-
- const Vec3fa R = Q-P;
- const float len_R = length(R); //reduce_max(abs(R));
- const float R_err = max(Q_err,P_err);
- const Vec3fa dRdu = /*dQdu*/-dPdu;
- const Vec3fa dRdt = dQdt;//-dPdt;
-
- const Vec3fa T = normalize(dPdu);
- const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
- //const Vec3fa dTdt = zero;
- const float cos_err = P_err/length(dPdu);
-
- /* Error estimate for dot(R,T):
-
- dot(R,T) = cos(R,T) |R| |T|
- = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
- = cos(R,T)*|R|*|T|
- +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
- +- cos_error*(|R| + |T|)
- +- lower order terms
- with cos(R,T) being in [0,1] and |T| = 1 we get:
- dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
- */
-
- const float f = dot(R,T);
- const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
- const float dfdu = dot(dRdu,T) + dot(R,dTdu);
- const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
-
- const float K = dot(R,R)-sqr(f);
- const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
- const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
- const float rsqrt_K = rsqrt(K);
-
- const float g = sqrt(K)-P.w;
- const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
- const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
- const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
-
- const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
- const Vec2f dut = rcp(J)*Vec2f(f,g);
- const Vec2f ut = Vec2f(u,t) - dut;
- u = ut.x; t = ut.y;
-
- if (abs(f) < f_err && abs(g) < g_err)
- {
- t+=dt;
- if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
- if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
- const Vec3fa R = normalize(Q-P);
- const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
- const Vec3fa V = cross(dPdu,R);
- BezierCurveHit hit(t,u,cross(V,U));
- return epilog(hit);
- }
- }
- return false;
- }
-
- template<typename NativeCurve3ff, typename Ray, typename Epilog>
- bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
- float u0, float u1, unsigned int depth, const Epilog& epilog)
- {
-#if defined(__AVX__)
- typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
- typedef vint8 vintx;
- typedef vfloat8 vfloatx;
-#else
- typedef vbool4 vboolx;
- typedef vint4 vintx;
- typedef vfloat4 vfloatx;
-#endif
- typedef Vec3<vfloatx> Vec3vfx;
- typedef Vec4<vfloatx> Vec4vfx;
-
- unsigned int maxDepth = numBezierSubdivisions;
- bool found = false;
- const Vec3fa org = zero;
- const Vec3fa dir = ray.dir;
-
- unsigned int sptr = 0;
- const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
- struct StackEntry {
- vboolx valid;
- vfloatx tlower;
- float u0;
- float u1;
- unsigned int depth;
- };
- StackEntry stack[stack_size];
- goto entry;
-
- /* terminate if stack is empty */
- while (sptr)
- {
- /* pop from stack */
- {
- sptr--;
- vboolx valid = stack[sptr].valid;
- const vfloatx tlower = stack[sptr].tlower;
- valid &= tlower+dt <= ray.tfar;
- if (none(valid)) continue;
- u0 = stack[sptr].u0;
- u1 = stack[sptr].u1;
- depth = stack[sptr].depth;
- const size_t i = select_min(valid,tlower); clear(valid,i);
- stack[sptr].valid = valid;
- if (any(valid)) sptr++; // there are still items on the stack
-
- /* process next segment */
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
- u0 = vu0[i+0];
- u1 = vu0[i+1];
- }
- entry:
-
- /* subdivide curve */
- const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
- const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
- Vec4vfx P0, dP0du; curve.veval(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
- const Vec4vfx P3 = shift_right_1(P0);
- const Vec4vfx dP3du = shift_right_1(dP0du);
- const Vec4vfx P1 = P0 + dP0du;
- const Vec4vfx P2 = P3 - dP3du;
-
- /* calculate bounding cylinders */
- const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
- const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
- const vfloatx maxr12 = sqrt(max(rr1,rr2));
- const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp);
- const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
- vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
- vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
- r_outer = one_plus_ulp*r_outer;
- r_inner = max(0.0f,one_minus_ulp*r_inner);
- const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
- const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
- vboolx valid = true; clear(valid,vfloatx::size-1);
-
- /* intersect with outer cylinder */
- BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
- valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
- if (none(valid)) continue;
-
- /* intersect with cap-planes */
- BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
- tp = embree::intersect(tp,tc_outer);
- BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
- tp = embree::intersect(tp,h0);
- BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
- tp = embree::intersect(tp,h1);
- valid &= tp.lower <= tp.upper;
- if (none(valid)) continue;
-
- /* clamp and correct u parameter */
- u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
- u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
- u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
- u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
-
- /* intersect with inner cylinder */
- BBox<vfloatx> tc_inner;
- vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
- const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
-
- /* at the unstable area we subdivide deeper */
- const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);
- const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);
-
- /* subtract the inner interval from the current hit interval */
- BBox<vfloatx> tp0, tp1;
- subtract(tp,tc_inner,tp0,tp1);
- vboolx valid0 = valid & (tp0.lower <= tp0.upper);
- vboolx valid1 = valid & (tp1.lower <= tp1.upper);
- if (none(valid0 | valid1)) continue;
-
- /* iterate over all first hits front to back */
- const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));
- vboolx recursion_valid0 = valid0 & (depth < termDepth0);
- valid0 &= depth >= termDepth0;
-
- while (any(valid0))
- {
- const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
- found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
- //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
- valid0 &= tp0.lower+dt <= ray.tfar;
- }
- valid1 &= tp1.lower+dt <= ray.tfar;
-
- /* iterate over all second hits front to back */
- const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));
- vboolx recursion_valid1 = valid1 & (depth < termDepth1);
- valid1 &= depth >= termDepth1;
- while (any(valid1))
- {
- const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
- found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
- //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
- valid1 &= tp1.lower+dt <= ray.tfar;
- }
-
- /* push valid segments to stack */
- recursion_valid0 &= tp0.lower+dt <= ray.tfar;
- recursion_valid1 &= tp1.lower+dt <= ray.tfar;
- const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
- if (any(recursion_valid))
- {
- assert(sptr < stack_size);
- stack[sptr].valid = recursion_valid;
- stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
- stack[sptr].u0 = u0;
- stack[sptr].u1 = u1;
- stack[sptr].depth = depth+1;
- sptr++;
- }
- }
- return found;
- }
-
- template<template<typename Ty> class NativeCurve>
- struct SweepCurve1Intersector1
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- template<typename Epilog>
- __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
-
- /* move ray closer to make intersection stable */
- NativeCurve3ff curve0(v0,v1,v2,v3);
- curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
- const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
- const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
- const NativeCurve3ff curve1 = curve0-ref;
- return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
- }
- };
-
- template<template<typename Ty> class NativeCurve, int K>
- struct SweepCurve1IntersectorK
- {
- typedef NativeCurve<Vec3ff> NativeCurve3ff;
-
- struct Ray1
- {
- __forceinline Ray1(RayK<K>& ray, size_t k)
- : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
-
- Vec3fa org;
- Vec3fa dir;
- float _tnear;
- float& tfar;
-
- __forceinline float& tnear() { return _tnear; }
- //__forceinline float& tfar() { return _tfar; }
- __forceinline const float& tnear() const { return _tnear; }
- //__forceinline const float& tfar() const { return _tfar; }
-
- };
-
- template<typename Epilog>
- __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
- IntersectContext* context,
- const CurveGeometry* geom, const unsigned int primID,
- const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
- const Epilog& epilog)
- {
- STAT3(normal.trav_prims,1,1,1);
- Ray1 ray(vray,k);
-
- /* move ray closer to make intersection stable */
- NativeCurve3ff curve0(v0,v1,v2,v3);
- curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
- const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
- const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
- const NativeCurve3ff curve1 = curve0-ref;
- return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h
deleted file mode 100644
index e1f4238130..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h
+++ /dev/null
@@ -1,671 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../subdiv/bezier_curve.h"
-#include "../common/primref.h"
-#include "curve_intersector_precalculations.h"
-#include "../bvh/node_intersector1.h"
-#include "../bvh/node_intersector_packet.h"
-
-#include "intersector_epilog.h"
-
-#include "../subdiv/bezier_curve.h"
-#include "../subdiv/bspline_curve.h"
-#include "../subdiv/hermite_curve.h"
-#include "../subdiv/catmullrom_curve.h"
-
-#include "spherei_intersector.h"
-#include "disci_intersector.h"
-
-#include "linei_intersector.h"
-#include "roundlinei_intersector.h"
-#include "conelinei_intersector.h"
-
-#include "curveNi_intersector.h"
-#include "curveNv_intersector.h"
-#include "curveNi_mb_intersector.h"
-
-#include "curve_intersector_distance.h"
-#include "curve_intersector_ribbon.h"
-#include "curve_intersector_oriented.h"
-#include "curve_intersector_sweep.h"
-
-namespace embree
-{
- struct VirtualCurveIntersector
- {
- typedef void (*Intersect1Ty)(void* pre, void* ray, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded1Ty )(void* pre, void* ray, IntersectContext* context, const void* primitive);
-
- typedef void (*Intersect4Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded4Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- typedef void (*Intersect8Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded8Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- typedef void (*Intersect16Ty)(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- typedef bool (*Occluded16Ty) (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- public:
- struct Intersectors
- {
- Intersectors() {} // WARNING: Do not zero initialize this, as we otherwise get problems with thread unsafe local static variable initialization (e.g. on VS2013) in curve_intersector_virtual.cpp.
-
- template<int K> void intersect(void* pre, void* ray, IntersectContext* context, const void* primitive);
- template<int K> bool occluded (void* pre, void* ray, IntersectContext* context, const void* primitive);
-
- template<int K> void intersect(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
- template<int K> bool occluded (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive);
-
- public:
- Intersect1Ty intersect1;
- Occluded1Ty occluded1;
- Intersect4Ty intersect4;
- Occluded4Ty occluded4;
- Intersect8Ty intersect8;
- Occluded8Ty occluded8;
- Intersect16Ty intersect16;
- Occluded16Ty occluded16;
- };
-
- Intersectors vtbl[Geometry::GTY_END];
- };
-
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(intersect1); intersect1(pre,ray,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<1> (void* pre, void* ray, IntersectContext* context, const void* primitive) { assert(occluded1); return occluded1(pre,ray,context,primitive); }
-
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<4>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect4); intersect4(pre,ray,k,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<4> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded4); return occluded4(pre,ray,k,context,primitive); }
-
-#if defined(__AVX__)
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<8>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect8); intersect8(pre,ray,k,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<8> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded8); return occluded8(pre,ray,k,context,primitive); }
-#endif
-
-#if defined(__AVX512F__)
- template<> __forceinline void VirtualCurveIntersector::Intersectors::intersect<16>(void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(intersect16); intersect16(pre,ray,k,context,primitive); }
- template<> __forceinline bool VirtualCurveIntersector::Intersectors::occluded<16> (void* pre, void* ray, size_t k, IntersectContext* context, const void* primitive) { assert(occluded16); return occluded16(pre,ray,k,context,primitive); }
-#endif
-
- namespace isa
- {
- struct VirtualCurveIntersector1
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<1>(&pre,&ray,context,prim);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<1>(&pre,&ray,context,prim);
- }
- };
-
- template<int K>
- struct VirtualCurveIntersectorK
- {
- typedef unsigned char Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- size_t mask = movemask(valid_i);
- while (mask) leafIntersector.intersect<K>(&pre,&ray,bscf(mask),context,prim);
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- vbool<K> valid_o = false;
- size_t mask = movemask(valid_i);
- while (mask) {
- size_t k = bscf(mask);
- if (leafIntersector.occluded<K>(&pre,&ray,k,context,prim))
- set(valid_o, k);
- }
- return valid_o;
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- leafIntersector.intersect<K>(&pre,&ray,k,context,prim);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- assert(num == 1);
- RTCGeometryType ty = (RTCGeometryType)(*prim);
- assert(This->leafIntersector);
- VirtualCurveIntersector::Intersectors& leafIntersector = ((VirtualCurveIntersector*) This->leafIntersector)->vtbl[ty];
- return leafIntersector.occluded<K>(&pre,&ray,k,context,prim);
- }
- };
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRoundConeNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearConeNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRoundConeNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearConeNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRibbonNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors LinearRibbonNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors SphereNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors SphereNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors DiscNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors DiscNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors OrientedDiscNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<int N>
- static VirtualCurveIntersector::Intersectors OrientedDiscNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::intersect;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::occluded;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::intersect;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::occluded;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,N,8,true>::intersect;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,N,8,true>::occluded;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,N,16,true>::intersect;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,N,16,true>::occluded;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors RibbonNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors RibbonNvIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNvIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors RibbonNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &CurveNiMBIntersectorK<N,4>::template intersect_t<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors CurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors CurveNvIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNvIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNvIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNvIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNvIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNvIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNvIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNvIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNvIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors CurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_t<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_t <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_t<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_t <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_t<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_t <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_t<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_t <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors OrientedCurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors OrientedCurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_n<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_n <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_n<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_n <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_n<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_n <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_n<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_n <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteRibbonNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteRibbonNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<RibbonCurve1Intersector1<Curve>, Intersect1EpilogMU<VSIZEX,true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <RibbonCurve1Intersector1<Curve>, Occluded1EpilogMU<VSIZEX,true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<RibbonCurve1IntersectorK<Curve,4>, Intersect1KEpilogMU<VSIZEX,4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <RibbonCurve1IntersectorK<Curve,4>, Occluded1KEpilogMU<VSIZEX,4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<RibbonCurve1IntersectorK<Curve,8>, Intersect1KEpilogMU<VSIZEX,8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <RibbonCurve1IntersectorK<Curve,8>, Occluded1KEpilogMU<VSIZEX,8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<RibbonCurve1IntersectorK<Curve,16>, Intersect1KEpilogMU<VSIZEX,16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <RibbonCurve1IntersectorK<Curve,16>, Occluded1KEpilogMU<VSIZEX,16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteCurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteCurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_h<SweepCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_h <SweepCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_h<SweepCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_h <SweepCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_h<SweepCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_h <SweepCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_h<SweepCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_h <SweepCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
-
- template<template<typename Ty> class Curve, int N>
- static VirtualCurveIntersector::Intersectors HermiteOrientedCurveNiMBIntersectors()
- {
- VirtualCurveIntersector::Intersectors intersectors;
- intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &CurveNiMBIntersector1<N>::template intersect_hn<OrientedCurve1Intersector1<Curve>, Intersect1Epilog1<true> >;
- intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &CurveNiMBIntersector1<N>::template occluded_hn <OrientedCurve1Intersector1<Curve>, Occluded1Epilog1<true> >;
- intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty)&CurveNiMBIntersectorK<N,4>::template intersect_hn<OrientedCurve1IntersectorK<Curve,4>, Intersect1KEpilog1<4,true> >;
- intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &CurveNiMBIntersectorK<N,4>::template occluded_hn <OrientedCurve1IntersectorK<Curve,4>, Occluded1KEpilog1<4,true> >;
-#if defined(__AVX__)
- intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&CurveNiMBIntersectorK<N,8>::template intersect_hn<OrientedCurve1IntersectorK<Curve,8>, Intersect1KEpilog1<8,true> >;
- intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &CurveNiMBIntersectorK<N,8>::template occluded_hn <OrientedCurve1IntersectorK<Curve,8>, Occluded1KEpilog1<8,true> >;
-#endif
-#if defined(__AVX512F__)
- intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&CurveNiMBIntersectorK<N,16>::template intersect_hn<OrientedCurve1IntersectorK<Curve,16>, Intersect1KEpilog1<16,true> >;
- intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &CurveNiMBIntersectorK<N,16>::template occluded_hn <OrientedCurve1IntersectorK<Curve,16>, Occluded1KEpilog1<16,true> >;
-#endif
- return intersectors;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h
deleted file mode 100644
index 69cf612275..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveBezierCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveBezierCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBezierCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h
deleted file mode 100644
index d37e41098e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveBSplineCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveBSplineCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveBSplineCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h
deleted file mode 100644
index a133a11d63..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveCatmullRomCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveCatmullRomCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveCatmullRomCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h
deleted file mode 100644
index 9aec35da45..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveHermiteCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveHermiteCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveHermiteCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h
deleted file mode 100644
index dd37d194f5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurveLinearCurveInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector4iMB(VirtualCurveIntersector &prim);
-#if defined(__AVX__)
- void AddVirtualCurveLinearCurveInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurveLinearCurveInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h
deleted file mode 100644
index fe5ceed840..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2020 Light Transport Entertainment Inc.
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "curve_intersector_virtual.h"
-
-namespace embree
-{
- namespace isa
- {
- void AddVirtualCurvePointInterector4i(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector4v(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector4iMB(VirtualCurveIntersector &prim);
-
-#if defined (__AVX__)
- void AddVirtualCurvePointInterector8i(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector8v(VirtualCurveIntersector &prim);
- void AddVirtualCurvePointInterector8iMB(VirtualCurveIntersector &prim);
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h b/thirdparty/embree-aarch64/kernels/geometry/cylinder.h
deleted file mode 100644
index 39a582864c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h
+++ /dev/null
@@ -1,223 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- struct Cylinder
- {
- const Vec3fa p0; //!< start location
- const Vec3fa p1; //!< end position
- const float rr; //!< squared radius of cylinder
-
- __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float r)
- : p0(p0), p1(p1), rr(sqr(r)) {}
-
- __forceinline Cylinder(const Vec3fa& p0, const Vec3fa& p1, const float rr, bool)
- : p0(p0), p1(p1), rr(rr) {}
-
- __forceinline bool intersect(const Vec3fa& org,
- const Vec3fa& dir,
- BBox1f& t_o,
- float& u0_o, Vec3fa& Ng0_o,
- float& u1_o, Vec3fa& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const float rl = rcp_length(p1-p0);
- const Vec3fa P0 = p0, dP = (p1-p0)*rl;
- const Vec3fa O = org-P0, dO = dir;
-
- const float dOdO = dot(dO,dO);
- const float OdO = dot(dO,O);
- const float OO = dot(O,O);
- const float dOz = dot(dP,dO);
- const float Oz = dot(dP,O);
-
- const float A = dOdO - sqr(dOz);
- const float B = 2.0f * (OdO - dOz*Oz);
- const float C = OO - sqr(Oz) - rr;
-
- /* we miss the cylinder if determinant is smaller than zero */
- const float D = B*B - 4.0f*A*C;
- if (D < 0.0f) {
- t_o = BBox1f(pos_inf,neg_inf);
- return false;
- }
-
- /* special case for rays that are parallel to the cylinder */
- const float eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- if (abs(A) < eps)
- {
- if (C <= 0.0f) {
- t_o = BBox1f(neg_inf,pos_inf);
- return true;
- } else {
- t_o = BBox1f(pos_inf,neg_inf);
- return false;
- }
- }
-
- /* standard case for rays that are not parallel to the cylinder */
- const float Q = sqrt(D);
- const float rcp_2A = rcp(2.0f*A);
- const float t0 = (-B-Q)*rcp_2A;
- const float t1 = (-B+Q)*rcp_2A;
-
- /* calculates u and Ng for near hit */
- {
- u0_o = madd(t0,dOz,Oz)*rl;
- const Vec3fa Pr = t0*dir;
- const Vec3fa Pl = madd(u0_o,p1-p0,p0);
- Ng0_o = Pr-Pl;
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = madd(t1,dOz,Oz)*rl;
- const Vec3fa Pr = t1*dir;
- const Vec3fa Pl = madd(u1_o,p1-p0,p0);
- Ng1_o = Pr-Pl;
- }
-
- t_o.lower = t0;
- t_o.upper = t1;
- return true;
- }
-
- __forceinline bool intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox1f& t_o) const
- {
- float u0_o; Vec3fa Ng0_o;
- float u1_o; Vec3fa Ng1_o;
- return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
-
- static bool verify(const size_t id, const Cylinder& cylinder, const RayHit& ray, bool shouldhit, const float t0, const float t1)
- {
- float eps = 0.001f;
- BBox1f t; bool hit;
- hit = cylinder.intersect(ray.org,ray.dir,t);
-
- bool failed = hit != shouldhit;
- if (shouldhit) failed |= std::isinf(t0) ? t0 != t.lower : abs(t0-t.lower) > eps;
- if (shouldhit) failed |= std::isinf(t1) ? t1 != t.upper : abs(t1-t.upper) > eps;
- if (!failed) return true;
- embree_cout << "Cylinder test " << id << " failed: cylinder = " << cylinder << ", ray = " << ray << ", hit = " << hit << ", t = " << t << embree_endl;
- return false;
- }
-
- /* verify cylinder class */
- static bool verify()
- {
- bool passed = true;
- const Cylinder cylinder(Vec3fa(0.0f,0.0f,0.0f),Vec3fa(1.0f,0.0f,0.0f),1.0f);
- passed &= verify(0,cylinder,RayHit(Vec3fa(-2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(1,cylinder,RayHit(Vec3fa(+2.0f,1.0f,0.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),true,0.0f,2.0f);
- passed &= verify(2,cylinder,RayHit(Vec3fa(+2.0f,1.0f,2.0f),Vec3fa( 0.0f,-1.0f,+0.0f),0.0f,float(inf)),false,0.0f,0.0f);
- passed &= verify(3,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(4,cylinder,RayHit(Vec3fa(+0.0f,0.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),true,neg_inf,pos_inf);
- passed &= verify(5,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa( 1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- passed &= verify(6,cylinder,RayHit(Vec3fa(+0.0f,2.0f,0.0f),Vec3fa(-1.0f, 0.0f,+0.0f),0.0f,float(inf)),false,pos_inf,neg_inf);
- return passed;
- }
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const Cylinder& c) {
- return cout << "Cylinder { p0 = " << c.p0 << ", p1 = " << c.p1 << ", r = " << sqrtf(c.rr) << "}";
- }
- };
-
- template<int N>
- struct CylinderN
- {
- const Vec3vf<N> p0; //!< start location
- const Vec3vf<N> p1; //!< end position
- const vfloat<N> rr; //!< squared radius of cylinder
-
- __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& r)
- : p0(p0), p1(p1), rr(sqr(r)) {}
-
- __forceinline CylinderN(const Vec3vf<N>& p0, const Vec3vf<N>& p1, const vfloat<N>& rr, bool)
- : p0(p0), p1(p1), rr(rr) {}
-
-
- __forceinline vbool<N> intersect(const Vec3fa& org, const Vec3fa& dir,
- BBox<vfloat<N>>& t_o,
- vfloat<N>& u0_o, Vec3vf<N>& Ng0_o,
- vfloat<N>& u1_o, Vec3vf<N>& Ng1_o) const
- {
- /* calculate quadratic equation to solve */
- const vfloat<N> rl = rcp_length(p1-p0);
- const Vec3vf<N> P0 = p0, dP = (p1-p0)*rl;
- const Vec3vf<N> O = Vec3vf<N>(org)-P0, dO = dir;
-
- const vfloat<N> dOdO = dot(dO,dO);
- const vfloat<N> OdO = dot(dO,O);
- const vfloat<N> OO = dot(O,O);
- const vfloat<N> dOz = dot(dP,dO);
- const vfloat<N> Oz = dot(dP,O);
-
- const vfloat<N> A = dOdO - sqr(dOz);
- const vfloat<N> B = 2.0f * (OdO - dOz*Oz);
- const vfloat<N> C = OO - sqr(Oz) - rr;
-
- /* we miss the cylinder if determinant is smaller than zero */
- const vfloat<N> D = B*B - 4.0f*A*C;
- vbool<N> valid = D >= 0.0f;
- if (none(valid)) {
- t_o = BBox<vfloat<N>>(empty);
- return valid;
- }
-
- /* standard case for rays that are not parallel to the cylinder */
- const vfloat<N> Q = sqrt(D);
- const vfloat<N> rcp_2A = rcp(2.0f*A);
- const vfloat<N> t0 = (-B-Q)*rcp_2A;
- const vfloat<N> t1 = (-B+Q)*rcp_2A;
-
- /* calculates u and Ng for near hit */
- {
- u0_o = madd(t0,dOz,Oz)*rl;
- const Vec3vf<N> Pr = t0*Vec3vf<N>(dir);
- const Vec3vf<N> Pl = madd(u0_o,p1-p0,p0);
- Ng0_o = Pr-Pl;
- }
-
- /* calculates u and Ng for far hit */
- {
- u1_o = madd(t1,dOz,Oz)*rl;
- const Vec3vf<N> Pr = t1*Vec3vf<N>(dir);
- const Vec3vf<N> Pl = madd(u1_o,p1-p0,p0);
- Ng1_o = Pr-Pl;
- }
-
- t_o.lower = select(valid, t0, vfloat<N>(pos_inf));
- t_o.upper = select(valid, t1, vfloat<N>(neg_inf));
-
- /* special case for rays that are parallel to the cylinder */
- const vfloat<N> eps = 16.0f*float(ulp)*max(abs(dOdO),abs(sqr(dOz)));
- vbool<N> validt = valid & (abs(A) < eps);
- if (unlikely(any(validt)))
- {
- vbool<N> inside = C <= 0.0f;
- t_o.lower = select(validt,select(inside,vfloat<N>(neg_inf),vfloat<N>(pos_inf)),t_o.lower);
- t_o.upper = select(validt,select(inside,vfloat<N>(pos_inf),vfloat<N>(neg_inf)),t_o.upper);
- valid &= !validt | inside;
- }
- return valid;
- }
-
- __forceinline vbool<N> intersect(const Vec3fa& org_i, const Vec3fa& dir, BBox<vfloat<N>>& t_o) const
- {
- vfloat<N> u0_o; Vec3vf<N> Ng0_o;
- vfloat<N> u1_o; Vec3vf<N> Ng1_o;
- return intersect(org_i,dir,t_o,u0_o,Ng0_o,u1_o,Ng1_o);
- }
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h
deleted file mode 100644
index e8305780e5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h
+++ /dev/null
@@ -1,216 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_points.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct DiscIntersectorHitM
- {
- __forceinline DiscIntersectorHitM() {}
-
- __forceinline DiscIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng)
- {
- }
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv(const size_t i) const
- {
- return Vec2f(vu[i], vv[i]);
- }
- __forceinline float t(const size_t i) const
- {
- return vt[i];
- }
- __forceinline Vec3fa Ng(const size_t i) const
- {
- return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]);
- }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct DiscIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(
- const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
-
- valid &= (vfloat<M>(ray.tnear()) <= projC0) & (projC0 <= vfloat<M>(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale; // ignore self intersections
- if (unlikely(none(valid)))
- return false;
-
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir);
- return epilog(valid, hit);
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Vec3vf<M>& normal,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- vfloat<M> divisor = dot(Vec3vf<M>((Vec3fa)ray.dir), normal);
- const vbool<M> parallel = divisor == vfloat<M>(0.f);
- valid &= !parallel;
- divisor = select(parallel, 1.f, divisor); // prevent divide by zero
-
- vfloat<M> t = dot(center - Vec3vf<M>((Vec3fa)ray.org), Vec3vf<M>(normal)) / divisor;
-
- valid &= (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar));
- if (unlikely(none(valid)))
- return false;
-
- Vec3vf<M> intersection = Vec3vf<M>((Vec3fa)ray.org) + Vec3vf<M>((Vec3fa)ray.dir) * t;
- vfloat<M> dist2 = dot(intersection - center, intersection - center);
- valid &= dist2 < radius * radius;
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, t, normal);
- return epilog(valid, hit);
- }
- };
-
- template<int M, int K>
- struct DiscIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray,
- size_t k,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
-
- valid &= (vfloat<M>(ray.tnear()[k]) <= projC0) & (projC0 <= vfloat<M>(ray.tfar[k]));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= projC0 > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR) * radius * pre.depth_scale[k]; // ignore self intersections
- if (unlikely(none(valid)))
- return false;
-
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, projC0, -ray_dir);
- return epilog(valid, hit);
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray,
- size_t k,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Vec3vf<M>& normal,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- vfloat<M> divisor = dot(Vec3vf<M>(ray_dir), normal);
- const vbool<M> parallel = divisor == vfloat<M>(0.f);
- valid &= !parallel;
- divisor = select(parallel, 1.f, divisor); // prevent divide by zero
-
- vfloat<M> t = dot(center - Vec3vf<M>(ray_org), Vec3vf<M>(normal)) / divisor;
-
- valid &= (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k]));
- if (unlikely(none(valid)))
- return false;
-
- Vec3vf<M> intersection = Vec3vf<M>(ray_org) + Vec3vf<M>(ray_dir) * t;
- vfloat<M> dist2 = dot(intersection - center, intersection - center);
- valid &= dist2 < radius * radius;
- if (unlikely(none(valid)))
- return false;
-
- DiscIntersectorHitM<M> hit(zero, zero, t, normal);
- return epilog(valid, hit);
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h
deleted file mode 100644
index e1dc3aa98e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "disc_intersector.h"
-#include "intersector_epilog.h"
-#include "pointi.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct DiscMiIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, bool filter>
- struct DiscMiMBIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct DiscMiIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct DiscMiMBIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, bool filter>
- struct OrientedDiscMiIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, bool filter>
- struct OrientedDiscMiMBIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time());
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct OrientedDiscMiIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct OrientedDiscMiMBIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& Disc)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(Disc.geomID());
- Vec4vf<M> v0; Vec3vf<M> n0;
- Disc.gather(v0, n0, geom, ray.time()[k]);
- const vbool<Mx> valid = Disc.template valid<Mx>();
- return DiscIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0, n0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID()));
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/filter.h b/thirdparty/embree-aarch64/kernels/geometry/filter.h
deleted file mode 100644
index 4cdf7a395a..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/filter.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/geometry.h"
-#include "../common/ray.h"
-#include "../common/hit.h"
-#include "../common/context.h"
-
-namespace embree
-{
- namespace isa
- {
- __forceinline bool runIntersectionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- if (geometry->intersectionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->intersectionFilterN(args);
-
- if (args->valid[0] == 0)
- return false;
- }
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
-
- if (args->valid[0] == 0)
- return false;
- }
-
- copyHitToRay(*(RayHit*)args->ray,*(Hit*)args->hit);
- return true;
- }
-
- __forceinline bool runIntersectionFilter1(const Geometry* const geometry, RayHit& ray, IntersectContext* context, Hit& hit)
- {
- RTCFilterFunctionNArguments args;
- int mask = -1;
- args.valid = &mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = 1;
- return runIntersectionFilter1Helper(&args,geometry,context);
- }
-
- __forceinline void reportIntersection1(IntersectFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args)
- {
-#if defined(EMBREE_FILTER_FUNCTION)
- IntersectContext* MAYBE_UNUSED context = args->internal_context;
- const Geometry* const geometry = args->geometry;
- if (geometry->intersectionFilterN) {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->intersectionFilterN(filter_args);
- }
-
- //if (args->valid[0] == 0)
- // return;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(filter_args);
- }
-#endif
- }
-
- __forceinline bool runOcclusionFilter1Helper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- if (geometry->occlusionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->occlusionFilterN(args);
-
- if (args->valid[0] == 0)
- return false;
- }
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
-
- if (args->valid[0] == 0)
- return false;
- }
- return true;
- }
-
- __forceinline bool runOcclusionFilter1(const Geometry* const geometry, Ray& ray, IntersectContext* context, Hit& hit)
- {
- RTCFilterFunctionNArguments args;
- int mask = -1;
- args.valid = &mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = 1;
- return runOcclusionFilter1Helper(&args,geometry,context);
- }
-
- __forceinline void reportOcclusion1(OccludedFunctionNArguments* args, const RTCFilterFunctionNArguments* filter_args)
- {
-#if defined(EMBREE_FILTER_FUNCTION)
- IntersectContext* MAYBE_UNUSED context = args->internal_context;
- const Geometry* const geometry = args->geometry;
- if (geometry->occlusionFilterN) {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->occlusionFilterN(filter_args);
- }
-
- //if (args->valid[0] == 0)
- // return false;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(filter_args);
- }
-#endif
- }
-
- template<int K>
- __forceinline vbool<K> runIntersectionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- vint<K>* mask = (vint<K>*) args->valid;
- if (geometry->intersectionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->intersectionFilterN(args);
- }
-
- vbool<K> valid_o = *mask != vint<K>(zero);
- if (none(valid_o)) return valid_o;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
- }
-
- valid_o = *mask != vint<K>(zero);
- if (none(valid_o)) return valid_o;
-
- copyHitToRay(valid_o,*(RayHitK<K>*)args->ray,*(HitK<K>*)args->hit);
- return valid_o;
- }
-
- template<int K>
- __forceinline vbool<K> runIntersectionFilter(const vbool<K>& valid, const Geometry* const geometry, RayHitK<K>& ray, IntersectContext* context, HitK<K>& hit)
- {
- RTCFilterFunctionNArguments args;
- vint<K> mask = valid.mask32();
- args.valid = (int*)&mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = K;
- return runIntersectionFilterHelper<K>(&args,geometry,context);
- }
-
- template<int K>
- __forceinline vbool<K> runOcclusionFilterHelper(RTCFilterFunctionNArguments* args, const Geometry* const geometry, IntersectContext* context)
- {
- vint<K>* mask = (vint<K>*) args->valid;
- if (geometry->occlusionFilterN)
- {
- assert(context->scene->hasGeometryFilterFunction());
- geometry->occlusionFilterN(args);
- }
-
- vbool<K> valid_o = *mask != vint<K>(zero);
-
- if (none(valid_o)) return valid_o;
-
- if (context->user->filter) {
- assert(context->scene->hasContextFilterFunction());
- context->user->filter(args);
- }
-
- valid_o = *mask != vint<K>(zero);
-
- RayK<K>* ray = (RayK<K>*) args->ray;
- ray->tfar = select(valid_o, vfloat<K>(neg_inf), ray->tfar);
- return valid_o;
- }
-
- template<int K>
- __forceinline vbool<K> runOcclusionFilter(const vbool<K>& valid, const Geometry* const geometry, RayK<K>& ray, IntersectContext* context, HitK<K>& hit)
- {
- RTCFilterFunctionNArguments args;
- vint<K> mask = valid.mask32();
- args.valid = (int*)&mask;
- args.geometryUserPtr = geometry->userPtr;
- args.context = context->user;
- args.ray = (RTCRayN*)&ray;
- args.hit = (RTCHitN*)&hit;
- args.N = K;
- return runOcclusionFilterHelper<K>(&args,geometry,context);
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h
deleted file mode 100644
index 46a0af0827..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "grid_soa.h"
-#include "grid_soa_intersector1.h"
-#include "grid_soa_intersector_packet.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename T>
- class SubdivPatch1Precalculations : public T
- {
- public:
- __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr)
- : T(ray,ptr) {}
- };
-
- template<int K, typename T>
- class SubdivPatch1PrecalculationsK : public T
- {
- public:
- __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray)
- : T(valid,ray) {}
- };
-
- class Grid1Intersector1
- {
- public:
- typedef GridSOA Primitive;
- typedef Grid1Precalculations<GridSOAIntersector1::Precalculations> Precalculations;
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node);
- }
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
- intersect(pre,ray,context,prim,ty,lazy_node);
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node);
- }
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
- return occluded(pre,ray,context,prim,ty,lazy_node);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, size_t& lazy_node) {
- assert(false && "not implemented");
- return false;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, size_t& lazy_node) {
- assert(false && "not implemented");
- return false;
- }
- };
-
- template <int K>
- struct GridIntersectorK
- {
- typedef GridSOA Primitive;
- typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations;
-
-
- static __forceinline void intersect(const vbool<K>& valid, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
- {
- GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
- }
- };
-
- typedef Grid1IntersectorK<4> SubdivPatch1Intersector4;
- typedef Grid1IntersectorK<8> SubdivPatch1Intersector8;
- typedef Grid1IntersectorK<16> SubdivPatch1Intersector16;
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
deleted file mode 100644
index d3b275586c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_subdiv_mesh.h"
-#include "../bvh/bvh.h"
-#include "../subdiv/tessellation.h"
-#include "../subdiv/tessellation_cache.h"
-#include "subdivpatch1.h"
-
-namespace embree
-{
- namespace isa
- {
- class GridSOA
- {
- public:
-
- /*! GridSOA constructor */
- GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
-
- /*! Subgrid creation */
- template<typename Allocator>
- static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
- unsigned x0, unsigned x1, unsigned y0, unsigned y1,
- const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
- {
- const unsigned width = x1-x0+1;
- const unsigned height = y1-y0+1;
- const GridRange range(0,width-1,0,height-1);
- size_t bvhBytes = 0;
- if (time_steps == 1)
- bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);
- else {
- bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);
- bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));
- }
- const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
- size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
-#if !defined(__X86_64__) && !defined(__aarch64__)
- rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
-#endif
- void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
- assert(data);
- return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);
- }
-
- /*! Grid creation */
- template<typename Allocator>
- static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
- const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
- {
- return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
- }
-
- /*! returns reference to root */
- __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
- __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
-
- /*! returns pointer to BVH array */
- __forceinline int8_t* bvhData() { return &data[0]; }
- __forceinline const int8_t* bvhData() const { return &data[0]; }
-
- /*! returns pointer to Grid array */
- __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
- __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
-
- __forceinline void* encodeLeaf(size_t u, size_t v) {
- return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
- }
- __forceinline float* decodeLeaf(size_t t, const void* ptr) {
- return gridData(t) + (((size_t) (ptr) >> 4) - 1);
- }
-
- /*! returns the size of the BVH over the grid in bytes */
- static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
-
- /*! returns the size of the temporal BVH over the time range BVHs */
- static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
-
- /*! calculates bounding box of grid range */
- __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
- {
- const float* const grid_array = gridData(time);
- const float* const grid_x_array = grid_array + 0 * dim_offset;
- const float* const grid_y_array = grid_array + 1 * dim_offset;
- const float* const grid_z_array = grid_array + 2 * dim_offset;
-
- /* compute the bounds just for the range! */
- BBox3fa bounds( empty );
- for (unsigned v = range.v_start; v<=range.v_end; v++)
- {
- for (unsigned u = range.u_start; u<=range.u_end; u++)
- {
- const float x = grid_x_array[ v * width + u];
- const float y = grid_y_array[ v * width + u];
- const float z = grid_z_array[ v * width + u];
- bounds.extend( Vec3fa(x,y,z) );
- }
- }
- assert(is_finite(bounds));
- return bounds;
- }
-
- /*! Evaluates grid over patch and builds BVH4 tree over the grid. */
- std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
-
- /*! Create BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
-
- /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
-
- /*! Create MBlur BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
-
- /*! Create MSMBlur BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
-
- template<typename Loader>
- struct MapUV
- {
- typedef typename Loader::vfloat vfloat;
- const float* const grid_uv;
- size_t line_offset;
- size_t lines;
-
- __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
- : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
-
- __forceinline void operator() (vfloat& u, vfloat& v) const {
- const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
- const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
- const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
- const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
- const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
- u = uv[0];v = uv[1];
- }
- };
-
- struct Gather2x3
- {
- enum { M = 4 };
- typedef vbool4 vbool;
- typedef vint4 vint;
- typedef vfloat4 vfloat;
-
- static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
- {
- vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
- vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
- if (unlikely(line_offset == 2))
- {
- r0 = shuffle<0,1,1,1>(r0);
- r1 = shuffle<0,1,1,1>(r1);
- }
- return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11
- shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
- shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
- }
-
- static __forceinline void gather(const float* const grid_x,
- const float* const grid_y,
- const float* const grid_z,
- const size_t line_offset,
- const size_t lines,
- Vec3vf4& v0_o,
- Vec3vf4& v1_o,
- Vec3vf4& v2_o)
- {
- const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);
- const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);
- const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);
- v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
- v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
- v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
- }
- };
-
-#if defined (__AVX__)
- struct Gather3x3
- {
- enum { M = 8 };
- typedef vbool8 vbool;
- typedef vint8 vint;
- typedef vfloat8 vfloat;
-
- static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
- {
- vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
- vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
- vfloat4 rc;
- if (likely(lines > 2))
- rc = vfloat4::loadu(grid + 2*line_offset);
- else
- rc = rb;
-
- if (unlikely(line_offset == 2))
- {
- ra = shuffle<0,1,1,1>(ra);
- rb = shuffle<0,1,1,1>(rb);
- rc = shuffle<0,1,1,1>(rc);
- }
-
- const vfloat8 r0 = vfloat8(ra,rb);
- const vfloat8 r1 = vfloat8(rb,rc);
- return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
- shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
- shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
- }
-
- static __forceinline void gather(const float* const grid_x,
- const float* const grid_y,
- const float* const grid_z,
- const size_t line_offset,
- const size_t lines,
- Vec3vf8& v0_o,
- Vec3vf8& v1_o,
- Vec3vf8& v2_o)
- {
- const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
- const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
- const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
- v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
- v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
- v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
- }
- };
-#endif
-
- template<typename vfloat>
- static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
- {
- typedef typename vfloat::Int vint;
- const vint iu = asInt(uv) & 0xffff;
- const vint iv = srl(asInt(uv),16);
- const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);
- const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);
- return Vec2<vfloat>(u,v);
- }
-
- __forceinline unsigned int geomID() const {
- return _geomID;
- }
-
- __forceinline unsigned int primID() const {
- return _primID;
- }
-
- public:
- BVH4::NodeRef troot;
-#if !defined(__X86_64__) && !defined(__aarch64__)
- unsigned align1;
-#endif
- unsigned time_steps;
- unsigned width;
-
- unsigned height;
- unsigned dim_offset;
- unsigned _geomID;
- unsigned _primID;
-
- unsigned align2;
- unsigned gridOffset;
- unsigned gridBytes;
- unsigned rootOffset;
-
- int8_t data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h
deleted file mode 100644
index 2ed922a5ae..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h
+++ /dev/null
@@ -1,207 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "grid_soa.h"
-#include "../common/ray.h"
-#include "triangle_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- class GridSOAIntersector1
- {
- public:
- typedef void Primitive;
-
- class Precalculations
- {
- public:
- __forceinline Precalculations (const Ray& ray, const void* ptr)
- : grid(nullptr) {}
-
- public:
- GridSOA* grid;
- int itime;
- float ftime;
- };
-
- template<typename Loader>
- static __forceinline void intersect(RayHit& ray,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
- Vec3<vfloat> v0, v1, v2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(Ray& ray,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> v0, v1, v2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
-
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, context, grid_x , line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
-
- class GridSOAMBIntersector1
- {
- public:
- typedef void Primitive;
- typedef GridSOAIntersector1::Precalculations Precalculations;
-
- template<typename Loader>
- static __forceinline void intersect(RayHit& ray, const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- intersector.intersect(ray,v0,v1,v2,mapUV,Intersect1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(Ray& ray, const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- GridSOA::MapUV<Loader> mapUV(grid_uv,line_offset,lines);
- PlueckerIntersector1<Loader::M> intersector(ray,nullptr);
- return intersector.intersect(ray,v0,v1,v2,mapUV,Occluded1EpilogMU<Loader::M,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim);
-
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x, line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(pre.itime,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, pre.ftime, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x , line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, pre.ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h
deleted file mode 100644
index 41d66e1e28..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h
+++ /dev/null
@@ -1,445 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "grid_soa.h"
-#include "../common/ray.h"
-#include "triangle_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int K>
- struct MapUV0
- {
- const float* const grid_uv;
- size_t ofs00, ofs01, ofs10, ofs11;
-
- __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
- : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
-
- __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const {
- const vfloat<K> uv00(grid_uv[ofs00]);
- const vfloat<K> uv01(grid_uv[ofs01]);
- const vfloat<K> uv10(grid_uv[ofs10]);
- const vfloat<K> uv11(grid_uv[ofs11]);
- const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00);
- const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
- const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10);
- const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
- u = uv[0]; v = uv[1];
- }
- };
-
- template<int K>
- struct MapUV1
- {
- const float* const grid_uv;
- size_t ofs00, ofs01, ofs10, ofs11;
-
- __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
- : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
-
- __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const {
- const vfloat<K> uv00(grid_uv[ofs00]);
- const vfloat<K> uv01(grid_uv[ofs01]);
- const vfloat<K> uv10(grid_uv[ofs10]);
- const vfloat<K> uv11(grid_uv[ofs11]);
- const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10);
- const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
- const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11);
- const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
- u = uv[0]; v = uv[1];
- }
- };
-
- template<int K>
- class GridSOAIntersectorK
- {
- public:
- typedef void Primitive;
-
- class Precalculations
- {
-#if defined(__AVX__)
- static const int M = 8;
-#else
- static const int M = 4;
-#endif
-
- public:
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray)
- : grid(nullptr), intersector(valid,ray) {}
-
- public:
- GridSOA* grid;
- PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector
- };
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- const size_t ofs00 = (y+0)*line_offset+(x+0);
- const size_t ofs01 = (y+0)*line_offset+(x+1);
- const size_t ofs10 = (y+1)*line_offset+(x+0);
- const size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
-
- pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
- }
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- vbool<K> valid = valid_i;
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- const size_t ofs00 = (y+0)*line_offset+(x+0);
- const size_t ofs01 = (y+0)*line_offset+(x+1);
- const size_t ofs10 = (y+1)*line_offset+(x+0);
- const size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
-
- pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- }
- }
- return !valid;
- }
-
- template<typename Loader>
- static __forceinline void intersect(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
- Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
- pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
- Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
- return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(0,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
-
- template<int K>
- class GridSOAMBIntersectorK
- {
- public:
- typedef void Primitive;
- typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations;
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- vfloat<K> vftime;
- vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
-
- vbool<K> valid1 = valid_i;
- while (any(valid1)) {
- const size_t j = bsf(movemask(valid1));
- const int itime = vitime[j];
- const vbool<K> valid2 = valid1 & (itime == vitime);
- valid1 = valid1 & !valid2;
- intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
- }
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- size_t ofs00 = (y+0)*line_offset+(x+0);
- size_t ofs01 = (y+0)*line_offset+(x+1);
- size_t ofs10 = (y+1)*line_offset+(x+0);
- size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- ofs00 += grid_offset;
- ofs01 += grid_offset;
- ofs10 += grid_offset;
- ofs11 += grid_offset;
- const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- const Vec3vf<K> p00 = lerp(a00,b00,ftime);
- const Vec3vf<K> p01 = lerp(a01,b01,ftime);
- const Vec3vf<K> p10 = lerp(a10,b10,ftime);
- const Vec3vf<K> p11 = lerp(a11,b11,ftime);
-
- pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
- }
- }
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- vfloat<K> vftime;
- vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
-
- vbool<K> valid_o = valid_i;
- vbool<K> valid1 = valid_i;
- while (any(valid1)) {
- const int j = int(bsf(movemask(valid1)));
- const int itime = vitime[j];
- const vbool<K> valid2 = valid1 & (itime == vitime);
- valid1 = valid1 & !valid2;
- valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
- }
- return !valid_o;
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const size_t line_offset = pre.grid->width;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- vbool<K> valid = valid_i;
- const size_t max_x = pre.grid->width == 2 ? 1 : 2;
- const size_t max_y = pre.grid->height == 2 ? 1 : 2;
- for (size_t y=0; y<max_y; y++)
- {
- for (size_t x=0; x<max_x; x++)
- {
- size_t ofs00 = (y+0)*line_offset+(x+0);
- size_t ofs01 = (y+0)*line_offset+(x+1);
- size_t ofs10 = (y+1)*line_offset+(x+0);
- size_t ofs11 = (y+1)*line_offset+(x+1);
- const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- ofs00 += grid_offset;
- ofs01 += grid_offset;
- ofs10 += grid_offset;
- ofs11 += grid_offset;
- const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
- const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
- const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
- const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
- const Vec3vf<K> p00 = lerp(a00,b00,ftime);
- const Vec3vf<K> p01 = lerp(a01,b01,ftime);
- const Vec3vf<K> p10 = lerp(a10,b10,ftime);
- const Vec3vf<K> p11 = lerp(a11,b11,ftime);
-
- pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
- if (none(valid)) break;
- }
- }
- return valid;
- }
-
- template<typename Loader>
- static __forceinline void intersect(RayHitK<K>& ray, size_t k,
- const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- };
-
- template<typename Loader>
- static __forceinline bool occluded(RayK<K>& ray, size_t k,
- const float ftime,
- IntersectContext* context,
- const float* const grid_x,
- const size_t line_offset,
- const size_t lines,
- Precalculations& pre)
- {
- typedef typename Loader::vfloat vfloat;
- const size_t grid_offset = pre.grid->gridBytes >> 2;
- const size_t dim_offset = pre.grid->dim_offset;
- const float* const grid_y = grid_x + 1 * dim_offset;
- const float* const grid_z = grid_x + 2 * dim_offset;
- const float* const grid_uv = grid_x + 3 * dim_offset;
-
- Vec3<vfloat> a0, a1, a2;
- Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
-
- Vec3<vfloat> b0, b1, b2;
- Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
-
- Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
- Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
- Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
-
- return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
- }
-
- /*! Intersect a ray with the primitive. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- float ftime;
- int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
-
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
-
-#if defined(__AVX__)
- intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
-#else
- intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre);
- if (likely(lines > 2))
- intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);
-#endif
- }
-
- /*! Test if the ray is occluded by the primitive */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- float ftime;
- int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
-
- const size_t line_offset = pre.grid->width;
- const size_t lines = pre.grid->height;
- const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
-
-#if defined(__AVX__)
- return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
-#else
- if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true;
- if (likely(lines > 2))
- if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
-#endif
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance.h b/thirdparty/embree-aarch64/kernels/geometry/instance.h
deleted file mode 100644
index 66893d581f..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/instance.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../common/scene_instance.h"
-
-namespace embree
-{
- struct InstancePrimitive
- {
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return 1; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return N; }
-
- public:
-
- InstancePrimitive (const Instance* instance, unsigned int instID)
- : instance(instance)
- , instID_(instID)
- {}
-
- __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
- {
- assert(end-i == 1);
- const PrimRef& prim = prims[i]; i++;
- const unsigned int geomID = prim.geomID();
- const Instance* instance = scene->get<Instance>(geomID);
- new (this) InstancePrimitive(instance, geomID);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
- {
- assert(end-i == 1);
- const PrimRef& prim = prims[i]; i++;
- const unsigned int geomID = prim.geomID();
- const Instance* instance = scene->get<Instance>(geomID);
- new (this) InstancePrimitive(instance,geomID);
- return instance->linearBounds(0,itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
- {
- assert(end-i == 1);
- const PrimRefMB& prim = prims[i]; i++;
- const unsigned int geomID = prim.geomID();
- const Instance* instance = scene->get<Instance>(geomID);
- new (this) InstancePrimitive(instance,geomID);
- return instance->linearBounds(0,time_range);
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(Instance* instance) {
- return instance->bounds(0);
- }
-
- public:
- const Instance* instance;
- const unsigned int instID_ = std::numeric_limits<unsigned int>::max ();
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h
deleted file mode 100644
index 91731a39c5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "instance.h"
-#include "../common/ray.h"
-#include "../common/point_query.h"
-
-namespace embree
-{
- namespace isa
- {
- struct InstanceIntersector1
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const Ray& ray, const void *ptr) {}
- };
-
- static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim);
- static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim);
- static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
- };
-
- struct InstanceIntersector1MB
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const Ray& ray, const void *ptr) {}
- };
-
- static void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim);
- static bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim);
- static bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim);
- };
-
- template<int K>
- struct InstanceIntersectorK
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
- };
-
- static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim);
- static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim);
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
- return ray.tfar[k] < 0.0f;
- }
- };
-
- template<int K>
- struct InstanceIntersectorKMB
- {
- typedef InstancePrimitive Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
- };
-
- static void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim);
- static vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim);
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
- return ray.tfar[k] < 0.0f;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h
deleted file mode 100644
index 0df49dd6e9..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h
+++ /dev/null
@@ -1,1074 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/context.h"
-#include "filter.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct UVIdentity {
- __forceinline void operator() (vfloat<M>& u, vfloat<M>& v) const {}
- };
-
-
- template<bool filter>
- struct Intersect1Epilog1
- {
- RayHit& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1Epilog1(RayHit& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
- hit.finalize();
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar;
- ray.tfar = hit.t;
- bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- return found;
- }
- }
-#endif
-
- /* update hit information */
- ray.tfar = hit.t;
- ray.Ng = hit.Ng;
- ray.u = hit.u;
- ray.v = hit.v;
- ray.primID = primID;
- ray.geomID = geomID;
- instance_id_stack::copy(context->user->instID, ray.instID);
- return true;
- }
- };
-
- template<bool filter>
- struct Occluded1Epilog1
- {
- Ray& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1Epilog1(Ray& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
- hit.finalize();
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) {
- HitK<1> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar;
- ray.tfar = hit.t;
- const bool found = runOcclusionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- return found;
- }
- }
-#endif
- return true;
- }
- };
-
- template<int K, bool filter>
- struct Intersect1KEpilog1
- {
- RayHitK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1KEpilog1(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
- hit.finalize();
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t;
- const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- return found;
- }
- }
-#endif
-
- /* update hit information */
- ray.tfar[k] = hit.t;
- ray.Ng.x[k] = hit.Ng.x;
- ray.Ng.y[k] = hit.Ng.y;
- ray.Ng.z[k] = hit.Ng.z;
- ray.u[k] = hit.u;
- ray.v[k] = hit.v;
- ray.primID[k] = primID;
- ray.geomID[k] = geomID;
- instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k);
- return true;
- }
- };
-
- template<int K, bool filter>
- struct Occluded1KEpilog1
- {
- RayK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1KEpilog1(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter())) {
- hit.finalize();
- HitK<K> h(context->user,geomID,primID,hit.u,hit.v,hit.Ng);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t;
- const bool found = any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- return found;
- }
- }
-#endif
- return true;
- }
- };
-
- template<int M, int Mx, bool filter>
- struct Intersect1EpilogM
- {
- RayHit& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Intersect1EpilogM(RayHit& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- hit.finalize();
- size_t i = select_min(valid,hit.vt);
- unsigned int geomID = geomIDs[i];
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- bool foundhit = false;
- goto entry;
- while (true)
- {
- if (unlikely(none(valid))) return foundhit;
- i = select_min(valid,hit.vt);
-
- geomID = geomIDs[i];
- entry:
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask) == 0) {
- clear(valid,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* call intersection filter function */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- const Vec2f uv = hit.uv(i);
- HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- const bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- foundhit |= found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value
- continue;
- }
- }
-#endif
- break;
- }
-#endif
-
- /* update hit information */
- const Vec2f uv = hit.uv(i);
- ray.tfar = hit.vt[i];
- ray.Ng.x = hit.vNg.x[i];
- ray.Ng.y = hit.vNg.y[i];
- ray.Ng.z = hit.vNg.z[i];
- ray.u = uv.x;
- ray.v = uv.y;
- ray.primID = primIDs[i];
- ray.geomID = geomID;
- instance_id_stack::copy(context->user->instID, ray.instID);
- return true;
-
- }
- };
-
-#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4
- template<int M, bool filter>
- struct Intersect1EpilogM<M,16,filter>
- {
- static const size_t Mx = 16;
- RayHit& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Intersect1EpilogM(RayHit& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* MAYBE_UNUSED scene = context->scene;
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- hit.finalize();
- size_t i = select_min(valid,hit.vt);
- unsigned int geomID = geomIDs[i];
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- bool foundhit = false;
- goto entry;
- while (true)
- {
- if (unlikely(none(valid))) return foundhit;
- i = select_min(valid,hit.vt);
-
- geomID = geomIDs[i];
- entry:
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask) == 0) {
- clear(valid,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* call intersection filter function */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- const Vec2f uv = hit.uv(i);
- HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- const bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- foundhit |= found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value
- continue;
- }
- }
-#endif
- break;
- }
-#endif
-
- vbool<Mx> finalMask(((unsigned int)1 << i));
- ray.update(finalMask,hit.vt,hit.vu,hit.vv,hit.vNg.x,hit.vNg.y,hit.vNg.z,geomID,primIDs);
- instance_id_stack::foreach([&](unsigned level)
- {
- ray.instID[level] = context->user->instID[level];
- return (context->user->instID[level] != RTC_INVALID_GEOMETRY_ID);
- });
- return true;
-
- }
- };
-#endif
-
- template<int M, int Mx, bool filter>
- struct Occluded1EpilogM
- {
- Ray& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Occluded1EpilogM(Ray& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- if (unlikely(filter))
- hit.finalize(); /* called only once */
-
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- size_t m=movemask(valid);
- goto entry;
- while (true)
- {
- if (unlikely(m == 0)) return false;
- entry:
- size_t i=bsf(m);
-
- const unsigned int geomID = geomIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask) == 0) {
- m=btc(m,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* if we have no filter then the test passed */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- const Vec2f uv = hit.uv(i);
- HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- if (runOcclusionFilter1(geometry,ray,context,h)) return true;
- ray.tfar = old_t;
- m=btc(m,i);
- continue;
- }
- }
-#endif
- break;
- }
-#endif
-
- return true;
- }
- };
-
- template<int M, bool filter>
- struct Intersect1EpilogMU
- {
- RayHit& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1EpilogMU(RayHit& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
-
- vbool<M> valid = valid_i;
- hit.finalize();
-
- size_t i = select_min(valid,hit.vt);
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter()))
- {
- bool foundhit = false;
- while (true)
- {
- /* call intersection filter function */
- Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- const bool found = runIntersectionFilter1(geometry,ray,context,h);
- if (!found) ray.tfar = old_t;
- foundhit |= found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value
- if (unlikely(none(valid))) break;
- i = select_min(valid,hit.vt);
- }
- return foundhit;
- }
-#endif
-
- /* update hit information */
- const Vec2f uv = hit.uv(i);
- const Vec3fa Ng = hit.Ng(i);
- ray.tfar = hit.t(i);
- ray.Ng.x = Ng.x;
- ray.Ng.y = Ng.y;
- ray.Ng.z = Ng.z;
- ray.u = uv.x;
- ray.v = uv.y;
- ray.primID = primID;
- ray.geomID = geomID;
- instance_id_stack::copy(context->user->instID, ray.instID);
- return true;
- }
- };
-
- template<int M, bool filter>
- struct Occluded1EpilogMU
- {
- Ray& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1EpilogMU(Ray& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid, Hit& hit) const
- {
- /* ray mask test */
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- if ((geometry->mask & ray.mask) == 0) return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- hit.finalize();
- for (size_t m=movemask(valid), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m))
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar;
- ray.tfar = hit.t(i);
- HitK<1> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- if (runOcclusionFilter1(geometry,ray,context,h)) return true;
- ray.tfar = old_t;
- }
- return false;
- }
-#endif
- return true;
- }
- };
-
- template<int M, int K, bool filter>
- struct IntersectKEpilogM
- {
- RayHitK<K>& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
- const size_t i;
-
- __forceinline IntersectKEpilogM(RayHitK<K>& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs,
- size_t i)
- : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
-
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- vbool<K> valid = valid_i;
-
- std::tie(u,v,t,Ng) = hit();
-
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
- /* ray masking test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return false;
-#endif
-
- /* occlusion filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(m_accept,ray.tfar,old_t);
- return m_accept;
- }
- }
-#endif
-
- /* update hit information */
- vfloat<K>::store(valid,&ray.tfar,t);
- vfloat<K>::store(valid,&ray.Ng.x,Ng.x);
- vfloat<K>::store(valid,&ray.Ng.y,Ng.y);
- vfloat<K>::store(valid,&ray.Ng.z,Ng.z);
- vfloat<K>::store(valid,&ray.u,u);
- vfloat<K>::store(valid,&ray.v,v);
- vuint<K>::store(valid,&ray.primID,primID);
- vuint<K>::store(valid,&ray.geomID,geomID);
- instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid);
- return valid;
- }
- };
-
- template<int M, int K, bool filter>
- struct OccludedKEpilogM
- {
- vbool<K>& valid0;
- RayK<K>& ray;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
- const size_t i;
-
- __forceinline OccludedKEpilogM(vbool<K>& valid0,
- RayK<K>& ray,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs,
- size_t i)
- : valid0(valid0), ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs), i(i) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const
- {
- vbool<K> valid = valid_i;
-
- /* ray masking test */
- Scene* scene MAYBE_UNUSED = context->scene;
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return valid;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- std::tie(u,v,t,Ng) = hit();
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- valid = runOcclusionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(valid,ray.tfar,old_t);
- }
- }
-#endif
-
- /* update occlusion */
- valid0 = valid0 & !valid;
- return valid;
- }
- };
-
- template<int M, int K, bool filter>
- struct IntersectKEpilogMU
- {
- RayHitK<K>& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline IntersectKEpilogMU(RayHitK<K>& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_org, const Hit& hit) const
- {
- vbool<K> valid = valid_org;
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- std::tie(u,v,t,Ng) = hit();
-
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
- /* ray masking test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- const vbool<K> m_accept = runIntersectionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(m_accept,ray.tfar,old_t);
- return m_accept;
- }
- }
-#endif
-
- /* update hit information */
- vfloat<K>::store(valid,&ray.tfar,t);
- vfloat<K>::store(valid,&ray.Ng.x,Ng.x);
- vfloat<K>::store(valid,&ray.Ng.y,Ng.y);
- vfloat<K>::store(valid,&ray.Ng.z,Ng.z);
- vfloat<K>::store(valid,&ray.u,u);
- vfloat<K>::store(valid,&ray.v,v);
- vuint<K>::store(valid,&ray.primID,primID);
- vuint<K>::store(valid,&ray.geomID,geomID);
- instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid);
-
- return valid;
- }
- };
-
- template<int M, int K, bool filter>
- struct OccludedKEpilogMU
- {
- vbool<K>& valid0;
- RayK<K>& ray;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline OccludedKEpilogMU(vbool<K>& valid0,
- RayK<K>& ray,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : valid0(valid0), ray(ray), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline vbool<K> operator() (const vbool<K>& valid_i, const Hit& hit) const
- {
- vbool<K> valid = valid_i;
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- valid &= (geometry->mask & ray.mask) != 0;
- if (unlikely(none(valid))) return false;
-#endif
-
- /* occlusion filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- vfloat<K> u, v, t;
- Vec3vf<K> Ng;
- std::tie(u,v,t,Ng) = hit();
- HitK<K> h(context->user,geomID,primID,u,v,Ng);
- const vfloat<K> old_t = ray.tfar;
- ray.tfar = select(valid,t,ray.tfar);
- valid = runOcclusionFilter(valid,geometry,ray,context,h);
- ray.tfar = select(valid,ray.tfar,old_t);
- }
- }
-#endif
-
- /* update occlusion */
- valid0 = valid0 & !valid;
- return valid;
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct Intersect1KEpilogM
- {
- RayHitK<K>& ray;
- size_t k;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Intersect1KEpilogM(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- vbool<Mx> valid = valid_i;
- hit.finalize();
- if (Mx > M) valid &= (1<<M)-1;
- size_t i = select_min(valid,hit.vt);
- assert(i<M);
- unsigned int geomID = geomIDs[i];
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- bool foundhit = false;
- goto entry;
- while (true)
- {
- if (unlikely(none(valid))) return foundhit;
- i = select_min(valid,hit.vt);
- assert(i<M);
- geomID = geomIDs[i];
- entry:
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask[k]) == 0) {
- clear(valid,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* call intersection filter function */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) {
- assert(i<M);
- const Vec2f uv = hit.uv(i);
- HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- foundhit = foundhit | found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value
- continue;
- }
- }
-#endif
- break;
- }
-#endif
- assert(i<M);
- /* update hit information */
-#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4
- ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<Mx>(hit.vNg.x),vfloat<Mx>(hit.vNg.y),vfloat<Mx>(hit.vNg.z),geomID,vuint<Mx>(primIDs));
-#else
- const Vec2f uv = hit.uv(i);
- ray.tfar[k] = hit.t(i);
- ray.Ng.x[k] = hit.vNg.x[i];
- ray.Ng.y[k] = hit.vNg.y[i];
- ray.Ng.z[k] = hit.vNg.z[i];
- ray.u[k] = uv.x;
- ray.v[k] = uv.y;
- ray.primID[k] = primIDs[i];
- ray.geomID[k] = geomID;
- instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k);
-#endif
- return true;
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct Occluded1KEpilogM
- {
- RayK<K>& ray;
- size_t k;
- IntersectContext* context;
- const vuint<M>& geomIDs;
- const vuint<M>& primIDs;
-
- __forceinline Occluded1KEpilogM(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
- : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK)
- if (unlikely(filter))
- hit.finalize(); /* called only once */
-
- vbool<Mx> valid = valid_i;
- if (Mx > M) valid &= (1<<M)-1;
- size_t m=movemask(valid);
- goto entry;
- while (true)
- {
- if (unlikely(m == 0)) return false;
- entry:
- size_t i=bsf(m);
-
- const unsigned int geomID = geomIDs[i];
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-
-#if defined(EMBREE_RAY_MASK)
- /* goto next hit if mask test fails */
- if ((geometry->mask & ray.mask[k]) == 0) {
- m=btc(m,i);
- continue;
- }
-#endif
-
-#if defined(EMBREE_FILTER_FUNCTION)
- /* execute occlusion filer */
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- HitK<K> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i));
- if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true;
- ray.tfar[k] = old_t;
- m=btc(m,i);
- continue;
- }
- }
-#endif
- break;
- }
-#endif
- return true;
- }
- };
-
- template<int M, int K, bool filter>
- struct Intersect1KEpilogMU
- {
- RayHitK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Intersect1KEpilogMU(RayHitK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- /* ray mask test */
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
-
- /* finalize hit calculation */
- vbool<M> valid = valid_i;
- hit.finalize();
- size_t i = select_min(valid,hit.vt);
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter()))
- {
- bool foundhit = false;
- while (true)
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- const bool found = any(runIntersectionFilter(vbool<K>(1<<k),geometry,ray,context,h));
- if (!found) ray.tfar[k] = old_t;
- foundhit = foundhit | found;
- clear(valid,i);
- valid &= hit.vt <= ray.tfar[k]; // intersection filters may modify tfar value
- if (unlikely(none(valid))) break;
- i = select_min(valid,hit.vt);
- }
- return foundhit;
- }
- }
-#endif
-
- /* update hit information */
-#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4
- const Vec3fa Ng = hit.Ng(i);
- ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<M>(Ng.x),vfloat<M>(Ng.y),vfloat<M>(Ng.z),geomID,vuint<M>(primID));
-#else
- const Vec2f uv = hit.uv(i);
- const Vec3fa Ng = hit.Ng(i);
- ray.tfar[k] = hit.t(i);
- ray.Ng.x[k] = Ng.x;
- ray.Ng.y[k] = Ng.y;
- ray.Ng.z[k] = Ng.z;
- ray.u[k] = uv.x;
- ray.v[k] = uv.y;
- ray.primID[k] = primID;
- ray.geomID[k] = geomID;
- instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k);
-#endif
- return true;
- }
- };
-
- template<int M, int K, bool filter>
- struct Occluded1KEpilogMU
- {
- RayK<K>& ray;
- size_t k;
- IntersectContext* context;
- const unsigned int geomID;
- const unsigned int primID;
-
- __forceinline Occluded1KEpilogMU(RayK<K>& ray, size_t k,
- IntersectContext* context,
- const unsigned int geomID,
- const unsigned int primID)
- : ray(ray), k(k), context(context), geomID(geomID), primID(primID) {}
-
- template<typename Hit>
- __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const
- {
- Scene* scene MAYBE_UNUSED = context->scene;
- Geometry* geometry MAYBE_UNUSED = scene->get(geomID);
-#if defined(EMBREE_RAY_MASK)
- /* ray mask test */
- if ((geometry->mask & ray.mask[k]) == 0)
- return false;
-#endif
-
- /* intersection filter test */
-#if defined(EMBREE_FILTER_FUNCTION)
- if (filter) {
- if (unlikely(context->hasContextFilter() || geometry->hasOcclusionFilter()))
- {
- hit.finalize();
- for (size_t m=movemask(valid_i), i=bsf(m); m!=0; m=btc(m,i), i=bsf(m))
- {
- const Vec2f uv = hit.uv(i);
- const float old_t = ray.tfar[k];
- ray.tfar[k] = hit.t(i);
- HitK<K> h(context->user,geomID,primID,uv.x,uv.y,hit.Ng(i));
- if (any(runOcclusionFilter(vbool<K>(1<<k),geometry,ray,context,h))) return true;
- ray.tfar[k] = old_t;
- }
- return false;
- }
- }
-#endif
- return true;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h b/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h
deleted file mode 100644
index 5c1ba5cb61..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "../common/ray.h"
-#include "../common/point_query.h"
-#include "../bvh/node_intersector1.h"
-#include "../bvh/node_intersector_packet.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename Intersector>
- struct ArrayIntersector1
- {
- typedef typename Intersector::Primitive Primitive;
- typedef typename Intersector::Precalculations Precalculations;
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++)
- Intersector::intersect(pre,ray,context,prim[i]);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- if (Intersector::occluded(pre,ray,context,prim[i]))
- return true;
- }
- return false;
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- bool changed = false;
- for (size_t i=0; i<num; i++)
- changed |= Intersector::pointQuery(query, context, prim[i]);
- return changed;
- }
-
- template<int K>
- static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- }
-
- template<int K>
- static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- return valid;
- }
- };
-
- template<int K, typename Intersector>
- struct ArrayIntersectorK_1
- {
- typedef typename Intersector::Primitive Primitive;
- typedef typename Intersector::Precalculations Precalculations;
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- Intersector::intersect(valid,pre,ray,context,prim[i]);
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- vbool<K> valid0 = valid;
- for (size_t i=0; i<num; i++) {
- valid0 &= !Intersector::occluded(valid0,pre,ray,context,prim[i]);
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- Intersector::intersect(pre,ray,k,context,prim[i]);
- }
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- for (size_t i=0; i<num; i++) {
- if (Intersector::occluded(pre,ray,k,context,prim[i]))
- return true;
- }
- return false;
- }
- };
-
- // =============================================================================================
-
- template<int K, typename IntersectorK>
- struct ArrayIntersectorKStream
- {
- typedef typename IntersectorK::Primitive PrimitiveK;
- typedef typename IntersectorK::Precalculations PrecalculationsK;
-
- static __forceinline void intersectK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(valid,ray); // FIXME: might cause trouble
-
- for (size_t i=0; i<num; i++) {
- IntersectorK::intersect(valid,pre,ray,context,prim[i]);
- }
- }
-
- static __forceinline vbool<K> occludedK(const vbool<K>& valid, const Accel::Intersectors* This, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(valid,ray); // FIXME: might cause trouble
- vbool<K> valid0 = valid;
- for (size_t i=0; i<num; i++) {
- valid0 &= !IntersectorK::occluded(valid0,pre,ray,context,prim[i]);
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(const Accel::Intersectors* This, RayHitK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
- for (size_t i=0; i<num; i++) {
- IntersectorK::intersect(pre,ray,k,context,prim[i]);
- }
- }
-
- static __forceinline bool occluded(const Accel::Intersectors* This, RayK<K>& ray, size_t k, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
- for (size_t i=0; i<num; i++) {
- if (IntersectorK::occluded(pre,ray,k,context,prim[i]))
- return true;
- }
- return false;
- }
-
- static __forceinline size_t occluded(const Accel::Intersectors* This, size_t cur_mask, RayK<K>** __restrict__ inputPackets, IntersectContext* context, const PrimitiveK* prim, size_t num, size_t& lazy_node)
- {
- size_t m_occluded = 0;
- for (size_t i=0; i<num; i++) {
- size_t bits = cur_mask & (~m_occluded);
- for (; bits!=0; )
- {
- const size_t rayID = bscf(bits);
- RayHitK<K> &ray = *inputPackets[rayID / K];
- const size_t k = rayID % K;
- PrecalculationsK pre(ray.tnear() <= ray.tfar,ray); // FIXME: might cause trouble
- if (IntersectorK::occluded(pre,ray,k,context,prim[i]))
- {
- m_occluded |= (size_t)1 << rayID;
- ray.tfar[k] = neg_inf;
- }
- }
- }
- return m_occluded;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h
deleted file mode 100644
index eef5b0b1fd..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct LineIntersectorHitM
- {
- __forceinline LineIntersectorHitM() {}
-
- __forceinline LineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct FlatLinearCurveIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Epilog& epilog)
- {
- /* transform end points into ray space */
- vbool<M> valid = valid_i;
- vfloat<M> depth_scale = pre.depth_scale;
- LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space;
-
- const Vec3vf<M> ray_org ((Vec3fa)ray.org);
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
-
- Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w);
- Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w);
-
- /* approximative intersection with cone */
- const Vec4vf<M> v = p1-p0;
- const Vec4vf<M> w = -p0;
- const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y);
- const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y);
- const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one));
- const Vec4vf<M> p = madd(u,v,p0);
- const vfloat<M> t = p.z;
- const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y);
- const vfloat<M> r = p.w;
- const vfloat<M> r2 = r*r;
- valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()) <= t) & (t <= vfloat<M>(ray.tfar));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections
- if (unlikely(none(valid))) return false;
-
- /* ignore denormalized segments */
- const Vec3vf<M> T = v1.xyz()-v0.xyz();
- valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero));
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- LineIntersectorHitM<M> hit(u,zero,t,T);
- return epilog(valid,hit);
- }
- };
-
- template<int M, int K>
- struct FlatLinearCurveIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Epilog& epilog)
- {
- /* transform end points into ray space */
- vbool<M> valid = valid_i;
- vfloat<M> depth_scale = pre.depth_scale[k];
- LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space[k];
- const Vec3vf<M> ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
-
- Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w);
- Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w);
-
- /* approximative intersection with cone */
- const Vec4vf<M> v = p1-p0;
- const Vec4vf<M> w = -p0;
- const vfloat<M> d0 = madd(w.x,v.x,w.y*v.y);
- const vfloat<M> d1 = madd(v.x,v.x,v.y*v.y);
- const vfloat<M> u = clamp(d0*rcp(d1),vfloat<M>(zero),vfloat<M>(one));
- const Vec4vf<M> p = madd(u,v,p0);
- const vfloat<M> t = p.z;
- const vfloat<M> d2 = madd(p.x,p.x,p.y*p.y);
- const vfloat<M> r = p.w;
- const vfloat<M> r2 = r*r;
- valid &= (d2 <= r2) & (vfloat<M>(ray.tnear()[k]) <= t) & (t <= vfloat<M>(ray.tfar[k]));
- if (EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR != 0.0f)
- valid &= t > float(EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR)*r*depth_scale; // ignore self intersections
- if (unlikely(none(valid))) return false;
-
- /* ignore denormalized segments */
- const Vec3vf<M> T = v1.xyz()-v0.xyz();
- valid &= (T.x != vfloat<M>(zero)) | (T.y != vfloat<M>(zero)) | (T.z != vfloat<M>(zero));
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- LineIntersectorHitM<M> hit(u,zero,t,T);
- return epilog(valid,hit);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei.h b/thirdparty/embree-aarch64/kernels/geometry/linei.h
deleted file mode 100644
index a72029ca53..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/linei.h
+++ /dev/null
@@ -1,709 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- template<int M>
- struct LineMi
- {
- /* Virtual interface to query information about the line segment type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored line segments */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N line segments */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- /* Returns required number of bytes for N line segments */
- static __forceinline size_t bytes(size_t N) { return blocks(N)*sizeof(LineMi); }
-
- public:
-
- /* Default constructor */
- __forceinline LineMi() { }
-
- /* Construction from vertices and IDs */
- __forceinline LineMi(const vuint<M>& v0, unsigned short leftExists, unsigned short rightExists, const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype)
- : gtype((unsigned char)gtype), m((unsigned char)popcnt(vuint<M>(primIDs) != vuint<M>(-1))), sharedGeomID(geomIDs[0]), leftExists (leftExists), rightExists(rightExists), v0(v0), primIDs(primIDs)
- {
- assert(all(vuint<M>(geomID()) == geomIDs));
- }
-
- /* Returns a mask that tells which line segments are valid */
- __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
-
- /* Returns a mask that tells which line segments are valid */
- template<int Mx>
- __forceinline vbool<Mx> valid() const { return vuint<Mx>(primIDs) != vuint<Mx>(-1); }
-
- /* Returns if the specified line segment is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
-
- /* Returns the number of stored line segments */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- //template<class T>
- //static __forceinline T unmask(T &index) { return index & 0x3fffffff; }
-
- __forceinline unsigned int geomID(unsigned int i = 0) const { return sharedGeomID; }
- //__forceinline vuint<M> geomID() { return unmask(geomIDs); }
- //__forceinline const vuint<M> geomID() const { return unmask(geomIDs); }
- //__forceinline unsigned int geomID(const size_t i) const { assert(i<M); return unmask(geomIDs[i]); }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* gather the line segments */
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- const LineSegments* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- const LineSegments* geom,
- const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- const LineSegments* geom,
- float time) const;
-
- /* gather the line segments with lateral info */
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- Vec4vf<M>& pL,
- Vec4vf<M>& pR,
- const LineSegments* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- Vec4vf<M>& pL,
- Vec4vf<M>& pR,
- const LineSegments* geom,
- const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- Vec4vf<M>& pL,
- Vec4vf<M>& pR,
- const LineSegments* geom,
- float time) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- vbool<M>& cL,
- vbool<M>& cR,
- const LineSegments* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- vbool<M>& cL,
- vbool<M>& cR,
- const LineSegments* geom,
- const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0,
- Vec4vf<M>& p1,
- vbool<M>& cL,
- vbool<M>& cR,
- const LineSegments* geom,
- float time) const;
-
- /* Calculate the bounds of the line segments */
- __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(geomID(i));
- const Vec3ff& p0 = geom->vertex(v0[i]+0,itime);
- const Vec3ff& p1 = geom->vertex(v0[i]+1,itime);
- BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
- b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
- bounds.extend(b);
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
- return LBBox3fa(bounds(scene,itime+0), bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(geomID(i));
- allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(geomID((unsigned int)i));
- allBounds.extend(geom->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Fill line segment from line segment list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
- vuint<M> geomID, primID;
- vuint<M> v0;
- unsigned short leftExists = 0;
- unsigned short rightExists = 0;
- const PrimRefT* prim = &prims[begin];
-
- for (size_t i=0; i<M; i++)
- {
- const LineSegments* geom = scene->get<LineSegments>(prim->geomID());
- if (begin<end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
- v0[i] = geom->segment(prim->primID());
- leftExists |= geom->segmentLeftExists(primID[i]) << i;
- rightExists |= geom->segmentRightExists(primID[i]) << i;
- begin++;
- } else {
- assert(i);
- if (i>0) {
- geomID[i] = geomID[i-1];
- primID[i] = -1;
- v0[i] = v0[i-1];
- }
- }
- if (begin<end) prim = &prims[begin]; // FIXME: remove this line
- }
- new (this) LineMi(v0,leftExists,rightExists,geomID,primID,gty); // FIXME: use non temporal store
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf (BVH* bvh, const PrimRef* prims, const range<size_t>& set, const Allocator& alloc)
- {
- size_t start = set.begin();
- size_t items = LineMi::blocks(set.size());
- size_t numbytes = LineMi::bytes(set.size());
- LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
- for (size_t i=0; i<items; i++) {
- accel[i].fill(prims,start,set.end(),bvh->scene);
- }
- return bvh->encodeLeaf((char*)accel,items);
- };
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims,begin,end,scene);
- return linearBounds(scene,itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims,begin,end,scene);
- return linearBounds(scene,time_range);
- }
-
- template<typename BVH, typename SetMB, typename Allocator>
- __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
- {
- size_t start = prims.begin();
- size_t end = prims.end();
- size_t items = LineMi::blocks(prims.size());
- size_t numbytes = LineMi::bytes(prims.size());
- LineMi* accel = (LineMi*) alloc.malloc1(numbytes,M*sizeof(float));
- const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items);
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<items; i++)
- bounds.extend(accel[i].fillMB(prims.prims->data(),start,end,bvh->scene,prims.time_range));
-
- return typename BVH::NodeRecordMB4D(node,bounds,prims.time_range);
- };
-
- /* Updates the primitive */
- __forceinline BBox3fa update(LineSegments* geom)
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const Vec3ff& p0 = geom->vertex(v0[i]+0);
- const Vec3ff& p1 = geom->vertex(v0[i]+1);
- BBox3fa b = merge(BBox3fa(p0),BBox3fa(p1));
- b = enlarge(b,Vec3fa(max(p0.w,p1.w)));
- bounds.extend(b);
- }
- return bounds;
- }
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const LineMi& line) {
- return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}";
- }
-
- public:
- unsigned char gtype;
- unsigned char m;
- unsigned int sharedGeomID;
- unsigned short leftExists, rightExists;
- vuint<M> v0; // index of start vertex
- private:
- vuint<M> primIDs; // primitive ID
- };
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
- Vec4vf4& p1,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf4 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- vbool4& cL,
- vbool4& cR,
- const LineSegments* geom) const
- {
- gather(p0,p1,geom);
- cL = !vbool4(leftExists);
- cR = !vbool4(rightExists);
- }
-
- template<>
- __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
- Vec4vf4& p1,
- vbool4& cL,
- vbool4& cR,
- const LineSegments* geom,
- const int itime) const
- {
- gatheri(p0,p1,geom,itime);
- cL = !vbool4(leftExists);
- cR = !vbool4(rightExists);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- vbool4& cL,
- vbool4& cR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf4 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- cL = !vbool4(leftExists);
- cR = !vbool4(rightExists);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- Vec4vf4& pL,
- Vec4vf4& pR,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
- Vec4vf4& p1,
- Vec4vf4& pL,
- Vec4vf4& pR,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<4>::gather(Vec4vf4& p0,
- Vec4vf4& p1,
- Vec4vf4& pL,
- Vec4vf4& pR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0,a1,aL,aR;
- gatheri(a0,a1,aL,aR,geom,itime);
- Vec4vf4 b0,b1,bL,bR;
- gatheri(b0,b1,bL,bR,geom,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- pL = lerp(aL,bL,vfloat4(ftime));
- pR = lerp(aR,bR,vfloat4(ftime));
- }
-
-#if defined(__AVX__)
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
- Vec4vf8& p1,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf8 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat8(ftime));
- p1 = lerp(a1,b1,vfloat8(ftime));
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- Vec4vf8& pL,
- Vec4vf8& pR,
- const LineSegments* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
- const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf);
- const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf);
- const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf);
- const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
- const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf);
- const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf);
- const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf);
- const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
- Vec4vf8& p1,
- Vec4vf8& pL,
- Vec4vf8& pR,
- const LineSegments* geom,
- const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
- transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
-
- const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
- const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
- const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
- const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
- const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
- const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
- const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
- const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
- transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
-
- const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
- const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
- const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
- const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
- const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf);
- const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf);
- const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf);
- const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf);
- transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
-
- const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
- const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
- const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
- const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
- const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf);
- const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf);
- const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf);
- const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf);
- transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- Vec4vf8& pL,
- Vec4vf8& pR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0,a1,aL,aR;
- gatheri(a0,a1,aL,aR,geom,itime);
- Vec4vf8 b0,b1,bL,bR;
- gatheri(b0,b1,bL,bR,geom,itime+1);
- p0 = lerp(a0,b0,vfloat8(ftime));
- p1 = lerp(a1,b1,vfloat8(ftime));
- pL = lerp(aL,bL,vfloat8(ftime));
- pR = lerp(aR,bR,vfloat8(ftime));
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- vbool8& cL,
- vbool8& cR,
- const LineSegments* geom) const
- {
- gather(p0,p1,geom);
- cL = !vbool8(leftExists);
- cR = !vbool8(rightExists);
- }
-
- template<>
- __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
- Vec4vf8& p1,
- vbool8& cL,
- vbool8& cR,
- const LineSegments* geom,
- const int itime) const
- {
- gatheri(p0,p1,geom,itime);
- cL = !vbool8(leftExists);
- cR = !vbool8(rightExists);
- }
-
- template<>
- __forceinline void LineMi<8>::gather(Vec4vf8& p0,
- Vec4vf8& p1,
- vbool8& cL,
- vbool8& cR,
- const LineSegments* geom,
- float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0,a1;
- gatheri(a0,a1,geom,itime);
- Vec4vf8 b0,b1;
- gatheri(b0,b1,geom,itime+1);
- p0 = lerp(a0,b0,vfloat8(ftime));
- p1 = lerp(a1,b1,vfloat8(ftime));
- cL = !vbool8(leftExists);
- cR = !vbool8(rightExists);
- }
-
-#endif
-
- template<int M>
- typename LineMi<M>::Type LineMi<M>::type;
-
- typedef LineMi<4> Line4i;
- typedef LineMi<8> Line8i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h
deleted file mode 100644
index a431796a88..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "linei.h"
-#include "line_intersector.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct FlatLinearCurveMiIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct FlatLinearCurveMiMBIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct FlatLinearCurveMiIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct FlatLinearCurveMiMBIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/object.h b/thirdparty/embree-aarch64/kernels/geometry/object.h
deleted file mode 100644
index f26391de52..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/object.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- struct Object
- {
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored primitives */
- static __forceinline size_t max_size() { return 1; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return N; }
-
- public:
-
- /*! constructs a virtual object */
- Object (unsigned geomID, unsigned primID)
- : _geomID(geomID), _primID(primID) {}
-
- __forceinline unsigned geomID() const {
- return _geomID;
- }
-
- __forceinline unsigned primID() const {
- return _primID;
- }
-
- /*! fill triangle from triangle list */
- __forceinline void fill(const PrimRef* prims, size_t& i, size_t end, Scene* scene)
- {
- const PrimRef& prim = prims[i]; i++;
- new (this) Object(prim.geomID(), prim.primID());
- }
-
- /*! fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& i, size_t end, Scene* scene, size_t itime)
- {
- const PrimRef& prim = prims[i]; i++;
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- new (this) Object(geomID, primID);
- AccelSet* accel = (AccelSet*) scene->get(geomID);
- return accel->linearBounds(primID,itime);
- }
-
- /*! fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& i, size_t end, Scene* scene, const BBox1f time_range)
- {
- const PrimRefMB& prim = prims[i]; i++;
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- new (this) Object(geomID, primID);
- AccelSet* accel = (AccelSet*) scene->get(geomID);
- return accel->linearBounds(primID,time_range);
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(AccelSet* mesh) {
- return mesh->bounds(primID());
- }
-
- private:
- unsigned int _geomID; //!< geometry ID
- unsigned int _primID; //!< primitive ID
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h
deleted file mode 100644
index 97882e0e59..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "object.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- template<bool mblur>
- struct ObjectIntersector1
- {
- typedef Object Primitive;
-
- static const bool validIntersectorK = false;
-
- struct Precalculations {
- __forceinline Precalculations() {}
- __forceinline Precalculations (const Ray& ray, const void *ptr) {}
- };
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& prim)
- {
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
-
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- if ((ray.mask & accel->mask) == 0)
- return;
-#endif
-
- accel->intersect(ray,prim.geomID(),prim.primID(),context,reportIntersection1);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& prim)
- {
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- if ((ray.mask & accel->mask) == 0)
- return false;
-#endif
-
- accel->occluded(ray,prim.geomID(),prim.primID(),context,&reportOcclusion1);
- return ray.tfar < 0.0f;
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim)
- {
- AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID());
- context->geomID = prim.geomID();
- context->primID = prim.primID();
- return accel->pointQuery(query, context);
- }
-
- template<int K>
- static __forceinline void intersectK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(false);
- }
-
- template<int K>
- static __forceinline vbool<K> occludedK(const vbool<K>& valid, /* PrecalculationsK& pre, */ RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, size_t& lazy_node)
- {
- assert(false);
- return valid;
- }
- };
-
- template<int K, bool mblur>
- struct ObjectIntersectorK
- {
- typedef Object Primitive;
-
- struct Precalculations {
- __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray) {}
- };
-
- static __forceinline void intersect(const vbool<K>& valid_i, const Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& prim)
- {
- vbool<K> valid = valid_i;
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
-
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (ray.mask & accel->mask) != 0;
- if (none(valid)) return;
-#endif
- accel->intersect(valid,ray,prim.geomID(),prim.primID(),context,&reportIntersection1);
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, const Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& prim)
- {
- vbool<K> valid = valid_i;
- AccelSet* accel = (AccelSet*) context->scene->get(prim.geomID());
-
- /* perform ray mask test */
-#if defined(EMBREE_RAY_MASK)
- valid &= (ray.mask & accel->mask) != 0;
- if (none(valid)) return false;
-#endif
- accel->occluded(valid,ray,prim.geomID(),prim.primID(),context,&reportOcclusion1);
- return ray.tfar < 0.0f;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- intersect(vbool<K>(1<<int(k)),pre,ray,context,prim);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& prim) {
- occluded(vbool<K>(1<<int(k)),pre,ray,context,prim);
- return ray.tfar[k] < 0.0f;
- }
- };
-
- typedef ObjectIntersectorK<4,false> ObjectIntersector4;
- typedef ObjectIntersectorK<8,false> ObjectIntersector8;
- typedef ObjectIntersectorK<16,false> ObjectIntersector16;
-
- typedef ObjectIntersectorK<4,true> ObjectIntersector4MB;
- typedef ObjectIntersectorK<8,true> ObjectIntersector8MB;
- typedef ObjectIntersectorK<16,true> ObjectIntersector16MB;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/plane.h b/thirdparty/embree-aarch64/kernels/geometry/plane.h
deleted file mode 100644
index ebe45db558..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/plane.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- struct HalfPlane
- {
- const Vec3fa P; //!< plane origin
- const Vec3fa N; //!< plane normal
-
- __forceinline HalfPlane(const Vec3fa& P, const Vec3fa& N)
- : P(P), N(N) {}
-
- __forceinline BBox1f intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const
- {
- Vec3fa O = Vec3fa(ray_org) - P;
- Vec3fa D = Vec3fa(ray_dir);
- float ON = dot(O,N);
- float DN = dot(D,N);
- bool eps = abs(DN) < min_rcp_input;
- float t = -ON*rcp(DN);
- float lower = select(eps || DN < 0.0f, float(neg_inf), t);
- float upper = select(eps || DN > 0.0f, float(pos_inf), t);
- return BBox1f(lower,upper);
- }
- };
-
- template<int M>
- struct HalfPlaneN
- {
- const Vec3vf<M> P; //!< plane origin
- const Vec3vf<M> N; //!< plane normal
-
- __forceinline HalfPlaneN(const Vec3vf<M>& P, const Vec3vf<M>& N)
- : P(P), N(N) {}
-
- __forceinline BBox<vfloat<M>> intersect(const Vec3fa& ray_org, const Vec3fa& ray_dir) const
- {
- Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray_org) - P;
- Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray_dir);
- vfloat<M> ON = dot(O,N);
- vfloat<M> DN = dot(D,N);
- vbool<M> eps = abs(DN) < min_rcp_input;
- vfloat<M> t = -ON*rcp(DN);
- vfloat<M> lower = select(eps | DN < 0.0f, vfloat<M>(neg_inf), t);
- vfloat<M> upper = select(eps | DN > 0.0f, vfloat<M>(pos_inf), t);
- return BBox<vfloat<M>>(lower,upper);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/pointi.h b/thirdparty/embree-aarch64/kernels/geometry/pointi.h
deleted file mode 100644
index 4ba298e86b..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/pointi.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- template<int M>
- struct PointMi
- {
- /* Virtual interface to query information about the line segment type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored line segments */
- static __forceinline size_t max_size()
- {
- return M;
- }
-
- /* Returns required number of primitive blocks for N line segments */
- static __forceinline size_t blocks(size_t N)
- {
- return (N + max_size() - 1) / max_size();
- }
-
- /* Returns required number of bytes for N line segments */
- static __forceinline size_t bytes(size_t N)
- {
- return blocks(N) * sizeof(PointMi);
- }
-
- public:
- /* Default constructor */
- __forceinline PointMi() {}
-
- /* Construction from vertices and IDs */
- __forceinline PointMi(const vuint<M>& geomIDs, const vuint<M>& primIDs, Geometry::GType gtype, uint32_t numPrimitives)
- : gtype((unsigned char)gtype),
- numPrimitives(numPrimitives),
- sharedGeomID(geomIDs[0]),
- primIDs(primIDs)
- {
- assert(all(vuint<M>(geomID()) == geomIDs));
- }
-
- /* Returns a mask that tells which line segments are valid */
- __forceinline vbool<M> valid() const {
- return vint<M>(step) < vint<M>(numPrimitives);
- }
-
- /* Returns a mask that tells which line segments are valid */
- template<int Mx> __forceinline vbool<Mx> valid() const {
- return vint<Mx>(step) < vint<Mx>(numPrimitives);
- }
-
- /* Returns if the specified line segment is valid */
- __forceinline bool valid(const size_t i) const
- {
- assert(i < M);
- return i < numPrimitives;
- }
-
- /* Returns the number of stored line segments */
- __forceinline size_t size() const {
- return numPrimitives;
- }
-
- __forceinline unsigned int geomID(unsigned int i = 0) const {
- return sharedGeomID;
- }
-
- __forceinline vuint<M>& primID() {
- return primIDs;
- }
- __forceinline const vuint<M>& primID() const {
- return primIDs;
- }
- __forceinline unsigned int primID(const size_t i) const {
- assert(i < M);
- return primIDs[i];
- }
-
- /* gather the line segments */
- __forceinline void gather(Vec4vf<M>& p0, const Points* geom) const;
- __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom) const;
-
- __forceinline void gatheri(Vec4vf<M>& p0, const Points* geom, const int itime) const;
- __forceinline void gatheri(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, const int itime) const;
-
- __forceinline void gather(Vec4vf<M>& p0, const Points* geom, float time) const;
- __forceinline void gather(Vec4vf<M>& p0, Vec3vf<M>& n0, const Points* geom, float time) const;
-
- /* Calculate the bounds of the line segments */
- __forceinline const BBox3fa bounds(const Scene* scene, size_t itime = 0) const
- {
- BBox3fa bounds = empty;
- for (size_t i = 0; i < M && valid(i); i++) {
- const Points* geom = scene->get<Points>(geomID(i));
- bounds.extend(geom->bounds(primID(i),itime));
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* scene, size_t itime) {
- return LBBox3fa(bounds(scene, itime + 0), bounds(scene, itime + 1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene* const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- for (size_t i = 0; i < M && valid(i); i++) {
- const Points* geom = scene->get<Points>(geomID(i));
- allBounds.extend(geom->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene* const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i = 0; i < M && valid(i); i++) {
- const Points* geom = scene->get<Points>(geomID((unsigned int)i));
- allBounds.extend(geom->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Fill line segment from line segment list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- Geometry::GType gty = scene->get(prims[begin].geomID())->getType();
- vuint<M> geomID, primID;
- vuint<M> v0;
- const PrimRefT* prim = &prims[begin];
-
- int numPrimitives = 0;
- for (size_t i = 0; i < M; i++) {
- if (begin < end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
- begin++;
- numPrimitives++;
- } else {
- assert(i);
- if (i > 0) {
- geomID[i] = geomID[i - 1];
- primID[i] = primID[i - 1];
- }
- }
- if (begin < end)
- prim = &prims[begin]; // FIXME: remove this line
- }
- new (this) PointMi(geomID, primID, gty, numPrimitives); // FIXME: use non temporal store
- }
-
- template<typename BVH, typename Allocator>
- __forceinline static typename BVH::NodeRef createLeaf(BVH* bvh,
- const PrimRef* prims,
- const range<size_t>& set,
- const Allocator& alloc)
- {
- size_t start = set.begin();
- size_t items = PointMi::blocks(set.size());
- size_t numbytes = PointMi::bytes(set.size());
- PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float));
- for (size_t i = 0; i < items; i++) {
- accel[i].fill(prims, start, set.end(), bvh->scene);
- }
- return bvh->encodeLeaf((char*)accel, items);
- };
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, itime);
- }
-
- __forceinline LBBox3fa fillMB(
- const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, time_range);
- }
-
- template<typename BVH, typename SetMB, typename Allocator>
- __forceinline static typename BVH::NodeRecordMB4D createLeafMB(BVH* bvh, const SetMB& prims, const Allocator& alloc)
- {
- size_t start = prims.object_range.begin();
- size_t end = prims.object_range.end();
- size_t items = PointMi::blocks(prims.object_range.size());
- size_t numbytes = PointMi::bytes(prims.object_range.size());
- PointMi* accel = (PointMi*)alloc.malloc1(numbytes, M * sizeof(float));
- const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel, items);
-
- LBBox3fa bounds = empty;
- for (size_t i = 0; i < items; i++)
- bounds.extend(accel[i].fillMB(prims.prims->data(), start, end, bvh->scene, prims.time_range));
-
- return typename BVH::NodeRecordMB4D(node, bounds, prims.time_range);
- };
-
- /*! output operator */
- friend __forceinline embree_ostream operator<<(embree_ostream cout, const PointMi& line)
- {
- return cout << "Line" << M << "i {" << line.v0 << ", " << line.geomID() << ", " << line.primID() << "}";
- }
-
- public:
- unsigned char gtype;
- unsigned char numPrimitives;
- unsigned int sharedGeomID;
-
- private:
- vuint<M> primIDs; // primitive ID
- };
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0)));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1)));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2)));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3)));
- transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<4>::gatheri(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- transpose(a0, a1, a2, a3, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime));
- transpose(b0, b1, b2, b3, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0; gatheri(a0, geom, itime);
- Vec4vf4 b0; gatheri(b0, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat4(ftime));
- }
-
- template<>
- __forceinline void PointMi<4>::gather(Vec4vf4& p0, Vec3vf4& n0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf4 a0, b0;
- Vec3vf4 norm0, norm1;
- gatheri(a0, norm0, geom, itime);
- gatheri(b0, norm1, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat4(ftime));
- n0 = lerp(norm0, norm1, vfloat4(ftime));
- }
-
-#if defined(__AVX__)
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4)));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5)));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6)));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7)));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0)));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1)));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2)));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3)));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4)));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5)));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6)));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7)));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0)));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1)));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2)));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3)));
- const vfloat4 b4 = vfloat4(geom->normal(primID(4)));
- const vfloat4 b5 = vfloat4(geom->normal(primID(5)));
- const vfloat4 b6 = vfloat4(geom->normal(primID(6)));
- const vfloat4 b7 = vfloat4(geom->normal(primID(7)));
- transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- }
-
- template<>
- __forceinline void PointMi<8>::gatheri(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, const int itime) const
- {
- const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(primID(0), itime));
- const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(primID(1), itime));
- const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(primID(2), itime));
- const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(primID(3), itime));
- const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(primID(4), itime));
- const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(primID(5), itime));
- const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(primID(6), itime));
- const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(primID(7), itime));
- transpose(a0, a1, a2, a3, a4, a5, a6, a7, p0.x, p0.y, p0.z, p0.w);
- const vfloat4 b0 = vfloat4(geom->normal(primID(0), itime));
- const vfloat4 b1 = vfloat4(geom->normal(primID(1), itime));
- const vfloat4 b2 = vfloat4(geom->normal(primID(2), itime));
- const vfloat4 b3 = vfloat4(geom->normal(primID(3), itime));
- const vfloat4 b4 = vfloat4(geom->normal(primID(4), itime));
- const vfloat4 b5 = vfloat4(geom->normal(primID(5), itime));
- const vfloat4 b6 = vfloat4(geom->normal(primID(6), itime));
- const vfloat4 b7 = vfloat4(geom->normal(primID(7), itime));
- transpose(b0, b1, b2, b3, b4, b5, b6, b7, n0.x, n0.y, n0.z);
- }
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0;
- gatheri(a0, geom, itime);
- Vec4vf8 b0;
- gatheri(b0, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat8(ftime));
- }
-
- template<>
- __forceinline void PointMi<8>::gather(Vec4vf8& p0, Vec3vf8& n0, const Points* geom, float time) const
- {
- float ftime;
- const int itime = geom->timeSegment(time, ftime);
-
- Vec4vf8 a0, b0;
- Vec3vf8 norm0, norm1;
- gatheri(a0, norm0, geom, itime);
- gatheri(b0, norm1, geom, itime + 1);
- p0 = lerp(a0, b0, vfloat8(ftime));
- n0 = lerp(norm0, norm1, vfloat8(ftime));
- }
-#endif
-
- template<int M>
- typename PointMi<M>::Type PointMi<M>::type;
-
- typedef PointMi<4> Point4i;
- typedef PointMi<8> Point8i;
-
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive.h b/thirdparty/embree-aarch64/kernels/geometry/primitive.h
deleted file mode 100644
index 41e5b2b304..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/primitive.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/scene.h"
-#include "../../common/simd/simd.h"
-#include "../common/primref.h"
-#include "../common/primref_mb.h"
-
-namespace embree
-{
- struct PrimitiveType
- {
- /*! returns name of this primitive type */
- virtual const char* name() const = 0;
-
- /*! Returns the number of stored active primitives in a block. */
- virtual size_t sizeActive(const char* This) const = 0;
-
- /*! Returns the number of stored active and inactive primitives in a block. */
- virtual size_t sizeTotal(const char* This) const = 0;
-
- /*! Returns the number of bytes of block. */
- virtual size_t getBytes(const char* This) const = 0;
- };
-
- template<typename Primitive>
- struct PrimitivePointQuery1
- {
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& prim)
- {
- bool changed = false;
- for (size_t i = 0; i < Primitive::max_size(); i++)
- {
- if (!prim.valid(i)) break;
- STAT3(point_query.trav_prims,1,1,1);
- AccelSet* accel = (AccelSet*)context->scene->get(prim.geomID(i));
- context->geomID = prim.geomID(i);
- context->primID = prim.primID(i);
- changed |= accel->pointQuery(query, context);
- }
- return changed;
- }
-
- static __forceinline void pointQueryNoop(PointQuery* query, PointQueryContext* context, const Primitive& prim) { }
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp b/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp
deleted file mode 100644
index f93574c9c8..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "primitive.h"
-#include "curveNv.h"
-#include "curveNi.h"
-#include "curveNi_mb.h"
-#include "linei.h"
-#include "triangle.h"
-#include "trianglev.h"
-#include "trianglev_mb.h"
-#include "trianglei.h"
-#include "quadv.h"
-#include "quadi.h"
-#include "subdivpatch1.h"
-#include "object.h"
-#include "instance.h"
-#include "subgrid.h"
-
-namespace embree
-{
- /********************** Curve4v **************************/
-
- template<>
- const char* Curve4v::Type::name () const {
- return "curve4v";
- }
-
- template<>
- size_t Curve4v::Type::sizeActive(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return ((Line4i*)This)->size();
- else
- return ((Curve4v*)This)->N;
- }
-
- template<>
- size_t Curve4v::Type::sizeTotal(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return 4;
- else
- return ((Curve4v*)This)->N;
- }
-
- template<>
- size_t Curve4v::Type::getBytes(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return Line4i::bytes(sizeActive(This));
- else
- return Curve4v::bytes(sizeActive(This));
- }
-
- /********************** Curve4i **************************/
-
- template<>
- const char* Curve4i::Type::name () const {
- return "curve4i";
- }
-
- template<>
- size_t Curve4i::Type::sizeActive(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return ((Line4i*)This)->size();
- else
- return ((Curve4i*)This)->N;
- }
-
- template<>
- size_t Curve4i::Type::sizeTotal(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return 4;
- else
- return ((Curve4i*)This)->N;
- }
-
- template<>
- size_t Curve4i::Type::getBytes(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return Line4i::bytes(sizeActive(This));
- else
- return Curve4i::bytes(sizeActive(This));
- }
-
- /********************** Curve4iMB **************************/
-
- template<>
- const char* Curve4iMB::Type::name () const {
- return "curve4imb";
- }
-
- template<>
- size_t Curve4iMB::Type::sizeActive(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return ((Line4i*)This)->size();
- else
- return ((Curve4iMB*)This)->N;
- }
-
- template<>
- size_t Curve4iMB::Type::sizeTotal(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return 4;
- else
- return ((Curve4iMB*)This)->N;
- }
-
- template<>
- size_t Curve4iMB::Type::getBytes(const char* This) const
- {
- if ((*This & Geometry::GType::GTY_BASIS_MASK) == Geometry::GType::GTY_BASIS_LINEAR)
- return Line4i::bytes(sizeActive(This));
- else
- return Curve4iMB::bytes(sizeActive(This));
- }
-
- /********************** Line4i **************************/
-
- template<>
- const char* Line4i::Type::name () const {
- return "line4i";
- }
-
- template<>
- size_t Line4i::Type::sizeActive(const char* This) const {
- return ((Line4i*)This)->size();
- }
-
- template<>
- size_t Line4i::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Line4i::Type::getBytes(const char* This) const {
- return sizeof(Line4i);
- }
-
- /********************** Triangle4 **************************/
-
- template<>
- const char* Triangle4::Type::name () const {
- return "triangle4";
- }
-
- template<>
- size_t Triangle4::Type::sizeActive(const char* This) const {
- return ((Triangle4*)This)->size();
- }
-
- template<>
- size_t Triangle4::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4::Type::getBytes(const char* This) const {
- return sizeof(Triangle4);
- }
-
- /********************** Triangle4v **************************/
-
- template<>
- const char* Triangle4v::Type::name () const {
- return "triangle4v";
- }
-
- template<>
- size_t Triangle4v::Type::sizeActive(const char* This) const {
- return ((Triangle4v*)This)->size();
- }
-
- template<>
- size_t Triangle4v::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4v::Type::getBytes(const char* This) const {
- return sizeof(Triangle4v);
- }
-
- /********************** Triangle4i **************************/
-
- template<>
- const char* Triangle4i::Type::name () const {
- return "triangle4i";
- }
-
- template<>
- size_t Triangle4i::Type::sizeActive(const char* This) const {
- return ((Triangle4i*)This)->size();
- }
-
- template<>
- size_t Triangle4i::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4i::Type::getBytes(const char* This) const {
- return sizeof(Triangle4i);
- }
-
- /********************** Triangle4vMB **************************/
-
- template<>
- const char* Triangle4vMB::Type::name () const {
- return "triangle4vmb";
- }
-
- template<>
- size_t Triangle4vMB::Type::sizeActive(const char* This) const {
- return ((Triangle4vMB*)This)->size();
- }
-
- template<>
- size_t Triangle4vMB::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Triangle4vMB::Type::getBytes(const char* This) const {
- return sizeof(Triangle4vMB);
- }
-
- /********************** Quad4v **************************/
-
- template<>
- const char* Quad4v::Type::name () const {
- return "quad4v";
- }
-
- template<>
- size_t Quad4v::Type::sizeActive(const char* This) const {
- return ((Quad4v*)This)->size();
- }
-
- template<>
- size_t Quad4v::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Quad4v::Type::getBytes(const char* This) const {
- return sizeof(Quad4v);
- }
-
- /********************** Quad4i **************************/
-
- template<>
- const char* Quad4i::Type::name () const {
- return "quad4i";
- }
-
- template<>
- size_t Quad4i::Type::sizeActive(const char* This) const {
- return ((Quad4i*)This)->size();
- }
-
- template<>
- size_t Quad4i::Type::sizeTotal(const char* This) const {
- return 4;
- }
-
- template<>
- size_t Quad4i::Type::getBytes(const char* This) const {
- return sizeof(Quad4i);
- }
-
- /********************** SubdivPatch1 **************************/
-
- const char* SubdivPatch1::Type::name () const {
- return "subdivpatch1";
- }
-
- size_t SubdivPatch1::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t SubdivPatch1::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t SubdivPatch1::Type::getBytes(const char* This) const {
- return sizeof(SubdivPatch1);
- }
-
- SubdivPatch1::Type SubdivPatch1::type;
-
- /********************** Virtual Object **************************/
-
- const char* Object::Type::name () const {
- return "object";
- }
-
- size_t Object::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t Object::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t Object::Type::getBytes(const char* This) const {
- return sizeof(Object);
- }
-
- Object::Type Object::type;
-
- /********************** Instance **************************/
-
- const char* InstancePrimitive::Type::name () const {
- return "instance";
- }
-
- size_t InstancePrimitive::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t InstancePrimitive::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t InstancePrimitive::Type::getBytes(const char* This) const {
- return sizeof(InstancePrimitive);
- }
-
- InstancePrimitive::Type InstancePrimitive::type;
-
- /********************** SubGrid **************************/
-
- const char* SubGrid::Type::name () const {
- return "subgrid";
- }
-
- size_t SubGrid::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- size_t SubGrid::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- size_t SubGrid::Type::getBytes(const char* This) const {
- return sizeof(SubGrid);
- }
-
- SubGrid::Type SubGrid::type;
-
- /********************** SubGridQBVH4 **************************/
-
- template<>
- const char* SubGridQBVH4::Type::name () const {
- return "SubGridQBVH4";
- }
-
- template<>
- size_t SubGridQBVH4::Type::sizeActive(const char* This) const {
- return 1;
- }
-
- template<>
- size_t SubGridQBVH4::Type::sizeTotal(const char* This) const {
- return 1;
- }
-
- template<>
- size_t SubGridQBVH4::Type::getBytes(const char* This) const {
- return sizeof(SubGridQBVH4);
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h
deleted file mode 100644
index 57ff4e60e5..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects a ray with a quad with backface culling
- * enabled. The quad v0,v1,v2,v3 is split into two triangles
- * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
- * triangles gets intersected. */
- template<int N>
- __forceinline vbool<N> intersect_quad_backface_culling(const vbool<N>& valid0,
- const Vec3fa& ray_org,
- const Vec3fa& ray_dir,
- const float ray_tnear,
- const float ray_tfar,
- const Vec3vf<N>& quad_v0,
- const Vec3vf<N>& quad_v1,
- const Vec3vf<N>& quad_v2,
- const Vec3vf<N>& quad_v3,
- vfloat<N>& u_o,
- vfloat<N>& v_o,
- vfloat<N>& t_o)
- {
- /* calculate vertices relative to ray origin */
- vbool<N> valid = valid0;
- const Vec3vf<N> O = Vec3vf<N>(ray_org);
- const Vec3vf<N> D = Vec3vf<N>(ray_dir);
- const Vec3vf<N> va = quad_v0-O;
- const Vec3vf<N> vb = quad_v1-O;
- const Vec3vf<N> vc = quad_v2-O;
- const Vec3vf<N> vd = quad_v3-O;
-
- const Vec3vf<N> edb = vb-vd;
- const vfloat<N> WW = dot(cross(vd,edb),D);
- const Vec3vf<N> v0 = select(WW <= 0.0f,va,vc);
- const Vec3vf<N> v1 = select(WW <= 0.0f,vb,vd);
- const Vec3vf<N> v2 = select(WW <= 0.0f,vd,vb);
-
- /* calculate edges */
- const Vec3vf<N> e0 = v2-v0;
- const Vec3vf<N> e1 = v0-v1;
-
- /* perform edge tests */
- const vfloat<N> U = dot(cross(v0,e0),D);
- const vfloat<N> V = dot(cross(v1,e1),D);
- valid &= max(U,V) <= 0.0f;
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<N> Ng = cross(e1,e0);
- const vfloat<N> den = dot(Ng,D);
- const vfloat<N> rcpDen = rcp(den);
-
- /* perform depth test */
- const vfloat<N> t = rcpDen*dot(v0,Ng);
- valid &= vfloat<N>(ray_tnear) <= t & t <= vfloat<N>(ray_tfar);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<N>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- t_o = t;
- u_o = U * rcpDen;
- v_o = V * rcpDen;
- u_o = select(WW <= 0.0f,u_o,1.0f-u_o);
- v_o = select(WW <= 0.0f,v_o,1.0f-v_o);
- return valid;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h
deleted file mode 100644
index 74e8c7720c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h
+++ /dev/null
@@ -1,566 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quadv.h"
-#include "triangle_intersector_moeller.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct QuadHitM
- {
- __forceinline QuadHitM() {}
-
- __forceinline QuadHitM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& T,
- const vfloat<M>& absDen,
- const Vec3vf<M>& Ng,
- const vbool<M>& flags)
- : U(U), V(V), T(T), absDen(absDen), tri_Ng(Ng), valid(valid), flags(flags) {}
-
- __forceinline void finalize()
- {
- const vfloat<M> rcpAbsDen = rcp(absDen);
- vt = T * rcpAbsDen;
- const vfloat<M> u = min(U * rcpAbsDen,1.0f);
- const vfloat<M> v = min(V * rcpAbsDen,1.0f);
- const vfloat<M> u1 = vfloat<M>(1.0f) - u;
- const vfloat<M> v1 = vfloat<M>(1.0f) - v;
-#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
- vu = select(flags,u1,u);
- vv = select(flags,v1,v);
- vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
-#else
- const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
- vv = select(flags,u1,v);
- vu = select(flags,v1,u);
- vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
-#endif
- }
-
- __forceinline Vec2f uv(const size_t i)
- {
- const float u = vu[i];
- const float v = vv[i];
- return Vec2f(u,v);
- }
-
- __forceinline float t(const size_t i) { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- vfloat<M> absDen;
- Vec3vf<M> tri_Ng;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
-
- public:
- const vbool<M> flags;
- };
-
- template<int K>
- struct QuadHitK
- {
- __forceinline QuadHitK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& T,
- const vfloat<K>& absDen,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags)
- : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u0 = min(U * rcpAbsDen,1.0f);
- const vfloat<K> v0 = min(V * rcpAbsDen,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> u = select(flags,u1,u0);
- const vfloat<K> v = select(flags,v1,v0);
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
- };
-
- /* ----------------------------- */
- /* -- single ray intersectors -- */
- /* ----------------------------- */
-
-
- template<int M, bool filter>
- struct QuadMIntersector1MoellerTrumbore;
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMIntersector1MoellerTrumbore
- {
- __forceinline QuadMIntersector1MoellerTrumbore() {}
-
- __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- epilog(hit.valid,hit);
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- epilog(hit.valid,hit);
- }
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- {
- if (epilog(hit.valid,hit))
- return true;
- }
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- if (epilog(hit.valid,hit))
- return true;
- }
- return false;
- }
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<bool filter>
- struct QuadMIntersector1MoellerTrumbore<4,filter>
- {
- __forceinline QuadMIntersector1MoellerTrumbore() {}
-
- __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
- const vbool16 flags(0xf0f0);
-
- MoellerTrumboreHitM<16> hit;
- MoellerTrumboreIntersector1<16> intersector(ray,nullptr);
- if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
- {
- vfloat16 U = hit.U, V = hit.V, absDen = hit.absDen;
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat16(-1.0f),vfloat16(1.0f)); // FIXME: use XOR
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
- if (likely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct QuadMIntersector1MoellerTrumbore<4,filter>
- {
- __forceinline QuadMIntersector1MoellerTrumbore() {}
-
- __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- MoellerTrumboreHitM<8> hit;
- MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
- const vbool8 flags(0,0,0,0,1,1,1,1);
- if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
- {
- vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
-
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); // FIXME: use XOR
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
- if (unlikely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
-
- struct MoellerTrumboreIntersector1KTriangleM
- {
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- template<int M, int K, typename Epilog>
- static __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Vec3vf<M>& tri_Ng,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- /* calculate denominator */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
- const Vec3vf<M> R = cross(C,D);
- const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
- const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- QuadHitM<M> hit(valid,U,V,T,absDen,tri_Ng,flags);
- return epilog(valid,hit);
- }
-
- template<int M, int K, typename Epilog>
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- const Vec3vf<M> Ng = cross(e2,e1);
- return intersect(ray,k,v0,e1,e2,Ng,flags,epilog);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKMoellerTrumboreBase
- {
- __forceinline QuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const vbool<K>& flags,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray.org;
- const Vec3vf<K> R = cross(C,ray.dir);
- const vfloat<K> den = dot(tri_Ng,ray.dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(R,tri_e2) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(R,tri_e1) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- QuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M quads. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const vbool<K>& flags,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,epilog);
- }
-
- /*! Intersects K rays with one of M quads. */
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
- return none(valid0);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKMoellerTrumbore : public QuadMIntersectorKMoellerTrumboreBase<M,K,filter>
- {
- __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog);
- MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
- if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
- return false;
- }
- };
-
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<int K, bool filter>
- struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
- const vbool16 flags(0xf0f0);
- return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<int K, bool filter>
- struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- const vbool8 flags(0,0,0,0,1,1,1,1);
- return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h
deleted file mode 100644
index 7ca3aed0a0..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h
+++ /dev/null
@@ -1,529 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quad_intersector_moeller.h"
-
-/*! Modified Pluecker ray/triangle intersector. The test first shifts
- * the ray origin into the origin of the coordinate system and then
- * uses Pluecker coordinates for the intersection. Due to the shift,
- * the Pluecker coordinate calculation simplifies and the tests get
- * numerically stable. The edge equations are watertight along the
- * edge for neighboring triangles. */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct QuadHitPlueckerM
- {
- __forceinline QuadHitPlueckerM() {}
-
- __forceinline QuadHitPlueckerM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& UVW,
- const vfloat<M>& t,
- const Vec3vf<M>& Ng,
- const vbool<M>& flags)
- : U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {}
-
- __forceinline void finalize()
- {
- const vbool<M> invalid = abs(UVW) < min_rcp_input;
- const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
- const vfloat<M> u = min(U * rcpUVW,1.0f);
- const vfloat<M> v = min(V * rcpUVW,1.0f);
- const vfloat<M> u1 = vfloat<M>(1.0f) - u;
- const vfloat<M> v1 = vfloat<M>(1.0f) - v;
-#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
- vu = select(flags,u1,u);
- vv = select(flags,v1,v);
- vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
-#else
- const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
- vv = select(flags,u1,v);
- vu = select(flags,v1,u);
- vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
-#endif
- }
-
- __forceinline Vec2f uv(const size_t i)
- {
- const float u = vu[i];
- const float v = vv[i];
- return Vec2f(u,v);
- }
-
- __forceinline float t(const size_t i) { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> UVW;
- Vec3vf<M> tri_Ng;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
-
- public:
- const vbool<M> flags;
- };
-
- template<int K>
- struct QuadHitPlueckerK
- {
- __forceinline QuadHitPlueckerK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& UVW,
- const vfloat<K>& t,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags)
- : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vbool<K> invalid = abs(UVW) < min_rcp_input;
- const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
- const vfloat<K> u0 = min(U * rcpUVW,1.0f);
- const vfloat<K> v0 = min(V * rcpUVW,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> u = select(flags,u1,u0);
- const vfloat<K> v = select(flags,v1,v0);
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> UVW;
- const vfloat<K> t;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
- };
-
- struct PlueckerIntersectorTriangle1
- {
- template<int M, typename Epilog>
- static __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
- return epilog(valid,hit);
- }
- };
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMIntersector1Pluecker
- {
- __forceinline QuadMIntersector1Pluecker() {}
-
- __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
- PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog);
- PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true),epilog);
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
- if (PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog)) return true;
- if (PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true;
- return false;
- }
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<bool filter>
- struct QuadMIntersector1Pluecker<4,filter>
- {
- __forceinline QuadMIntersector1Pluecker() {}
-
- __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
- const vbool16 flags(0xf0f0);
- return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct QuadMIntersector1Pluecker<4,filter>
- {
- __forceinline QuadMIntersector1Pluecker() {}
-
- __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- const vbool8 flags(0,0,0,0,1,1,1,1);
- return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
-
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
- struct PlueckerIntersector1KTriangleM
- {
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- template<int M, int K, typename Epilog>
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const vbool<M>& flags,
- const Epilog& epilog)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
- return epilog(valid,hit);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKPlueckerBase
- {
- __forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const vbool<K>& flags,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- vbool<K> valid = valid0;
- const Vec3vf<K> O = ray.org;
- const Vec3vf<K> D = ray.dir;
- const Vec3vf<K> v0 = tri_v0-O;
- const Vec3vf<K> v1 = tri_v1-O;
- const Vec3vf<K> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<K> e0 = v2-v0;
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
- const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
- const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
- const vfloat<K> UVW = U+V+W;
- const vfloat<K> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= max(U,V,W) <= eps;
-#else
- valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
-
- /* perform depth test */
- const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
- const vfloat<K> t = rcp(den)*T;
- valid &= ray.tnear() <= t & t <= ray.tfar;
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-
- /* calculate hit information */
- QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M quads. */
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
- return none(valid0);
- }
- };
-
- template<int M, int K, bool filter>
- struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter>
- {
- __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog);
- PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const vuint<M>& geomID, const vuint<M>& primID) const
- {
- Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
- if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
- if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
- return false;
- }
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! Intersects 4 quads with 1 ray using AVX512 */
- template<int K, bool filter>
- struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
- select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
- select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
- const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
-#else
- const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
- select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
- select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
- const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
- select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
- select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
-#endif
-
- const vbool16 flags(0xf0f0);
- return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#elif defined(__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<int K, bool filter>
- struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter>
- {
- __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
- const vbool8 flags(0,0,0,0,1,1,1,1);
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const vuint4& geomID, const vuint4& primID) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
- }
- };
-
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi.h b/thirdparty/embree-aarch64/kernels/geometry/quadi.h
deleted file mode 100644
index 741ec519ab..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadi.h
+++ /dev/null
@@ -1,483 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../common/scene.h"
-
-namespace embree
-{
- /* Stores M quads from an indexed face set */
- template <int M>
- struct QuadMi
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored quads */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline QuadMi() { }
-
- /* Construction from vertices and IDs */
- __forceinline QuadMi(const vuint<M>& v0,
- const vuint<M>& v1,
- const vuint<M>& v2,
- const vuint<M>& v3,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
-#if defined(EMBREE_COMPACT_POLYS)
- : geomIDs(geomIDs), primIDs(primIDs) {}
-#else
- : v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {}
-#endif
-
- /* Returns a mask that tells which quads are valid */
- __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
-
- /* Returns if the specified quad is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
-
- /* Returns the number of stored quads */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the quads */
- __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++) {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
- bounds.extend(mesh->bounds(primID(i),itime));
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) {
- return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Fill quad from quad list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> geomID = -1, primID = -1;
- const PrimRefT* prim = &prims[begin];
- vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (begin<end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
-#if !defined(EMBREE_COMPACT_POLYS)
- const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID());
- const QuadMesh::Quad& q = mesh->quad(prim->primID());
- unsigned int_stride = mesh->vertices0.getStride()/4;
- v0[i] = q.v[0] * int_stride;
- v1[i] = q.v[1] * int_stride;
- v2[i] = q.v[2] * int_stride;
- v3[i] = q.v[3] * int_stride;
-#endif
- begin++;
- } else {
- assert(i);
- if (likely(i > 0)) {
- geomID[i] = geomID[0]; // always valid geomIDs
- primID[i] = -1; // indicates invalid data
- v0[i] = v0[0];
- v1[i] = v0[0];
- v2[i] = v0[0];
- v3[i] = v0[0];
- }
- }
- if (begin<end) prim = &prims[begin];
- }
- new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store
- }
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, time_range);
- }
-
- friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) {
- return cout << "QuadMi<" << M << ">( "
-#if !defined(EMBREE_COMPACT_POLYS)
- << "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", "
-#endif
- << "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )";
- }
-
- protected:
-#if !defined(EMBREE_COMPACT_POLYS)
- vuint<M> v0_; // 4 byte offset of 1st vertex
- vuint<M> v1_; // 4 byte offset of 2nd vertex
- vuint<M> v2_; // 4 byte offset of 3rd vertex
- vuint<M> v3_; // 4 byte offset of 4th vertex
-#endif
- vuint<M> geomIDs; // geometry ID of mesh
- vuint<M> primIDs; // primitive ID of primitive inside mesh
- };
-
- namespace isa
- {
-
- template<int M>
- struct QuadMi : public embree::QuadMi<M>
- {
-#if !defined(EMBREE_COMPACT_POLYS)
- using embree::QuadMi<M>::v0_;
- using embree::QuadMi<M>::v1_;
- using embree::QuadMi<M>::v2_;
- using embree::QuadMi<M>::v3_;
-#endif
- using embree::QuadMi<M>::geomIDs;
- using embree::QuadMi<M>::primIDs;
- using embree::QuadMi<M>::geomID;
- using embree::QuadMi<M>::primID;
- using embree::QuadMi<M>::valid;
-
- template<int vid>
- __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
- const QuadMesh::Quad& quad = mesh->quad(primID(index));
- return (Vec3f) mesh->vertices[0][quad.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices = scene->vertices[geomID(index)];
- return (Vec3f&) vertices[v[index]];
-#endif
- }
-
- template<int vid, typename T>
- __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
- const QuadMesh::Quad& quad = mesh->quad(primID(index));
- const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- const Vec3<T> p0(v0.x,v0.y,v0.z);
- const Vec3<T> p1(v1.x,v1.y,v1.z);
- return lerp(p0,p1,ftime);
- }
-
- template<int vid, int K, typename T>
- __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
- {
- Vec3<T> p0, p1;
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
-
- for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const QuadMesh::Quad& quad = mesh->quad(primID(index));
- const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
- p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
- }
- return (T(one)-ftime)*p0 + ftime*p1;
- }
-
- struct Quad {
- vfloat4 v0,v1,v2,v3;
- };
-
-#if defined(EMBREE_COMPACT_POLYS)
-
- __forceinline Quad loadQuad(const int i, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero, zero };
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
- const QuadMesh::Quad& quad = mesh->quad(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]];
- const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]];
- return { v0, v1, v2, v3 };
- }
-
- __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero, zero };
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
- const QuadMesh::Quad& quad = mesh->quad(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]];
- const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]];
- return { v0, v1, v2, v3 };
- }
-
-#else
-
- __forceinline Quad loadQuad(const int i, const Scene* const scene) const
- {
- const float* vertices = scene->vertices[geomID(i)];
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
- return { v0, v1, v2, v3 };
- }
-
- __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID);
- const float* vertices = (const float*) mesh->vertexPtr(0,itime);
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]);
- return { v0, v1, v2, v3 };
- }
-
-#endif
-
- /* Gather the quads */
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- Vec3vf<M>& p3,
- const Scene *const scene) const;
-
-#if defined(__AVX512F__)
- __forceinline void gather(Vec3vf16& p0,
- Vec3vf16& p1,
- Vec3vf16& p2,
- Vec3vf16& p3,
- const Scene *const scene) const;
-#endif
-
- template<int K>
-#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
- __noinline
-#else
- __forceinline
-#endif
- void gather(const vbool<K>& valid,
- Vec3vf<K>& p0,
- Vec3vf<K>& p1,
- Vec3vf<K>& p2,
- Vec3vf<K>& p3,
- const size_t index,
- const Scene* const scene,
- const vfloat<K>& time) const
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(time, ftime);
-
- const size_t first = bsf(movemask(valid));
- if (likely(all(valid,itime[first] == itime)))
- {
- p0 = getVertex<0>(index, scene, itime[first], ftime);
- p1 = getVertex<1>(index, scene, itime[first], ftime);
- p2 = getVertex<2>(index, scene, itime[first], ftime);
- p3 = getVertex<3>(index, scene, itime[first], ftime);
- }
- else
- {
- p0 = getVertex<0>(valid, index, scene, itime, ftime);
- p1 = getVertex<1>(valid, index, scene, itime, ftime);
- p2 = getVertex<2>(valid, index, scene, itime, ftime);
- p3 = getVertex<3>(valid, index, scene, itime, ftime);
- }
- }
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- Vec3vf<M>& p3,
- const QuadMesh* mesh,
- const Scene *const scene,
- const int itime) const;
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- Vec3vf<M>& p3,
- const Scene *const scene,
- const float time) const;
-
- /* Updates the primitive */
- __forceinline BBox3fa update(QuadMesh* mesh)
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M; i++)
- {
- if (!valid(i)) break;
- const unsigned primId = primID(i);
- const QuadMesh::Quad& q = mesh->quad(primId);
- const Vec3fa p0 = mesh->vertex(q.v[0]);
- const Vec3fa p1 = mesh->vertex(q.v[1]);
- const Vec3fa p2 = mesh->vertex(q.v[2]);
- const Vec3fa p3 = mesh->vertex(q.v[3]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
- }
- return bounds;
- }
-
- private:
-#if !defined(EMBREE_COMPACT_POLYS)
- template<int N> const vuint<M>& getVertexOffset() const;
-#endif
- };
-
-#if !defined(EMBREE_COMPACT_POLYS)
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; }
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; }
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; }
- template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; }
-#endif
-
- template<>
- __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene) const
- {
- prefetchL1(((char*)this)+0*64);
- prefetchL1(((char*)this)+1*64);
- const Quad tri0 = loadQuad(0,scene);
- const Quad tri1 = loadQuad(1,scene);
- const Quad tri2 = loadQuad(2,scene);
- const Quad tri3 = loadQuad(3,scene);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
- }
-
- template<>
- __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const QuadMesh* mesh,
- const Scene *const scene,
- const int itime) const
- {
- // FIXME: for trianglei there all geometries are identical, is this the case here too?
-
- const Quad tri0 = loadQuad(0,itime,scene);
- const Quad tri1 = loadQuad(1,itime,scene);
- const Quad tri2 = loadQuad(2,itime,scene);
- const Quad tri3 = loadQuad(3,itime,scene);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z);
- }
-
- template<>
- __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene,
- const float time) const
- {
- const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical
-
- float ftime;
- const int itime = mesh->timeSegment(time, ftime);
-
- Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime);
- Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- p2 = lerp(a2,b2,vfloat4(ftime));
- p3 = lerp(a3,b3,vfloat4(ftime));
- }
- }
-
- template<int M>
- typename QuadMi<M>::Type QuadMi<M>::type;
-
- typedef QuadMi<4> Quad4i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h
deleted file mode 100644
index 96cf7f1ca2..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h
+++ /dev/null
@@ -1,350 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quadi.h"
-#include "quad_intersector_moeller.h"
-#include "quad_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiIntersector1Moeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiIntersectorKMoeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiIntersector1Pluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiIntersectorKPluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- Scene* scene = context->scene;
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = quad.template getVertex<0>(i,scene);
- const Vec3vf<K> p1 = quad.template getVertex<1>(i,scene);
- const Vec3vf<K> p2 = quad.template getVertex<2>(i,scene);
- const Vec3vf<K> p3 = quad.template getVertex<3>(i,scene);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf4 v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M motion blur quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiMBIntersector1Moeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M motion blur quads with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiMBIntersectorKMoeller
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M quads. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M quads. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M quads and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M quads. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M motion blur quads with 1 ray */
- template<int M, bool filter>
- struct QuadMiMBIntersector1Pluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- pre.intersect(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time());
- return pre.occluded(ray,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M motion blur quads with K rays. */
- template<int M, int K, bool filter>
- struct QuadMiMBIntersectorKPluecker
- {
- typedef QuadMi<M> Primitive;
- typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M quads. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M quads. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMi<M>& quad)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<QuadMi<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time());
- if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M quads and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M quads. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMi<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2,v3; quad.gather(v0,v1,v2,v3,context->scene,ray.time()[k]);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,quad.geomID(),quad.primID());
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv.h b/thirdparty/embree-aarch64/kernels/geometry/quadv.h
deleted file mode 100644
index 0a1fe4d128..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadv.h
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Stores the vertices of M quads in struct of array layout */
- template <int M>
- struct QuadMv
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored quads */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline QuadMv() {}
-
- /* Construction from vertices and IDs */
- __forceinline QuadMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(v0), v1(v1), v2(v2), v3(v3), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which quads are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns true if the specified quad is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored quads */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M> primID() { return primIDs; }
- __forceinline const vuint<M> primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the quads */
- __forceinline BBox3fa bounds() const
- {
- Vec3vf<M> lower = min(v0,v1,v2,v3);
- Vec3vf<M> upper = max(v0,v1,v2,v3);
- vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Non temporal store */
- __forceinline static void store_nt(QuadMv* dst, const QuadMv& src)
- {
- vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
- vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
- vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
- vfloat<M>::store_nt(&dst->v1.x,src.v1.x);
- vfloat<M>::store_nt(&dst->v1.y,src.v1.y);
- vfloat<M>::store_nt(&dst->v1.z,src.v1.z);
- vfloat<M>::store_nt(&dst->v2.x,src.v2.x);
- vfloat<M>::store_nt(&dst->v2.y,src.v2.y);
- vfloat<M>::store_nt(&dst->v2.z,src.v2.z);
- vfloat<M>::store_nt(&dst->v3.x,src.v3.x);
- vfloat<M>::store_nt(&dst->v3.y,src.v3.y);
- vfloat<M>::store_nt(&dst->v3.z,src.v3.z);
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill quad from quad list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const QuadMesh* __restrict__ const mesh = scene->get<QuadMesh>(geomID);
- const QuadMesh::Quad& quad = mesh->quad(primID);
- const Vec3fa& p0 = mesh->vertex(quad.v[0]);
- const Vec3fa& p1 = mesh->vertex(quad.v[1]);
- const Vec3fa& p2 = mesh->vertex(quad.v[2]);
- const Vec3fa& p3 = mesh->vertex(quad.v[3]);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
- }
- QuadMv::store_nt(this,QuadMv(v0,v1,v2,v3,vgeomID,vprimID));
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(QuadMesh* mesh)
- {
- BBox3fa bounds = empty;
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (primID(i) == -1) break;
- const unsigned geomId = geomID(i);
- const unsigned primId = primID(i);
- const QuadMesh::Quad& quad = mesh->quad(primId);
- const Vec3fa p0 = mesh->vertex(quad.v[0]);
- const Vec3fa p1 = mesh->vertex(quad.v[1]);
- const Vec3fa p2 = mesh->vertex(quad.v[2]);
- const Vec3fa p3 = mesh->vertex(quad.v[3]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
- vgeomID [i] = geomId;
- vprimID [i] = primId;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
- }
- new (this) QuadMv(v0,v1,v2,v3,vgeomID,vprimID);
- return bounds;
- }
-
- public:
- Vec3vf<M> v0; // 1st vertex of the quads
- Vec3vf<M> v1; // 2nd vertex of the quads
- Vec3vf<M> v2; // 3rd vertex of the quads
- Vec3vf<M> v3; // 4rd vertex of the quads
- private:
- vuint<M> geomIDs; // geometry ID
- vuint<M> primIDs; // primitive ID
- };
-
- template<int M>
- typename QuadMv<M>::Type QuadMv<M>::type;
-
- typedef QuadMv<4> Quad4v;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h
deleted file mode 100644
index 30a24b291a..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "quadv.h"
-#include "quad_intersector_moeller.h"
-#include "quad_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMvIntersector1Moeller
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersector1MoellerTrumbore<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMvIntersectorKMoeller
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersectorKMoellerTrumbore<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
- };
-
- /*! Intersects M quads with 1 ray */
- template<int M, bool filter>
- struct QuadMvIntersector1Pluecker
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersector1Pluecker<M,filter> Precalculations;
-
- /*! Intersect a ray with the M quads and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of M quads. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded(ray,context, quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& quad)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, quad);
- }
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int K, bool filter>
- struct QuadMvIntersectorKPluecker
- {
- typedef QuadMv<M> Primitive;
- typedef QuadMIntersectorKPluecker<M,K,filter> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const QuadMv<M>& quad)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<QuadMv<M>::max_size(); i++)
- {
- if (!quad.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(quad.v0,i);
- const Vec3vf<K> p1 = broadcast<vfloat<K>>(quad.v1,i);
- const Vec3vf<K> p2 = broadcast<vfloat<K>>(quad.v2,i);
- const Vec3vf<K> p3 = broadcast<vfloat<K>>(quad.v3,i);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const QuadMv<M>& quad)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.occluded1(ray,k,context,quad.v0,quad.v1,quad.v2,quad.v3,quad.geomID(),quad.primID());
- }
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h
deleted file mode 100644
index cdf68f486b..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h
+++ /dev/null
@@ -1,710 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "curve_intersector_precalculations.h"
-
-
-/*
-
- This file implements the intersection of a ray with a round linear
- curve segment. We define the geometry of such a round linear curve
- segment from point p0 with radius r0 to point p1 with radius r1
- using the cone that touches spheres p0/r0 and p1/r1 tangentially
- plus the sphere p1/r1. We denote the tangentially touching cone from
- p0/r0 to p1/r1 with cone(p0,r0,p1,r1) and the cone plus the ending
- sphere with cone_sphere(p0,r0,p1,r1).
-
- For multiple connected round linear curve segments this construction
- yield a proper shape when viewed from the outside. Using the
- following CSG we can also handle the interiour in most common cases:
-
- round_linear_curve(pl,rl,p0,r0,p1,r1,pr,rr) =
- cone_sphere(p0,r0,p1,r1) - cone(pl,rl,p0,r0) - cone(p1,r1,pr,rr)
-
- Thus by subtracting the neighboring cone geometries, we cut away
- parts of the center cone_sphere surface which lie inside the
- combined curve. This approach works as long as geometry of the
- current cone_sphere penetrates into direct neighbor segments only,
- and not into segments further away.
-
- To construct a cone that touches two spheres at p0 and p1 with r0
- and r1, one has to increase the cone radius at r0 and r1 to obtain
- larger radii w0 and w1, such that the infinite cone properly touches
- the spheres. From the paper "Ray Tracing Generalized Tube
- Primitives: Method and Applications"
- (https://www.researchgate.net/publication/334378683_Ray_Tracing_Generalized_Tube_Primitives_Method_and_Applications)
- one can derive the following equations for these increased
- radii:
-
- sr = 1.0f / sqrt(1-sqr(dr)/sqr(p1-p0))
- w0 = sr*r0
- w1 = sr*r1
-
- Further, we want the cone to start where it touches the sphere at p0
- and to end where it touches sphere at p1. Therefore, we need to
- construct clipping locations y0 and y1 for the start and end of the
- cone. These start and end clipping location of the cone can get
- calculated as:
-
- Y0 = - r0 * (r1-r0) / length(p1-p0)
- Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0)
-
- Where the cone starts a distance Y0 and ends a distance Y1 away of
- point p0 along the cone center. The distance between Y1-Y0 can get
- calculated as:
-
- dY = length(p1-p0) - (r1-r0)^2 / length(p1-p0)
-
- In the code below, Y will always be scaled by length(p1-p0) to
- obtain y and you will find the terms r0*(r1-r0) and
- (p1-p0)^2-(r1-r0)^2.
-
- */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct RoundLineIntersectorHitM
- {
- __forceinline RoundLineIntersectorHitM() {}
-
- __forceinline RoundLineIntersectorHitM(const vfloat<M>& u, const vfloat<M>& v, const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vu(u), vv(v), vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- namespace __roundline_internal
- {
- template<int M>
- struct ConeGeometry
- {
- ConeGeometry (const Vec4vf<M>& a, const Vec4vf<M>& b)
- : p0(a.xyz()), p1(b.xyz()), dP(p1-p0), dPdP(dot(dP,dP)), r0(a.w), sqr_r0(sqr(r0)), r1(b.w), dr(r1-r0), drdr(dr*dr), r0dr (r0*dr), g(dPdP - drdr) {}
-
- /*
-
- This function tests if a point is accepted by first cone
- clipping plane.
-
- First, we need to project the point onto the line p0->p1:
-
- Y = (p-p0)*(p1-p0)/length(p1-p0)
-
- This value y is the distance to the projection point from
- p0. The clip distances are calculated as:
-
- Y0 = - r0 * (r1-r0) / length(p1-p0)
- Y1 = length(p1-p0) - r1 * (r1-r0) / length(p1-p0)
-
- Thus to test if the point p is accepted by the first
- clipping plane we need to test Y > Y0 and to test if it
- is accepted by the second clipping plane we need to test
- Y < Y1.
-
- By multiplying the calculations with length(p1-p0) these
- calculation can get simplied to:
-
- y = (p-p0)*(p1-p0)
- y0 = - r0 * (r1-r0)
- y1 = (p1-p0)^2 - r1 * (r1-r0)
-
- and the test y > y0 and y < y1.
-
- */
-
- __forceinline vbool<M> isClippedByPlane (const vbool<M>& valid_i, const Vec3vf<M>& p) const
- {
- const Vec3vf<M> p0p = p - p0;
- const vfloat<M> y = dot(p0p,dP);
- const vfloat<M> cap0 = -r0dr;
- const vbool<M> inside_cone = y > cap0;
- return valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf)) & inside_cone;
- }
-
- /*
-
- This function tests whether a point lies inside the capped cone
- tangential to its ending spheres.
-
- Therefore one has to check if the point is inside the
- region defined by the cone clipping planes, which is
- performed similar as in the previous function.
-
- To perform the inside cone test we need to project the
- point onto the line p0->p1:
-
- dP = p1-p0
- Y = (p-p0)*dP/length(dP)
-
- This value Y is the distance to the projection point from
- p0. To obtain a parameter value u going from 0 to 1 along
- the line p0->p1 we calculate:
-
- U = Y/length(dP)
-
- The radii to use at points p0 and p1 are:
-
- w0 = sr * r0
- w1 = sr * r1
- dw = w1-w0
-
- Using these radii and u one can directly test if the point
- lies inside the cone using the formula dP*dP < wy*wy with:
-
- wy = w0 + u*dw
- py = p0 + u*dP - p
-
- By multiplying the calculations with length(p1-p0) and
- inserting the definition of w can obtain simpler equations:
-
- y = (p-p0)*dP
- ry = r0 + y/dP^2 * dr
- wy = sr*ry
- py = p0 + y/dP^2*dP - p
- y0 = - r0 * dr
- y1 = dP^2 - r1 * dr
-
- Thus for the in-cone test we get:
-
- py^2 < wy^2
- <=> py^2 < sr^2 * ry^2
- <=> py^2 * ( dP^2 - dr^2 ) < dP^2 * ry^2
-
- This can further get simplified to:
-
- (p0-p)^2 * (dP^2 - dr^2) - y^2 < dP^2 * r0^2 + 2.0f*r0*dr*y;
-
- */
-
- __forceinline vbool<M> isInsideCappedCone (const vbool<M>& valid_i, const Vec3vf<M>& p) const
- {
- const Vec3vf<M> p0p = p - p0;
- const vfloat<M> y = dot(p0p,dP);
- const vfloat<M> cap0 = -r0dr+vfloat<M>(ulp);
- const vfloat<M> cap1 = -r1*dr + dPdP;
-
- vbool<M> inside_cone = valid_i & (p0.x != vfloat<M>(inf)) & (p1.x != vfloat<M>(inf));
- inside_cone &= y > cap0; // start clipping plane
- inside_cone &= y < cap1; // end clipping plane
- inside_cone &= sqr(p0p)*g - sqr(y) < dPdP * sqr_r0 + 2.0f*r0dr*y; // in cone test
- return inside_cone;
- }
-
- protected:
- Vec3vf<M> p0;
- Vec3vf<M> p1;
- Vec3vf<M> dP;
- vfloat<M> dPdP;
- vfloat<M> r0;
- vfloat<M> sqr_r0;
- vfloat<M> r1;
- vfloat<M> dr;
- vfloat<M> drdr;
- vfloat<M> r0dr;
- vfloat<M> g;
- };
-
- template<int M>
- struct ConeGeometryIntersector : public ConeGeometry<M>
- {
- using ConeGeometry<M>::p0;
- using ConeGeometry<M>::p1;
- using ConeGeometry<M>::dP;
- using ConeGeometry<M>::dPdP;
- using ConeGeometry<M>::r0;
- using ConeGeometry<M>::sqr_r0;
- using ConeGeometry<M>::r1;
- using ConeGeometry<M>::dr;
- using ConeGeometry<M>::r0dr;
- using ConeGeometry<M>::g;
-
- ConeGeometryIntersector (const Vec3vf<M>& ray_org, const Vec3vf<M>& ray_dir, const vfloat<M>& dOdO, const vfloat<M>& rcp_dOdO, const Vec4vf<M>& a, const Vec4vf<M>& b)
- : ConeGeometry<M>(a,b), org(ray_org), O(ray_org-p0), dO(ray_dir), dOdO(dOdO), rcp_dOdO(rcp_dOdO), OdP(dot(dP,O)), dOdP(dot(dP,dO)), yp(OdP + r0dr) {}
-
- /*
-
- This function intersects a ray with a cone that touches a
- start sphere p0/r0 and end sphere p1/r1.
-
- To find this ray/cone intersections one could just
- calculate radii w0 and w1 as described above and use a
- standard ray/cone intersection routine with these
- radii. However, it turns out that calculations can get
- simplified when deriving a specialized ray/cone
- intersection for this special case. We perform
- calculations relative to the cone origin p0 and define:
-
- O = ray_org - p0
- dO = ray_dir
- dP = p1-p0
- dr = r1-r0
- dw = w1-w0
-
- For some t we can compute the potential hit point h = O + t*dO and
- project it onto the cone vector dP to obtain u = (h*dP)/(dP*dP). In
- case of an intersection, the squared distance from the hit point
- projected onto the cone center line to the hit point should be equal
- to the squared cone radius at u:
-
- (u*dP - h)^2 = (w0 + u*dw)^2
-
- Inserting the definition of h, u, w0, and dw into this formula, then
- factoring out all terms, and sorting by t^2, t^1, and t^0 terms
- yields a quadratic equation to solve.
-
- Inserting u:
- ( (h*dP)*dP/dP^2 - h )^2 = ( w0 + (h*dP)*dw/dP^2 )^2
-
- Multiplying by dP^4:
- ( (h*dP)*dP - h*dP^2 )^2 = ( w0*dP^2 + (h*dP)*dw )^2
-
- Inserting w0 and dw:
- ( (h*dP)*dP - h*dP^2 )^2 = ( r0*dP^2 + (h*dP)*dr )^2 / (1-dr^2/dP^2)
- ( (h*dP)*dP - h*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (h*dP)*dr )^2
-
- Now one can insert the definition of h, factor out, and presort by t:
- ( ((O + t*dO)*dP)*dP - (O + t*dO)*dP^2 )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + ((O + t*dO)*dP)*dr )^2
- ( (O*dP)*dP-O*dP^2 + t*( (dO*dP)*dP - dO*dP^2 ) )^2 *(dP^2 - dr^2) = dP^2 * ( r0*dP^2 + (O*dP)*dr + t*(dO*dP)*dr )^2
-
- Factoring out further and sorting by t^2, t^1 and t^0 yields:
-
- 0 = t^2 * [ ((dO*dP)*dP - dO-dP^2)^2 * (dP^2 - dr^2) - dP^2*(dO*dP)^2*dr^2 ]
- + 2*t^1 * [ ((O*dP)*dP - O*dP^2) * ((dO*dP)*dP - dO*dP^2) * (dP^2 - dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)*(dO*dP)*dr ]
- + t^0 * [ ( (O*dP)*dP - O*dP^2)^2 * (dP^2-dr^2) - dP^2*(r0*dP^2 + (O*dP)*dr)^2 ]
-
- This can be simplified to:
-
- 0 = t^2 * [ (dP^2 - dr^2)*dO^2 - (dO*dP)^2 ]
- + 2*t^1 * [ (dP^2 - dr^2)*(O*dO) - (dO*dP)*(O*dP + r0*dr) ]
- + t^0 * [ (dP^2 - dr^2)*O^2 - (O*dP)^2 - r0^2*dP^2 - 2.0f*r0*dr*(O*dP) ]
-
- Solving this quadratic equation yields the values for t at which the
- ray intersects the cone.
-
- */
-
- __forceinline bool intersectCone(vbool<M>& valid, vfloat<M>& lower, vfloat<M>& upper)
- {
- /* return no hit by default */
- lower = pos_inf;
- upper = neg_inf;
-
- /* compute quadratic equation A*t^2 + B*t + C = 0 */
- const vfloat<M> OO = dot(O,O);
- const vfloat<M> OdO = dot(dO,O);
- const vfloat<M> A = g * dOdO - sqr(dOdP);
- const vfloat<M> B = 2.0f * (g*OdO - dOdP*yp);
- const vfloat<M> C = g*OO - sqr(OdP) - sqr_r0*dPdP - 2.0f*r0dr*OdP;
-
- /* we miss the cone if determinant is smaller than zero */
- const vfloat<M> D = B*B - 4.0f*A*C;
- valid &= (D >= 0.0f & g > 0.0f); // if g <= 0 then the cone is inside a sphere end
-
- /* When rays are parallel to the cone surface, then the
- * ray may be inside or outside the cone. We just assume a
- * miss in that case, which is fine as rays inside the
- * cone would anyway hit the ending spheres in that
- * case. */
- valid &= abs(A) > min_rcp_input;
- if (unlikely(none(valid))) {
- return false;
- }
-
- /* compute distance to front and back hit */
- const vfloat<M> Q = sqrt(D);
- const vfloat<M> rcp_2A = rcp(2.0f*A);
- t_cone_front = (-B-Q)*rcp_2A;
- y_cone_front = yp + t_cone_front*dOdP;
- lower = select( (y_cone_front > -(float)ulp) & (y_cone_front <= g) & (g > 0.0f), t_cone_front, vfloat<M>(pos_inf));
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- t_cone_back = (-B+Q)*rcp_2A;
- y_cone_back = yp + t_cone_back *dOdP;
- upper = select( (y_cone_back > -(float)ulp) & (y_cone_back <= g) & (g > 0.0f), t_cone_back , vfloat<M>(neg_inf));
-#endif
- return true;
- }
-
- /*
- This function intersects the ray with the end sphere at
- p1. We already clip away hits that are inside the
- neighboring cone segment.
-
- */
-
- __forceinline void intersectEndSphere(vbool<M>& valid,
- const ConeGeometry<M>& coneR,
- vfloat<M>& lower, vfloat<M>& upper)
- {
- /* calculate front and back hit with end sphere */
- const Vec3vf<M> O1 = org - p1;
- const vfloat<M> O1dO = dot(O1,dO);
- const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r1));
- const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) );
-
- /* clip away front hit if it is inside next cone segment */
- t_sph1_front = (-O1dO - rhs1)*rcp_dOdO;
- const Vec3vf<M> hit_front = org + t_sph1_front*dO;
- vbool<M> valid_sph1_front = h2 >= 0.0f & yp + t_sph1_front*dOdP > g & !coneR.isClippedByPlane (valid, hit_front);
- lower = select(valid_sph1_front, t_sph1_front, vfloat<M>(pos_inf));
-
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- /* clip away back hit if it is inside next cone segment */
- t_sph1_back = (-O1dO + rhs1)*rcp_dOdO;
- const Vec3vf<M> hit_back = org + t_sph1_back*dO;
- vbool<M> valid_sph1_back = h2 >= 0.0f & yp + t_sph1_back*dOdP > g & !coneR.isClippedByPlane (valid, hit_back);
- upper = select(valid_sph1_back, t_sph1_back, vfloat<M>(neg_inf));
-#else
- upper = vfloat<M>(neg_inf);
-#endif
- }
-
- __forceinline void intersectBeginSphere(const vbool<M>& valid,
- vfloat<M>& lower, vfloat<M>& upper)
- {
- /* calculate front and back hit with end sphere */
- const Vec3vf<M> O1 = org - p0;
- const vfloat<M> O1dO = dot(O1,dO);
- const vfloat<M> h2 = sqr(O1dO) - dOdO*(sqr(O1) - sqr(r0));
- const vfloat<M> rhs1 = select( h2 >= 0.0f, sqrt(h2), vfloat<M>(neg_inf) );
-
- /* clip away front hit if it is inside next cone segment */
- t_sph0_front = (-O1dO - rhs1)*rcp_dOdO;
- vbool<M> valid_sph1_front = valid & h2 >= 0.0f & yp + t_sph0_front*dOdP < 0;
- lower = select(valid_sph1_front, t_sph0_front, vfloat<M>(pos_inf));
-
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- /* clip away back hit if it is inside next cone segment */
- t_sph0_back = (-O1dO + rhs1)*rcp_dOdO;
- vbool<M> valid_sph1_back = valid & h2 >= 0.0f & yp + t_sph0_back*dOdP < 0;
- upper = select(valid_sph1_back, t_sph0_back, vfloat<M>(neg_inf));
-#else
- upper = vfloat<M>(neg_inf);
-#endif
- }
-
- /*
-
- This function calculates the geometry normal of some cone hit.
-
- For a given hit point h (relative to p0) with a cone
- starting at p0 with radius w0 and ending at p1 with
- radius w1 one normally calculates the geometry normal by
- first calculating the parmetric u hit location along the
- cone:
-
- u = dot(h,dP)/dP^2
-
- Using this value one can now directly calculate the
- geometry normal by bending the connection vector (h-u*dP)
- from hit to projected hit with some cone dependent value
- dw/sqrt(dP^2) * normalize(dP):
-
- Ng = normalize(h-u*dP) - dw/length(dP) * normalize(dP)
-
- The length of the vector (h-u*dP) can also get calculated
- by interpolating the radii as w0+u*dw which yields:
-
- Ng = (h-u*dP)/(w0+u*dw) - dw/dP^2 * dP
-
- Multiplying with (w0+u*dw) yield a scaled Ng':
-
- Ng' = (h-u*dP) - (w0+u*dw)*dw/dP^2*dP
-
- Inserting the definition of w0 and dw and refactoring
- yield a furhter scaled Ng'':
-
- Ng'' = (dP^2 - dr^2) (h-q) - (r0+u*dr)*dr*dP
-
- Now inserting the definition of u gives and multiplying
- with the denominator yields:
-
- Ng''' = (dP^2-dr^2)*(dP^2*h-dot(h,dP)*dP) - (dP^2*r0+dot(h,dP)*dr)*dr*dP
-
- Factoring out, cancelling terms, dividing by dP^2, and
- factoring again yields finally:
-
- Ng'''' = (dP^2-dr^2)*h - dP*(dot(h,dP) + r0*dr)
-
- */
-
- __forceinline Vec3vf<M> Ng_cone(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back);
- const vfloat<M> t = select(front_hit, t_cone_front, t_cone_back);
- const Vec3vf<M> h = O + t*dO;
- return g*h-dP*y;
-#else
- const Vec3vf<M> h = O + t_cone_front*dO;
- return g*h-dP*y_cone_front;
-#endif
- }
-
- /* compute geometry normal of sphere hit as the difference
- * vector from hit point to sphere center */
-
- __forceinline Vec3vf<M> Ng_sphere1(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> t_sph1 = select(front_hit, t_sph1_front, t_sph1_back);
- return org+t_sph1*dO-p1;
-#else
- return org+t_sph1_front*dO-p1;
-#endif
- }
-
- __forceinline Vec3vf<M> Ng_sphere0(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> t_sph0 = select(front_hit, t_sph0_front, t_sph0_back);
- return org+t_sph0*dO-p0;
-#else
- return org+t_sph0_front*dO-p0;
-#endif
- }
-
- /*
- This function calculates the u coordinate of a
- hit. Therefore we use the hit distance y (which is zero
- at the first cone clipping plane) and divide by distance
- g between the clipping planes.
-
- */
-
- __forceinline vfloat<M> u_cone(const vbool<M>& front_hit) const
- {
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const vfloat<M> y = select(front_hit, y_cone_front, y_cone_back);
- return clamp(y*rcp(g));
-#else
- return clamp(y_cone_front*rcp(g));
-#endif
- }
-
- private:
- Vec3vf<M> org;
- Vec3vf<M> O;
- Vec3vf<M> dO;
- vfloat<M> dOdO;
- vfloat<M> rcp_dOdO;
- vfloat<M> OdP;
- vfloat<M> dOdP;
-
- /* for ray/cone intersection */
- private:
- vfloat<M> yp;
- vfloat<M> y_cone_front;
- vfloat<M> t_cone_front;
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- vfloat<M> y_cone_back;
- vfloat<M> t_cone_back;
-#endif
-
- /* for ray/sphere intersection */
- private:
- vfloat<M> t_sph1_front;
- vfloat<M> t_sph0_front;
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- vfloat<M> t_sph1_back;
- vfloat<M> t_sph0_back;
-#endif
- };
-
-
- template<int M, typename Epilog, typename ray_tfar_func>
- static __forceinline bool intersectConeSphere(const vbool<M>& valid_i,
- const Vec3vf<M>& ray_org_in, const Vec3vf<M>& ray_dir,
- const vfloat<M>& ray_tnear, const ray_tfar_func& ray_tfar,
- const Vec4vf<M>& v0, const Vec4vf<M>& v1,
- const Vec4vf<M>& vL, const Vec4vf<M>& vR,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- /* move ray origin closer to make calculations numerically stable */
- const vfloat<M> dOdO = sqr(ray_dir);
- const vfloat<M> rcp_dOdO = rcp(dOdO);
- const Vec3vf<M> center = vfloat<M>(0.5f)*(v0.xyz()+v1.xyz());
- const vfloat<M> dt = dot(center-ray_org_in,ray_dir)*rcp_dOdO;
- const Vec3vf<M> ray_org = ray_org_in + dt*ray_dir;
-
- /* intersect with cone from v0 to v1 */
- vfloat<M> t_cone_lower, t_cone_upper;
- ConeGeometryIntersector<M> cone (ray_org, ray_dir, dOdO, rcp_dOdO, v0, v1);
- vbool<M> validCone = valid;
- cone.intersectCone(validCone, t_cone_lower, t_cone_upper);
-
- valid &= (validCone | (cone.g <= 0.0f)); // if cone is entirely in sphere end - check sphere
- if (unlikely(none(valid)))
- return false;
-
- /* cone hits inside the neighboring capped cones are inside the geometry and thus ignored */
- const ConeGeometry<M> coneL (v0, vL);
- const ConeGeometry<M> coneR (v1, vR);
-#if !defined(EMBREE_BACKFACE_CULLING_CURVES)
- const Vec3vf<M> hit_lower = ray_org + t_cone_lower*ray_dir;
- const Vec3vf<M> hit_upper = ray_org + t_cone_upper*ray_dir;
- t_cone_lower = select (!coneL.isInsideCappedCone (validCone, hit_lower) & !coneR.isInsideCappedCone (validCone, hit_lower), t_cone_lower, vfloat<M>(pos_inf));
- t_cone_upper = select (!coneL.isInsideCappedCone (validCone, hit_upper) & !coneR.isInsideCappedCone (validCone, hit_upper), t_cone_upper, vfloat<M>(neg_inf));
-#endif
-
- /* intersect ending sphere */
- vfloat<M> t_sph1_lower, t_sph1_upper;
- vfloat<M> t_sph0_lower = vfloat<M>(pos_inf);
- vfloat<M> t_sph0_upper = vfloat<M>(neg_inf);
- cone.intersectEndSphere(valid, coneR, t_sph1_lower, t_sph1_upper);
-
- const vbool<M> isBeginPoint = valid & (vL[0] == vfloat<M>(pos_inf));
- if (unlikely(any(isBeginPoint))) {
- cone.intersectBeginSphere (isBeginPoint, t_sph0_lower, t_sph0_upper);
- }
-
- /* CSG union of cone and end sphere */
- vfloat<M> t_sph_lower = min(t_sph0_lower, t_sph1_lower);
- vfloat<M> t_cone_sphere_lower = min(t_cone_lower, t_sph_lower);
-#if !defined (EMBREE_BACKFACE_CULLING_CURVES)
- vfloat<M> t_sph_upper = max(t_sph0_upper, t_sph1_upper);
- vfloat<M> t_cone_sphere_upper = max(t_cone_upper, t_sph_upper);
-
- /* filter out hits that are not in tnear/tfar range */
- const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf);
- const vbool<M> valid_upper = valid & ray_tnear <= dt+t_cone_sphere_upper & dt+t_cone_sphere_upper <= ray_tfar() & t_cone_sphere_upper != vfloat<M>(neg_inf);
-
- /* check if there is a first hit */
- const vbool<M> valid_first = valid_lower | valid_upper;
- if (unlikely(none(valid_first)))
- return false;
-
- /* construct first hit */
- const vfloat<M> t_first = select(valid_lower, t_cone_sphere_lower, t_cone_sphere_upper);
- const vbool<M> cone_hit_first = t_first == t_cone_lower | t_first == t_cone_upper;
- const vbool<M> sph0_hit_first = t_first == t_sph0_lower | t_first == t_sph0_upper;
- const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower)));
- const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- /* invoke intersection filter for first hit */
- RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_first,Ng_first);
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_cone_sphere_upper;
- const vbool<M> valid_second = valid_lower & valid_upper & (dt+t_cone_sphere_upper <= ray_tfar());
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const vbool<M> cone_hit_second = t_second == t_cone_lower | t_second == t_cone_upper;
- const vbool<M> sph0_hit_second = t_second == t_sph0_lower | t_second == t_sph0_upper;
- const Vec3vf<M> Ng_second = select(cone_hit_second, cone.Ng_cone(false), select (sph0_hit_second, cone.Ng_sphere0(false), cone.Ng_sphere1(false)));
- const vfloat<M> u_second = select(cone_hit_second, cone.u_cone(false), select (sph0_hit_second, vfloat<M>(zero), vfloat<M>(one)));
-
- hit = RoundLineIntersectorHitM<M>(u_second,zero,dt+t_second,Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
-#else
- /* filter out hits that are not in tnear/tfar range */
- const vbool<M> valid_lower = valid & ray_tnear <= dt+t_cone_sphere_lower & dt+t_cone_sphere_lower <= ray_tfar() & t_cone_sphere_lower != vfloat<M>(pos_inf);
-
- /* check if there is a valid hit */
- if (unlikely(none(valid_lower)))
- return false;
-
- /* construct first hit */
- const vbool<M> cone_hit_first = t_cone_sphere_lower == t_cone_lower | t_cone_sphere_lower == t_cone_upper;
- const vbool<M> sph0_hit_first = t_cone_sphere_lower == t_sph0_lower | t_cone_sphere_lower == t_sph0_upper;
- const Vec3vf<M> Ng_first = select(cone_hit_first, cone.Ng_cone(valid_lower), select (sph0_hit_first, cone.Ng_sphere0(valid_lower), cone.Ng_sphere1(valid_lower)));
- const vfloat<M> u_first = select(cone_hit_first, cone.u_cone(valid_lower), select (sph0_hit_first, vfloat<M>(zero), vfloat<M>(one)));
-
- /* invoke intersection filter for first hit */
- RoundLineIntersectorHitM<M> hit(u_first,zero,dt+t_cone_sphere_lower,Ng_first);
- const bool is_hit_first = epilog(valid_lower, hit);
-
- return is_hit_first;
-#endif
- }
-
- } // end namespace __roundline_internal
-
- template<int M>
- struct RoundLinearCurveIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- struct ray_tfar {
- Ray& ray;
- __forceinline ray_tfar(Ray& ray) : ray(ray) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- Ray& ray,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Vec4vf<M>& vLi, const Vec4vf<M>& vRi,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const vfloat<M> ray_tnear(ray.tnear());
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi);
- const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi);
- return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,vL,vR,epilog);
- }
- };
-
- template<int M, int K>
- struct RoundLinearCurveIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- struct ray_tfar {
- RayK<K>& ray;
- size_t k;
- __forceinline ray_tfar(RayK<K>& ray, size_t k) : ray(ray), k(k) {}
- __forceinline vfloat<M> operator() () const { return ray.tfar[k]; };
- };
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const LineSegments* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i, const Vec4vf<M>& v1i,
- const Vec4vf<M>& vLi, const Vec4vf<M>& vRi,
- const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> ray_tnear = ray.tnear()[k];
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i);
- const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi);
- const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi);
- return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h
deleted file mode 100644
index 079817335e..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h
+++ /dev/null
@@ -1,136 +0,0 @@
-// ======================================================================== //
-// Copyright 2009-2020 Intel Corporation //
-// //
-// Licensed under the Apache License, Version 2.0 (the "License"); //
-// you may not use this file except in compliance with the License. //
-// You may obtain a copy of the License at //
-// //
-// http://www.apache.org/licenses/LICENSE-2.0 //
-// //
-// Unless required by applicable law or agreed to in writing, software //
-// distributed under the License is distributed on an "AS IS" BASIS, //
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
-// See the License for the specific language governing permissions and //
-// limitations under the License. //
-// ======================================================================== //
-
-#pragma once
-
-#include "roundline_intersector.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct RoundLinearCurveMiIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct RoundLinearCurveMiMBIntersector1
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time());
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, line);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct RoundLinearCurveMiIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom);
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct RoundLinearCurveMiMBIntersectorK
- {
- typedef LineMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(normal.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const LineSegments* geom = context->scene->get<LineSegments>(line.geomID());
- Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]);
- const vbool<Mx> valid = line.template valid<Mx>();
- return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h
deleted file mode 100644
index 3ab90c29ef..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h
+++ /dev/null
@@ -1,183 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_points.h"
-#include "curve_intersector_precalculations.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct SphereIntersectorHitM
- {
- __forceinline SphereIntersectorHitM() {}
-
- __forceinline SphereIntersectorHitM(const vfloat<M>& t, const Vec3vf<M>& Ng)
- : vt(t), vNg(Ng) {}
-
- __forceinline void finalize() {}
-
- __forceinline Vec2f uv(const size_t i) const {
- return Vec2f(0.0f, 0.0f);
- }
- __forceinline float t(const size_t i) const {
- return vt[i];
- }
- __forceinline Vec3fa Ng(const size_t i) const {
- return Vec3fa(vNg.x[i], vNg.y[i], vNg.z[i]);
- }
-
- public:
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct SphereIntersector1
- {
- typedef CurvePrecalculations1 Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(
- const vbool<M>& valid_i, Ray& ray,
- const Precalculations& pre, const Vec4vf<M>& v0, const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const vfloat<M> rd2 = rcp(dot(ray.dir, ray.dir));
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- const vfloat<M> td = sqrt((r2 - l2) * rd2);
- const vfloat<M> t_front = projC0 - td;
- const vfloat<M> t_back = projC0 + td;
-
- const vbool<M> valid_front = valid & (ray.tnear() <= t_front) & (t_front <= ray.tfar);
- const vbool<M> valid_back = valid & (ray.tnear() <= t_back ) & (t_back <= ray.tfar);
-
- /* check if there is a first hit */
- const vbool<M> valid_first = valid_front | valid_back;
- if (unlikely(none(valid_first)))
- return false;
-
- /* construct first hit */
- const vfloat<M> td_front = -td;
- const vfloat<M> td_back = +td;
- const vfloat<M> t_first = select(valid_front, t_front, t_back);
- const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp;
- SphereIntersectorHitM<M> hit(t_first, Ng_first);
-
- /* invoke intersection filter for first hit */
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_back;
- const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar);
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
- hit = SphereIntersectorHitM<M> (t_second, Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(
- const vbool<M>& valid_i, Ray& ray, IntersectContext* context, const Points* geom,
- const Precalculations& pre, const Vec4vf<M>& v0i, const Epilog& epilog)
- {
- const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z);
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- return intersect(valid_i,ray,pre,v0,epilog);
- }
- };
-
- template<int M, int K>
- struct SphereIntersectorK
- {
- typedef CurvePrecalculationsK<K> Precalculations;
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid_i,
- RayK<K>& ray, size_t k,
- IntersectContext* context,
- const Points* geom,
- const Precalculations& pre,
- const Vec4vf<M>& v0i,
- const Epilog& epilog)
- {
- vbool<M> valid = valid_i;
-
- const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]);
- const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]);
- const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir));
-
- const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i);
- const Vec3vf<M> center = v0.xyz();
- const vfloat<M> radius = v0.w;
-
- const Vec3vf<M> c0 = center - ray_org;
- const vfloat<M> projC0 = dot(c0, ray_dir) * rd2;
- const Vec3vf<M> perp = c0 - projC0 * ray_dir;
- const vfloat<M> l2 = dot(perp, perp);
- const vfloat<M> r2 = radius * radius;
- valid &= (l2 <= r2);
- if (unlikely(none(valid)))
- return false;
-
- const vfloat<M> td = sqrt((r2 - l2) * rd2);
- const vfloat<M> t_front = projC0 - td;
- const vfloat<M> t_back = projC0 + td;
-
- const vbool<M> valid_front = valid & (ray.tnear()[k] <= t_front) & (t_front <= ray.tfar[k]);
- const vbool<M> valid_back = valid & (ray.tnear()[k] <= t_back ) & (t_back <= ray.tfar[k]);
-
- /* check if there is a first hit */
- const vbool<M> valid_first = valid_front | valid_back;
- if (unlikely(none(valid_first)))
- return false;
-
- /* construct first hit */
- const vfloat<M> td_front = -td;
- const vfloat<M> td_back = +td;
- const vfloat<M> t_first = select(valid_front, t_front, t_back);
- const Vec3vf<M> Ng_first = select(valid_front, td_front, td_back) * ray_dir - perp;
- SphereIntersectorHitM<M> hit(t_first, Ng_first);
-
- /* invoke intersection filter for first hit */
- const bool is_hit_first = epilog(valid_first, hit);
-
- /* check for possible second hits before potentially accepted hit */
- const vfloat<M> t_second = t_back;
- const vbool<M> valid_second = valid_front & valid_back & (t_second <= ray.tfar[k]);
- if (unlikely(none(valid_second)))
- return is_hit_first;
-
- /* invoke intersection filter for second hit */
- const Vec3vf<M> Ng_second = td_back * ray_dir - perp;
- hit = SphereIntersectorHitM<M> (t_second, Ng_second);
- const bool is_hit_second = epilog(valid_second, hit);
-
- return is_hit_first | is_hit_second;
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h
deleted file mode 100644
index 1146847602..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "intersector_epilog.h"
-#include "pointi.h"
-#include "sphere_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int M, int Mx, bool filter>
- struct SphereMiIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query,
- PointQueryContext* context,
- const Primitive& sphere)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere);
- }
- };
-
- template<int M, int Mx, bool filter>
- struct SphereMiMBIntersector1
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculations1 Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre,
- RayHit& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time());
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre,
- Ray& ray,
- IntersectContext* context,
- const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time());
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersector1<Mx>::intersect(
- valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query,
- PointQueryContext* context,
- const Primitive& sphere)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, sphere);
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct SphereMiIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
- };
-
- template<int M, int Mx, int K, bool filter>
- struct SphereMiMBIntersectorK
- {
- typedef PointMi<M> Primitive;
- typedef CurvePrecalculationsK<K> Precalculations;
-
- static __forceinline void intersect(
- const Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(normal.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
-
- static __forceinline bool occluded(
- const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& sphere)
- {
- STAT3(shadow.trav_prims, 1, 1, 1);
- const Points* geom = context->scene->get<Points>(sphere.geomID());
- Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]);
- const vbool<Mx> valid = sphere.template valid<Mx>();
- return SphereIntersectorK<Mx, K>::intersect(
- valid, ray, k, context, geom, pre, v0,
- Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID()));
- }
- };
- } // namespace isa
-} // namespace embree
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h
deleted file mode 100644
index 94ad46ad87..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../geometry/primitive.h"
-#include "../subdiv/subdivpatch1base.h"
-
-namespace embree
-{
-
- struct __aligned(64) SubdivPatch1 : public SubdivPatch1Base
- {
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
-
- static Type type;
-
- public:
-
- /*! constructor for cached subdiv patch */
- SubdivPatch1 (const unsigned int gID,
- const unsigned int pID,
- const unsigned int subPatch,
- const SubdivMesh *const mesh,
- const size_t time,
- const Vec2f uv[4],
- const float edge_level[4],
- const int subdiv[4],
- const int simd_width)
- : SubdivPatch1Base(gID,pID,subPatch,mesh,time,uv,edge_level,subdiv,simd_width) {}
- };
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h
deleted file mode 100644
index 74ec1de258..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subdivpatch1.h"
-#include "grid_soa.h"
-#include "grid_soa_intersector1.h"
-#include "grid_soa_intersector_packet.h"
-#include "../common/ray.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename T>
- class SubdivPatch1Precalculations : public T
- {
- public:
- __forceinline SubdivPatch1Precalculations (const Ray& ray, const void* ptr)
- : T(ray,ptr) {}
- };
-
- template<int K, typename T>
- class SubdivPatch1PrecalculationsK : public T
- {
- public:
- __forceinline SubdivPatch1PrecalculationsK (const vbool<K>& valid, RayK<K>& ray)
- : T(valid,ray) {}
- };
-
- class SubdivPatch1Intersector1
- {
- public:
- typedef GridSOA Primitive;
- typedef SubdivPatch1Precalculations<GridSOAIntersector1::Precalculations> Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- lazy_node = prim->root(0);
- pre.grid = (Primitive*)prim;
- return false;
- }
-
- /*! Intersect a ray with the primitive. */
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- intersect(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- /*! Test if the ray is occluded by the primitive */
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- return occluded(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- // TODO: PointQuery implement
- assert(false && "not implemented");
- return false;
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) {
- return pointQuery(This,query,context,prim,ty,tquery,lazy_node);
- }
- };
-
- class SubdivPatch1MBIntersector1
- {
- public:
- typedef SubdivPatch1 Primitive;
- typedef GridSOAMBIntersector1::Precalculations Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node)
- {
- Primitive* prim = (Primitive*) prim_i;
- GridSOA* grid = nullptr;
- grid = (GridSOA*) prim->root_ref.get();
- pre.itime = getTimeSegment(ray.time(), float(grid->time_steps-1), pre.ftime);
- lazy_node = grid->root(pre.itime);
- pre.grid = grid;
- return false;
- }
-
- /*! Intersect a ray with the primitive. */
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAMBIntersector1::intersect(pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,ray,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- intersect(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- /*! Test if the ray is occluded by the primitive */
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAMBIntersector1::occluded(pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,ray,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) {
- return occluded(This,pre,ray,context,prim,ty,tray,lazy_node);
- }
-
- template<int N>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- // TODO: PointQuery implement
- assert(false && "not implemented");
- return false;
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) {
- return pointQuery(This,query,context,prim,ty,tquery,lazy_node);
- }
- };
-
- template <int K>
- struct SubdivPatch1IntersectorK
- {
- typedef GridSOA Primitive;
- typedef SubdivPatch1PrecalculationsK<K,typename GridSOAIntersectorK<K>::Precalculations> Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
- {
- lazy_node = prim->root(0);
- pre.grid = (Primitive*)prim;
- return false;
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
- };
-
- typedef SubdivPatch1IntersectorK<4> SubdivPatch1Intersector4;
- typedef SubdivPatch1IntersectorK<8> SubdivPatch1Intersector8;
- typedef SubdivPatch1IntersectorK<16> SubdivPatch1Intersector16;
-
- template <int K>
- struct SubdivPatch1MBIntersectorK
- {
- typedef SubdivPatch1 Primitive;
- //typedef GridSOAMBIntersectorK<K>::Precalculations Precalculations;
- typedef SubdivPatch1PrecalculationsK<K,typename GridSOAMBIntersectorK<K>::Precalculations> Precalculations;
-
- static __forceinline bool processLazyNode(Precalculations& pre, IntersectContext* context, const Primitive* prim_i, size_t& lazy_node)
- {
- Primitive* prim = (Primitive*) prim_i;
- GridSOA* grid = (GridSOA*) prim->root_ref.get();
- lazy_node = grid->troot;
- pre.grid = grid;
- return false;
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(valid,pre,ray,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(valid,pre,ray,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node);
- else processLazyNode(pre,context,prim,lazy_node);
- }
-
- template<int N, int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node);
- else return processLazyNode(pre,context,prim,lazy_node);
- }
- };
-
- typedef SubdivPatch1MBIntersectorK<4> SubdivPatch1MBIntersector4;
- typedef SubdivPatch1MBIntersectorK<8> SubdivPatch1MBIntersector8;
- typedef SubdivPatch1MBIntersectorK<16> SubdivPatch1MBIntersector16;
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid.h
deleted file mode 100644
index 39fa6fb0f0..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h
+++ /dev/null
@@ -1,517 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_grid_mesh.h"
-#include "../bvh/bvh.h"
-
-namespace embree
-{
- /* Stores M quads from an indexed face set */
- struct SubGrid
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored quads */
- static __forceinline size_t max_size() { return 1; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline SubGrid() { }
-
- /* Construction from vertices and IDs */
- __forceinline SubGrid(const unsigned int x,
- const unsigned int y,
- const unsigned int geomID,
- const unsigned int primID)
- : _x(x), _y(y), _geomID(geomID), _primID(primID)
- {
- }
-
- __forceinline bool invalid3x3X() const { return (unsigned int)_x & (1<<15); }
- __forceinline bool invalid3x3Y() const { return (unsigned int)_y & (1<<15); }
-
- /* Gather the quads */
- __forceinline void gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const GridMesh* const mesh,
- const GridMesh::Grid &g) const
- {
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const vfloat4 vtx00 = vfloat4::loadu(mesh->vertexPtr(vtxID00));
- const vfloat4 vtx01 = vfloat4::loadu(mesh->vertexPtr(vtxID01));
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const vfloat4 vtx10 = vfloat4::loadu(mesh->vertexPtr(vtxID10));
- const vfloat4 vtx11 = vfloat4::loadu(mesh->vertexPtr(vtxID11));
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const vfloat4 vtx02 = vfloat4::loadu(mesh->vertexPtr(vtxID02));
- const size_t vtxID12 = vtxID11 + deltaX;
- const vfloat4 vtx12 = vfloat4::loadu(mesh->vertexPtr(vtxID12));
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const vfloat4 vtx20 = vfloat4::loadu(mesh->vertexPtr(vtxID20));
- const vfloat4 vtx21 = vfloat4::loadu(mesh->vertexPtr(vtxID21));
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const vfloat4 vtx22 = vfloat4::loadu(mesh->vertexPtr(vtxID22));
-
- transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z);
- transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z);
- transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z);
- transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z);
- }
-
- template<typename T>
- __forceinline vfloat4 getVertexMB(const GridMesh* const mesh, const size_t offset, const size_t itime, const float ftime) const
- {
- const T v0 = T::loadu(mesh->vertexPtr(offset,itime+0));
- const T v1 = T::loadu(mesh->vertexPtr(offset,itime+1));
- return lerp(v0,v1,ftime);
- }
-
- /* Gather the quads */
- __forceinline void gatherMB(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const GridMesh* const mesh,
- const GridMesh::Grid &g,
- const size_t itime,
- const float ftime) const
- {
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime);
- const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime);
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime);
- const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime);
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime);
- const size_t vtxID12 = vtxID11 + deltaX;
- const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime);
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime);
- const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime);
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime);
-
- transpose(vtx00,vtx01,vtx11,vtx10,p0.x,p0.y,p0.z);
- transpose(vtx01,vtx02,vtx12,vtx11,p1.x,p1.y,p1.z);
- transpose(vtx11,vtx12,vtx22,vtx21,p2.x,p2.y,p2.z);
- transpose(vtx10,vtx11,vtx21,vtx20,p3.x,p3.y,p3.z);
- }
-
-
-
- /* Gather the quads */
- __forceinline void gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene) const
- {
- const GridMesh* const mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
- gather(p0,p1,p2,p3,mesh,g);
- }
-
- /* Gather the quads in the motion blur case */
- __forceinline void gatherMB(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- Vec3vf4& p3,
- const Scene *const scene,
- const size_t itime,
- const float ftime) const
- {
- const GridMesh* const mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
- gatherMB(p0,p1,p2,p3,mesh,g,itime,ftime);
- }
-
- /* Gather the quads */
- __forceinline void gather(Vec3fa vtx[16], const Scene *const scene) const
- {
- const GridMesh* mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
-
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const Vec3fa vtx00 = Vec3fa::loadu(mesh->vertexPtr(vtxID00));
- const Vec3fa vtx01 = Vec3fa::loadu(mesh->vertexPtr(vtxID01));
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const Vec3fa vtx10 = Vec3fa::loadu(mesh->vertexPtr(vtxID10));
- const Vec3fa vtx11 = Vec3fa::loadu(mesh->vertexPtr(vtxID11));
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const Vec3fa vtx02 = Vec3fa::loadu(mesh->vertexPtr(vtxID02));
- const size_t vtxID12 = vtxID11 + deltaX;
- const Vec3fa vtx12 = Vec3fa::loadu(mesh->vertexPtr(vtxID12));
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const Vec3fa vtx20 = Vec3fa::loadu(mesh->vertexPtr(vtxID20));
- const Vec3fa vtx21 = Vec3fa::loadu(mesh->vertexPtr(vtxID21));
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const Vec3fa vtx22 = Vec3fa::loadu(mesh->vertexPtr(vtxID22));
-
- vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10;
- vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11;
- vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20;
- vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21;
- }
-
- /* Gather the quads */
- __forceinline void gatherMB(vfloat4 vtx[16], const Scene *const scene, const size_t itime, const float ftime) const
- {
- const GridMesh* mesh = scene->get<GridMesh>(geomID());
- const GridMesh::Grid &g = mesh->grid(primID());
-
- /* first quad always valid */
- const size_t vtxID00 = g.startVtxID + x() + y() * g.lineVtxOffset;
- const size_t vtxID01 = vtxID00 + 1;
- const vfloat4 vtx00 = getVertexMB<vfloat4>(mesh,vtxID00,itime,ftime);
- const vfloat4 vtx01 = getVertexMB<vfloat4>(mesh,vtxID01,itime,ftime);
- const size_t vtxID10 = vtxID00 + g.lineVtxOffset;
- const size_t vtxID11 = vtxID01 + g.lineVtxOffset;
- const vfloat4 vtx10 = getVertexMB<vfloat4>(mesh,vtxID10,itime,ftime);
- const vfloat4 vtx11 = getVertexMB<vfloat4>(mesh,vtxID11,itime,ftime);
-
- /* deltaX => vtx02, vtx12 */
- const size_t deltaX = invalid3x3X() ? 0 : 1;
- const size_t vtxID02 = vtxID01 + deltaX;
- const vfloat4 vtx02 = getVertexMB<vfloat4>(mesh,vtxID02,itime,ftime);
- const size_t vtxID12 = vtxID11 + deltaX;
- const vfloat4 vtx12 = getVertexMB<vfloat4>(mesh,vtxID12,itime,ftime);
-
- /* deltaY => vtx20, vtx21 */
- const size_t deltaY = invalid3x3Y() ? 0 : g.lineVtxOffset;
- const size_t vtxID20 = vtxID10 + deltaY;
- const size_t vtxID21 = vtxID11 + deltaY;
- const vfloat4 vtx20 = getVertexMB<vfloat4>(mesh,vtxID20,itime,ftime);
- const vfloat4 vtx21 = getVertexMB<vfloat4>(mesh,vtxID21,itime,ftime);
-
- /* deltaX/deltaY => vtx22 */
- const size_t vtxID22 = vtxID11 + deltaX + deltaY;
- const vfloat4 vtx22 = getVertexMB<vfloat4>(mesh,vtxID22,itime,ftime);
-
- vtx[ 0] = vtx00; vtx[ 1] = vtx01; vtx[ 2] = vtx11; vtx[ 3] = vtx10;
- vtx[ 4] = vtx01; vtx[ 5] = vtx02; vtx[ 6] = vtx12; vtx[ 7] = vtx11;
- vtx[ 8] = vtx10; vtx[ 9] = vtx11; vtx[10] = vtx21; vtx[11] = vtx20;
- vtx[12] = vtx11; vtx[13] = vtx12; vtx[14] = vtx22; vtx[15] = vtx21;
- }
-
-
- /* Calculate the bounds of the subgrid */
- __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
- {
- BBox3fa bounds = empty;
- FATAL("not implemented yet");
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime)
- {
- return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- FATAL("not implemented yet");
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- FATAL("not implemented yet");
- return allBounds;
- }
-
-
- friend embree_ostream operator<<(embree_ostream cout, const SubGrid& sg) {
- return cout << "SubGrid " << " ( x " << sg.x() << ", y = " << sg.y() << ", geomID = " << sg.geomID() << ", primID = " << sg.primID() << " )";
- }
-
- __forceinline unsigned int geomID() const { return _geomID; }
- __forceinline unsigned int primID() const { return _primID; }
- __forceinline unsigned int x() const { return (unsigned int)_x & 0x7fff; }
- __forceinline unsigned int y() const { return (unsigned int)_y & 0x7fff; }
-
- private:
- unsigned short _x;
- unsigned short _y;
- unsigned int _geomID; // geometry ID of mesh
- unsigned int _primID; // primitive ID of primitive inside mesh
- };
-
- struct SubGridID {
- unsigned short x;
- unsigned short y;
- unsigned int primID;
-
- __forceinline SubGridID() {}
- __forceinline SubGridID(const unsigned int x, const unsigned int y, const unsigned int primID) :
- x(x), y(y), primID(primID) {}
- };
-
- /* QuantizedBaseNode as large subgrid leaf */
- template<int N>
- struct SubGridQBVHN
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- __forceinline size_t size() const
- {
- for (size_t i=0;i<N;i++)
- if (primID(i) == -1) return i;
- return N;
- }
-
- __forceinline void clear() {
- for (size_t i=0;i<N;i++)
- subgridIDs[i] = SubGridID(0,0,(unsigned int)-1);
- qnode.clear();
- }
-
- /* Default constructor */
- __forceinline SubGridQBVHN() { }
-
- /* Construction from vertices and IDs */
- __forceinline SubGridQBVHN(const unsigned int x[N],
- const unsigned int y[N],
- const unsigned int primID[N],
- const BBox3fa * const subGridBounds,
- const unsigned int geomID,
- const unsigned int items)
- {
- clear();
- _geomID = geomID;
-
- __aligned(64) typename BVHN<N>::AABBNode node;
- node.clear();
- for (size_t i=0;i<items;i++)
- {
- subgridIDs[i] = SubGridID(x[i],y[i],primID[i]);
- node.setBounds(i,subGridBounds[i]);
- }
- qnode.init_dim(node);
- }
-
- __forceinline unsigned int geomID() const { return _geomID; }
- __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; }
- __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; }
- __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; }
-
- __forceinline SubGrid subgrid(const size_t i) const {
- assert(i < N);
- assert(primID(i) != -1);
- return SubGrid(x(i),y(i),geomID(),primID(i));
- }
-
- public:
- SubGridID subgridIDs[N];
-
- typename BVHN<N>::QuantizedBaseNode qnode;
-
- unsigned int _geomID; // geometry ID of mesh
-
-
- friend embree_ostream operator<<(embree_ostream cout, const SubGridQBVHN& sg) {
- cout << "SubGridQBVHN " << embree_endl;
- for (size_t i=0;i<N;i++)
- cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl;
- cout << "geomID " << sg._geomID << embree_endl;
- cout << "lowerX " << sg.qnode.dequantizeLowerX() << embree_endl;
- cout << "upperX " << sg.qnode.dequantizeUpperX() << embree_endl;
- cout << "lowerY " << sg.qnode.dequantizeLowerY() << embree_endl;
- cout << "upperY " << sg.qnode.dequantizeUpperY() << embree_endl;
- cout << "lowerZ " << sg.qnode.dequantizeLowerZ() << embree_endl;
- cout << "upperZ " << sg.qnode.dequantizeUpperZ() << embree_endl;
- return cout;
- }
-
- };
-
- template<int N>
- typename SubGridQBVHN<N>::Type SubGridQBVHN<N>::type;
-
- typedef SubGridQBVHN<4> SubGridQBVH4;
- typedef SubGridQBVHN<8> SubGridQBVH8;
-
-
- /* QuantizedBaseNode as large subgrid leaf */
- template<int N>
- struct SubGridMBQBVHN
- {
- /* Virtual interface to query information about the quad type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- __forceinline size_t size() const
- {
- for (size_t i=0;i<N;i++)
- if (primID(i) == -1) return i;
- return N;
- }
-
- __forceinline void clear() {
- for (size_t i=0;i<N;i++)
- subgridIDs[i] = SubGridID(0,0,(unsigned int)-1);
- qnode.clear();
- }
-
- /* Default constructor */
- __forceinline SubGridMBQBVHN() { }
-
- /* Construction from vertices and IDs */
- __forceinline SubGridMBQBVHN(const unsigned int x[N],
- const unsigned int y[N],
- const unsigned int primID[N],
- const BBox3fa * const subGridBounds0,
- const BBox3fa * const subGridBounds1,
- const unsigned int geomID,
- const float toffset,
- const float tscale,
- const unsigned int items)
- {
- clear();
- _geomID = geomID;
- time_offset = toffset;
- time_scale = tscale;
-
- __aligned(64) typename BVHN<N>::AABBNode node0,node1;
- node0.clear();
- node1.clear();
- for (size_t i=0;i<items;i++)
- {
- subgridIDs[i] = SubGridID(x[i],y[i],primID[i]);
- node0.setBounds(i,subGridBounds0[i]);
- node1.setBounds(i,subGridBounds1[i]);
- }
- qnode.node0.init_dim(node0);
- qnode.node1.init_dim(node1);
- }
-
- __forceinline unsigned int geomID() const { return _geomID; }
- __forceinline unsigned int primID(const size_t i) const { assert(i < N); return subgridIDs[i].primID; }
- __forceinline unsigned int x(const size_t i) const { assert(i < N); return subgridIDs[i].x; }
- __forceinline unsigned int y(const size_t i) const { assert(i < N); return subgridIDs[i].y; }
-
- __forceinline SubGrid subgrid(const size_t i) const {
- assert(i < N);
- assert(primID(i) != -1);
- return SubGrid(x(i),y(i),geomID(),primID(i));
- }
-
- __forceinline float adjustTime(const float t) const { return time_scale * (t-time_offset); }
-
- template<int K>
- __forceinline vfloat<K> adjustTime(const vfloat<K> &t) const { return time_scale * (t-time_offset); }
-
- public:
- SubGridID subgridIDs[N];
-
- typename BVHN<N>::QuantizedBaseNodeMB qnode;
-
- float time_offset;
- float time_scale;
- unsigned int _geomID; // geometry ID of mesh
-
-
- friend embree_ostream operator<<(embree_ostream cout, const SubGridMBQBVHN& sg) {
- cout << "SubGridMBQBVHN " << embree_endl;
- for (size_t i=0;i<N;i++)
- cout << i << " ( x = " << sg.subgridIDs[i].x << ", y = " << sg.subgridIDs[i].y << ", primID = " << sg.subgridIDs[i].primID << " )" << embree_endl;
- cout << "geomID " << sg._geomID << embree_endl;
- cout << "time_offset " << sg.time_offset << embree_endl;
- cout << "time_scale " << sg.time_scale << embree_endl;
- cout << "lowerX " << sg.qnode.node0.dequantizeLowerX() << embree_endl;
- cout << "upperX " << sg.qnode.node0.dequantizeUpperX() << embree_endl;
- cout << "lowerY " << sg.qnode.node0.dequantizeLowerY() << embree_endl;
- cout << "upperY " << sg.qnode.node0.dequantizeUpperY() << embree_endl;
- cout << "lowerZ " << sg.qnode.node0.dequantizeLowerZ() << embree_endl;
- cout << "upperZ " << sg.qnode.node0.dequantizeUpperZ() << embree_endl;
- cout << "lowerX " << sg.qnode.node1.dequantizeLowerX() << embree_endl;
- cout << "upperX " << sg.qnode.node1.dequantizeUpperX() << embree_endl;
- cout << "lowerY " << sg.qnode.node1.dequantizeLowerY() << embree_endl;
- cout << "upperY " << sg.qnode.node1.dequantizeUpperY() << embree_endl;
- cout << "lowerZ " << sg.qnode.node1.dequantizeLowerZ() << embree_endl;
- cout << "upperZ " << sg.qnode.node1.dequantizeUpperZ() << embree_endl;
- return cout;
- }
-
- };
-
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h
deleted file mode 100644
index 045eee4329..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h
+++ /dev/null
@@ -1,518 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid.h"
-#include "subgrid_intersector_moeller.h"
-#include "subgrid_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
-
- // =======================================================================================
- // =================================== SubGridIntersectors ===============================
- // =======================================================================================
-
-
- template<int N, bool filter>
- struct SubGridIntersector1Moeller
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersector1MoellerTrumbore<4,filter> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
- {
- STAT3(point_query.trav_prims,1,1,1);
- AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID());
- assert(accel);
- context->geomID = subgrid.geomID();
- context->primID = subgrid.primID();
- return accel->pointQuery(query, context);
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
-#if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar)) continue;
- intersect(pre,ray,context,prim[i].subgrid(ID));
- }
- }
- }
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
-
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- bool changed = false;
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- size_t mask;
- if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
- mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- } else {
- mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- }
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
- changed |= pointQuery(query, context, prim[i].subgrid(ID));
- }
- }
- return changed;
- }
- };
-
- template<int N, bool filter>
- struct SubGridIntersector1Pluecker
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
- {
- STAT3(point_query.trav_prims,1,1,1);
- AccelSet* accel = (AccelSet*)context->scene->get(subgrid.geomID());
- context->geomID = subgrid.geomID();
- context->primID = subgrid.primID();
- return accel->pointQuery(query, context);
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
-#if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar)) continue;
- intersect(pre,ray,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- bool changed = false;
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- size_t mask;
- if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
- mask = BVHNQuantizedBaseNodePointQuerySphere1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- } else {
- mask = BVHNQuantizedBaseNodePointQueryAABB1<N>::pointQuery(&prim[i].qnode,tquery,dist);
- }
-#if defined(__AVX__)
- STAT3(point_query.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
- changed |= pointQuery(query, context, prim[i].subgrid(ID));
- }
- }
- return changed;
- }
- };
-
- template<int N, int K, bool filter>
- struct SubGridIntersectorKMoeller
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- vbool<K> valid0 = valid_i;
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- intersect(valid,pre,ray,context,prim[j].subgrid(i));
- }
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- vbool<K> valid0 = valid;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
- if (none(valid0)) break;
- }
- }
- return !valid0;
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar[k])) continue;
- intersect(pre,ray,k,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
- };
-
-
- template<int N, int K, bool filter>
- struct SubGridIntersectorKPluecker
- {
- typedef SubGridQBVHN<N> Primitive;
- typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- pre.intersectK(valid_i,ray,p0,p1,p2,p3,g,subgrid,i,IntersectKEpilogM<4,K,filter>(ray,context,subgrid.geomID(),subgrid.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- vbool<K> valid0 = valid_i;
- Vec3fa vtx[16];
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- subgrid.gather(vtx,context->scene);
- for (unsigned int i=0; i<4; i++)
- {
- const Vec3vf<K> p0 = vtx[i*4+0];
- const Vec3vf<K> p1 = vtx[i*4+1];
- const Vec3vf<K> p2 = vtx[i*4+2];
- const Vec3vf<K> p3 = vtx[i*4+3];
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i)))
- break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
- Vec3vf4 v0,v1,v2,v3; subgrid.gather(v0,v1,v2,v3,context->scene);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- intersect(valid,pre,ray,context,prim[j].subgrid(i));
- }
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- vbool<K> valid0 = valid;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,dist))) continue;
- valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
- if (none(valid0)) break;
- }
- }
- return !valid0;
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (unlikely(dist[ID] > ray.tfar[k])) continue;
- intersect(pre,ray,k,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- assert(((size_t)1 << ID) & movemask(prim[i].qnode.validMask()));
-
- if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
- };
-
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h
deleted file mode 100644
index f65b4abf61..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h
+++ /dev/null
@@ -1,493 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid.h"
-#include "quad_intersector_moeller.h"
-
-namespace embree
-{
- namespace isa
- {
-
- /* ----------------------------- */
- /* -- single ray intersectors -- */
- /* ----------------------------- */
-
- template<int M>
- __forceinline void interpolateUV(MoellerTrumboreHitM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid)
- {
- /* correct U,V interpolation across the entire grid */
- const vint<M> sx((int)subgrid.x());
- const vint<M> sy((int)subgrid.y());
- const vint<M> sxM(sx + vint<M>(0,1,1,0));
- const vint<M> syM(sy + vint<M>(0,0,1,1));
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX;
- hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY;
- }
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1MoellerTrumbore;
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1MoellerTrumbore
- {
- __forceinline SubGridQuadMIntersector1MoellerTrumbore() {}
-
- __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- MoellerTrumboreHitM<M> hit;
- MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
- Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersector.intersect(ray,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit))
- return true;
- }
-
- /* intersect second triangle */
- if (intersector.intersect(ray,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit))
- return true;
- }
- return false;
- }
- };
-
-#if defined (__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct SubGridQuadMIntersector1MoellerTrumbore<4,filter>
- {
- __forceinline SubGridQuadMIntersector1MoellerTrumbore() {}
-
- __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- MoellerTrumboreHitM<8> hit;
- MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
- const vbool8 flags(0,0,0,0,1,1,1,1);
- if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
- {
- vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
-
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
- /* correct U,V interpolation across the entire grid */
- const vint8 sx((int)subgrid.x());
- const vint8 sy((int)subgrid.y());
- const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0));
- const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1));
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX;
- hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY;
-
- if (unlikely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
- };
-
-#endif
-
- // ============================================================================================================================
- // ============================================================================================================================
- // ============================================================================================================================
-
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
- template<int K>
- struct SubGridQuadHitK
- {
- __forceinline SubGridQuadHitK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& T,
- const vfloat<K>& absDen,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid& subgrid,
- const unsigned int i)
- : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u0 = min(U * rcpAbsDen,1.0f);
- const vfloat<K> v0 = min(V * rcpAbsDen,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> uu = select(flags,u1,u0);
- const vfloat<K> vv = select(flags,v1,v0);
- const unsigned int sx = subgrid.x() + (unsigned int)(i % 2);
- const unsigned int sy = subgrid.y() + (unsigned int)(i >>1);
- const float inv_resX = rcp((float)(int)(g.resX-1));
- const float inv_resY = rcp((float)(int)(g.resY-1));
- const vfloat<K> u = (uu + (float)(int)sx) * inv_resX;
- const vfloat<K> v = (vv + (float)(int)sy) * inv_resY;
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
-
- const GridMesh::Grid &g;
- const SubGrid& subgrid;
- const size_t i;
- };
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKMoellerTrumboreBase
- {
- __forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray.org;
- const Vec3vf<K> R = cross(C,ray.dir);
- const vfloat<K> den = dot(tri_Ng,ray.dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(R,tri_e2) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(R,tri_e1) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- SubGridQuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags,g,subgrid,i);
- return epilog(valid,hit);
- }
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,g,subgrid,i,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog);
- return none(valid0);
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Vec3vf<M>& tri_Ng,
- MoellerTrumboreHitM<M> &hit)
- {
- /* calculate denominator */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
- const Vec3vf<M> R = cross(C,D);
- const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
- const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng);
- return true;
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- MoellerTrumboreHitM<M> &hit)
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- const Vec3vf<M> Ng = cross(e2,e1);
- return intersect1(ray,k,v0,e1,e2,Ng,hit);
- }
-
- };
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>
- {
- __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- MoellerTrumboreHitM<4> hit;
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
-
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- epilog(hit.valid,hit);
- }
-
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- MoellerTrumboreHitM<4> hit;
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit))
- {
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit)) return true;
- }
-
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit))
- {
- hit.U = hit.absDen - hit.U;
- hit.V = hit.absDen - hit.V;
- interpolateUV<M>(hit,g,subgrid);
- if (epilog(hit.valid,hit)) return true;
- }
- return false;
- }
- };
-
-
-#if defined (__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<int K, bool filter>
- struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>
- {
- __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
- : SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
-
- template<typename Epilog>
- __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- const vbool8 flags(0,0,0,0,1,1,1,1);
-
- MoellerTrumboreHitM<8> hit;
- if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit))
- {
- vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
-#if !defined(EMBREE_BACKFACE_CULLING)
- hit.U = select(flags,absDen-V,U);
- hit.V = select(flags,absDen-U,V);
- hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f));
-#else
- hit.U = select(flags,absDen-U,U);
- hit.V = select(flags,absDen-V,V);
-#endif
-
- /* correct U,V interpolation across the entire grid */
- const vint8 sx((int)subgrid.x());
- const vint8 sy((int)subgrid.y());
- const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0));
- const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1));
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX;
- hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY;
- if (unlikely(epilog(hit.valid,hit)))
- return true;
-
- }
- return false;
- }
-
- __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID()));
- }
- };
-
-#endif
-
-
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h
deleted file mode 100644
index 1cd88aa799..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h
+++ /dev/null
@@ -1,508 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid.h"
-#include "quad_intersector_moeller.h"
-#include "quad_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
-
- template<int M>
- struct SubGridQuadHitPlueckerM
- {
- __forceinline SubGridQuadHitPlueckerM() {}
-
- __forceinline SubGridQuadHitPlueckerM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& UVW,
- const vfloat<M>& t,
- const Vec3vf<M>& Ng,
- const vbool<M>& flags) : valid(valid), vt(t)
- {
- const vbool<M> invalid = abs(UVW) < min_rcp_input;
- const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
- const vfloat<M> u = min(U * rcpUVW,1.0f);
- const vfloat<M> v = min(V * rcpUVW,1.0f);
- const vfloat<M> u1 = vfloat<M>(1.0f) - u;
- const vfloat<M> v1 = vfloat<M>(1.0f) - v;
-#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
- vu = select(flags,u1,u);
- vv = select(flags,v1,v);
- vNg = Vec3vf<M>(Ng.x,Ng.y,Ng.z);
-#else
- const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
- vv = select(flags,u1,v);
- vu = select(flags,v1,u);
- vNg = Vec3vf<M>(flip*Ng.x,flip*Ng.y,flip*Ng.z);
-#endif
- }
-
- __forceinline void finalize()
- {
- }
-
- __forceinline Vec2f uv(const size_t i)
- {
- const float u = vu[i];
- const float v = vv[i];
- return Vec2f(u,v);
- }
-
- __forceinline float t(const size_t i) { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- __forceinline void interpolateUV(SubGridQuadHitPlueckerM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY)
- {
- /* correct U,V interpolation across the entire grid */
- const vint<M> sx((int)subgrid.x());
- const vint<M> sy((int)subgrid.y());
- const vint<M> sxM(sx + stepX);
- const vint<M> syM(sy + stepY);
- const float inv_resX = rcp((float)((int)g.resX-1));
- const float inv_resY = rcp((float)((int)g.resY-1));
- hit.vu = (hit.vu + vfloat<M>(sxM)) * inv_resX;
- hit.vv = (hit.vv + vfloat<M>(syM)) * inv_resY;
- }
-
- template<int M>
- __forceinline static bool intersectPluecker(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const vbool<M>& flags,
- SubGridQuadHitPlueckerM<M> &hit)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,Ng,flags);
- return true;
- }
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1Pluecker;
-
- template<int M, bool filter>
- struct SubGridQuadMIntersector1Pluecker
- {
- __forceinline SubGridQuadMIntersector1Pluecker() {}
-
- __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- __forceinline void intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- SubGridQuadHitPlueckerM<M> hit;
- Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
-
- /* intersect second triangle */
- if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- SubGridQuadHitPlueckerM<M> hit;
- Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID());
-
- /* intersect first triangle */
- if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit))
- return true;
- }
-
- /* intersect second triangle */
- if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit))
- return true;
- }
-
- return false;
- }
- };
-
-#if defined (__AVX__)
-
- /*! Intersects 4 quads with 1 ray using AVX */
- template<bool filter>
- struct SubGridQuadMIntersector1Pluecker<4,filter>
- {
- __forceinline SubGridQuadMIntersector1Pluecker() {}
-
- __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const
- {
- const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
-#if !defined(EMBREE_BACKFACE_CULLING)
- const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
- const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
-#else
- const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
- const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
-#endif
- SubGridQuadHitPlueckerM<8> hit;
- const vbool8 flags(0,0,0,0,1,1,1,1);
- if (unlikely(intersectPluecker(ray,vtx0,vtx1,vtx2,flags,hit)))
- {
- /* correct U,V interpolation across the entire grid */
- interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1));
- if (unlikely(epilog(hit.valid,hit)))
- return true;
- }
- return false;
- }
-
- __forceinline bool intersect(RayHit& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
-
- __forceinline bool occluded(Ray& ray, IntersectContext* context,
- const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
- const GridMesh::Grid &g, const SubGrid& subgrid) const
- {
- return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID()));
- }
- };
-
-#endif
-
-
- /* ----------------------------- */
- /* -- ray packet intersectors -- */
- /* ----------------------------- */
-
- template<int K>
- struct SubGridQuadHitPlueckerK
- {
- __forceinline SubGridQuadHitPlueckerK(const vfloat<K>& U,
- const vfloat<K>& V,
- const vfloat<K>& UVW,
- const vfloat<K>& t,
- const Vec3vf<K>& Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid& subgrid,
- const unsigned int i)
- : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vbool<K> invalid = abs(UVW) < min_rcp_input;
- const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
- const vfloat<K> u0 = min(U * rcpUVW,1.0f);
- const vfloat<K> v0 = min(V * rcpUVW,1.0f);
- const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
- const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
- const vfloat<K> uu = select(flags,u1,u0);
- const vfloat<K> vv = select(flags,v1,v0);
- const unsigned int sx = subgrid.x() + (unsigned int)(i % 2);
- const unsigned int sy = subgrid.y() + (unsigned int)(i >>1);
- const float inv_resX = rcp((float)(int)(g.resX-1));
- const float inv_resY = rcp((float)(int)(g.resY-1));
- const vfloat<K> u = (uu + (float)(int)sx) * inv_resX;
- const vfloat<K> v = (vv + (float)(int)sy) * inv_resY;
- const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> UVW;
- const vfloat<K> t;
- const vfloat<K> absDen;
- const vbool<K> flags;
- const Vec3vf<K> tri_Ng;
-
- const GridMesh::Grid &g;
- const SubGrid& subgrid;
- const size_t i;
- };
-
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKPlueckerBase
- {
- __forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const Vec3vf<K>& tri_Ng,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- /* calculate vertices relative to ray origin */
- vbool<K> valid = valid0;
- const Vec3vf<K> O = ray.org;
- const Vec3vf<K> D = ray.dir;
- const Vec3vf<K> v0 = tri_v0-O;
- const Vec3vf<K> v1 = tri_v1-O;
- const Vec3vf<K> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<K> e0 = v2-v0;
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
- const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
- const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
- const vfloat<K> UVW = U+V+W;
- const vfloat<K> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= max(U,V,W) <= eps;
-#else
- valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
-
- /* perform depth test */
- const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
- const vfloat<K> t = rcp(den)*T;
- valid &= ray.tnear() <= t & t <= ray.tfar;
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-
- /* calculate hit information */
- SubGridQuadHitPlueckerK<K> hit(U,V,UVW,t,tri_Ng,flags,g,subgrid,i);
- return epilog(valid,hit);
- }
-
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const vbool<K>& flags,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v2-v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray,v0,v1,v2,Ng,flags,g,subgrid,i,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& v0,
- const Vec3vf<K>& v1,
- const Vec3vf<K>& v2,
- const Vec3vf<K>& v3,
- const GridMesh::Grid &g,
- const SubGrid &subgrid,
- const unsigned int i,
- const Epilog& epilog) const
- {
- intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog);
- if (none(valid0)) return true;
- intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog);
- return none(valid0);
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const Vec3vf<M>& tri_Ng,
- const vbool<M>& flags,
- SubGridQuadHitPlueckerM<M> &hit)
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps ;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,tri_Ng,flags);
- return true;
- }
-
- static __forceinline bool intersect1(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const vbool<M>& flags,
- SubGridQuadHitPlueckerM<M> &hit)
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- const Vec3vf<M> Ng = cross(e2,e1); // FIXME: optimize!!!
- return intersect1(ray,k,v0,v1,v2,Ng,flags,hit);
- }
-
- };
-
- template<int M, int K, bool filter>
- struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter>
- {
- __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
- : SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
-
- __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- SubGridQuadHitPlueckerM<4> hit;
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
-
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- epilog(hit.valid,hit);
- }
-
- }
-
- __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
- const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const
- {
- Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID());
-
- SubGridQuadHitPlueckerM<4> hit;
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit)) return true;
- }
-
- if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit))
- {
- interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1));
- if (epilog(hit.valid,hit)) return true;
- }
- return false;
- }
- };
-
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h
deleted file mode 100644
index 400a88b985..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h
+++ /dev/null
@@ -1,236 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "subgrid_intersector.h"
-
-namespace embree
-{
- namespace isa
- {
- template<int N, bool filter>
- struct SubGridMBIntersector1Pluecker
- {
- typedef SubGridMBQBVHN<N> Primitive;
- typedef SubGridQuadMIntersector1Pluecker<4,filter> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- float ftime;
- const int itime = mesh->timeSegment(ray.time(), ftime);
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime);
- pre.intersect(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- float ftime;
- const int itime = mesh->timeSegment(ray.time(), ftime);
-
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime,ftime);
- return pre.occluded(ray,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const SubGrid& subgrid)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, subgrid);
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
- for (size_t i=0;i<num;i++)
- {
- vfloat<Nx> dist;
- const float time = prim[i].adjustTime(ray.time());
-
- assert(time <= 1.0f);
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
-#if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1);
-#endif
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (unlikely(dist[ID] > ray.tfar)) continue;
- intersect(pre,ray,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
- for (size_t i=0;i<num;i++)
- {
- const float time = prim[i].adjustTime(ray.time());
- assert(time <= 1.0f);
- vfloat<Nx> dist;
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (occluded(pre,ray,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
-
- static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, const Primitive* prim, size_t num, const TravPointQuery<N> &tquery, size_t& lazy_node)
- {
- assert(false && "not implemented");
- return false;
- }
- };
-
-
- template<int N, int K, bool filter>
- struct SubGridMBIntersectorKPluecker
- {
- typedef SubGridMBQBVHN<N> Primitive;
- typedef SubGridQuadMIntersectorKPluecker<4,K,filter> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- size_t m_valid = movemask(valid_i);
- while(m_valid)
- {
- size_t ID = bscf(m_valid);
- intersect(pre,ray,ID,context,subgrid);
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const SubGrid& subgrid)
- {
- vbool<K> valid0 = valid_i;
- size_t m_valid = movemask(valid_i);
- while(m_valid)
- {
- size_t ID = bscf(m_valid);
- if (occluded(pre,ray,ID,context,subgrid))
- clear(valid0,ID);
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(normal.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(ray.time(), ftime);
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]);
- pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const SubGrid& subgrid)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const GridMesh* mesh = context->scene->get<GridMesh>(subgrid.geomID());
- const GridMesh::Grid &g = mesh->grid(subgrid.primID());
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(ray.time(), ftime);
- Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]);
- return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid);
- }
-
- template<bool robust>
- static __forceinline void intersect(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- const vfloat<K> time = prim[j].adjustTime(ray.time());
-
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue;
- intersect(valid,pre,ray,context,prim[j].subgrid(i));
- }
- }
- }
-
- template<bool robust>
- static __forceinline vbool<K> occluded(const vbool<K>& valid, const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRayK<K, robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersectorK<N,K,robust> isecK;
-
- vbool<K> valid0 = valid;
- for (size_t j=0;j<num;j++)
- {
- size_t m_valid = movemask(prim[j].qnode.validMask());
- const vfloat<K> time = prim[j].adjustTime(ray.time());
- vfloat<K> dist;
- while(m_valid)
- {
- const size_t i = bscf(m_valid);
- if (none(valid0 & isecK.intersectK(&prim[j].qnode,i,tray,time,dist))) continue;
- valid0 &= !occluded(valid0,pre,ray,context,prim[j].subgrid(i));
- if (none(valid0)) break;
- }
- }
- return !valid0;
- }
-
- template<int Nx, bool robust>
- static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- const float time = prim[i].adjustTime(ray.time()[k]);
- assert(time <= 1.0f);
-
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (unlikely(dist[ID] > ray.tfar[k])) continue;
- intersect(pre,ray,k,context,prim[i].subgrid(ID));
- }
- }
- }
-
- template<int Nx, bool robust>
- static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node)
- {
- BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1;
-
- for (size_t i=0;i<num;i++)
- {
- vfloat<N> dist;
- const float time = prim[i].adjustTime(ray.time()[k]);
- assert(time <= 1.0f);
-
- size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist);
- while(mask != 0)
- {
- const size_t ID = bscf(mask);
- if (occluded(pre,ray,k,context,prim[i].subgrid(ID)))
- return true;
- }
- }
- return false;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle.h b/thirdparty/embree-aarch64/kernels/geometry/triangle.h
deleted file mode 100644
index 0dedf6dc4c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle.h
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Precalculated representation for M triangles. Stores for each
- triangle a base vertex, two edges, and the geometry normal to
- speed up intersection calculations */
- template<int M>
- struct TriangleM
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleM() {}
-
- /* Construction from vertices and IDs */
- __forceinline TriangleM(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(v0), e1(v0-v1), e2(v2-v0), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns true if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangle */
- __forceinline BBox3fa bounds() const
- {
- Vec3vf<M> p0 = v0;
- Vec3vf<M> p1 = v0-e1;
- Vec3vf<M> p2 = v0+e2;
- Vec3vf<M> lower = min(p0,p1,p2);
- Vec3vf<M> upper = max(p0,p1,p2);
- vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Non temporal store */
- __forceinline static void store_nt(TriangleM* dst, const TriangleM& src)
- {
- vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
- vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
- vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
- vfloat<M>::store_nt(&dst->e1.x,src.e1.x);
- vfloat<M>::store_nt(&dst->e1.y,src.e1.y);
- vfloat<M>::store_nt(&dst->e1.z,src.e1.z);
- vfloat<M>::store_nt(&dst->e2.x,src.e2.x);
- vfloat<M>::store_nt(&dst->e2.y,src.e2.y);
- vfloat<M>::store_nt(&dst->e2.z,src.e2.z);
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill triangle from triangle list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID));
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(TriangleMesh* mesh)
- {
- BBox3fa bounds = empty;
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (unlikely(geomID(i) == -1)) break;
- const unsigned geomId = geomID(i);
- const unsigned primId = primID(i);
- const TriangleMesh::Triangle& tri = mesh->triangle(primId);
- const Vec3fa p0 = mesh->vertex(tri.v[0]);
- const Vec3fa p1 = mesh->vertex(tri.v[1]);
- const Vec3fa p2 = mesh->vertex(tri.v[2]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
- vgeomID [i] = geomId;
- vprimID [i] = primId;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- TriangleM::store_nt(this,TriangleM(v0,v1,v2,vgeomID,vprimID));
- return bounds;
- }
-
- public:
- Vec3vf<M> v0; // base vertex of the triangles
- Vec3vf<M> e1; // 1st edge of the triangles (v0-v1)
- Vec3vf<M> e2; // 2nd edge of the triangles (v2-v0)
- private:
- vuint<M> geomIDs; // geometry IDs
- vuint<M> primIDs; // primitive IDs
- };
-
- template<int M>
- typename TriangleM<M>::Type TriangleM<M>::type;
-
- typedef TriangleM<4> Triangle4;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h
deleted file mode 100644
index 125a42c5fe..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "triangle_intersector_moeller.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMIntersector1Moeller
- {
- typedef TriangleM<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
-
- };
-
- /*! Intersects M triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMIntersectorKMoeller
- {
- typedef TriangleM<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT_USER(0,TriangleM<M>::max_size());
- for (size_t i=0; i<TriangleM<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i);
- const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i);
- pre.intersectEdgeK(valid_i,ray,p0,e1,e2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleM<M>& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<TriangleM<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i);
- const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i);
- pre.intersectEdgeK(valid0,ray,p0,e1,e2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h
deleted file mode 100644
index b5a8519236..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h
+++ /dev/null
@@ -1,403 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "intersector_epilog.h"
-
-/*! This intersector implements a modified version of the Moeller
- * Trumbore intersector from the paper "Fast, Minimum Storage
- * Ray-Triangle Intersection". In contrast to the paper we
- * precalculate some factors and factor the calculations differently
- * to allow precalculating the cross product e1 x e2. The resulting
- * algorithm is similar to the fastest one of the paper "Optimizing
- * Ray-Triangle Intersection via Automated Search". */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct MoellerTrumboreHitM
- {
- __forceinline MoellerTrumboreHitM() {}
-
- __forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng)
- : U(U), V(V), T(T), absDen(absDen), valid(valid), vNg(Ng) {}
-
- __forceinline void finalize()
- {
- const vfloat<M> rcpAbsDen = rcp(absDen);
- vt = T * rcpAbsDen;
- vu = U * rcpAbsDen;
- vv = V * rcpAbsDen;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- public:
- vfloat<M> U;
- vfloat<M> V;
- vfloat<M> T;
- vfloat<M> absDen;
-
- public:
- vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct MoellerTrumboreIntersector1
- {
- __forceinline MoellerTrumboreIntersector1() {}
-
- __forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {}
-
- __forceinline bool intersect(const vbool<M>& valid0,
- Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Vec3vf<M>& tri_Ng,
- MoellerTrumboreHitM<M>& hit) const
- {
- /* calculate denominator */
- vbool<M> valid = valid0;
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O;
- const Vec3vf<M> R = cross(C,D);
- const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D);
-
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen;
- const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- valid &= (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()) < T) & (T <= absDen*vfloat<M>(ray.tfar));
- if (likely(none(valid))) return false;
-
-
- /* update hit information */
- new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng);
-
- return true;
- }
-
- __forceinline bool intersectEdge(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- MoellerTrumboreHitM<M>& hit) const
- {
- vbool<M> valid = true;
- const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1);
- return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,hit);
- }
-
- __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- MoellerTrumboreHitM<M>& hit) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,v0,e1,e2,hit);
- }
-
- __forceinline bool intersect(const vbool<M>& valid,
- Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- MoellerTrumboreHitM<M>& hit) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(valid,ray,v0,e1,e2,hit);
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& e1,
- const Vec3vf<M>& e2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersectEdge(ray,v0,e1,e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersect(ray,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(const vbool<M>& valid,
- Ray& ray,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersect(valid,ray,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
- };
-
- template<int K>
- struct MoellerTrumboreHitK
- {
- __forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng)
- : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u = U * rcpAbsDen;
- const vfloat<K> v = V * rcpAbsDen;
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const Vec3vf<K> Ng;
- };
-
- template<int M, int K>
- struct MoellerTrumboreIntersectorK
- {
- __forceinline MoellerTrumboreIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- //RayK<K>& ray,
- const Vec3vf<K>& ray_org,
- const Vec3vf<K>& ray_dir,
- const vfloat<K>& ray_tnear,
- const vfloat<K>& ray_tfar,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray_org;
- const Vec3vf<K> R = cross(C,ray_dir);
- const vfloat<K> den = dot(tri_Ng,ray_dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- MoellerTrumboreHitK<K> hit(U,V,T,absDen,tri_Ng);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog);
- }
-
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- MoellerTrumboreHitM<M>& hit) const
- {
- /* calculate denominator */
- typedef Vec3vf<M> Vec3vfM;
- const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
-
- const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vfM C = Vec3vfM(tri_v0) - O;
- const Vec3vfM R = cross(C,D);
- const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
- const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng);
- return true;
- }
-
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- MoellerTrumboreHitM<M>& hit) const
- {
- if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit)))
- {
- hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
- hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
- return any(hit.valid);
- }
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- MoellerTrumboreHitM<M> hit;
- if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,v0,e1,e2,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,time_range,v0,e1,e2,epilog);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h
deleted file mode 100644
index f1de99d208..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "trianglev.h"
-#include "trianglev_mb.h"
-#include "intersector_epilog.h"
-
-/*! Modified Pluecker ray/triangle intersector. The test first shifts
- * the ray origin into the origin of the coordinate system and then
- * uses Pluecker coordinates for the intersection. Due to the shift,
- * the Pluecker coordinate calculation simplifies and the tests get
- * numerically stable. The edge equations are watertight along the
- * edge for neighboring triangles. */
-
-namespace embree
-{
- namespace isa
- {
- template<int M, typename UVMapper>
- struct PlueckerHitM
- {
- __forceinline PlueckerHitM(const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV)
- : U(U), V(V), UVW(UVW), mapUV(mapUV), vt(t), vNg(Ng) {}
-
- __forceinline void finalize()
- {
- const vbool<M> invalid = abs(UVW) < min_rcp_input;
- const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
- vu = U * rcpUVW;
- vv = V * rcpUVW;
- mapUV(vu,vv);
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- const vfloat<M> U;
- const vfloat<M> V;
- const vfloat<M> UVW;
- const UVMapper& mapUV;
-
- public:
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct PlueckerIntersector1
- {
- __forceinline PlueckerIntersector1() {}
-
- __forceinline PlueckerIntersector1(const Ray& ray, const void* ptr) {}
-
- template<typename UVMapper, typename Epilog>
- __forceinline bool intersect(Ray& ray,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const UVMapper& mapUV,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
- const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV);
- return epilog(valid,hit);
- }
- };
-
- template<int K, typename UVMapper>
- struct PlueckerHitK
- {
- __forceinline PlueckerHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& UVW, const vfloat<K>& t, const Vec3vf<K>& Ng, const UVMapper& mapUV)
- : U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vbool<K> invalid = abs(UVW) < min_rcp_input;
- const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
- vfloat<K> u = U * rcpUVW;
- vfloat<K> v = V * rcpUVW;
- mapUV(u,v);
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> UVW;
- const vfloat<K> t;
- const Vec3vf<K> Ng;
- const UVMapper& mapUV;
- };
-
- template<int M, int K>
- struct PlueckerIntersectorK
- {
- __forceinline PlueckerIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename UVMapper, typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const UVMapper& mapUV,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- vbool<K> valid = valid0;
- const Vec3vf<K> O = ray.org;
- const Vec3vf<K> D = ray.dir;
- const Vec3vf<K> v0 = tri_v0-O;
- const Vec3vf<K> v1 = tri_v1-O;
- const Vec3vf<K> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<K> e0 = v2-v0;
- const Vec3vf<K> e1 = v0-v1;
- const Vec3vf<K> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
- const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
- const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
- const vfloat<K> UVW = U+V+W;
- const vfloat<K> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= max(U,V,W) <= eps;
-#else
- valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
-
- /* perform depth test */
- const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
- const vfloat<K> t = rcp(den)*T;
- valid &= ray.tnear() <= t & t <= ray.tfar;
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-
- /* calculate hit information */
- PlueckerHitK<K,UVMapper> hit(U,V,UVW,t,Ng,mapUV);
- return epilog(valid,hit);
- }
-
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- template<typename UVMapper, typename Epilog>
- __forceinline bool intersect(RayK<K>& ray, size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- const UVMapper& mapUV,
- const Epilog& epilog) const
- {
- /* calculate vertices relative to ray origin */
- const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vf<M> v0 = tri_v0-O;
- const Vec3vf<M> v1 = tri_v1-O;
- const Vec3vf<M> v2 = tri_v2-O;
-
- /* calculate triangle edges */
- const Vec3vf<M> e0 = v2-v0;
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v1-v2;
-
- /* perform edge tests */
- const vfloat<M> U = dot(cross(e0,v2+v0),D);
- const vfloat<M> V = dot(cross(e1,v0+v1),D);
- const vfloat<M> W = dot(cross(e2,v1+v2),D);
- const vfloat<M> UVW = U+V+W;
- const vfloat<M> eps = float(ulp)*abs(UVW);
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = max(U,V,W) <= eps;
-#else
- vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
-#endif
- if (unlikely(none(valid))) return false;
-
- /* calculate geometry normal and denominator */
- const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
- const vfloat<M> den = twice(dot(Ng,D));
-
- /* perform depth test */
- const vfloat<M> T = twice(dot(v0,Ng));
- const vfloat<M> t = rcp(den)*T;
- valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
- if (unlikely(none(valid))) return false;
-
- /* avoid division by 0 */
- valid &= den != vfloat<M>(zero);
- if (unlikely(none(valid))) return false;
-
- /* update hit information */
- PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV);
- return epilog(valid,hit);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h
deleted file mode 100644
index 63e649d8fb..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h
+++ /dev/null
@@ -1,418 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "intersector_epilog.h"
-
-/*! This intersector implements a modified version of the Woop's ray-triangle intersection test */
-
-namespace embree
-{
- namespace isa
- {
- template<int M>
- struct WoopHitM
- {
- __forceinline WoopHitM() {}
-
- __forceinline WoopHitM(const vbool<M>& valid,
- const vfloat<M>& U,
- const vfloat<M>& V,
- const vfloat<M>& T,
- const vfloat<M>& inv_det,
- const Vec3vf<M>& Ng)
- : U(U), V(V), T(T), inv_det(inv_det), valid(valid), vNg(Ng) {}
-
- __forceinline void finalize()
- {
- vt = T;
- vu = U*inv_det;
- vv = V*inv_det;
- }
-
- __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
- __forceinline float t (const size_t i) const { return vt[i]; }
- __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
-
- private:
- const vfloat<M> U;
- const vfloat<M> V;
- const vfloat<M> T;
- const vfloat<M> inv_det;
-
- public:
- const vbool<M> valid;
- vfloat<M> vu;
- vfloat<M> vv;
- vfloat<M> vt;
- Vec3vf<M> vNg;
- };
-
- template<int M>
- struct WoopPrecalculations1
- {
- unsigned int kx,ky,kz;
- Vec3vf<M> org;
- Vec3fa S;
- __forceinline WoopPrecalculations1() {}
-
- __forceinline WoopPrecalculations1(const Ray& ray, const void* ptr)
- {
- kz = maxDim(abs(ray.dir));
- kx = (kz+1) % 3;
- ky = (kx+1) % 3;
- const float inv_dir_kz = rcp(ray.dir[kz]);
- if (ray.dir[kz]) std::swap(kx,ky);
- S.x = ray.dir[kx] * inv_dir_kz;
- S.y = ray.dir[ky] * inv_dir_kz;
- S.z = inv_dir_kz;
- org = Vec3vf<M>(ray.org[kx],ray.org[ky],ray.org[kz]);
- }
- };
-
-
- template<int M>
- struct WoopIntersector1
- {
-
- typedef WoopPrecalculations1<M> Precalculations;
-
- __forceinline WoopIntersector1() {}
-
- __forceinline WoopIntersector1(const Ray& ray, const void* ptr) {}
-
- static __forceinline bool intersect(const vbool<M>& valid0,
- Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_v1,
- const Vec3vf<M>& tri_v2,
- WoopHitM<M>& hit)
- {
- vbool<M> valid = valid0;
-
- /* vertices relative to ray origin */
- const Vec3vf<M> org = Vec3vf<M>(pre.org.x,pre.org.y,pre.org.z);
- const Vec3vf<M> A = Vec3vf<M>(tri_v0[pre.kx],tri_v0[pre.ky],tri_v0[pre.kz]) - org;
- const Vec3vf<M> B = Vec3vf<M>(tri_v1[pre.kx],tri_v1[pre.ky],tri_v1[pre.kz]) - org;
- const Vec3vf<M> C = Vec3vf<M>(tri_v2[pre.kx],tri_v2[pre.ky],tri_v2[pre.kz]) - org;
-
- /* shear and scale vertices */
- const vfloat<M> Ax = nmadd(A.z,pre.S.x,A.x);
- const vfloat<M> Ay = nmadd(A.z,pre.S.y,A.y);
- const vfloat<M> Bx = nmadd(B.z,pre.S.x,B.x);
- const vfloat<M> By = nmadd(B.z,pre.S.y,B.y);
- const vfloat<M> Cx = nmadd(C.z,pre.S.x,C.x);
- const vfloat<M> Cy = nmadd(C.z,pre.S.y,C.y);
-
- /* scaled barycentric */
- const vfloat<M> U0 = Cx*By;
- const vfloat<M> U1 = Cy*Bx;
- const vfloat<M> V0 = Ax*Cy;
- const vfloat<M> V1 = Ay*Cx;
- const vfloat<M> W0 = Bx*Ay;
- const vfloat<M> W1 = By*Ax;
-#if !defined(__AVX512F__)
- valid &= (U0 >= U1) & (V0 >= V1) & (W0 >= W1) |
- (U0 <= U1) & (V0 <= V1) & (W0 <= W1);
-#else
- valid &= ge(ge(U0 >= U1,V0,V1),W0,W1) | le(le(U0 <= U1,V0,V1),W0,W1);
-#endif
-
- if (likely(none(valid))) return false;
- const vfloat<M> U = U0-U1;
- const vfloat<M> V = V0-V1;
- const vfloat<M> W = W0-W1;
-
- const vfloat<M> det = U+V+W;
-
- valid &= det != 0.0f;
- const vfloat<M> inv_det = rcp(det);
-
- const vfloat<M> Az = pre.S.z * A.z;
- const vfloat<M> Bz = pre.S.z * B.z;
- const vfloat<M> Cz = pre.S.z * C.z;
- const vfloat<M> T = madd(U,Az,madd(V,Bz,W*Cz));
- const vfloat<M> t = T * inv_det;
- /* perform depth test */
- valid &= (vfloat<M>(ray.tnear()) < t) & (t <= vfloat<M>(ray.tfar));
- if (likely(none(valid))) return false;
-
- const Vec3vf<M> tri_Ng = cross(tri_v2-tri_v0,tri_v0-tri_v1);
-
- /* update hit information */
- new (&hit) WoopHitM<M>(valid,U,V,t,inv_det,tri_Ng);
- return true;
- }
-
- static __forceinline bool intersect(Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- WoopHitM<M>& hit)
- {
- vbool<M> valid = true;
- return intersect(valid,ray,pre,v0,v1,v2,hit);
- }
-
-
- template<typename Epilog>
- static __forceinline bool intersect(Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog)
- {
- WoopHitM<M> hit;
- if (likely(intersect(ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- static __forceinline bool intersect(const vbool<M>& valid,
- Ray& ray,
- const Precalculations& pre,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog)
- {
- WoopHitM<M> hit;
- if (likely(intersect(valid,ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
- return false;
- }
- };
-
-#if 0
- template<int K>
- struct WoopHitK
- {
- __forceinline WoopHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng)
- : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {}
-
- __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
- {
- const vfloat<K> rcpAbsDen = rcp(absDen);
- const vfloat<K> t = T * rcpAbsDen;
- const vfloat<K> u = U * rcpAbsDen;
- const vfloat<K> v = V * rcpAbsDen;
- return std::make_tuple(u,v,t,Ng);
- }
-
- private:
- const vfloat<K> U;
- const vfloat<K> V;
- const vfloat<K> T;
- const vfloat<K> absDen;
- const Vec3vf<K> Ng;
- };
-
- template<int M, int K>
- struct WoopIntersectorK
- {
- __forceinline WoopIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- //RayK<K>& ray,
- const Vec3vf<K>& ray_org,
- const Vec3vf<K>& ray_dir,
- const vfloat<K>& ray_tnear,
- const vfloat<K>& ray_tfar,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Vec3vf<K>& tri_Ng,
- const Epilog& epilog) const
- {
- /* calculate denominator */
- vbool<K> valid = valid0;
- const Vec3vf<K> C = tri_v0 - ray_org;
- const Vec3vf<K> R = cross(C,ray_dir);
- const vfloat<K> den = dot(tri_Ng,ray_dir);
- const vfloat<K> absDen = abs(den);
- const vfloat<K> sgnDen = signmsk(den);
-
- /* test against edge p2 p0 */
- const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
- valid &= U >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p0 p1 */
- const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
- valid &= V >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* test against edge p1 p2 */
- const vfloat<K> W = absDen-U-V;
- valid &= W >= 0.0f;
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
- valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
- if (unlikely(none(valid))) return false;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- valid &= den < vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#else
- valid &= den != vfloat<K>(zero);
- if (unlikely(none(valid))) return false;
-#endif
-
- /* calculate hit information */
- WoopHitK<K> hit(U,V,T,absDen,tri_Ng);
- return epilog(valid,hit);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_v1,
- const Vec3vf<K>& tri_v2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> e1 = tri_v0-tri_v1;
- const Vec3vf<K> e2 = tri_v2-tri_v0;
- const Vec3vf<K> Ng = cross(e2,e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog);
- }
-
- /*! Intersects K rays with one of M triangles. */
- template<typename Epilog>
- __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
- RayK<K>& ray,
- const Vec3vf<K>& tri_v0,
- const Vec3vf<K>& tri_e1,
- const Vec3vf<K>& tri_e2,
- const Epilog& epilog) const
- {
- const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
- return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog);
- }
-
- /*! Intersect k'th ray from ray packet of size K with M triangles. */
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- WoopHitM<M>& hit) const
- {
- /* calculate denominator */
- typedef Vec3vf<M> Vec3vfM;
- const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
-
- const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
- const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
- const Vec3vfM C = Vec3vfM(tri_v0) - O;
- const Vec3vfM R = cross(C,D);
- const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
- const vfloat<M> absDen = abs(den);
- const vfloat<M> sgnDen = signmsk(den);
-
- /* perform edge tests */
- const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
- const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
-
- /* perform backface culling */
-#if defined(EMBREE_BACKFACE_CULLING)
- vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#else
- vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
-#endif
- if (likely(none(valid))) return false;
-
- /* perform depth test */
- const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
- valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
- if (likely(none(valid))) return false;
-
- /* calculate hit information */
- new (&hit) WoopHitM<M>(valid,U,V,T,absDen,tri_Ng);
- return true;
- }
-
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- WoopHitM<M>& hit) const
- {
- if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit)))
- {
- hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
- hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
- return any(hit.valid);
- }
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- WoopHitM<M> hit;
- if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersectEdge(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& tri_v0,
- const Vec3vf<M>& tri_e1,
- const Vec3vf<M>& tri_e2,
- const Epilog& epilog) const
- {
- WoopHitM<M> hit;
- if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
- return false;
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,v0,e1,e2,epilog);
- }
-
- template<typename Epilog>
- __forceinline bool intersect(RayK<K>& ray,
- size_t k,
- const BBox<vfloat<M>>& time_range,
- const Vec3vf<M>& v0,
- const Vec3vf<M>& v1,
- const Vec3vf<M>& v2,
- const Epilog& epilog) const
- {
- const Vec3vf<M> e1 = v0-v1;
- const Vec3vf<M> e2 = v2-v0;
- return intersectEdge(ray,k,time_range,v0,e1,e2,epilog);
- }
- };
-#endif
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h
deleted file mode 100644
index 91b35c36f3..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "primitive.h"
-
-namespace embree
-{
- namespace isa
- {
- struct TriangleTriangleIntersector
- {
- __forceinline static float T(float pa0, float pa1, float da0, float da1) {
- return pa0 + (pa1-pa0)*da0/(da0-da1);
- }
-
- __forceinline static bool point_line_side(const Vec2f& p, const Vec2f& a0, const Vec2f& a1) {
- return det(p-a0,a0-a1) >= 0.0f;
- }
-
- __forceinline static bool point_inside_triangle(const Vec2f& p, const Vec2f& a, const Vec2f& b, const Vec2f& c)
- {
- const bool pab = point_line_side(p,a,b);
- const bool pbc = point_line_side(p,b,c);
- const bool pca = point_line_side(p,c,a);
- return pab == pbc && pab == pca;
- }
-
- __forceinline static bool intersect_line_line(const Vec2f& a0, const Vec2f& a1, const Vec2f& b0, const Vec2f& b1)
- {
- const bool different_sides0 = point_line_side(b0,a0,a1) != point_line_side(b1,a0,a1);
- const bool different_sides1 = point_line_side(a0,b0,b1) != point_line_side(a1,b0,b1);
- return different_sides0 && different_sides1;
- }
-
- __forceinline static bool intersect_triangle_triangle (const Vec2f& a0, const Vec2f& a1, const Vec2f& a2,
- const Vec2f& b0, const Vec2f& b1, const Vec2f& b2)
- {
- const bool a01_b01 = intersect_line_line(a0,a1,b0,b1);
- if (a01_b01) return true;
- const bool a01_b12 = intersect_line_line(a0,a1,b1,b2);
- if (a01_b12) return true;
- const bool a01_b20 = intersect_line_line(a0,a1,b2,b0);
- if (a01_b20) return true;
- const bool a12_b01 = intersect_line_line(a1,a2,b0,b1);
- if (a12_b01) return true;
- const bool a12_b12 = intersect_line_line(a1,a2,b1,b2);
- if (a12_b12) return true;
- const bool a12_b20 = intersect_line_line(a1,a2,b2,b0);
- if (a12_b20) return true;
- const bool a20_b01 = intersect_line_line(a2,a0,b0,b1);
- if (a20_b01) return true;
- const bool a20_b12 = intersect_line_line(a2,a0,b1,b2);
- if (a20_b12) return true;
- const bool a20_b20 = intersect_line_line(a2,a0,b2,b0);
- if (a20_b20) return true;
-
- bool a_in_b = point_inside_triangle(a0,b0,b1,b2) && point_inside_triangle(a1,b0,b1,b2) && point_inside_triangle(a2,b0,b1,b2);
- if (a_in_b) return true;
-
- bool b_in_a = point_inside_triangle(b0,a0,a1,a2) && point_inside_triangle(b1,a0,a1,a2) && point_inside_triangle(b2,a0,a1,a2);
- if (b_in_a) return true;
-
- return false;
- }
-
- static bool intersect_triangle_triangle (const Vec3fa& a0, const Vec3fa& a1, const Vec3fa& a2,
- const Vec3fa& b0, const Vec3fa& b1, const Vec3fa& b2)
- {
- const float eps = 1E-5f;
-
- /* calculate triangle planes */
- const Vec3fa Na = cross(a1-a0,a2-a0);
- const float Ca = dot(Na,a0);
- const Vec3fa Nb = cross(b1-b0,b2-b0);
- const float Cb = dot(Nb,b0);
-
- /* project triangle A onto plane B */
- const float da0 = dot(Nb,a0)-Cb;
- const float da1 = dot(Nb,a1)-Cb;
- const float da2 = dot(Nb,a2)-Cb;
- if (max(da0,da1,da2) < -eps) return false;
- if (min(da0,da1,da2) > +eps) return false;
- //CSTAT(bvh_collide_prim_intersections4++);
-
- /* project triangle B onto plane A */
- const float db0 = dot(Na,b0)-Ca;
- const float db1 = dot(Na,b1)-Ca;
- const float db2 = dot(Na,b2)-Ca;
- if (max(db0,db1,db2) < -eps) return false;
- if (min(db0,db1,db2) > +eps) return false;
- //CSTAT(bvh_collide_prim_intersections5++);
-
- if (unlikely((std::fabs(da0) < eps && std::fabs(da1) < eps && std::fabs(da2) < eps) ||
- (std::fabs(db0) < eps && std::fabs(db1) < eps && std::fabs(db2) < eps)))
- {
- const size_t dz = maxDim(Na);
- const size_t dx = (dz+1)%3;
- const size_t dy = (dx+1)%3;
- const Vec2f A0(a0[dx],a0[dy]);
- const Vec2f A1(a1[dx],a1[dy]);
- const Vec2f A2(a2[dx],a2[dy]);
- const Vec2f B0(b0[dx],b0[dy]);
- const Vec2f B1(b1[dx],b1[dy]);
- const Vec2f B2(b2[dx],b2[dy]);
- return intersect_triangle_triangle(A0,A1,A2,B0,B1,B2);
- }
-
- const Vec3fa D = cross(Na,Nb);
- const float pa0 = dot(D,a0);
- const float pa1 = dot(D,a1);
- const float pa2 = dot(D,a2);
- const float pb0 = dot(D,b0);
- const float pb1 = dot(D,b1);
- const float pb2 = dot(D,b2);
-
- BBox1f ba = empty;
- if (min(da0,da1) <= 0.0f && max(da0,da1) >= 0.0f && abs(da0-da1) > 0.0f) ba.extend(T(pa0,pa1,da0,da1));
- if (min(da1,da2) <= 0.0f && max(da1,da2) >= 0.0f && abs(da1-da2) > 0.0f) ba.extend(T(pa1,pa2,da1,da2));
- if (min(da2,da0) <= 0.0f && max(da2,da0) >= 0.0f && abs(da2-da0) > 0.0f) ba.extend(T(pa2,pa0,da2,da0));
-
- BBox1f bb = empty;
- if (min(db0,db1) <= 0.0f && max(db0,db1) >= 0.0f && abs(db0-db1) > 0.0f) bb.extend(T(pb0,pb1,db0,db1));
- if (min(db1,db2) <= 0.0f && max(db1,db2) >= 0.0f && abs(db1-db2) > 0.0f) bb.extend(T(pb1,pb2,db1,db2));
- if (min(db2,db0) <= 0.0f && max(db2,db0) >= 0.0f && abs(db2-db0) > 0.0f) bb.extend(T(pb2,pb0,db2,db0));
-
- return conjoint(ba,bb);
- }
- };
- }
-}
-
-
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei.h
deleted file mode 100644
index 4f3118cc0c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h
+++ /dev/null
@@ -1,442 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-#include "../common/scene.h"
-
-namespace embree
-{
- /* Stores M triangles from an indexed face set */
- template <int M>
- struct TriangleMi
- {
- /* Virtual interface to query information about the triangle type */
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* primitive supports multiple time segments */
- static const bool singleTimeSegment = false;
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleMi() { }
-
- /* Construction from vertices and IDs */
- __forceinline TriangleMi(const vuint<M>& v0,
- const vuint<M>& v1,
- const vuint<M>& v2,
- const vuint<M>& geomIDs,
- const vuint<M>& primIDs)
-#if defined(EMBREE_COMPACT_POLYS)
- : geomIDs(geomIDs), primIDs(primIDs) {}
-#else
- : v0_(v0), v1_(v1), v2_(v2), geomIDs(geomIDs), primIDs(primIDs) {}
-#endif
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); }
-
- /* Returns if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M> geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M> primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangles */
- __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M && valid(i); i++) {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
- bounds.extend(mesh->bounds(primID(i),itime));
- }
- return bounds;
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime) {
- return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1));
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps));
- }
- return allBounds;
- }
-
- __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range)
- {
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && valid(i); i++)
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(i));
- allBounds.extend(mesh->linearBounds(primID(i), time_range));
- }
- return allBounds;
- }
-
- /* Non-temporal store */
- __forceinline static void store_nt(TriangleMi* dst, const TriangleMi& src)
- {
-#if !defined(EMBREE_COMPACT_POLYS)
- vuint<M>::store_nt(&dst->v0_,src.v0_);
- vuint<M>::store_nt(&dst->v1_,src.v1_);
- vuint<M>::store_nt(&dst->v2_,src.v2_);
-#endif
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill triangle from triangle list */
- template<typename PrimRefT>
- __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> v0 = zero, v1 = zero, v2 = zero;
- vuint<M> geomID = -1, primID = -1;
- const PrimRefT* prim = &prims[begin];
-
- for (size_t i=0; i<M; i++)
- {
- if (begin<end) {
- geomID[i] = prim->geomID();
- primID[i] = prim->primID();
-#if !defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh* mesh = scene->get<TriangleMesh>(prim->geomID());
- const TriangleMesh::Triangle& tri = mesh->triangle(prim->primID());
- unsigned int int_stride = mesh->vertices0.getStride()/4;
- v0[i] = tri.v[0] * int_stride;
- v1[i] = tri.v[1] * int_stride;
- v2[i] = tri.v[2] * int_stride;
-#endif
- begin++;
- } else {
- assert(i);
- if (likely(i > 0)) {
- geomID[i] = geomID[0];
- primID[i] = -1;
- v0[i] = v0[0];
- v1[i] = v0[0];
- v2[i] = v0[0];
- }
- }
- if (begin<end) prim = &prims[begin];
- }
- new (this) TriangleMi(v0,v1,v2,geomID,primID); // FIXME: use non temporal store
- }
-
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, itime);
- }
-
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- fill(prims, begin, end, scene);
- return linearBounds(scene, time_range);
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(TriangleMesh* mesh)
- {
- BBox3fa bounds = empty;
- for (size_t i=0; i<M; i++)
- {
- if (primID(i) == -1) break;
- const unsigned int primId = primID(i);
- const TriangleMesh::Triangle& tri = mesh->triangle(primId);
- const Vec3fa p0 = mesh->vertex(tri.v[0]);
- const Vec3fa p1 = mesh->vertex(tri.v[1]);
- const Vec3fa p2 = mesh->vertex(tri.v[2]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
- }
- return bounds;
- }
-
- protected:
-#if !defined(EMBREE_COMPACT_POLYS)
- vuint<M> v0_; // 4 byte offset of 1st vertex
- vuint<M> v1_; // 4 byte offset of 2nd vertex
- vuint<M> v2_; // 4 byte offset of 3rd vertex
-#endif
- vuint<M> geomIDs; // geometry ID of mesh
- vuint<M> primIDs; // primitive ID of primitive inside mesh
- };
-
- namespace isa
- {
-
- template<int M>
- struct TriangleMi : public embree::TriangleMi<M>
- {
-#if !defined(EMBREE_COMPACT_POLYS)
- using embree::TriangleMi<M>::v0_;
- using embree::TriangleMi<M>::v1_;
- using embree::TriangleMi<M>::v2_;
-#endif
- using embree::TriangleMi<M>::geomIDs;
- using embree::TriangleMi<M>::primIDs;
- using embree::TriangleMi<M>::geomID;
- using embree::TriangleMi<M>::primID;
- using embree::TriangleMi<M>::valid;
-
- /* loads a single vertex */
- template<int vid>
- __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
- const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
- return (Vec3f) mesh->vertices[0][tri.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices = scene->vertices[geomID(index)];
- return (Vec3f&) vertices[v[index]];
-#endif
- }
-
- template<int vid, typename T>
- __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
- const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
- const Vec3fa v0 = mesh->vertices[itime+0][tri.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime+1][tri.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- const Vec3<T> p0(v0.x,v0.y,v0.z);
- const Vec3<T> p1(v1.x,v1.y,v1.z);
- return lerp(p0,p1,ftime);
- }
-
- template<int vid, int K, typename T>
- __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const
- {
- Vec3<T> p0, p1;
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
-
- for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask))
- {
-#if defined(EMBREE_COMPACT_POLYS)
- const TriangleMesh::Triangle& tri = mesh->triangle(primID(index));
- const Vec3fa v0 = mesh->vertices[itime[i]+0][tri.v[vid]];
- const Vec3fa v1 = mesh->vertices[itime[i]+1][tri.v[vid]];
-#else
- const vuint<M>& v = getVertexOffset<vid>();
- const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0);
- const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1);
- const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]);
- const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]);
-#endif
- p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z;
- p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z;
- }
- return (T(one)-ftime)*p0 + ftime*p1;
- }
-
- struct Triangle {
- vfloat4 v0,v1,v2;
- };
-
-#if defined(EMBREE_COMPACT_POLYS)
-
- __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const
- {
- const unsigned int geomID = geomIDs[i];
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero };
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices0[tri.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices0[tri.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices0[tri.v[2]];
- return { v0, v1, v2 };
- }
-
- __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const
- {
- const unsigned int primID = primIDs[i];
- if (unlikely(primID == -1)) return { zero, zero, zero };
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const vfloat4 v0 = (vfloat4) mesh->vertices[itime][tri.v[0]];
- const vfloat4 v1 = (vfloat4) mesh->vertices[itime][tri.v[1]];
- const vfloat4 v2 = (vfloat4) mesh->vertices[itime][tri.v[2]];
- return { v0, v1, v2 };
- }
-
-#else
-
- __forceinline Triangle loadTriangle(const int i, const Scene* const scene) const
- {
- const float* vertices = scene->vertices[geomID(i)];
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- return { v0, v1, v2 };
- }
-
- __forceinline Triangle loadTriangle(const int i, const int itime, const TriangleMesh* const mesh) const
- {
- const float* vertices = (const float*) mesh->vertexPtr(0,itime);
- const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]);
- const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]);
- const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]);
- return { v0, v1, v2 };
- }
-
-#endif
-
- /* Gather the triangles */
- __forceinline void gather(Vec3vf<M>& p0, Vec3vf<M>& p1, Vec3vf<M>& p2, const Scene* const scene) const;
-
- template<int K>
-#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019
- __noinline
-#else
- __forceinline
-#endif
- void gather(const vbool<K>& valid,
- Vec3vf<K>& p0,
- Vec3vf<K>& p1,
- Vec3vf<K>& p2,
- const size_t index,
- const Scene* const scene,
- const vfloat<K>& time) const
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index));
-
- vfloat<K> ftime;
- const vint<K> itime = mesh->timeSegment(time, ftime);
-
- const size_t first = bsf(movemask(valid));
- if (likely(all(valid,itime[first] == itime)))
- {
- p0 = getVertex<0>(index, scene, itime[first], ftime);
- p1 = getVertex<1>(index, scene, itime[first], ftime);
- p2 = getVertex<2>(index, scene, itime[first], ftime);
- } else {
- p0 = getVertex<0>(valid, index, scene, itime, ftime);
- p1 = getVertex<1>(valid, index, scene, itime, ftime);
- p2 = getVertex<2>(valid, index, scene, itime, ftime);
- }
- }
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- const TriangleMesh* mesh,
- const Scene *const scene,
- const int itime) const;
-
- __forceinline void gather(Vec3vf<M>& p0,
- Vec3vf<M>& p1,
- Vec3vf<M>& p2,
- const Scene *const scene,
- const float time) const;
-
-
-#if !defined(EMBREE_COMPACT_POLYS)
- template<int N> const vuint<M>& getVertexOffset() const;
-#endif
- };
-
-#if !defined(EMBREE_COMPACT_POLYS)
- template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<0>() const { return v0_; }
- template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<1>() const { return v1_; }
- template<> template<> __forceinline const vuint<4>& TriangleMi<4>::getVertexOffset<2>() const { return v2_; }
-#endif
-
- template<>
- __forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- const Scene* const scene) const
- {
- const Triangle tri0 = loadTriangle(0,scene);
- const Triangle tri1 = loadTriangle(1,scene);
- const Triangle tri2 = loadTriangle(2,scene);
- const Triangle tri3 = loadTriangle(3,scene);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- }
-
- template<>
- __forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- const TriangleMesh* mesh,
- const Scene *const scene,
- const int itime) const
- {
- const Triangle tri0 = loadTriangle(0,itime,mesh);
- const Triangle tri1 = loadTriangle(1,itime,mesh);
- const Triangle tri2 = loadTriangle(2,itime,mesh);
- const Triangle tri3 = loadTriangle(3,itime,mesh);
- transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z);
- transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z);
- transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z);
- }
-
- template<>
- __forceinline void TriangleMi<4>::gather(Vec3vf4& p0,
- Vec3vf4& p1,
- Vec3vf4& p2,
- const Scene *const scene,
- const float time) const
- {
- const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(0)); // in mblur mode all geometries are identical
-
- float ftime;
- const int itime = mesh->timeSegment(time, ftime);
-
- Vec3vf4 a0,a1,a2; gather(a0,a1,a2,mesh,scene,itime);
- Vec3vf4 b0,b1,b2; gather(b0,b1,b2,mesh,scene,itime+1);
- p0 = lerp(a0,b0,vfloat4(ftime));
- p1 = lerp(a1,b1,vfloat4(ftime));
- p2 = lerp(a2,b2,vfloat4(ftime));
- }
- }
-
- template<int M>
- typename TriangleMi<M>::Type TriangleMi<M>::type;
-
- typedef TriangleMi<4> Triangle4i;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h
deleted file mode 100644
index e2f106a62c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h
+++ /dev/null
@@ -1,336 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "trianglei.h"
-#include "triangle_intersector_moeller.h"
-#include "triangle_intersector_pluecker.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiIntersector1Moeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiIntersectorKMoeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- const Scene* scene = context->scene;
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
- const Scene* scene = context->scene;
-
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiIntersector1Pluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiIntersectorKPluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- const Scene* scene = context->scene;
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
- const Scene* scene = context->scene;
-
- for (size_t i=0; i<Primitive::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),RayHitK<K>::size());
- const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene);
- const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene);
- const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene);
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene);
- return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiMBIntersector1Moeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiMBIntersectorKMoeller
- {
- typedef TriangleMi<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMiMBIntersector1Pluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time());
- return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMiMBIntersectorKPluecker
- {
- typedef TriangleMi<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri)
- {
- vbool<K> valid0 = valid_i;
- for (size_t i=0; i<TriangleMi<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time());
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMi<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]);
- return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev.h
deleted file mode 100644
index 19af389e73..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Stores the vertices of M triangles in struct of array layout */
- template <int M>
- struct TriangleMv
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
- static Type type;
-
- public:
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleMv() {}
-
- /* Construction from vertices and IDs */
- __forceinline TriangleMv(const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(v0), v1(v1), v2(v2), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns true if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangles */
- __forceinline BBox3fa bounds() const
- {
- Vec3vf<M> lower = min(v0,v1,v2);
- Vec3vf<M> upper = max(v0,v1,v2);
- vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Non temporal store */
- __forceinline static void store_nt(TriangleMv* dst, const TriangleMv& src)
- {
- vfloat<M>::store_nt(&dst->v0.x,src.v0.x);
- vfloat<M>::store_nt(&dst->v0.y,src.v0.y);
- vfloat<M>::store_nt(&dst->v0.z,src.v0.z);
- vfloat<M>::store_nt(&dst->v1.x,src.v1.x);
- vfloat<M>::store_nt(&dst->v1.y,src.v1.y);
- vfloat<M>::store_nt(&dst->v1.z,src.v1.z);
- vfloat<M>::store_nt(&dst->v2.x,src.v2.x);
- vfloat<M>::store_nt(&dst->v2.y,src.v2.y);
- vfloat<M>::store_nt(&dst->v2.z,src.v2.z);
- vuint<M>::store_nt(&dst->geomIDs,src.geomIDs);
- vuint<M>::store_nt(&dst->primIDs,src.primIDs);
- }
-
- /* Fill triangle from triangle list */
- __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& p0 = mesh->vertex(tri.v[0]);
- const Vec3fa& p1 = mesh->vertex(tri.v[1]);
- const Vec3fa& p2 = mesh->vertex(tri.v[2]);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- TriangleMv::store_nt(this,TriangleMv(v0,v1,v2,vgeomID,vprimID));
- }
-
- /* Updates the primitive */
- __forceinline BBox3fa update(TriangleMesh* mesh)
- {
- BBox3fa bounds = empty;
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> v0 = zero, v1 = zero, v2 = zero;
-
- for (size_t i=0; i<M; i++)
- {
- if (primID(i) == -1) break;
- const unsigned geomId = geomID(i);
- const unsigned primId = primID(i);
- const TriangleMesh::Triangle& tri = mesh->triangle(primId);
- const Vec3fa p0 = mesh->vertex(tri.v[0]);
- const Vec3fa p1 = mesh->vertex(tri.v[1]);
- const Vec3fa p2 = mesh->vertex(tri.v[2]);
- bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2)));
- vgeomID [i] = geomId;
- vprimID [i] = primId;
- v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
- v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
- v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
- }
- new (this) TriangleMv(v0,v1,v2,vgeomID,vprimID);
- return bounds;
- }
-
- public:
- Vec3vf<M> v0; // 1st vertex of the triangles
- Vec3vf<M> v1; // 2nd vertex of the triangles
- Vec3vf<M> v2; // 3rd vertex of the triangles
- private:
- vuint<M> geomIDs; // geometry ID
- vuint<M> primIDs; // primitive ID
- };
-
- template<int M>
- typename TriangleMv<M>::Type TriangleMv<M>::type;
-
- typedef TriangleMv<4> Triangle4v;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h
deleted file mode 100644
index 6af0d5a11c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "triangle_intersector_pluecker.h"
-#include "triangle_intersector_moeller.h"
-#include "triangle_intersector_woop.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvIntersector1Moeller
- {
- typedef TriangleMv<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
-
- template<int M, int Mx, bool filter>
- struct TriangleMvIntersector1Woop
- {
- typedef TriangleMv<M> Primitive;
- typedef WoopIntersector1<Mx> intersec;
- typedef WoopPrecalculations1<M> Precalculations;
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvIntersectorKMoeller
- {
- typedef TriangleMv<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
- };
-
- /*! Intersects M triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvIntersector1Pluecker
- {
- typedef TriangleMv<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M triangles with K rays */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvIntersectorKPluecker
- {
- typedef TriangleMv<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<M; i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> v0 = broadcast<vfloat<K>>(tri.v0,i);
- const Vec3vf<K> v1 = broadcast<vfloat<K>>(tri.v1,i);
- const Vec3vf<K> v2 = broadcast<vfloat<K>>(tri.v2,i);
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h
deleted file mode 100644
index 63137aee16..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h
+++ /dev/null
@@ -1,201 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "primitive.h"
-
-namespace embree
-{
- /* Stores the vertices of M triangles in struct of array layout */
- template<int M>
- struct TriangleMvMB
- {
- public:
- struct Type : public PrimitiveType
- {
- const char* name() const;
- size_t sizeActive(const char* This) const;
- size_t sizeTotal(const char* This) const;
- size_t getBytes(const char* This) const;
- };
-
- static Type type;
-
- public:
-
- /* primitive supports single time segments */
- static const bool singleTimeSegment = true;
-
- /* Returns maximum number of stored triangles */
- static __forceinline size_t max_size() { return M; }
-
- /* Returns required number of primitive blocks for N primitives */
- static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
-
- public:
-
- /* Default constructor */
- __forceinline TriangleMvMB() {}
-
- /* Construction from vertices and IDs */
- __forceinline TriangleMvMB(const Vec3vf<M>& a0, const Vec3vf<M>& a1,
- const Vec3vf<M>& b0, const Vec3vf<M>& b1,
- const Vec3vf<M>& c0, const Vec3vf<M>& c1,
- const vuint<M>& geomIDs, const vuint<M>& primIDs)
- : v0(a0), v1(b0), v2(c0), dv0(a1-a0), dv1(b1-b0), dv2(c1-c0), geomIDs(geomIDs), primIDs(primIDs) {}
-
- /* Returns a mask that tells which triangles are valid */
- __forceinline vbool<M> valid() const { return geomIDs != vuint<M>(-1); }
-
- /* Returns if the specified triangle is valid */
- __forceinline bool valid(const size_t i) const { assert(i<M); return geomIDs[i] != -1; }
-
- /* Returns the number of stored triangles */
- __forceinline size_t size() const { return bsf(~movemask(valid())); }
-
- /* Returns the geometry IDs */
- __forceinline vuint<M>& geomID() { return geomIDs; }
- __forceinline const vuint<M>& geomID() const { return geomIDs; }
- __forceinline unsigned int geomID(const size_t i) const { assert(i<M); return geomIDs[i]; }
-
- /* Returns the primitive IDs */
- __forceinline vuint<M>& primID() { return primIDs; }
- __forceinline const vuint<M>& primID() const { return primIDs; }
- __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
-
- /* Calculate the bounds of the triangles at t0 */
- __forceinline BBox3fa bounds0() const
- {
- Vec3vf<M> lower = min(v0,v1,v2);
- Vec3vf<M> upper = max(v0,v1,v2);
- const vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Calculate the bounds of the triangles at t1 */
- __forceinline BBox3fa bounds1() const
- {
- const Vec3vf<M> p0 = v0+dv0;
- const Vec3vf<M> p1 = v1+dv1;
- const Vec3vf<M> p2 = v2+dv2;
- Vec3vf<M> lower = min(p0,p1,p2);
- Vec3vf<M> upper = max(p0,p1,p2);
- const vbool<M> mask = valid();
- lower.x = select(mask,lower.x,vfloat<M>(pos_inf));
- lower.y = select(mask,lower.y,vfloat<M>(pos_inf));
- lower.z = select(mask,lower.z,vfloat<M>(pos_inf));
- upper.x = select(mask,upper.x,vfloat<M>(neg_inf));
- upper.y = select(mask,upper.y,vfloat<M>(neg_inf));
- upper.z = select(mask,upper.z,vfloat<M>(neg_inf));
- return BBox3fa(Vec3fa(reduce_min(lower.x),reduce_min(lower.y),reduce_min(lower.z)),
- Vec3fa(reduce_max(upper.x),reduce_max(upper.y),reduce_max(upper.z)));
- }
-
- /* Calculate the linear bounds of the primitive */
- __forceinline LBBox3fa linearBounds() const {
- return LBBox3fa(bounds0(),bounds1());
- }
-
- /* Fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero;
- Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero;
-
- BBox3fa bounds0 = empty;
- BBox3fa bounds1 = empty;
-
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRef& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* __restrict__ const mesh = scene->get<TriangleMesh>(geomID);
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- const Vec3fa& a0 = mesh->vertex(tri.v[0],itime+0); bounds0.extend(a0);
- const Vec3fa& a1 = mesh->vertex(tri.v[0],itime+1); bounds1.extend(a1);
- const Vec3fa& b0 = mesh->vertex(tri.v[1],itime+0); bounds0.extend(b0);
- const Vec3fa& b1 = mesh->vertex(tri.v[1],itime+1); bounds1.extend(b1);
- const Vec3fa& c0 = mesh->vertex(tri.v[2],itime+0); bounds0.extend(c0);
- const Vec3fa& c1 = mesh->vertex(tri.v[2],itime+1); bounds1.extend(c1);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- va0.x[i] = a0.x; va0.y[i] = a0.y; va0.z[i] = a0.z;
- va1.x[i] = a1.x; va1.y[i] = a1.y; va1.z[i] = a1.z;
- vb0.x[i] = b0.x; vb0.y[i] = b0.y; vb0.z[i] = b0.z;
- vb1.x[i] = b1.x; vb1.y[i] = b1.y; vb1.z[i] = b1.z;
- vc0.x[i] = c0.x; vc0.y[i] = c0.y; vc0.z[i] = c0.z;
- vc1.x[i] = c1.x; vc1.y[i] = c1.y; vc1.z[i] = c1.z;
- }
- new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID);
- return LBBox3fa(bounds0,bounds1);
- }
-
- /* Fill triangle from triangle list */
- __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range)
- {
- vuint<M> vgeomID = -1, vprimID = -1;
- Vec3vf<M> va0 = zero, vb0 = zero, vc0 = zero;
- Vec3vf<M> va1 = zero, vb1 = zero, vc1 = zero;
-
- LBBox3fa allBounds = empty;
- for (size_t i=0; i<M && begin<end; i++, begin++)
- {
- const PrimRefMB& prim = prims[begin];
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const TriangleMesh* const mesh = scene->get<TriangleMesh>(geomID);
- const range<int> itime_range = mesh->timeSegmentRange(time_range);
- assert(itime_range.size() == 1);
- const int ilower = itime_range.begin();
- const TriangleMesh::Triangle& tri = mesh->triangle(primID);
- allBounds.extend(mesh->linearBounds(primID, time_range));
- const Vec3fa& a0 = mesh->vertex(tri.v[0],ilower+0);
- const Vec3fa& a1 = mesh->vertex(tri.v[0],ilower+1);
- const Vec3fa& b0 = mesh->vertex(tri.v[1],ilower+0);
- const Vec3fa& b1 = mesh->vertex(tri.v[1],ilower+1);
- const Vec3fa& c0 = mesh->vertex(tri.v[2],ilower+0);
- const Vec3fa& c1 = mesh->vertex(tri.v[2],ilower+1);
- const BBox1f time_range_v(mesh->timeStep(ilower+0),mesh->timeStep(ilower+1));
- auto a01 = globalLinear(std::make_pair(a0,a1),time_range_v);
- auto b01 = globalLinear(std::make_pair(b0,b1),time_range_v);
- auto c01 = globalLinear(std::make_pair(c0,c1),time_range_v);
- vgeomID [i] = geomID;
- vprimID [i] = primID;
- va0.x[i] = a01.first .x; va0.y[i] = a01.first .y; va0.z[i] = a01.first .z;
- va1.x[i] = a01.second.x; va1.y[i] = a01.second.y; va1.z[i] = a01.second.z;
- vb0.x[i] = b01.first .x; vb0.y[i] = b01.first .y; vb0.z[i] = b01.first .z;
- vb1.x[i] = b01.second.x; vb1.y[i] = b01.second.y; vb1.z[i] = b01.second.z;
- vc0.x[i] = c01.first .x; vc0.y[i] = c01.first .y; vc0.z[i] = c01.first .z;
- vc1.x[i] = c01.second.x; vc1.y[i] = c01.second.y; vc1.z[i] = c01.second.z;
- }
- new (this) TriangleMvMB(va0,va1,vb0,vb1,vc0,vc1,vgeomID,vprimID);
- return allBounds;
- }
-
- public:
- Vec3vf<M> v0; // 1st vertex of the triangles
- Vec3vf<M> v1; // 2nd vertex of the triangles
- Vec3vf<M> v2; // 3rd vertex of the triangles
- Vec3vf<M> dv0; // difference vector between time steps t0 and t1 for first vertex
- Vec3vf<M> dv1; // difference vector between time steps t0 and t1 for second vertex
- Vec3vf<M> dv2; // difference vector between time steps t0 and t1 for third vertex
- private:
- vuint<M> geomIDs; // geometry ID
- vuint<M> primIDs; // primitive ID
- };
-
- template<int M>
- typename TriangleMvMB<M>::Type TriangleMvMB<M>::type;
-
- typedef TriangleMvMB<4> Triangle4vMB;
-}
diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h b/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h
deleted file mode 100644
index 35a260d826..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "triangle.h"
-#include "intersector_epilog.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvMBIntersector1Moeller
- {
- typedef TriangleMvMB<M> Primitive;
- typedef MoellerTrumboreIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvMBIntersectorKMoeller
- {
- typedef TriangleMvMB<M> Primitive;
- typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
-
- /*! Intersects M motion blur triangles with 1 ray */
- template<int M, int Mx, bool filter>
- struct TriangleMvMBIntersector1Pluecker
- {
- typedef TriangleMvMB<M> Primitive;
- typedef PlueckerIntersector1<Mx> Precalculations;
-
- /*! Intersect a ray with the M triangles and updates the hit. */
- static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of M triangles. */
- static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time());
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID()));
- }
-
- static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri)
- {
- return PrimitivePointQuery1<Primitive>::pointQuery(query, context, tri);
- }
- };
-
- /*! Intersects M motion blur triangles with K rays. */
- template<int M, int Mx, int K, bool filter>
- struct TriangleMvMBIntersectorKPluecker
- {
- typedef TriangleMvMB<M> Primitive;
- typedef PlueckerIntersectorK<Mx,K> Precalculations;
-
- /*! Intersects K rays with M triangles. */
- static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(normal.trav_prims,1,popcnt(valid_i),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i));
- }
- }
-
- /*! Test for K rays if they are occluded by any of the M triangles. */
- static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- vbool<K> valid0 = valid_i;
-
- for (size_t i=0; i<TriangleMvMB<M>::max_size(); i++)
- {
- if (!tri.valid(i)) break;
- STAT3(shadow.trav_prims,1,popcnt(valid0),K);
- const Vec3vf<K> time(ray.time());
- const Vec3vf<K> v0 = madd(time,broadcast<vfloat<K>>(tri.dv0,i),broadcast<vfloat<K>>(tri.v0,i));
- const Vec3vf<K> v1 = madd(time,broadcast<vfloat<K>>(tri.dv1,i),broadcast<vfloat<K>>(tri.v1,i));
- const Vec3vf<K> v2 = madd(time,broadcast<vfloat<K>>(tri.dv2,i),broadcast<vfloat<K>>(tri.v2,i));
- pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i));
- if (none(valid0)) break;
- }
- return !valid0;
- }
-
- /*! Intersect a ray with M triangles and updates the hit. */
- static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(normal.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
-
- /*! Test if the ray is occluded by one of the M triangles. */
- static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri)
- {
- STAT3(shadow.trav_prims,1,1,1);
- const Vec3vf<Mx> time(ray.time()[k]);
- const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0));
- const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1));
- const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2));
- return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID()));
- }
- };
- }
-}