// Copyright 2009-2020 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "node_intersector.h" namespace embree { namespace isa { ////////////////////////////////////////////////////////////////////////////////////// // Ray packet structure used in hybrid traversal ////////////////////////////////////////////////////////////////////////////////////// template struct TravRayK; /* Fast variant */ template struct TravRayK { __forceinline TravRayK() {} __forceinline TravRayK(const Vec3vf& ray_org, const Vec3vf& ray_dir, int N) { init(ray_org, ray_dir, N); } __forceinline TravRayK(const Vec3vf& ray_org, const Vec3vf& ray_dir, const vfloat& ray_tnear, const vfloat& ray_tfar, int N) { init(ray_org, ray_dir, N); tnear = ray_tnear; tfar = ray_tfar; } __forceinline void init(const Vec3vf& ray_org, const Vec3vf& ray_dir, int N) { org = ray_org; dir = ray_dir; rdir = rcp_safe(ray_dir); #if defined(__aarch64__) neg_org_rdir = -(org * rdir); #elif defined(__AVX2__) org_rdir = org * rdir; #endif if (N) { const int size = sizeof(float)*N; nearXYZ.x = select(rdir.x >= 0.0f, vint(0*size), vint(1*size)); nearXYZ.y = select(rdir.y >= 0.0f, vint(2*size), vint(3*size)); nearXYZ.z = select(rdir.z >= 0.0f, vint(4*size), vint(5*size)); } } Vec3vf org; Vec3vf dir; Vec3vf rdir; #if defined(__aarch64__) Vec3vf neg_org_rdir; #elif defined(__AVX2__) Vec3vf org_rdir; #endif Vec3vi nearXYZ; vfloat tnear; vfloat tfar; }; template using TravRayKFast = TravRayK; /* Robust variant */ template struct TravRayK { __forceinline TravRayK() {} __forceinline TravRayK(const Vec3vf& ray_org, const Vec3vf& ray_dir, int N) { init(ray_org, ray_dir, N); } __forceinline TravRayK(const Vec3vf& ray_org, const Vec3vf& ray_dir, const vfloat& ray_tnear, const vfloat& ray_tfar, int N) { init(ray_org, ray_dir, N); tnear = ray_tnear; tfar = ray_tfar; } __forceinline void init(const Vec3vf& ray_org, const Vec3vf& ray_dir, int N) { org = ray_org; dir = ray_dir; rdir = vfloat(1.0f)/(zero_fix(ray_dir)); if (N) { const int size = sizeof(float)*N; nearXYZ.x = select(rdir.x >= 0.0f, vint(0*size), vint(1*size)); nearXYZ.y = select(rdir.y >= 0.0f, vint(2*size), vint(3*size)); nearXYZ.z = select(rdir.z >= 0.0f, vint(4*size), vint(5*size)); } } Vec3vf org; Vec3vf dir; Vec3vf rdir; Vec3vi nearXYZ; vfloat tnear; vfloat tfar; }; template using TravRayKRobust = TravRayK; ////////////////////////////////////////////////////////////////////////////////////// // Fast AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeK(const typename BVHN::AABBNode* node, size_t i, const TravRayKFast& ray, vfloat& dist) { #if defined(__aarch64__) const vfloat lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); const vfloat lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); #elif defined(__AVX2__) const vfloat lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x); const vfloat lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y); const vfloat lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z); const vfloat lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x); const vfloat lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y); const vfloat lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z); #else const vfloat lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; #endif #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX if (K == 16) { /* use mixed float/int min/max */ const vfloat lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); dist = lnearP; return lhit; } else #endif { const vfloat lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX const vbool lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); #else const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); #endif dist = lnearP; return lhit; } } ////////////////////////////////////////////////////////////////////////////////////// // Robust AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeKRobust(const typename BVHN::AABBNode* node, size_t i, const TravRayKRobust& ray, vfloat& dist) { // FIXME: use per instruction rounding for AVX512 const vfloat lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; const float round_up = 1.0f+3.0f*float(ulp); const float round_down = 1.0f-3.0f*float(ulp); const vfloat lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const vbool lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); dist = lnearP; return lhit; } ////////////////////////////////////////////////////////////////////////////////////// // Fast AABBNodeMB intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeK(const typename BVHN::AABBNodeMB* node, const size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist) { const vfloat vlower_x = madd(time, vfloat(node->lower_dx[i]), vfloat(node->lower_x[i])); const vfloat vlower_y = madd(time, vfloat(node->lower_dy[i]), vfloat(node->lower_y[i])); const vfloat vlower_z = madd(time, vfloat(node->lower_dz[i]), vfloat(node->lower_z[i])); const vfloat vupper_x = madd(time, vfloat(node->upper_dx[i]), vfloat(node->upper_x[i])); const vfloat vupper_y = madd(time, vfloat(node->upper_dy[i]), vfloat(node->upper_y[i])); const vfloat vupper_z = madd(time, vfloat(node->upper_dz[i]), vfloat(node->upper_z[i])); #if defined(__aarch64__) const vfloat lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); const vfloat lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); #elif defined(__AVX2__) const vfloat lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); const vfloat lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); const vfloat lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); const vfloat lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); const vfloat lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); const vfloat lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); #else const vfloat lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; #endif #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX if (K == 16) { /* use mixed float/int min/max */ const vfloat lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); dist = lnearP; return lhit; } else #endif { const vfloat lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX const vbool lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); #else const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); #endif dist = lnearP; return lhit; } } ////////////////////////////////////////////////////////////////////////////////////// // Robust AABBNodeMB intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeKRobust(const typename BVHN::AABBNodeMB* node, const size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist) { const vfloat vlower_x = madd(time, vfloat(node->lower_dx[i]), vfloat(node->lower_x[i])); const vfloat vlower_y = madd(time, vfloat(node->lower_dy[i]), vfloat(node->lower_y[i])); const vfloat vlower_z = madd(time, vfloat(node->lower_dz[i]), vfloat(node->lower_z[i])); const vfloat vupper_x = madd(time, vfloat(node->upper_dx[i]), vfloat(node->upper_x[i])); const vfloat vupper_y = madd(time, vfloat(node->upper_dy[i]), vfloat(node->upper_y[i])); const vfloat vupper_z = madd(time, vfloat(node->upper_dz[i]), vfloat(node->upper_z[i])); const vfloat lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; const float round_up = 1.0f+3.0f*float(ulp); const float round_down = 1.0f-3.0f*float(ulp); #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX if (K == 16) { const vfloat lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); dist = lnearP; return lhit; } else #endif { const vfloat lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); dist = lnearP; return lhit; } } ////////////////////////////////////////////////////////////////////////////////////// // Fast AABBNodeMB4D intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeKMB4D(const typename BVHN::NodeRef ref, const size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist) { const typename BVHN::AABBNodeMB* node = ref.getAABBNodeMB(); const vfloat vlower_x = madd(time, vfloat(node->lower_dx[i]), vfloat(node->lower_x[i])); const vfloat vlower_y = madd(time, vfloat(node->lower_dy[i]), vfloat(node->lower_y[i])); const vfloat vlower_z = madd(time, vfloat(node->lower_dz[i]), vfloat(node->lower_z[i])); const vfloat vupper_x = madd(time, vfloat(node->upper_dx[i]), vfloat(node->upper_x[i])); const vfloat vupper_y = madd(time, vfloat(node->upper_dy[i]), vfloat(node->upper_y[i])); const vfloat vupper_z = madd(time, vfloat(node->upper_dz[i]), vfloat(node->upper_z[i])); #if defined(__aarch64__) const vfloat lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); const vfloat lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); #elif defined(__AVX2__) const vfloat lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); const vfloat lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); const vfloat lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); const vfloat lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); const vfloat lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); const vfloat lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); #else const vfloat lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; #endif const vfloat lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); if (unlikely(ref.isAABBNodeMB4D())) { const typename BVHN::AABBNodeMB4D* node1 = (const typename BVHN::AABBNodeMB4D*) node; lhit = lhit & (vfloat(node1->lower_t[i]) <= time) & (time < vfloat(node1->upper_t[i])); } dist = lnearP; return lhit; } ////////////////////////////////////////////////////////////////////////////////////// // Robust AABBNodeMB4D intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeKMB4DRobust(const typename BVHN::NodeRef ref, const size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist) { const typename BVHN::AABBNodeMB* node = ref.getAABBNodeMB(); const vfloat vlower_x = madd(time, vfloat(node->lower_dx[i]), vfloat(node->lower_x[i])); const vfloat vlower_y = madd(time, vfloat(node->lower_dy[i]), vfloat(node->lower_y[i])); const vfloat vlower_z = madd(time, vfloat(node->lower_dz[i]), vfloat(node->lower_z[i])); const vfloat vupper_x = madd(time, vfloat(node->upper_dx[i]), vfloat(node->upper_x[i])); const vfloat vupper_y = madd(time, vfloat(node->upper_dy[i]), vfloat(node->upper_y[i])); const vfloat vupper_z = madd(time, vfloat(node->upper_dz[i]), vfloat(node->upper_z[i])); const vfloat lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; const float round_up = 1.0f+3.0f*float(ulp); const float round_down = 1.0f-3.0f*float(ulp); const vfloat lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const vfloat lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); if (unlikely(ref.isAABBNodeMB4D())) { const typename BVHN::AABBNodeMB4D* node1 = (const typename BVHN::AABBNodeMB4D*) node; lhit = lhit & (vfloat(node1->lower_t[i]) <= time) & (time < vfloat(node1->upper_t[i])); } dist = lnearP; return lhit; } ////////////////////////////////////////////////////////////////////////////////////// // Fast OBBNode intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeK(const typename BVHN::OBBNode* node, const size_t i, const TravRayK& ray, vfloat& dist) { const AffineSpace3vf naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]), Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]), Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]), Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i])); const Vec3vf dir = xfmVector(naabb, ray.dir); const Vec3vf nrdir = Vec3vf(vfloat(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1? const Vec3vf org = xfmPoint(naabb, ray.org); const vfloat lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir; const vfloat lclipMinY = org.y * nrdir.y; const vfloat lclipMinZ = org.z * nrdir.z; const vfloat lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir; const vfloat lclipMaxY = lclipMinY - nrdir.y; const vfloat lclipMaxZ = lclipMinZ - nrdir.z; vfloat lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); vfloat lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); if (robust) { lnearP = lnearP*vfloat(1.0f-3.0f*float(ulp)); lfarP = lfarP *vfloat(1.0f+3.0f*float(ulp)); } const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); dist = lnearP; return lhit; } ////////////////////////////////////////////////////////////////////////////////////// // Fast OBBNodeMB intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectNodeK(const typename BVHN::OBBNodeMB* node, const size_t i, const TravRayK& ray, const vfloat& time, vfloat& dist) { const AffineSpace3vf xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]), Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]), Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]), Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i])); const Vec3vf b0_lower = zero; const Vec3vf b0_upper = one; const Vec3vf b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]); const Vec3vf b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]); const Vec3vf lower = lerp(b0_lower, b1_lower, time); const Vec3vf upper = lerp(b0_upper, b1_upper, time); const Vec3vf dir = xfmVector(xfm, ray.dir); const Vec3vf rdir = rcp_safe(dir); const Vec3vf org = xfmPoint(xfm, ray.org); const vfloat lclipMinX = (lower.x - org.x) * rdir.x; const vfloat lclipMinY = (lower.y - org.y) * rdir.y; const vfloat lclipMinZ = (lower.z - org.z) * rdir.z; const vfloat lclipMaxX = (upper.x - org.x) * rdir.x; const vfloat lclipMaxY = (upper.y - org.y) * rdir.y; const vfloat lclipMaxZ = (upper.z - org.z) * rdir.z; vfloat lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); vfloat lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); if (robust) { lnearP = lnearP*vfloat(1.0f-3.0f*float(ulp)); lfarP = lfarP *vfloat(1.0f+3.0f*float(ulp)); } const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); dist = lnearP; return lhit; } ////////////////////////////////////////////////////////////////////////////////////// // QuantizedBaseNode intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline vbool intersectQuantizedNodeK(const typename BVHN::QuantizedBaseNode* node, size_t i, const TravRayK& ray, vfloat& dist) { assert(movemask(node->validMask()) & ((size_t)1 << i)); const vfloat lower_x = node->dequantizeLowerX(); const vfloat upper_x = node->dequantizeUpperX(); const vfloat lower_y = node->dequantizeLowerY(); const vfloat upper_y = node->dequantizeUpperY(); const vfloat lower_z = node->dequantizeLowerZ(); const vfloat upper_z = node->dequantizeUpperZ(); #if defined(__aarch64__) const vfloat lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); const vfloat lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); #elif defined(__AVX2__) const vfloat lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x); const vfloat lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y); const vfloat lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z); const vfloat lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x); const vfloat lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y); const vfloat lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z); #else const vfloat lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; #endif #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX if (K == 16) { /* use mixed float/int min/max */ const vfloat lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); dist = lnearP; return lhit; } else #endif { const vfloat lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX const vbool lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); #else const vbool lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); #endif dist = lnearP; return lhit; } } template __forceinline vbool intersectQuantizedNodeK(const typename BVHN::QuantizedBaseNode* node, size_t i, const TravRayK& ray, vfloat& dist) { assert(movemask(node->validMask()) & ((size_t)1 << i)); const vfloat lower_x = node->dequantizeLowerX(); const vfloat upper_x = node->dequantizeUpperX(); const vfloat lower_y = node->dequantizeLowerY(); const vfloat upper_y = node->dequantizeUpperY(); const vfloat lower_z = node->dequantizeLowerZ(); const vfloat upper_z = node->dequantizeUpperZ(); const vfloat lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; const float round_up = 1.0f+3.0f*float(ulp); const float round_down = 1.0f-3.0f*float(ulp); const vfloat lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); dist = lnearP; return lhit; } template __forceinline vbool intersectQuantizedNodeMBK(const typename BVHN::QuantizedBaseNodeMB* node, const size_t i, const TravRayK& ray, const vfloat& time, vfloat& dist) { assert(movemask(node->validMask()) & ((size_t)1 << i)); const vfloat lower_x = node->dequantizeLowerX(i,time); const vfloat upper_x = node->dequantizeUpperX(i,time); const vfloat lower_y = node->dequantizeLowerY(i,time); const vfloat upper_y = node->dequantizeUpperY(i,time); const vfloat lower_z = node->dequantizeLowerZ(i,time); const vfloat upper_z = node->dequantizeUpperZ(i,time); #if defined(__aarch64__) const vfloat lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); const vfloat lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); const vfloat lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); const vfloat lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); #elif defined(__AVX2__) const vfloat lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); const vfloat lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); const vfloat lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); const vfloat lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); const vfloat lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); const vfloat lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); #else const vfloat lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; #endif const vfloat lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); dist = lnearP; return lhit; } template __forceinline vbool intersectQuantizedNodeMBK(const typename BVHN::QuantizedBaseNodeMB* node, const size_t i, const TravRayK& ray, const vfloat& time, vfloat& dist) { assert(movemask(node->validMask()) & ((size_t)1 << i)); const vfloat lower_x = node->dequantizeLowerX(i,time); const vfloat upper_x = node->dequantizeUpperX(i,time); const vfloat lower_y = node->dequantizeLowerY(i,time); const vfloat upper_y = node->dequantizeUpperY(i,time); const vfloat lower_z = node->dequantizeLowerZ(i,time); const vfloat upper_z = node->dequantizeUpperZ(i,time); const vfloat lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; const vfloat lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; const vfloat lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; const vfloat lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; const vfloat lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; const vfloat lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; const float round_up = 1.0f+3.0f*float(ulp); const float round_down = 1.0f-3.0f*float(ulp); const vfloat lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); const vfloat lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); const vbool lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); dist = lnearP; return lhit; } ////////////////////////////////////////////////////////////////////////////////////// // Node intersectors used in hybrid traversal ////////////////////////////////////////////////////////////////////////////////////// /*! Intersects N nodes with K rays */ template struct BVHNNodeIntersectorK; template struct BVHNNodeIntersectorK { /* vmask is both an input and an output parameter! Its initial value should be the parent node hit mask, which is used for correctly computing the current hit mask. The parent hit mask is actually required only for motion blur node intersections (because different rays may have different times), so for regular nodes vmask is simply overwritten. */ static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist, vbool& vmask) { vmask = intersectNodeK(node.getAABBNode(), i, ray, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist, vbool& vmask) { vmask = intersectNodeKRobust(node.getAABBNode(), i, ray, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist, vbool& vmask) { vmask = intersectNodeK(node.getAABBNodeMB(), i, ray, time, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist, vbool& vmask) { vmask = intersectNodeKRobust(node.getAABBNodeMB(), i, ray, time, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist, vbool& vmask) { if (likely(node.isAABBNode())) vmask = intersectNodeK(node.getAABBNode(), i, ray, dist); else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK(node.ungetAABBNode(), i, ray, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist, vbool& vmask) { if (likely(node.isAABBNode())) vmask = intersectNodeKRobust(node.getAABBNode(), i, ray, dist); else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK(node.ungetAABBNode(), i, ray, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist, vbool& vmask) { if (likely(node.isAABBNodeMB())) vmask = intersectNodeK(node.getAABBNodeMB(), i, ray, time, dist); else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK(node.ungetAABBNodeMB(), i, ray, time, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist, vbool& vmask) { if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust(node.getAABBNodeMB(), i, ray, time, dist); else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK(node.ungetAABBNodeMB(), i, ray, time, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist, vbool& vmask) { vmask &= intersectNodeKMB4D(node, i, ray, time, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist, vbool& vmask) { vmask &= intersectNodeKMB4DRobust(node, i, ray, time, dist); return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKFast& ray, const vfloat& time, vfloat& dist, vbool& vmask) { if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { vmask &= intersectNodeKMB4D(node, i, ray, time, dist); } else /*if (unlikely(node.isOBBNodeMB()))*/ { assert(node.isOBBNodeMB()); vmask &= intersectNodeK(node.ungetAABBNodeMB(), i, ray, time, dist); } return true; } }; template struct BVHNNodeIntersectorK { static __forceinline bool intersect(const typename BVHN::NodeRef& node, size_t i, const TravRayKRobust& ray, const vfloat& time, vfloat& dist, vbool& vmask) { if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { vmask &= intersectNodeKMB4DRobust(node, i, ray, time, dist); } else /*if (unlikely(node.isOBBNodeMB()))*/ { assert(node.isOBBNodeMB()); vmask &= intersectNodeK(node.ungetAABBNodeMB(), i, ray, time, dist); } return true; } }; /*! Intersects N nodes with K rays */ template struct BVHNQuantizedBaseNodeIntersectorK; template struct BVHNQuantizedBaseNodeIntersectorK { static __forceinline vbool intersectK(const typename BVHN::QuantizedBaseNode* node, const size_t i, const TravRayK& ray, vfloat& dist) { return intersectQuantizedNodeK(node,i,ray,dist); } static __forceinline vbool intersectK(const typename BVHN::QuantizedBaseNodeMB* node, const size_t i, const TravRayK& ray, const vfloat& time, vfloat& dist) { return intersectQuantizedNodeMBK(node,i,ray,time,dist); } }; template struct BVHNQuantizedBaseNodeIntersectorK { static __forceinline vbool intersectK(const typename BVHN::QuantizedBaseNode* node, const size_t i, const TravRayK& ray, vfloat& dist) { return intersectQuantizedNodeK(node,i,ray,dist); } static __forceinline vbool intersectK(const typename BVHN::QuantizedBaseNodeMB* node, const size_t i, const TravRayK& ray, const vfloat& time, vfloat& dist) { return intersectQuantizedNodeMBK(node,i,ray,time,dist); } }; } }