// Copyright 2009-2020 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "node_intersector.h" namespace embree { namespace isa { ////////////////////////////////////////////////////////////////////////////////////// // Frustum structure used in hybrid and stream traversal ////////////////////////////////////////////////////////////////////////////////////// /* Optimized frustum test. We calculate t=(p-org)/dir in ray/box intersection. We assume the rays are split by octant, thus dir intervals are either positive or negative in each dimension. Case 1: dir.min >= 0 && dir.max >= 0: t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max Case 2: dir.min < 0 && dir.max < 0: t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min */ template struct Frustum; /* Fast variant */ template<> struct Frustum { __forceinline Frustum() {} template __forceinline Frustum(const vbool& valid, const Vec3vf& org, const Vec3vf& rdir, const vfloat& ray_tnear, const vfloat& ray_tfar, int N) { init(valid, org, rdir, ray_tnear, ray_tfar, N); } template __forceinline void init(const vbool& valid, const Vec3vf& org, const Vec3vf& rdir, const vfloat& ray_tnear, const vfloat& ray_tfar, int N) { const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), reduce_min(select(valid, org.y, pos_inf)), reduce_min(select(valid, org.z, pos_inf))); const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)), reduce_max(select(valid, org.y, neg_inf)), reduce_max(select(valid, org.z, neg_inf))); const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)), reduce_min(select(valid, rdir.y, pos_inf)), reduce_min(select(valid, rdir.z, pos_inf))); const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)), reduce_max(select(valid, rdir.y, neg_inf)), reduce_max(select(valid, rdir.z, neg_inf))); const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat(pos_inf))); const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat(neg_inf))); init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N); } __forceinline void init(const Vec3fa& reduced_min_org, const Vec3fa& reduced_max_org, const Vec3fa& reduced_min_rdir, const Vec3fa& reduced_max_rdir, float reduced_min_dist, float reduced_max_dist, int N) { const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero)); min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); #if defined (__aarch64__) neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org)); neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org)); #else min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org); max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org); #endif min_dist = reduced_min_dist; max_dist = reduced_max_dist; nf = NearFarPrecalculations(min_rdir, N); } template __forceinline void updateMaxDist(const vfloat& ray_tfar) { max_dist = reduce_max(ray_tfar); } NearFarPrecalculations nf; Vec3fa min_rdir; Vec3fa max_rdir; #if defined (__aarch64__) Vec3fa neg_min_org_rdir; Vec3fa neg_max_org_rdir; #else Vec3fa min_org_rdir; Vec3fa max_org_rdir; #endif float min_dist; float max_dist; }; typedef Frustum FrustumFast; /* Robust variant */ template<> struct Frustum { __forceinline Frustum() {} template __forceinline Frustum(const vbool& valid, const Vec3vf& org, const Vec3vf& rdir, const vfloat& ray_tnear, const vfloat& ray_tfar, int N) { init(valid, org, rdir, ray_tnear, ray_tfar, N); } template __forceinline void init(const vbool& valid, const Vec3vf& org, const Vec3vf& rdir, const vfloat& ray_tnear, const vfloat& ray_tfar, int N) { const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), reduce_min(select(valid, org.y, pos_inf)), reduce_min(select(valid, org.z, pos_inf))); const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)), reduce_max(select(valid, org.y, neg_inf)), reduce_max(select(valid, org.z, neg_inf))); const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)), reduce_min(select(valid, rdir.y, pos_inf)), reduce_min(select(valid, rdir.z, pos_inf))); const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)), reduce_max(select(valid, rdir.y, neg_inf)), reduce_max(select(valid, rdir.z, neg_inf))); const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat(pos_inf))); const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat(neg_inf))); init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N); } __forceinline void init(const Vec3fa& reduced_min_org, const Vec3fa& reduced_max_org, const Vec3fa& reduced_min_rdir, const Vec3fa& reduced_max_rdir, float reduced_min_dist, float reduced_max_dist, int N) { const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero)); min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); min_org = select(pos_rdir, reduced_max_org, reduced_min_org); max_org = select(pos_rdir, reduced_min_org, reduced_max_org); min_dist = reduced_min_dist; max_dist = reduced_max_dist; nf = NearFarPrecalculations(min_rdir, N); } template __forceinline void updateMaxDist(const vfloat& ray_tfar) { max_dist = reduce_max(ray_tfar); } NearFarPrecalculations nf; Vec3fa min_rdir; Vec3fa max_rdir; Vec3fa min_org; Vec3fa max_org; float min_dist; float max_dist; }; typedef Frustum FrustumRobust; ////////////////////////////////////////////////////////////////////////////////////// // Fast AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline size_t intersectNodeFrustum(const typename BVHN::AABBNode* __restrict__ node, const FrustumFast& frustum, vfloat& dist) { const vfloat bminX = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.nearX); const vfloat bminY = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.nearY); const vfloat bminZ = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.nearZ); const vfloat bmaxX = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.farX); const vfloat bmaxY = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.farY); const vfloat bmaxZ = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.farZ); #if defined (__aarch64__) const vfloat fminX = madd(bminX, vfloat(frustum.min_rdir.x), vfloat(frustum.neg_min_org_rdir.x)); const vfloat fminY = madd(bminY, vfloat(frustum.min_rdir.y), vfloat(frustum.neg_min_org_rdir.y)); const vfloat fminZ = madd(bminZ, vfloat(frustum.min_rdir.z), vfloat(frustum.neg_min_org_rdir.z)); const vfloat fmaxX = madd(bmaxX, vfloat(frustum.max_rdir.x), vfloat(frustum.neg_max_org_rdir.x)); const vfloat fmaxY = madd(bmaxY, vfloat(frustum.max_rdir.y), vfloat(frustum.neg_max_org_rdir.y)); const vfloat fmaxZ = madd(bmaxZ, vfloat(frustum.max_rdir.z), vfloat(frustum.neg_max_org_rdir.z)); #else const vfloat fminX = msub(bminX, vfloat(frustum.min_rdir.x), vfloat(frustum.min_org_rdir.x)); const vfloat fminY = msub(bminY, vfloat(frustum.min_rdir.y), vfloat(frustum.min_org_rdir.y)); const vfloat fminZ = msub(bminZ, vfloat(frustum.min_rdir.z), vfloat(frustum.min_org_rdir.z)); const vfloat fmaxX = msub(bmaxX, vfloat(frustum.max_rdir.x), vfloat(frustum.max_org_rdir.x)); const vfloat fmaxY = msub(bmaxY, vfloat(frustum.max_rdir.y), vfloat(frustum.max_org_rdir.y)); const vfloat fmaxZ = msub(bmaxZ, vfloat(frustum.max_rdir.z), vfloat(frustum.max_org_rdir.z)); #endif const vfloat fmin = maxi(fminX, fminY, fminZ, vfloat(frustum.min_dist)); dist = fmin; const vfloat fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat(frustum.max_dist)); const vbool vmask_node_hit = fmin <= fmax; size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); return m_node; } ////////////////////////////////////////////////////////////////////////////////////// // Robust AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// template __forceinline size_t intersectNodeFrustum(const typename BVHN::AABBNode* __restrict__ node, const FrustumRobust& frustum, vfloat& dist) { const vfloat bminX = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.nearX); const vfloat bminY = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.nearY); const vfloat bminZ = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.nearZ); const vfloat bmaxX = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.farX); const vfloat bmaxY = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.farY); const vfloat bmaxZ = *(const vfloat*)((const char*)&node->lower_x + frustum.nf.farZ); const vfloat fminX = (bminX - vfloat(frustum.min_org.x)) * vfloat(frustum.min_rdir.x); const vfloat fminY = (bminY - vfloat(frustum.min_org.y)) * vfloat(frustum.min_rdir.y); const vfloat fminZ = (bminZ - vfloat(frustum.min_org.z)) * vfloat(frustum.min_rdir.z); const vfloat fmaxX = (bmaxX - vfloat(frustum.max_org.x)) * vfloat(frustum.max_rdir.x); const vfloat fmaxY = (bmaxY - vfloat(frustum.max_org.y)) * vfloat(frustum.max_rdir.y); const vfloat fmaxZ = (bmaxZ - vfloat(frustum.max_org.z)) * vfloat(frustum.max_rdir.z); const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 const float round_up = 1.0f+2.0f*float(ulp); const vfloat fmin = max(fminX, fminY, fminZ, vfloat(frustum.min_dist)); dist = fmin; const vfloat fmax = min(fmaxX, fmaxY, fmaxZ, vfloat(frustum.max_dist)); const vbool vmask_node_hit = (round_down*fmin <= round_up*fmax); size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); return m_node; } } }