diff options
Diffstat (limited to 'thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp')
-rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp | 917 |
1 files changed, 917 insertions, 0 deletions
diff --git a/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp b/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp new file mode 100644 index 0000000000..6e9a5a538e --- /dev/null +++ b/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp @@ -0,0 +1,917 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "bvh_intersector_hybrid.h" +#include "bvh_traverser1.h" +#include "node_intersector1.h" +#include "node_intersector_packet.h" + +#include "../geometry/intersector_iterators.h" +#include "../geometry/triangle_intersector.h" +#include "../geometry/trianglev_intersector.h" +#include "../geometry/trianglev_mb_intersector.h" +#include "../geometry/trianglei_intersector.h" +#include "../geometry/quadv_intersector.h" +#include "../geometry/quadi_intersector.h" +#include "../geometry/curveNv_intersector.h" +#include "../geometry/curveNi_intersector.h" +#include "../geometry/curveNi_mb_intersector.h" +#include "../geometry/linei_intersector.h" +#include "../geometry/subdivpatch1_intersector.h" +#include "../geometry/object_intersector.h" +#include "../geometry/instance_intersector.h" +#include "../geometry/subgrid_intersector.h" +#include "../geometry/subgrid_mb_intersector.h" +#include "../geometry/curve_intersector_virtual.h" + +#define SWITCH_DURING_DOWN_TRAVERSAL 1 +#define FORCE_SINGLE_MODE 0 + +#define ENABLE_FAST_COHERENT_CODEPATHS 1 + +namespace embree +{ + namespace isa + { + template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> + void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This, + const BVH* bvh, + NodeRef root, + size_t k, + Precalculations& pre, + RayHitK<K>& ray, + const TravRayK<K, robust>& tray, + IntersectContext* context) + { + /* stack state */ + StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes + StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer + StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle; + stack[0].ptr = root; + stack[0].dist = neg_inf; + + /* load the ray into SIMD registers */ + TravRay<N,robust> tray1; + tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]); + + /* pop loop */ + while (true) pop: + { + /* pop next node */ + if (unlikely(stackPtr == stack)) break; + stackPtr--; + NodeRef cur = NodeRef(stackPtr->ptr); + + /* if popped node is too far, pop next one */ + if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k])) + continue; + + /* downtraversal loop */ + while (true) + { + /* intersect node */ + size_t mask; vfloat<N> tNear; + STAT3(normal.trav_nodes, 1, 1, 1); + bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask); + if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; } + + /* if no child is hit, pop next node */ + if (unlikely(mask == 0)) + goto pop; + + /* select next child and push other children */ + BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd); + } + + /* this is a leaf node */ + assert(cur != BVH::emptyNode); + STAT3(normal.trav_leaves, 1, 1, 1); + size_t num; Primitive* prim = (Primitive*)cur.leaf(num); + + size_t lazy_node = 0; + PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node); + + tray1.tfar = ray.tfar[k]; + + if (unlikely(lazy_node)) { + stackPtr->ptr = lazy_node; + stackPtr->dist = neg_inf; + stackPtr++; + } + } + } + + template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> + void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i, + Accel::Intersectors* __restrict__ This, + RayHitK<K>& __restrict__ ray, + IntersectContext* __restrict__ context) + { + BVH* __restrict__ bvh = (BVH*)This->ptr; + + /* we may traverse an empty BVH in case all geometry was invalid */ + if (bvh->root == BVH::emptyNode) + return; + +#if ENABLE_FAST_COHERENT_CODEPATHS == 1 + assert(context); + if (unlikely(types == BVH_AN1 && context->user && context->isCoherent())) + { + intersectCoherent(valid_i, This, ray, context); + return; + } +#endif + + /* filter out invalid rays */ + vbool<K> valid = *valid_i == -1; +#if defined(EMBREE_IGNORE_INVALID_RAYS) + valid &= ray.valid(); +#endif + + /* return if there are no valid rays */ + size_t valid_bits = movemask(valid); + +#if defined(__AVX__) + STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1); +#endif + + if (unlikely(valid_bits == 0)) return; + + /* verify correct input */ + assert(all(valid, ray.valid())); + assert(all(valid, ray.tnear() >= 0.0f)); + assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); + Precalculations pre(valid, ray); + + /* load ray */ + TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); + const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); + const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); + + if (single) + { + tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf)); + tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf)); + + for (; valid_bits!=0; ) { + const size_t i = bscf(valid_bits); + intersect1(This, bvh, bvh->root, i, pre, ray, tray, context); + } + return; + } + + /* determine switch threshold based on flags */ + const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent; + + vint<K> octant = ray.octant(); + octant = select(valid, octant, vint<K>(0xffffffff)); + + /* test whether we have ray with opposing direction signs in the packet */ + bool split = false; + { + size_t bits = valid_bits; + vbool<K> vsplit( false ); + do + { + const size_t valid_index = bsf(bits); + vbool<K> octant_valid = octant[valid_index] == octant; + bits &= ~(size_t)movemask(octant_valid); + vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7)); + } while (bits); + if (any(vsplit)) split = true; + } + + do + { + const size_t valid_index = bsf(valid_bits); + const vint<K> diff_octant = vint<K>(octant[valid_index])^octant; + const vint<K> count_diff_octant = \ + ((diff_octant >> 2) & 1) + + ((diff_octant >> 1) & 1) + + ((diff_octant >> 0) & 1); + + vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff)); + if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down + + + octant = select(octant_valid,vint<K>(0xffffffff),octant); + valid_bits &= ~(size_t)movemask(octant_valid); + + tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf)); + tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf)); + + /* allocate stack and push root node */ + vfloat<K> stack_near[stackSizeChunk]; + NodeRef stack_node[stackSizeChunk]; + stack_node[0] = BVH::invalidNode; + stack_near[0] = inf; + stack_node[1] = bvh->root; + stack_near[1] = tray.tnear; + NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk; + NodeRef* __restrict__ sptr_node = stack_node + 2; + vfloat<K>* __restrict__ sptr_near = stack_near + 2; + + while (1) pop: + { + /* pop next node from stack */ + assert(sptr_node > stack_node); + sptr_node--; + sptr_near--; + NodeRef cur = *sptr_node; + if (unlikely(cur == BVH::invalidNode)) { + assert(sptr_node == stack_node); + break; + } + + /* cull node if behind closest hit point */ + vfloat<K> curDist = *sptr_near; + const vbool<K> active = curDist < tray.tfar; + if (unlikely(none(active))) + continue; + + /* switch to single ray traversal */ +#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__) +#if FORCE_SINGLE_MODE == 0 + if (single) +#endif + { + size_t bits = movemask(active); +#if FORCE_SINGLE_MODE == 0 + if (unlikely(popcnt(bits) <= switchThreshold)) +#endif + { + for (; bits!=0; ) { + const size_t i = bscf(bits); + intersect1(This, bvh, cur, i, pre, ray, tray, context); + } + tray.tfar = min(tray.tfar, ray.tfar); + continue; + } + } +#endif + while (likely(!cur.isLeaf())) + { + /* process nodes */ + const vbool<K> valid_node = tray.tfar > curDist; + STAT3(normal.trav_nodes, 1, popcnt(valid_node), K); + const NodeRef nodeRef = cur; + const BaseNode* __restrict__ const node = nodeRef.baseNode(); + + /* set cur to invalid */ + cur = BVH::emptyNode; + curDist = pos_inf; + + size_t num_child_hits = 0; + + for (unsigned i = 0; i < N; i++) + { + const NodeRef child = node->children[i]; + if (unlikely(child == BVH::emptyNode)) break; + vfloat<K> lnearP; + vbool<K> lhit = valid_node; + BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); + + /* if we hit the child we choose to continue with that child if it + is closer than the current next child, or we push it onto the stack */ + if (likely(any(lhit))) + { + assert(sptr_node < stackEnd); + assert(child != BVH::emptyNode); + const vfloat<K> childDist = select(lhit, lnearP, inf); + /* push cur node onto stack and continue with hit child */ + if (any(childDist < curDist)) + { + if (likely(cur != BVH::emptyNode)) { + num_child_hits++; + *sptr_node = cur; sptr_node++; + *sptr_near = curDist; sptr_near++; + } + curDist = childDist; + cur = child; + } + + /* push hit child onto stack */ + else { + num_child_hits++; + *sptr_node = child; sptr_node++; + *sptr_near = childDist; sptr_near++; + } + } + } + +#if defined(__AVX__) + //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1); +#endif + + if (unlikely(cur == BVH::emptyNode)) + goto pop; + + /* improved distance sorting for 3 or more hits */ + if (unlikely(num_child_hits >= 2)) + { + if (any(sptr_near[-2] < sptr_near[-1])) + { + std::swap(sptr_near[-2],sptr_near[-1]); + std::swap(sptr_node[-2],sptr_node[-1]); + } + if (unlikely(num_child_hits >= 3)) + { + if (any(sptr_near[-3] < sptr_near[-1])) + { + std::swap(sptr_near[-3],sptr_near[-1]); + std::swap(sptr_node[-3],sptr_node[-1]); + } + if (any(sptr_near[-3] < sptr_near[-2])) + { + std::swap(sptr_near[-3],sptr_near[-2]); + std::swap(sptr_node[-3],sptr_node[-2]); + } + } + } + +#if SWITCH_DURING_DOWN_TRAVERSAL == 1 + if (single) + { + // seems to be the best place for testing utilization + if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold)) + { + *sptr_node++ = cur; + *sptr_near++ = curDist; + goto pop; + } + } +#endif + } + + /* return if stack is empty */ + if (unlikely(cur == BVH::invalidNode)) { + assert(sptr_node == stack_node); + break; + } + + /* intersect leaf */ + assert(cur != BVH::emptyNode); + const vbool<K> valid_leaf = tray.tfar > curDist; + STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K); + if (unlikely(none(valid_leaf))) continue; + size_t items; const Primitive* prim = (Primitive*)cur.leaf(items); + + size_t lazy_node = 0; + PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node); + tray.tfar = select(valid_leaf, ray.tfar, tray.tfar); + + if (unlikely(lazy_node)) { + *sptr_node = lazy_node; sptr_node++; + *sptr_near = neg_inf; sptr_near++; + } + } + } while(valid_bits); + } + + + template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> + void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i, + Accel::Intersectors* __restrict__ This, + RayHitK<K>& __restrict__ ray, + IntersectContext* context) + { + BVH* __restrict__ bvh = (BVH*)This->ptr; + + /* filter out invalid rays */ + vbool<K> valid = *valid_i == -1; +#if defined(EMBREE_IGNORE_INVALID_RAYS) + valid &= ray.valid(); +#endif + + /* return if there are no valid rays */ + size_t valid_bits = movemask(valid); + if (unlikely(valid_bits == 0)) return; + + /* verify correct input */ + assert(all(valid, ray.valid())); + assert(all(valid, ray.tnear() >= 0.0f)); + assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); + Precalculations pre(valid, ray); + + /* load ray */ + TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); + const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); + const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); + + vint<K> octant = ray.octant(); + octant = select(valid, octant, vint<K>(0xffffffff)); + + do + { + const size_t valid_index = bsf(valid_bits); + const vbool<K> octant_valid = octant[valid_index] == octant; + valid_bits &= ~(size_t)movemask(octant_valid); + + tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf)); + tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf)); + + Frustum<robust> frustum; + frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N); + + StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes + StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer + stack[0].ptr = bvh->root; + stack[0].dist = neg_inf; + + while (1) pop: + { + /* pop next node from stack */ + if (unlikely(stackPtr == stack)) break; + + stackPtr--; + NodeRef cur = NodeRef(stackPtr->ptr); + + /* cull node if behind closest hit point */ + vfloat<K> curDist = *(float*)&stackPtr->dist; + const vbool<K> active = curDist < tray.tfar; + if (unlikely(none(active))) continue; + + while (likely(!cur.isLeaf())) + { + /* process nodes */ + //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K); + const NodeRef nodeRef = cur; + const AABBNode* __restrict__ const node = nodeRef.getAABBNode(); + + vfloat<N> fmin; + size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin); + + if (unlikely(!m_frustum_node)) goto pop; + cur = BVH::emptyNode; + curDist = pos_inf; + +#if defined(__AVX__) + //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1); +#endif + size_t num_child_hits = 0; + do { + const size_t i = bscf(m_frustum_node); + vfloat<K> lnearP; + vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored + STAT3(normal.trav_nodes, 1, 1, 1); + BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); + + if (likely(any(lhit))) + { + const vfloat<K> childDist = fmin[i]; + const NodeRef child = node->child(i); + BVHN<N>::prefetch(child); + if (any(childDist < curDist)) + { + if (likely(cur != BVH::emptyNode)) { + num_child_hits++; + stackPtr->ptr = cur; + *(float*)&stackPtr->dist = toScalar(curDist); + stackPtr++; + } + curDist = childDist; + cur = child; + } + /* push hit child onto stack */ + else { + num_child_hits++; + stackPtr->ptr = child; + *(float*)&stackPtr->dist = toScalar(childDist); + stackPtr++; + } + } + } while(m_frustum_node); + + if (unlikely(cur == BVH::emptyNode)) goto pop; + + /* improved distance sorting for 3 or more hits */ + if (unlikely(num_child_hits >= 2)) + { + if (stackPtr[-2].dist < stackPtr[-1].dist) + std::swap(stackPtr[-2],stackPtr[-1]); + if (unlikely(num_child_hits >= 3)) + { + if (stackPtr[-3].dist < stackPtr[-1].dist) + std::swap(stackPtr[-3],stackPtr[-1]); + if (stackPtr[-3].dist < stackPtr[-2].dist) + std::swap(stackPtr[-3],stackPtr[-2]); + } + } + } + + /* intersect leaf */ + assert(cur != BVH::invalidNode); + assert(cur != BVH::emptyNode); + const vbool<K> valid_leaf = tray.tfar > curDist; + STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K); + if (unlikely(none(valid_leaf))) continue; + size_t items; const Primitive* prim = (Primitive*)cur.leaf(items); + + size_t lazy_node = 0; + PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node); + + /* reduce max distance interval on successful intersection */ + if (likely(any((ray.tfar < tray.tfar) & valid_leaf))) + { + tray.tfar = select(valid_leaf, ray.tfar, tray.tfar); + frustum.template updateMaxDist<K>(tray.tfar); + } + + if (unlikely(lazy_node)) { + stackPtr->ptr = lazy_node; + stackPtr->dist = neg_inf; + stackPtr++; + } + } + + } while(valid_bits); + } + + // =================================================================================================================================================================== + // =================================================================================================================================================================== + // =================================================================================================================================================================== + + template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> + bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This, + const BVH* bvh, + NodeRef root, + size_t k, + Precalculations& pre, + RayK<K>& ray, + const TravRayK<K, robust>& tray, + IntersectContext* context) + { + /* stack state */ + NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed + NodeRef* stackPtr = stack+1; // current stack pointer + NodeRef* stackEnd = stack+stackSizeSingle; + stack[0] = root; + + /* load the ray into SIMD registers */ + TravRay<N,robust> tray1; + tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]); + + /* pop loop */ + while (true) pop: + { + /* pop next node */ + if (unlikely(stackPtr == stack)) break; + stackPtr--; + NodeRef cur = (NodeRef)*stackPtr; + + /* downtraversal loop */ + while (true) + { + /* intersect node */ + size_t mask; vfloat<N> tNear; + STAT3(shadow.trav_nodes, 1, 1, 1); + bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask); + if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; } + + /* if no child is hit, pop next node */ + if (unlikely(mask == 0)) + goto pop; + + /* select next child and push other children */ + BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd); + } + + /* this is a leaf node */ + assert(cur != BVH::emptyNode); + STAT3(shadow.trav_leaves, 1, 1, 1); + size_t num; Primitive* prim = (Primitive*)cur.leaf(num); + + size_t lazy_node = 0; + if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) { + ray.tfar[k] = neg_inf; + return true; + } + + if (unlikely(lazy_node)) { + *stackPtr = lazy_node; + stackPtr++; + } + } + return false; + } + + template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> + void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i, + Accel::Intersectors* __restrict__ This, + RayK<K>& __restrict__ ray, + IntersectContext* context) + { + BVH* __restrict__ bvh = (BVH*)This->ptr; + + /* we may traverse an empty BVH in case all geometry was invalid */ + if (bvh->root == BVH::emptyNode) + return; + +#if ENABLE_FAST_COHERENT_CODEPATHS == 1 + assert(context); + if (unlikely(types == BVH_AN1 && context->user && context->isCoherent())) + { + occludedCoherent(valid_i, This, ray, context); + return; + } +#endif + + /* filter out already occluded and invalid rays */ + vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f); +#if defined(EMBREE_IGNORE_INVALID_RAYS) + valid &= ray.valid(); +#endif + + /* return if there are no valid rays */ + const size_t valid_bits = movemask(valid); + if (unlikely(valid_bits == 0)) return; + + /* verify correct input */ + assert(all(valid, ray.valid())); + assert(all(valid, ray.tnear() >= 0.0f)); + assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); + Precalculations pre(valid, ray); + + /* load ray */ + TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); + const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); + const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); + + tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf)); + tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf)); + + vbool<K> terminated = !valid; + const vfloat<K> inf = vfloat<K>(pos_inf); + + /* determine switch threshold based on flags */ + const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent; + + /* allocate stack and push root node */ + vfloat<K> stack_near[stackSizeChunk]; + NodeRef stack_node[stackSizeChunk]; + stack_node[0] = BVH::invalidNode; + stack_near[0] = inf; + stack_node[1] = bvh->root; + stack_near[1] = tray.tnear; + NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk; + NodeRef* __restrict__ sptr_node = stack_node + 2; + vfloat<K>* __restrict__ sptr_near = stack_near + 2; + + while (1) pop: + { + /* pop next node from stack */ + assert(sptr_node > stack_node); + sptr_node--; + sptr_near--; + NodeRef cur = *sptr_node; + if (unlikely(cur == BVH::invalidNode)) { + assert(sptr_node == stack_node); + break; + } + + /* cull node if behind closest hit point */ + vfloat<K> curDist = *sptr_near; + const vbool<K> active = curDist < tray.tfar; + if (unlikely(none(active))) + continue; + + /* switch to single ray traversal */ +#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__) +#if FORCE_SINGLE_MODE == 0 + if (single) +#endif + { + size_t bits = movemask(active); +#if FORCE_SINGLE_MODE == 0 + if (unlikely(popcnt(bits) <= switchThreshold)) +#endif + { + for (; bits!=0; ) { + const size_t i = bscf(bits); + if (occluded1(This, bvh, cur, i, pre, ray, tray, context)) + set(terminated, i); + } + if (all(terminated)) break; + tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); + continue; + } + } +#endif + + while (likely(!cur.isLeaf())) + { + /* process nodes */ + const vbool<K> valid_node = tray.tfar > curDist; + STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K); + const NodeRef nodeRef = cur; + const BaseNode* __restrict__ const node = nodeRef.baseNode(); + + /* set cur to invalid */ + cur = BVH::emptyNode; + curDist = pos_inf; + + for (unsigned i = 0; i < N; i++) + { + const NodeRef child = node->children[i]; + if (unlikely(child == BVH::emptyNode)) break; + vfloat<K> lnearP; + vbool<K> lhit = valid_node; + BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); + + /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */ + if (likely(any(lhit))) + { + assert(sptr_node < stackEnd); + assert(child != BVH::emptyNode); + const vfloat<K> childDist = select(lhit, lnearP, inf); + + /* push 'cur' node onto stack and continue with hit child */ + if (likely(cur != BVH::emptyNode)) { + *sptr_node = cur; sptr_node++; + *sptr_near = curDist; sptr_near++; + } + curDist = childDist; + cur = child; + } + } + if (unlikely(cur == BVH::emptyNode)) + goto pop; + +#if SWITCH_DURING_DOWN_TRAVERSAL == 1 + if (single) + { + // seems to be the best place for testing utilization + if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold)) + { + *sptr_node++ = cur; + *sptr_near++ = curDist; + goto pop; + } + } +#endif + } + + /* return if stack is empty */ + if (unlikely(cur == BVH::invalidNode)) { + assert(sptr_node == stack_node); + break; + } + + + /* intersect leaf */ + assert(cur != BVH::emptyNode); + const vbool<K> valid_leaf = tray.tfar > curDist; + STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K); + if (unlikely(none(valid_leaf))) continue; + size_t items; const Primitive* prim = (Primitive*) cur.leaf(items); + + size_t lazy_node = 0; + terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node); + if (all(terminated)) break; + tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays + + if (unlikely(lazy_node)) { + *sptr_node = lazy_node; sptr_node++; + *sptr_near = neg_inf; sptr_near++; + } + } + + vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf); + } + + + template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> + void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i, + Accel::Intersectors* __restrict__ This, + RayK<K>& __restrict__ ray, + IntersectContext* context) + { + BVH* __restrict__ bvh = (BVH*)This->ptr; + + /* filter out invalid rays */ + vbool<K> valid = *valid_i == -1; +#if defined(EMBREE_IGNORE_INVALID_RAYS) + valid &= ray.valid(); +#endif + + /* return if there are no valid rays */ + size_t valid_bits = movemask(valid); + if (unlikely(valid_bits == 0)) return; + + /* verify correct input */ + assert(all(valid, ray.valid())); + assert(all(valid, ray.tnear() >= 0.0f)); + assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); + Precalculations pre(valid,ray); + + /* load ray */ + TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); + const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); + const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); + + vbool<K> terminated = !valid; + + vint<K> octant = ray.octant(); + octant = select(valid, octant, vint<K>(0xffffffff)); + + do + { + const size_t valid_index = bsf(valid_bits); + vbool<K> octant_valid = octant[valid_index] == octant; + valid_bits &= ~(size_t)movemask(octant_valid); + + tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf)); + tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf)); + + Frustum<robust> frustum; + frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N); + + StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes + StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer + stack[0].ptr = bvh->root; + stack[0].mask = movemask(octant_valid); + + while (1) pop: + { + /* pop next node from stack */ + if (unlikely(stackPtr == stack)) break; + + stackPtr--; + NodeRef cur = NodeRef(stackPtr->ptr); + + /* cull node of active rays have already been terminated */ + size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated)); + + if (unlikely(m_active == 0)) continue; + + while (likely(!cur.isLeaf())) + { + /* process nodes */ + //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K); + const NodeRef nodeRef = cur; + const AABBNode* __restrict__ const node = nodeRef.getAABBNode(); + + vfloat<N> fmin; + size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin); + + if (unlikely(!m_frustum_node)) goto pop; + cur = BVH::emptyNode; + m_active = 0; + +#if defined(__AVX__) + //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1); +#endif + size_t num_child_hits = 0; + do { + const size_t i = bscf(m_frustum_node); + vfloat<K> lnearP; + vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored + STAT3(normal.trav_nodes, 1, 1, 1); + BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); + + if (likely(any(lhit))) + { + const NodeRef child = node->child(i); + assert(child != BVH::emptyNode); + BVHN<N>::prefetch(child); + if (likely(cur != BVH::emptyNode)) { + num_child_hits++; + stackPtr->ptr = cur; + stackPtr->mask = m_active; + stackPtr++; + } + cur = child; + m_active = movemask(lhit); + } + } while(m_frustum_node); + + if (unlikely(cur == BVH::emptyNode)) goto pop; + } + + /* intersect leaf */ + assert(cur != BVH::invalidNode); + assert(cur != BVH::emptyNode); +#if defined(__AVX__) + STAT3(normal.trav_leaves, 1, popcnt(m_active), K); +#endif + if (unlikely(!m_active)) continue; + size_t items; const Primitive* prim = (Primitive*)cur.leaf(items); + + size_t lazy_node = 0; + terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node); + octant_valid &= !terminated; + if (unlikely(none(octant_valid))) break; + tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays + + if (unlikely(lazy_node)) { + stackPtr->ptr = lazy_node; + stackPtr->mask = movemask(octant_valid); + stackPtr++; + } + } + } while(valid_bits); + + vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf); + } + } +} |