summaryrefslogtreecommitdiff
path: root/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp')
-rw-r--r--thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp917
1 files changed, 917 insertions, 0 deletions
diff --git a/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp b/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
new file mode 100644
index 0000000000..6e9a5a538e
--- /dev/null
+++ b/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
@@ -0,0 +1,917 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "bvh_intersector_hybrid.h"
+#include "bvh_traverser1.h"
+#include "node_intersector1.h"
+#include "node_intersector_packet.h"
+
+#include "../geometry/intersector_iterators.h"
+#include "../geometry/triangle_intersector.h"
+#include "../geometry/trianglev_intersector.h"
+#include "../geometry/trianglev_mb_intersector.h"
+#include "../geometry/trianglei_intersector.h"
+#include "../geometry/quadv_intersector.h"
+#include "../geometry/quadi_intersector.h"
+#include "../geometry/curveNv_intersector.h"
+#include "../geometry/curveNi_intersector.h"
+#include "../geometry/curveNi_mb_intersector.h"
+#include "../geometry/linei_intersector.h"
+#include "../geometry/subdivpatch1_intersector.h"
+#include "../geometry/object_intersector.h"
+#include "../geometry/instance_intersector.h"
+#include "../geometry/subgrid_intersector.h"
+#include "../geometry/subgrid_mb_intersector.h"
+#include "../geometry/curve_intersector_virtual.h"
+
+#define SWITCH_DURING_DOWN_TRAVERSAL 1
+#define FORCE_SINGLE_MODE 0
+
+#define ENABLE_FAST_COHERENT_CODEPATHS 1
+
+namespace embree
+{
+ namespace isa
+ {
+ template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+ void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
+ const BVH* bvh,
+ NodeRef root,
+ size_t k,
+ Precalculations& pre,
+ RayHitK<K>& ray,
+ const TravRayK<K, robust>& tray,
+ IntersectContext* context)
+ {
+ /* stack state */
+ StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
+ StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
+ StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
+ stack[0].ptr = root;
+ stack[0].dist = neg_inf;
+
+ /* load the ray into SIMD registers */
+ TravRay<N,robust> tray1;
+ tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
+
+ /* pop loop */
+ while (true) pop:
+ {
+ /* pop next node */
+ if (unlikely(stackPtr == stack)) break;
+ stackPtr--;
+ NodeRef cur = NodeRef(stackPtr->ptr);
+
+ /* if popped node is too far, pop next one */
+ if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
+ continue;
+
+ /* downtraversal loop */
+ while (true)
+ {
+ /* intersect node */
+ size_t mask; vfloat<N> tNear;
+ STAT3(normal.trav_nodes, 1, 1, 1);
+ bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
+ if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
+
+ /* if no child is hit, pop next node */
+ if (unlikely(mask == 0))
+ goto pop;
+
+ /* select next child and push other children */
+ BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
+ }
+
+ /* this is a leaf node */
+ assert(cur != BVH::emptyNode);
+ STAT3(normal.trav_leaves, 1, 1, 1);
+ size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
+
+ size_t lazy_node = 0;
+ PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
+
+ tray1.tfar = ray.tfar[k];
+
+ if (unlikely(lazy_node)) {
+ stackPtr->ptr = lazy_node;
+ stackPtr->dist = neg_inf;
+ stackPtr++;
+ }
+ }
+ }
+
+ template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+ void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
+ Accel::Intersectors* __restrict__ This,
+ RayHitK<K>& __restrict__ ray,
+ IntersectContext* __restrict__ context)
+ {
+ BVH* __restrict__ bvh = (BVH*)This->ptr;
+
+ /* we may traverse an empty BVH in case all geometry was invalid */
+ if (bvh->root == BVH::emptyNode)
+ return;
+
+#if ENABLE_FAST_COHERENT_CODEPATHS == 1
+ assert(context);
+ if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
+ {
+ intersectCoherent(valid_i, This, ray, context);
+ return;
+ }
+#endif
+
+ /* filter out invalid rays */
+ vbool<K> valid = *valid_i == -1;
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+ valid &= ray.valid();
+#endif
+
+ /* return if there are no valid rays */
+ size_t valid_bits = movemask(valid);
+
+#if defined(__AVX__)
+ STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
+#endif
+
+ if (unlikely(valid_bits == 0)) return;
+
+ /* verify correct input */
+ assert(all(valid, ray.valid()));
+ assert(all(valid, ray.tnear() >= 0.0f));
+ assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+ Precalculations pre(valid, ray);
+
+ /* load ray */
+ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+ const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+ const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
+
+ if (single)
+ {
+ tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
+ tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+ for (; valid_bits!=0; ) {
+ const size_t i = bscf(valid_bits);
+ intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
+ }
+ return;
+ }
+
+ /* determine switch threshold based on flags */
+ const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
+
+ vint<K> octant = ray.octant();
+ octant = select(valid, octant, vint<K>(0xffffffff));
+
+ /* test whether we have ray with opposing direction signs in the packet */
+ bool split = false;
+ {
+ size_t bits = valid_bits;
+ vbool<K> vsplit( false );
+ do
+ {
+ const size_t valid_index = bsf(bits);
+ vbool<K> octant_valid = octant[valid_index] == octant;
+ bits &= ~(size_t)movemask(octant_valid);
+ vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
+ } while (bits);
+ if (any(vsplit)) split = true;
+ }
+
+ do
+ {
+ const size_t valid_index = bsf(valid_bits);
+ const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
+ const vint<K> count_diff_octant = \
+ ((diff_octant >> 2) & 1) +
+ ((diff_octant >> 1) & 1) +
+ ((diff_octant >> 0) & 1);
+
+ vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
+ if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
+
+
+ octant = select(octant_valid,vint<K>(0xffffffff),octant);
+ valid_bits &= ~(size_t)movemask(octant_valid);
+
+ tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
+ tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+ /* allocate stack and push root node */
+ vfloat<K> stack_near[stackSizeChunk];
+ NodeRef stack_node[stackSizeChunk];
+ stack_node[0] = BVH::invalidNode;
+ stack_near[0] = inf;
+ stack_node[1] = bvh->root;
+ stack_near[1] = tray.tnear;
+ NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
+ NodeRef* __restrict__ sptr_node = stack_node + 2;
+ vfloat<K>* __restrict__ sptr_near = stack_near + 2;
+
+ while (1) pop:
+ {
+ /* pop next node from stack */
+ assert(sptr_node > stack_node);
+ sptr_node--;
+ sptr_near--;
+ NodeRef cur = *sptr_node;
+ if (unlikely(cur == BVH::invalidNode)) {
+ assert(sptr_node == stack_node);
+ break;
+ }
+
+ /* cull node if behind closest hit point */
+ vfloat<K> curDist = *sptr_near;
+ const vbool<K> active = curDist < tray.tfar;
+ if (unlikely(none(active)))
+ continue;
+
+ /* switch to single ray traversal */
+#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
+#if FORCE_SINGLE_MODE == 0
+ if (single)
+#endif
+ {
+ size_t bits = movemask(active);
+#if FORCE_SINGLE_MODE == 0
+ if (unlikely(popcnt(bits) <= switchThreshold))
+#endif
+ {
+ for (; bits!=0; ) {
+ const size_t i = bscf(bits);
+ intersect1(This, bvh, cur, i, pre, ray, tray, context);
+ }
+ tray.tfar = min(tray.tfar, ray.tfar);
+ continue;
+ }
+ }
+#endif
+ while (likely(!cur.isLeaf()))
+ {
+ /* process nodes */
+ const vbool<K> valid_node = tray.tfar > curDist;
+ STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
+ const NodeRef nodeRef = cur;
+ const BaseNode* __restrict__ const node = nodeRef.baseNode();
+
+ /* set cur to invalid */
+ cur = BVH::emptyNode;
+ curDist = pos_inf;
+
+ size_t num_child_hits = 0;
+
+ for (unsigned i = 0; i < N; i++)
+ {
+ const NodeRef child = node->children[i];
+ if (unlikely(child == BVH::emptyNode)) break;
+ vfloat<K> lnearP;
+ vbool<K> lhit = valid_node;
+ BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+ /* if we hit the child we choose to continue with that child if it
+ is closer than the current next child, or we push it onto the stack */
+ if (likely(any(lhit)))
+ {
+ assert(sptr_node < stackEnd);
+ assert(child != BVH::emptyNode);
+ const vfloat<K> childDist = select(lhit, lnearP, inf);
+ /* push cur node onto stack and continue with hit child */
+ if (any(childDist < curDist))
+ {
+ if (likely(cur != BVH::emptyNode)) {
+ num_child_hits++;
+ *sptr_node = cur; sptr_node++;
+ *sptr_near = curDist; sptr_near++;
+ }
+ curDist = childDist;
+ cur = child;
+ }
+
+ /* push hit child onto stack */
+ else {
+ num_child_hits++;
+ *sptr_node = child; sptr_node++;
+ *sptr_near = childDist; sptr_near++;
+ }
+ }
+ }
+
+#if defined(__AVX__)
+ //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
+#endif
+
+ if (unlikely(cur == BVH::emptyNode))
+ goto pop;
+
+ /* improved distance sorting for 3 or more hits */
+ if (unlikely(num_child_hits >= 2))
+ {
+ if (any(sptr_near[-2] < sptr_near[-1]))
+ {
+ std::swap(sptr_near[-2],sptr_near[-1]);
+ std::swap(sptr_node[-2],sptr_node[-1]);
+ }
+ if (unlikely(num_child_hits >= 3))
+ {
+ if (any(sptr_near[-3] < sptr_near[-1]))
+ {
+ std::swap(sptr_near[-3],sptr_near[-1]);
+ std::swap(sptr_node[-3],sptr_node[-1]);
+ }
+ if (any(sptr_near[-3] < sptr_near[-2]))
+ {
+ std::swap(sptr_near[-3],sptr_near[-2]);
+ std::swap(sptr_node[-3],sptr_node[-2]);
+ }
+ }
+ }
+
+#if SWITCH_DURING_DOWN_TRAVERSAL == 1
+ if (single)
+ {
+ // seems to be the best place for testing utilization
+ if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
+ {
+ *sptr_node++ = cur;
+ *sptr_near++ = curDist;
+ goto pop;
+ }
+ }
+#endif
+ }
+
+ /* return if stack is empty */
+ if (unlikely(cur == BVH::invalidNode)) {
+ assert(sptr_node == stack_node);
+ break;
+ }
+
+ /* intersect leaf */
+ assert(cur != BVH::emptyNode);
+ const vbool<K> valid_leaf = tray.tfar > curDist;
+ STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
+ if (unlikely(none(valid_leaf))) continue;
+ size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
+
+ size_t lazy_node = 0;
+ PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
+ tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
+
+ if (unlikely(lazy_node)) {
+ *sptr_node = lazy_node; sptr_node++;
+ *sptr_near = neg_inf; sptr_near++;
+ }
+ }
+ } while(valid_bits);
+ }
+
+
+ template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+ void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
+ Accel::Intersectors* __restrict__ This,
+ RayHitK<K>& __restrict__ ray,
+ IntersectContext* context)
+ {
+ BVH* __restrict__ bvh = (BVH*)This->ptr;
+
+ /* filter out invalid rays */
+ vbool<K> valid = *valid_i == -1;
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+ valid &= ray.valid();
+#endif
+
+ /* return if there are no valid rays */
+ size_t valid_bits = movemask(valid);
+ if (unlikely(valid_bits == 0)) return;
+
+ /* verify correct input */
+ assert(all(valid, ray.valid()));
+ assert(all(valid, ray.tnear() >= 0.0f));
+ assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+ Precalculations pre(valid, ray);
+
+ /* load ray */
+ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+ const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+ const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
+
+ vint<K> octant = ray.octant();
+ octant = select(valid, octant, vint<K>(0xffffffff));
+
+ do
+ {
+ const size_t valid_index = bsf(valid_bits);
+ const vbool<K> octant_valid = octant[valid_index] == octant;
+ valid_bits &= ~(size_t)movemask(octant_valid);
+
+ tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
+ tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+ Frustum<robust> frustum;
+ frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
+
+ StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
+ StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
+ stack[0].ptr = bvh->root;
+ stack[0].dist = neg_inf;
+
+ while (1) pop:
+ {
+ /* pop next node from stack */
+ if (unlikely(stackPtr == stack)) break;
+
+ stackPtr--;
+ NodeRef cur = NodeRef(stackPtr->ptr);
+
+ /* cull node if behind closest hit point */
+ vfloat<K> curDist = *(float*)&stackPtr->dist;
+ const vbool<K> active = curDist < tray.tfar;
+ if (unlikely(none(active))) continue;
+
+ while (likely(!cur.isLeaf()))
+ {
+ /* process nodes */
+ //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
+ const NodeRef nodeRef = cur;
+ const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
+
+ vfloat<N> fmin;
+ size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
+
+ if (unlikely(!m_frustum_node)) goto pop;
+ cur = BVH::emptyNode;
+ curDist = pos_inf;
+
+#if defined(__AVX__)
+ //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
+#endif
+ size_t num_child_hits = 0;
+ do {
+ const size_t i = bscf(m_frustum_node);
+ vfloat<K> lnearP;
+ vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
+ STAT3(normal.trav_nodes, 1, 1, 1);
+ BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+ if (likely(any(lhit)))
+ {
+ const vfloat<K> childDist = fmin[i];
+ const NodeRef child = node->child(i);
+ BVHN<N>::prefetch(child);
+ if (any(childDist < curDist))
+ {
+ if (likely(cur != BVH::emptyNode)) {
+ num_child_hits++;
+ stackPtr->ptr = cur;
+ *(float*)&stackPtr->dist = toScalar(curDist);
+ stackPtr++;
+ }
+ curDist = childDist;
+ cur = child;
+ }
+ /* push hit child onto stack */
+ else {
+ num_child_hits++;
+ stackPtr->ptr = child;
+ *(float*)&stackPtr->dist = toScalar(childDist);
+ stackPtr++;
+ }
+ }
+ } while(m_frustum_node);
+
+ if (unlikely(cur == BVH::emptyNode)) goto pop;
+
+ /* improved distance sorting for 3 or more hits */
+ if (unlikely(num_child_hits >= 2))
+ {
+ if (stackPtr[-2].dist < stackPtr[-1].dist)
+ std::swap(stackPtr[-2],stackPtr[-1]);
+ if (unlikely(num_child_hits >= 3))
+ {
+ if (stackPtr[-3].dist < stackPtr[-1].dist)
+ std::swap(stackPtr[-3],stackPtr[-1]);
+ if (stackPtr[-3].dist < stackPtr[-2].dist)
+ std::swap(stackPtr[-3],stackPtr[-2]);
+ }
+ }
+ }
+
+ /* intersect leaf */
+ assert(cur != BVH::invalidNode);
+ assert(cur != BVH::emptyNode);
+ const vbool<K> valid_leaf = tray.tfar > curDist;
+ STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
+ if (unlikely(none(valid_leaf))) continue;
+ size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
+
+ size_t lazy_node = 0;
+ PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
+
+ /* reduce max distance interval on successful intersection */
+ if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
+ {
+ tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
+ frustum.template updateMaxDist<K>(tray.tfar);
+ }
+
+ if (unlikely(lazy_node)) {
+ stackPtr->ptr = lazy_node;
+ stackPtr->dist = neg_inf;
+ stackPtr++;
+ }
+ }
+
+ } while(valid_bits);
+ }
+
+ // ===================================================================================================================================================================
+ // ===================================================================================================================================================================
+ // ===================================================================================================================================================================
+
+ template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+ bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
+ const BVH* bvh,
+ NodeRef root,
+ size_t k,
+ Precalculations& pre,
+ RayK<K>& ray,
+ const TravRayK<K, robust>& tray,
+ IntersectContext* context)
+ {
+ /* stack state */
+ NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
+ NodeRef* stackPtr = stack+1; // current stack pointer
+ NodeRef* stackEnd = stack+stackSizeSingle;
+ stack[0] = root;
+
+ /* load the ray into SIMD registers */
+ TravRay<N,robust> tray1;
+ tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
+
+ /* pop loop */
+ while (true) pop:
+ {
+ /* pop next node */
+ if (unlikely(stackPtr == stack)) break;
+ stackPtr--;
+ NodeRef cur = (NodeRef)*stackPtr;
+
+ /* downtraversal loop */
+ while (true)
+ {
+ /* intersect node */
+ size_t mask; vfloat<N> tNear;
+ STAT3(shadow.trav_nodes, 1, 1, 1);
+ bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
+ if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
+
+ /* if no child is hit, pop next node */
+ if (unlikely(mask == 0))
+ goto pop;
+
+ /* select next child and push other children */
+ BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
+ }
+
+ /* this is a leaf node */
+ assert(cur != BVH::emptyNode);
+ STAT3(shadow.trav_leaves, 1, 1, 1);
+ size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
+
+ size_t lazy_node = 0;
+ if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
+ ray.tfar[k] = neg_inf;
+ return true;
+ }
+
+ if (unlikely(lazy_node)) {
+ *stackPtr = lazy_node;
+ stackPtr++;
+ }
+ }
+ return false;
+ }
+
+ template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+ void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
+ Accel::Intersectors* __restrict__ This,
+ RayK<K>& __restrict__ ray,
+ IntersectContext* context)
+ {
+ BVH* __restrict__ bvh = (BVH*)This->ptr;
+
+ /* we may traverse an empty BVH in case all geometry was invalid */
+ if (bvh->root == BVH::emptyNode)
+ return;
+
+#if ENABLE_FAST_COHERENT_CODEPATHS == 1
+ assert(context);
+ if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
+ {
+ occludedCoherent(valid_i, This, ray, context);
+ return;
+ }
+#endif
+
+ /* filter out already occluded and invalid rays */
+ vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+ valid &= ray.valid();
+#endif
+
+ /* return if there are no valid rays */
+ const size_t valid_bits = movemask(valid);
+ if (unlikely(valid_bits == 0)) return;
+
+ /* verify correct input */
+ assert(all(valid, ray.valid()));
+ assert(all(valid, ray.tnear() >= 0.0f));
+ assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+ Precalculations pre(valid, ray);
+
+ /* load ray */
+ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+ const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+ const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
+
+ tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
+ tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+ vbool<K> terminated = !valid;
+ const vfloat<K> inf = vfloat<K>(pos_inf);
+
+ /* determine switch threshold based on flags */
+ const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
+
+ /* allocate stack and push root node */
+ vfloat<K> stack_near[stackSizeChunk];
+ NodeRef stack_node[stackSizeChunk];
+ stack_node[0] = BVH::invalidNode;
+ stack_near[0] = inf;
+ stack_node[1] = bvh->root;
+ stack_near[1] = tray.tnear;
+ NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
+ NodeRef* __restrict__ sptr_node = stack_node + 2;
+ vfloat<K>* __restrict__ sptr_near = stack_near + 2;
+
+ while (1) pop:
+ {
+ /* pop next node from stack */
+ assert(sptr_node > stack_node);
+ sptr_node--;
+ sptr_near--;
+ NodeRef cur = *sptr_node;
+ if (unlikely(cur == BVH::invalidNode)) {
+ assert(sptr_node == stack_node);
+ break;
+ }
+
+ /* cull node if behind closest hit point */
+ vfloat<K> curDist = *sptr_near;
+ const vbool<K> active = curDist < tray.tfar;
+ if (unlikely(none(active)))
+ continue;
+
+ /* switch to single ray traversal */
+#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
+#if FORCE_SINGLE_MODE == 0
+ if (single)
+#endif
+ {
+ size_t bits = movemask(active);
+#if FORCE_SINGLE_MODE == 0
+ if (unlikely(popcnt(bits) <= switchThreshold))
+#endif
+ {
+ for (; bits!=0; ) {
+ const size_t i = bscf(bits);
+ if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
+ set(terminated, i);
+ }
+ if (all(terminated)) break;
+ tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
+ continue;
+ }
+ }
+#endif
+
+ while (likely(!cur.isLeaf()))
+ {
+ /* process nodes */
+ const vbool<K> valid_node = tray.tfar > curDist;
+ STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
+ const NodeRef nodeRef = cur;
+ const BaseNode* __restrict__ const node = nodeRef.baseNode();
+
+ /* set cur to invalid */
+ cur = BVH::emptyNode;
+ curDist = pos_inf;
+
+ for (unsigned i = 0; i < N; i++)
+ {
+ const NodeRef child = node->children[i];
+ if (unlikely(child == BVH::emptyNode)) break;
+ vfloat<K> lnearP;
+ vbool<K> lhit = valid_node;
+ BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+ /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
+ if (likely(any(lhit)))
+ {
+ assert(sptr_node < stackEnd);
+ assert(child != BVH::emptyNode);
+ const vfloat<K> childDist = select(lhit, lnearP, inf);
+
+ /* push 'cur' node onto stack and continue with hit child */
+ if (likely(cur != BVH::emptyNode)) {
+ *sptr_node = cur; sptr_node++;
+ *sptr_near = curDist; sptr_near++;
+ }
+ curDist = childDist;
+ cur = child;
+ }
+ }
+ if (unlikely(cur == BVH::emptyNode))
+ goto pop;
+
+#if SWITCH_DURING_DOWN_TRAVERSAL == 1
+ if (single)
+ {
+ // seems to be the best place for testing utilization
+ if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
+ {
+ *sptr_node++ = cur;
+ *sptr_near++ = curDist;
+ goto pop;
+ }
+ }
+#endif
+ }
+
+ /* return if stack is empty */
+ if (unlikely(cur == BVH::invalidNode)) {
+ assert(sptr_node == stack_node);
+ break;
+ }
+
+
+ /* intersect leaf */
+ assert(cur != BVH::emptyNode);
+ const vbool<K> valid_leaf = tray.tfar > curDist;
+ STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
+ if (unlikely(none(valid_leaf))) continue;
+ size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
+
+ size_t lazy_node = 0;
+ terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
+ if (all(terminated)) break;
+ tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
+
+ if (unlikely(lazy_node)) {
+ *sptr_node = lazy_node; sptr_node++;
+ *sptr_near = neg_inf; sptr_near++;
+ }
+ }
+
+ vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
+ }
+
+
+ template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+ void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
+ Accel::Intersectors* __restrict__ This,
+ RayK<K>& __restrict__ ray,
+ IntersectContext* context)
+ {
+ BVH* __restrict__ bvh = (BVH*)This->ptr;
+
+ /* filter out invalid rays */
+ vbool<K> valid = *valid_i == -1;
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+ valid &= ray.valid();
+#endif
+
+ /* return if there are no valid rays */
+ size_t valid_bits = movemask(valid);
+ if (unlikely(valid_bits == 0)) return;
+
+ /* verify correct input */
+ assert(all(valid, ray.valid()));
+ assert(all(valid, ray.tnear() >= 0.0f));
+ assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+ Precalculations pre(valid,ray);
+
+ /* load ray */
+ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+ const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+ const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
+
+ vbool<K> terminated = !valid;
+
+ vint<K> octant = ray.octant();
+ octant = select(valid, octant, vint<K>(0xffffffff));
+
+ do
+ {
+ const size_t valid_index = bsf(valid_bits);
+ vbool<K> octant_valid = octant[valid_index] == octant;
+ valid_bits &= ~(size_t)movemask(octant_valid);
+
+ tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
+ tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
+
+ Frustum<robust> frustum;
+ frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
+
+ StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
+ StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
+ stack[0].ptr = bvh->root;
+ stack[0].mask = movemask(octant_valid);
+
+ while (1) pop:
+ {
+ /* pop next node from stack */
+ if (unlikely(stackPtr == stack)) break;
+
+ stackPtr--;
+ NodeRef cur = NodeRef(stackPtr->ptr);
+
+ /* cull node of active rays have already been terminated */
+ size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
+
+ if (unlikely(m_active == 0)) continue;
+
+ while (likely(!cur.isLeaf()))
+ {
+ /* process nodes */
+ //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
+ const NodeRef nodeRef = cur;
+ const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
+
+ vfloat<N> fmin;
+ size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
+
+ if (unlikely(!m_frustum_node)) goto pop;
+ cur = BVH::emptyNode;
+ m_active = 0;
+
+#if defined(__AVX__)
+ //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
+#endif
+ size_t num_child_hits = 0;
+ do {
+ const size_t i = bscf(m_frustum_node);
+ vfloat<K> lnearP;
+ vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
+ STAT3(normal.trav_nodes, 1, 1, 1);
+ BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+ if (likely(any(lhit)))
+ {
+ const NodeRef child = node->child(i);
+ assert(child != BVH::emptyNode);
+ BVHN<N>::prefetch(child);
+ if (likely(cur != BVH::emptyNode)) {
+ num_child_hits++;
+ stackPtr->ptr = cur;
+ stackPtr->mask = m_active;
+ stackPtr++;
+ }
+ cur = child;
+ m_active = movemask(lhit);
+ }
+ } while(m_frustum_node);
+
+ if (unlikely(cur == BVH::emptyNode)) goto pop;
+ }
+
+ /* intersect leaf */
+ assert(cur != BVH::invalidNode);
+ assert(cur != BVH::emptyNode);
+#if defined(__AVX__)
+ STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
+#endif
+ if (unlikely(!m_active)) continue;
+ size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
+
+ size_t lazy_node = 0;
+ terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
+ octant_valid &= !terminated;
+ if (unlikely(none(octant_valid))) break;
+ tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
+
+ if (unlikely(lazy_node)) {
+ stackPtr->ptr = lazy_node;
+ stackPtr->mask = movemask(octant_valid);
+ stackPtr++;
+ }
+ }
+ } while(valid_bits);
+
+ vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
+ }
+ }
+}