// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_intersector_hybrid.h" #include "bvh_traverser1.h" #include "node_intersector1.h" #include "node_intersector_packet.h" #include "../geometry/intersector_iterators.h" #include "../geometry/triangle_intersector.h" #include "../geometry/trianglev_intersector.h" #include "../geometry/trianglev_mb_intersector.h" #include "../geometry/trianglei_intersector.h" #include "../geometry/quadv_intersector.h" #include "../geometry/quadi_intersector.h" #include "../geometry/curveNv_intersector.h" #include "../geometry/curveNi_intersector.h" #include "../geometry/curveNi_mb_intersector.h" #include "../geometry/linei_intersector.h" #include "../geometry/subdivpatch1_intersector.h" #include "../geometry/object_intersector.h" #include "../geometry/instance_intersector.h" #include "../geometry/subgrid_intersector.h" #include "../geometry/subgrid_mb_intersector.h" #include "../geometry/curve_intersector_virtual.h" #define SWITCH_DURING_DOWN_TRAVERSAL 1 #define FORCE_SINGLE_MODE 0 #define ENABLE_FAST_COHERENT_CODEPATHS 1 namespace embree { namespace isa { template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre, RayHitK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context) { /* stack state */ StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /* load the ray into SIMD registers */ TravRay<N,robust> tray1; tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]); /* pop loop */ while (true) pop: { /* pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /* if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /* intersect node */ size_t mask; vfloat<N> tNear; STAT3(normal.trav_nodes, 1, 1, 1); bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask); if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; } /* if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /* select next child and push other children */ BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd); } /* this is a leaf node */ assert(cur != BVH::emptyNode); STAT3(normal.trav_leaves, 1, 1, 1); size_t num; Primitive* prim = (Primitive*)cur.leaf(num); size_t lazy_node = 0; PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node); tray1.tfar = ray.tfar[k]; if (unlikely(lazy_node)) { stackPtr->ptr = lazy_node; stackPtr->dist = neg_inf; stackPtr++; } } } template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i, Accel::Intersectors* __restrict__ This, RayHitK<K>& __restrict__ ray, IntersectContext* __restrict__ context) { BVH* __restrict__ bvh = (BVH*)This->ptr; /* we may traverse an empty BVH in case all geometry was invalid */ if (bvh->root == BVH::emptyNode) return; #if ENABLE_FAST_COHERENT_CODEPATHS == 1 assert(context); if (unlikely(types == BVH_AN1 && context->user && context->isCoherent())) { intersectCoherent(valid_i, This, ray, context); return; } #endif /* filter out invalid rays */ vbool<K> valid = *valid_i == -1; #if defined(EMBREE_IGNORE_INVALID_RAYS) valid &= ray.valid(); #endif /* return if there are no valid rays */ size_t valid_bits = movemask(valid); #if defined(__AVX__) STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1); #endif if (unlikely(valid_bits == 0)) return; /* verify correct input */ assert(all(valid, ray.valid())); assert(all(valid, ray.tnear() >= 0.0f)); assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); Precalculations pre(valid, ray); /* load ray */ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); if (single) { tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf)); tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf)); for (; valid_bits!=0; ) { const size_t i = bscf(valid_bits); intersect1(This, bvh, bvh->root, i, pre, ray, tray, context); } return; } /* determine switch threshold based on flags */ const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent; vint<K> octant = ray.octant(); octant = select(valid, octant, vint<K>(0xffffffff)); /* test whether we have ray with opposing direction signs in the packet */ bool split = false; { size_t bits = valid_bits; vbool<K> vsplit( false ); do { const size_t valid_index = bsf(bits); vbool<K> octant_valid = octant[valid_index] == octant; bits &= ~(size_t)movemask(octant_valid); vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7)); } while (bits); if (any(vsplit)) split = true; } do { const size_t valid_index = bsf(valid_bits); const vint<K> diff_octant = vint<K>(octant[valid_index])^octant; const vint<K> count_diff_octant = \ ((diff_octant >> 2) & 1) + ((diff_octant >> 1) & 1) + ((diff_octant >> 0) & 1); vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff)); if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down octant = select(octant_valid,vint<K>(0xffffffff),octant); valid_bits &= ~(size_t)movemask(octant_valid); tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf)); tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf)); /* allocate stack and push root node */ vfloat<K> stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = tray.tnear; NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; vfloat<K>* __restrict__ sptr_near = stack_near + 2; while (1) pop: { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ vfloat<K> curDist = *sptr_near; const vbool<K> active = curDist < tray.tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__) #if FORCE_SINGLE_MODE == 0 if (single) #endif { size_t bits = movemask(active); #if FORCE_SINGLE_MODE == 0 if (unlikely(popcnt(bits) <= switchThreshold)) #endif { for (; bits!=0; ) { const size_t i = bscf(bits); intersect1(This, bvh, cur, i, pre, ray, tray, context); } tray.tfar = min(tray.tfar, ray.tfar); continue; } } #endif while (likely(!cur.isLeaf())) { /* process nodes */ const vbool<K> valid_node = tray.tfar > curDist; STAT3(normal.trav_nodes, 1, popcnt(valid_node), K); const NodeRef nodeRef = cur; const BaseNode* __restrict__ const node = nodeRef.baseNode(); /* set cur to invalid */ cur = BVH::emptyNode; curDist = pos_inf; size_t num_child_hits = 0; for (unsigned i = 0; i < N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH::emptyNode)) break; vfloat<K> lnearP; vbool<K> lhit = valid_node; BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH::emptyNode); const vfloat<K> childDist = select(lhit, lnearP, inf); /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { if (likely(cur != BVH::emptyNode)) { num_child_hits++; *sptr_node = cur; sptr_node++; *sptr_near = curDist; sptr_near++; } curDist = childDist; cur = child; } /* push hit child onto stack */ else { num_child_hits++; *sptr_node = child; sptr_node++; *sptr_near = childDist; sptr_near++; } } } #if defined(__AVX__) //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1); #endif if (unlikely(cur == BVH::emptyNode)) goto pop; /* improved distance sorting for 3 or more hits */ if (unlikely(num_child_hits >= 2)) { if (any(sptr_near[-2] < sptr_near[-1])) { std::swap(sptr_near[-2],sptr_near[-1]); std::swap(sptr_node[-2],sptr_node[-1]); } if (unlikely(num_child_hits >= 3)) { if (any(sptr_near[-3] < sptr_near[-1])) { std::swap(sptr_near[-3],sptr_near[-1]); std::swap(sptr_node[-3],sptr_node[-1]); } if (any(sptr_near[-3] < sptr_near[-2])) { std::swap(sptr_near[-3],sptr_near[-2]); std::swap(sptr_node[-3],sptr_node[-2]); } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 if (single) { // seems to be the best place for testing utilization if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } } #endif } /* return if stack is empty */ if (unlikely(cur == BVH::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ assert(cur != BVH::emptyNode); const vbool<K> valid_leaf = tray.tfar > curDist; STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K); if (unlikely(none(valid_leaf))) continue; size_t items; const Primitive* prim = (Primitive*)cur.leaf(items); size_t lazy_node = 0; PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node); tray.tfar = select(valid_leaf, ray.tfar, tray.tfar); if (unlikely(lazy_node)) { *sptr_node = lazy_node; sptr_node++; *sptr_near = neg_inf; sptr_near++; } } } while(valid_bits); } template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i, Accel::Intersectors* __restrict__ This, RayHitK<K>& __restrict__ ray, IntersectContext* context) { BVH* __restrict__ bvh = (BVH*)This->ptr; /* filter out invalid rays */ vbool<K> valid = *valid_i == -1; #if defined(EMBREE_IGNORE_INVALID_RAYS) valid &= ray.valid(); #endif /* return if there are no valid rays */ size_t valid_bits = movemask(valid); if (unlikely(valid_bits == 0)) return; /* verify correct input */ assert(all(valid, ray.valid())); assert(all(valid, ray.tnear() >= 0.0f)); assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); Precalculations pre(valid, ray); /* load ray */ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); vint<K> octant = ray.octant(); octant = select(valid, octant, vint<K>(0xffffffff)); do { const size_t valid_index = bsf(valid_bits); const vbool<K> octant_valid = octant[valid_index] == octant; valid_bits &= ~(size_t)movemask(octant_valid); tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf)); tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf)); Frustum<robust> frustum; frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N); StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer stack[0].ptr = bvh->root; stack[0].dist = neg_inf; while (1) pop: { /* pop next node from stack */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /* cull node if behind closest hit point */ vfloat<K> curDist = *(float*)&stackPtr->dist; const vbool<K> active = curDist < tray.tfar; if (unlikely(none(active))) continue; while (likely(!cur.isLeaf())) { /* process nodes */ //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K); const NodeRef nodeRef = cur; const AABBNode* __restrict__ const node = nodeRef.getAABBNode(); vfloat<N> fmin; size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin); if (unlikely(!m_frustum_node)) goto pop; cur = BVH::emptyNode; curDist = pos_inf; #if defined(__AVX__) //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1); #endif size_t num_child_hits = 0; do { const size_t i = bscf(m_frustum_node); vfloat<K> lnearP; vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored STAT3(normal.trav_nodes, 1, 1, 1); BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); if (likely(any(lhit))) { const vfloat<K> childDist = fmin[i]; const NodeRef child = node->child(i); BVHN<N>::prefetch(child); if (any(childDist < curDist)) { if (likely(cur != BVH::emptyNode)) { num_child_hits++; stackPtr->ptr = cur; *(float*)&stackPtr->dist = toScalar(curDist); stackPtr++; } curDist = childDist; cur = child; } /* push hit child onto stack */ else { num_child_hits++; stackPtr->ptr = child; *(float*)&stackPtr->dist = toScalar(childDist); stackPtr++; } } } while(m_frustum_node); if (unlikely(cur == BVH::emptyNode)) goto pop; /* improved distance sorting for 3 or more hits */ if (unlikely(num_child_hits >= 2)) { if (stackPtr[-2].dist < stackPtr[-1].dist) std::swap(stackPtr[-2],stackPtr[-1]); if (unlikely(num_child_hits >= 3)) { if (stackPtr[-3].dist < stackPtr[-1].dist) std::swap(stackPtr[-3],stackPtr[-1]); if (stackPtr[-3].dist < stackPtr[-2].dist) std::swap(stackPtr[-3],stackPtr[-2]); } } } /* intersect leaf */ assert(cur != BVH::invalidNode); assert(cur != BVH::emptyNode); const vbool<K> valid_leaf = tray.tfar > curDist; STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K); if (unlikely(none(valid_leaf))) continue; size_t items; const Primitive* prim = (Primitive*)cur.leaf(items); size_t lazy_node = 0; PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node); /* reduce max distance interval on successful intersection */ if (likely(any((ray.tfar < tray.tfar) & valid_leaf))) { tray.tfar = select(valid_leaf, ray.tfar, tray.tfar); frustum.template updateMaxDist<K>(tray.tfar); } if (unlikely(lazy_node)) { stackPtr->ptr = lazy_node; stackPtr->dist = neg_inf; stackPtr++; } } } while(valid_bits); } // =================================================================================================================================================================== // =================================================================================================================================================================== // =================================================================================================================================================================== template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre, RayK<K>& ray, const TravRayK<K, robust>& tray, IntersectContext* context) { /* stack state */ NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; // current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /* load the ray into SIMD registers */ TravRay<N,robust> tray1; tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]); /* pop loop */ while (true) pop: { /* pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef)*stackPtr; /* downtraversal loop */ while (true) { /* intersect node */ size_t mask; vfloat<N> tNear; STAT3(shadow.trav_nodes, 1, 1, 1); bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask); if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; } /* if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /* select next child and push other children */ BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd); } /* this is a leaf node */ assert(cur != BVH::emptyNode); STAT3(shadow.trav_leaves, 1, 1, 1); size_t num; Primitive* prim = (Primitive*)cur.leaf(num); size_t lazy_node = 0; if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) { ray.tfar[k] = neg_inf; return true; } if (unlikely(lazy_node)) { *stackPtr = lazy_node; stackPtr++; } } return false; } template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i, Accel::Intersectors* __restrict__ This, RayK<K>& __restrict__ ray, IntersectContext* context) { BVH* __restrict__ bvh = (BVH*)This->ptr; /* we may traverse an empty BVH in case all geometry was invalid */ if (bvh->root == BVH::emptyNode) return; #if ENABLE_FAST_COHERENT_CODEPATHS == 1 assert(context); if (unlikely(types == BVH_AN1 && context->user && context->isCoherent())) { occludedCoherent(valid_i, This, ray, context); return; } #endif /* filter out already occluded and invalid rays */ vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f); #if defined(EMBREE_IGNORE_INVALID_RAYS) valid &= ray.valid(); #endif /* return if there are no valid rays */ const size_t valid_bits = movemask(valid); if (unlikely(valid_bits == 0)) return; /* verify correct input */ assert(all(valid, ray.valid())); assert(all(valid, ray.tnear() >= 0.0f)); assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); Precalculations pre(valid, ray); /* load ray */ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf)); tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf)); vbool<K> terminated = !valid; const vfloat<K> inf = vfloat<K>(pos_inf); /* determine switch threshold based on flags */ const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent; /* allocate stack and push root node */ vfloat<K> stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = tray.tnear; NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; vfloat<K>* __restrict__ sptr_near = stack_near + 2; while (1) pop: { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ vfloat<K> curDist = *sptr_near; const vbool<K> active = curDist < tray.tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__) #if FORCE_SINGLE_MODE == 0 if (single) #endif { size_t bits = movemask(active); #if FORCE_SINGLE_MODE == 0 if (unlikely(popcnt(bits) <= switchThreshold)) #endif { for (; bits!=0; ) { const size_t i = bscf(bits); if (occluded1(This, bvh, cur, i, pre, ray, tray, context)) set(terminated, i); } if (all(terminated)) break; tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); continue; } } #endif while (likely(!cur.isLeaf())) { /* process nodes */ const vbool<K> valid_node = tray.tfar > curDist; STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K); const NodeRef nodeRef = cur; const BaseNode* __restrict__ const node = nodeRef.baseNode(); /* set cur to invalid */ cur = BVH::emptyNode; curDist = pos_inf; for (unsigned i = 0; i < N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH::emptyNode)) break; vfloat<K> lnearP; vbool<K> lhit = valid_node; BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH::emptyNode); const vfloat<K> childDist = select(lhit, lnearP, inf); /* push 'cur' node onto stack and continue with hit child */ if (likely(cur != BVH::emptyNode)) { *sptr_node = cur; sptr_node++; *sptr_near = curDist; sptr_near++; } curDist = childDist; cur = child; } } if (unlikely(cur == BVH::emptyNode)) goto pop; #if SWITCH_DURING_DOWN_TRAVERSAL == 1 if (single) { // seems to be the best place for testing utilization if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } } #endif } /* return if stack is empty */ if (unlikely(cur == BVH::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ assert(cur != BVH::emptyNode); const vbool<K> valid_leaf = tray.tfar > curDist; STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K); if (unlikely(none(valid_leaf))) continue; size_t items; const Primitive* prim = (Primitive*) cur.leaf(items); size_t lazy_node = 0; terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node); if (all(terminated)) break; tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays if (unlikely(lazy_node)) { *sptr_node = lazy_node; sptr_node++; *sptr_near = neg_inf; sptr_near++; } } vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf); } template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single> void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i, Accel::Intersectors* __restrict__ This, RayK<K>& __restrict__ ray, IntersectContext* context) { BVH* __restrict__ bvh = (BVH*)This->ptr; /* filter out invalid rays */ vbool<K> valid = *valid_i == -1; #if defined(EMBREE_IGNORE_INVALID_RAYS) valid &= ray.valid(); #endif /* return if there are no valid rays */ size_t valid_bits = movemask(valid); if (unlikely(valid_bits == 0)) return; /* verify correct input */ assert(all(valid, ray.valid())); assert(all(valid, ray.tnear() >= 0.0f)); assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f))); Precalculations pre(valid,ray); /* load ray */ TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0); const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f); const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f); vbool<K> terminated = !valid; vint<K> octant = ray.octant(); octant = select(valid, octant, vint<K>(0xffffffff)); do { const size_t valid_index = bsf(valid_bits); vbool<K> octant_valid = octant[valid_index] == octant; valid_bits &= ~(size_t)movemask(octant_valid); tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf)); tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf)); Frustum<robust> frustum; frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N); StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer stack[0].ptr = bvh->root; stack[0].mask = movemask(octant_valid); while (1) pop: { /* pop next node from stack */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /* cull node of active rays have already been terminated */ size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated)); if (unlikely(m_active == 0)) continue; while (likely(!cur.isLeaf())) { /* process nodes */ //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K); const NodeRef nodeRef = cur; const AABBNode* __restrict__ const node = nodeRef.getAABBNode(); vfloat<N> fmin; size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin); if (unlikely(!m_frustum_node)) goto pop; cur = BVH::emptyNode; m_active = 0; #if defined(__AVX__) //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1); #endif size_t num_child_hits = 0; do { const size_t i = bscf(m_frustum_node); vfloat<K> lnearP; vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored STAT3(normal.trav_nodes, 1, 1, 1); BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit); if (likely(any(lhit))) { const NodeRef child = node->child(i); assert(child != BVH::emptyNode); BVHN<N>::prefetch(child); if (likely(cur != BVH::emptyNode)) { num_child_hits++; stackPtr->ptr = cur; stackPtr->mask = m_active; stackPtr++; } cur = child; m_active = movemask(lhit); } } while(m_frustum_node); if (unlikely(cur == BVH::emptyNode)) goto pop; } /* intersect leaf */ assert(cur != BVH::invalidNode); assert(cur != BVH::emptyNode); #if defined(__AVX__) STAT3(normal.trav_leaves, 1, popcnt(m_active), K); #endif if (unlikely(!m_active)) continue; size_t items; const Primitive* prim = (Primitive*)cur.leaf(items); size_t lazy_node = 0; terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node); octant_valid &= !terminated; if (unlikely(none(octant_valid))) break; tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays if (unlikely(lazy_node)) { stackPtr->ptr = lazy_node; stackPtr->mask = movemask(octant_valid); stackPtr++; } } } while(valid_bits); vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf); } } }