// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../bvh/bvh.h" #include "../geometry/primitive.h" #include "../builders/bvh_builder_msmblur.h" #include "../builders/heuristic_binning_array_aligned.h" #include "../builders/heuristic_binning_array_unaligned.h" #include "../builders/heuristic_timesplit_array.h" namespace embree { namespace isa { struct BVHBuilderHairMSMBlur { /*! settings for msmblur builder */ struct Settings { /*! default settings */ Settings () : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {} public: size_t branchingFactor; //!< branching factor of BVH to build size_t maxDepth; //!< maximum depth of BVH to build size_t logBlockSize; //!< log2 of blocksize for SAH heuristic size_t minLeafSize; //!< minimum size of a leaf size_t maxLeafSize; //!< maximum size of a leaf }; struct BuildRecord { public: __forceinline BuildRecord () {} __forceinline BuildRecord (size_t depth) : depth(depth) {} __forceinline BuildRecord (const SetMB& prims, size_t depth) : depth(depth), prims(prims) {} __forceinline size_t size() const { return prims.size(); } public: size_t depth; //!< depth of the root of this subtree SetMB prims; //!< the list of primitives }; template class BuilderT { ALIGNED_CLASS_(16); static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build typedef BVHNodeRecordMB NodeRecordMB; typedef BVHNodeRecordMB4D NodeRecordMB4D; typedef FastAllocator::CachedAllocator Allocator; typedef LocalChildListT LocalChildList; typedef HeuristicMBlurTemporalSplit HeuristicTemporal; typedef HeuristicArrayBinningMB HeuristicBinning; typedef UnalignedHeuristicArrayBinningMB UnalignedHeuristicBinning; public: BuilderT (Scene* scene, const RecalculatePrimRef& recalculatePrimRef, const CreateAllocFunc& createAlloc, const CreateAABBNodeMBFunc& createAABBNodeMB, const SetAABBNodeMBFunc& setAABBNodeMB, const CreateOBBNodeMBFunc& createOBBNodeMB, const SetOBBNodeMBFunc& setOBBNodeMB, const CreateLeafFunc& createLeaf, const ProgressMonitor& progressMonitor, const Settings settings) : cfg(settings), scene(scene), recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB), createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB), createLeaf(createLeaf), progressMonitor(progressMonitor), unalignedHeuristic(scene), temporalSplitHeuristic(scene->device,recalculatePrimRef) {} private: /*! checks if all primitives are from the same geometry */ __forceinline bool sameGeometry(const SetMB& set) { mvector& prims = *set.prims; unsigned int firstGeomID = prims[set.begin()].geomID(); for (size_t i=set.begin()+1; i& prims = *set.prims; const size_t begin = set.begin(); const size_t end = set.end(); const size_t center = (begin + end)/2; PrimInfoMB linfo = empty; for (size_t i=begin; i(begin,center),set.time_range); new (&rset) SetMB(rinfo,set.prims,range(center,end ),set.time_range); } void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset) { assert(set.size() > 1); const size_t begin = set.begin(); const size_t end = set.end(); PrimInfoMB linfo(empty); PrimInfoMB rinfo(empty); unsigned int geomID = (*set.prims)[begin].geomID(); size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo, [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; }, [ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); }); new (&lset) SetMB(linfo,set.prims,range(begin,center),set.time_range); new (&rset) SetMB(rinfo,set.prims,range(center,end ),set.time_range); } /*! creates a large leaf that could be larger than supported by the BVH */ NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc) { /* this should never occur but is a fatal error */ if (current.depth > cfg.maxDepth) throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); /* special case when directly creating leaf without any splits that could shrink time_range */ bool force_split = false; if (current.depth == 1 && current.size() > 0) { BBox1f c = empty; BBox1f p = current.prims.time_range; for (size_t i=current.prims.begin(); i& prims = *current.prims.prims; c.extend(prims[i].time_range); } force_split = c.lower > p.lower || c.upper < p.upper; } /* create leaf for few primitives */ if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split) return createLeaf(current.prims,alloc); /* fill all children by always splitting the largest one */ LocalChildList children(current); NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; do { /* find best child with largest bounding box area */ int bestChild = -1; size_t bestSize = 0; for (unsigned i=0; i bestSize) { bestSize = children[i].size(); bestChild = i; } } if (bestChild == -1) break; /*! split best child into left and right child */ BuildRecord left(current.depth+1); BuildRecord right(current.depth+1); if (!sameGeometry(children[bestChild].prims)) { splitByGeometry(children[bestChild].prims,left.prims,right.prims); } else { splitFallback(children[bestChild].prims,left.prims,right.prims); } children.split(bestChild,left,right,std::unique_ptr>()); } while (children.size() < cfg.branchingFactor); /* detect time_ranges that have shrunken */ bool timesplit = false; for (size_t i=0; i p.lower || c.upper < p.upper; } /* create node */ NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit); LBBox3fa bounds = empty; for (size_t i=0; i> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit) { /* variable to track the SAH of the best splitting approach */ float bestSAH = inf; const float leafSAH = current.prims.leafSAH(cfg.logBlockSize); /* perform standard binning in aligned space */ HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize); float alignedObjectSAH = alignedObjectSplit.splitSAH(); bestSAH = min(alignedObjectSAH,bestSAH); /* perform standard binning in unaligned space */ UnalignedHeuristicBinning::Split unalignedObjectSplit; LinearSpace3fa uspace; float unalignedObjectSAH = inf; if (alignedObjectSAH > 0.7f*leafSAH) { uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims); const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace); unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace); unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive bestSAH = min(unalignedObjectSAH,bestSAH); } /* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */ float temporal_split_sah = inf; typename HeuristicTemporal::Split temporal_split; if (bestSAH > 0.5f*leafSAH) { if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) { temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize); temporal_split_sah = temporal_split.splitSAH(); bestSAH = min(temporal_split_sah,bestSAH); } } /* perform fallback split if SAH heuristics failed */ if (unlikely(!std::isfinite(bestSAH))) { current.prims.deterministic_order(); splitFallback(current.prims,lrecord.prims,rrecord.prims); } /* perform aligned split if this is best */ else if (likely(bestSAH == alignedObjectSAH)) { alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims); } /* perform unaligned split if this is best */ else if (likely(bestSAH == unalignedObjectSAH)) { unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims); aligned = false; } /* perform temporal split if this is best */ else if (likely(bestSAH == temporal_split_sah)) { timesplit = true; return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims); } else assert(false); return std::unique_ptr>(); } /*! recursive build */ NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel) { /* get thread local allocator */ if (!alloc) alloc = createAlloc(); /* call memory monitor function to signal progress */ if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD) progressMonitor(current.size()); /* create leaf node */ if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) { current.prims.deterministic_order(); return createLargeLeaf(current,alloc); } /* fill all children by always splitting the one with the largest surface area */ NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; LocalChildList children(current); bool aligned = true; bool timesplit = false; do { /* find best child with largest bounding box area */ ssize_t bestChild = -1; float bestArea = neg_inf; for (size_t i=0; i bestArea) { bestArea = children[i].prims.halfArea(); bestChild = i; } } if (bestChild == -1) break; /*! split best child into left and right child */ BuildRecord left(current.depth+1); BuildRecord right(current.depth+1); std::unique_ptr> new_vector = split(children[bestChild],left,right,aligned,timesplit); children.split(bestChild,left,right,std::move(new_vector)); } while (children.size() < cfg.branchingFactor); /* detect time_ranges that have shrunken */ for (size_t i=0; i p.lower || c.upper < p.upper; } /* create time split node */ if (timesplit) { const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true); /* spawn tasks or ... */ if (current.size() > SINGLE_THREADED_THRESHOLD) { parallel_for(size_t(0), children.size(), [&] (const range& r) { for (size_t i=r.begin(); i SINGLE_THREADED_THRESHOLD) { LBBox3fa cbounds[MAX_BRANCHING_FACTOR]; parallel_for(size_t(0), children.size(), [&] (const range& r) { for (size_t i=r.begin(); i SINGLE_THREADED_THRESHOLD) { parallel_for(size_t(0), children.size(), [&] (const range& r) { for (size_t i=r.begin(); i& prims, const PrimInfoMB& pinfo) { BuildRecord record(SetMB(pinfo,&prims),1); auto root = recurse(record,nullptr,true); _mm_mfence(); // to allow non-temporal stores during build return root; } private: Settings cfg; Scene* scene; const RecalculatePrimRef& recalculatePrimRef; const CreateAllocFunc& createAlloc; const CreateAABBNodeMBFunc& createAABBNodeMB; const SetAABBNodeMBFunc& setAABBNodeMB; const CreateOBBNodeMBFunc& createOBBNodeMB; const SetOBBNodeMBFunc& setOBBNodeMB; const CreateLeafFunc& createLeaf; const ProgressMonitor& progressMonitor; private: HeuristicBinning alignedHeuristic; UnalignedHeuristicBinning unalignedHeuristic; HeuristicTemporal temporalSplitHeuristic; }; template static BVHNodeRecordMB4D build (Scene* scene, mvector& prims, const PrimInfoMB& pinfo, const RecalculatePrimRef& recalculatePrimRef, const CreateAllocFunc& createAlloc, const CreateAABBNodeMBFunc& createAABBNodeMB, const SetAABBNodeMBFunc& setAABBNodeMB, const CreateOBBNodeMBFunc& createOBBNodeMB, const SetOBBNodeMBFunc& setOBBNodeMB, const CreateLeafFunc& createLeaf, const ProgressMonitor& progressMonitor, const Settings settings) { typedef BuilderT Builder; Builder builder(scene,recalculatePrimRef,createAlloc, createAABBNodeMB,setAABBNodeMB, createOBBNodeMB,setOBBNodeMB, createLeaf,progressMonitor,settings); return builder(prims,pinfo); } }; } }