summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
diff options
context:
space:
mode:
authorRĂ©mi Verschelde <remi@verschelde.fr>2021-04-27 19:07:12 +0200
committerGitHub <noreply@github.com>2021-04-27 19:07:12 +0200
commit95cfce661bbe9700209c09dfe297ab7ef5ebfe09 (patch)
tree9f3c8a98b619e2a0dd171d6cb6d8dc2e791baa14 /thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
parentb999fbc4bd349cce153c2133dd0487694add1a05 (diff)
parent4d9d99bb827967e2bb931eeb8c3f0e079b39ae1a (diff)
Merge pull request #48050 from JFonS/occlusion_culling
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h')
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h692
1 files changed, 692 insertions, 0 deletions
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
new file mode 100644
index 0000000000..4c138dacdb
--- /dev/null
+++ b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
@@ -0,0 +1,692 @@
+// Copyright 2009-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#define MBLUR_NUM_TEMPORAL_BINS 2
+#define MBLUR_NUM_OBJECT_BINS 32
+
+#include "../bvh/bvh.h"
+#include "../common/primref_mb.h"
+#include "heuristic_binning_array_aligned.h"
+#include "heuristic_timesplit_array.h"
+
+namespace embree
+{
+ namespace isa
+ {
+ template<typename T>
+ struct SharedVector
+ {
+ __forceinline SharedVector() {}
+
+ __forceinline SharedVector(T* ptr, size_t refCount = 1)
+ : prims(ptr), refCount(refCount) {}
+
+ __forceinline void incRef() {
+ refCount++;
+ }
+
+ __forceinline void decRef()
+ {
+ if (--refCount == 0)
+ delete prims;
+ }
+
+ T* prims;
+ size_t refCount;
+ };
+
+ template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
+ struct LocalChildListT
+ {
+ typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
+
+ __forceinline LocalChildListT (const BuildRecord& record)
+ : numChildren(1), numSharedPrimVecs(1)
+ {
+ /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
+ children[0] = record;
+ primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
+ }
+
+ __forceinline ~LocalChildListT()
+ {
+ for (size_t i = 0; i < numChildren; i++)
+ primvecs[i]->decRef();
+ }
+
+ __forceinline BuildRecord& operator[] ( const size_t i ) {
+ return children[i];
+ }
+
+ __forceinline size_t size() const {
+ return numChildren;
+ }
+
+ __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
+ {
+ SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
+ if (lrecord.prims.prims == bsharedPrimVec->prims) {
+ primvecs[bestChild] = bsharedPrimVec;
+ bsharedPrimVec->incRef();
+ }
+ else {
+ primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
+ }
+
+ if (rrecord.prims.prims == bsharedPrimVec->prims) {
+ primvecs[numChildren] = bsharedPrimVec;
+ bsharedPrimVec->incRef();
+ }
+ else {
+ primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
+ }
+ bsharedPrimVec->decRef();
+ new_vector.release();
+
+ children[bestChild] = lrecord;
+ children[numChildren] = rrecord;
+ numChildren++;
+ }
+
+ public:
+ array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
+ array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
+ size_t numChildren;
+
+ array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
+ size_t numSharedPrimVecs;
+ };
+
+ template<typename Mesh>
+ struct RecalculatePrimRef
+ {
+ Scene* scene;
+
+ __forceinline RecalculatePrimRef (Scene* scene)
+ : scene(scene) {}
+
+ __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
+ {
+ const unsigned geomID = prim.geomID();
+ const unsigned primID = prim.primID();
+ const Mesh* mesh = scene->get<Mesh>(geomID);
+ const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
+ const range<int> tbounds = mesh->timeSegmentRange(time_range);
+ return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
+ }
+
+ // __noinline is workaround for ICC16 bug under MacOSX
+ __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
+ {
+ const unsigned geomID = prim.geomID();
+ const unsigned primID = prim.primID();
+ const Mesh* mesh = scene->get<Mesh>(geomID);
+ const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
+ const range<int> tbounds = mesh->timeSegmentRange(time_range);
+ return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
+ }
+
+ __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
+ return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
+ }
+
+ // __noinline is workaround for ICC16 bug under MacOSX
+ __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
+ return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
+ }
+ };
+
+ struct VirtualRecalculatePrimRef
+ {
+ Scene* scene;
+
+ __forceinline VirtualRecalculatePrimRef (Scene* scene)
+ : scene(scene) {}
+
+ __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
+ {
+ const unsigned geomID = prim.geomID();
+ const unsigned primID = prim.primID();
+ const Geometry* mesh = scene->get(geomID);
+ const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
+ const range<int> tbounds = mesh->timeSegmentRange(time_range);
+ return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
+ }
+
+ __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
+ {
+ const unsigned geomID = prim.geomID();
+ const unsigned primID = prim.primID();
+ const Geometry* mesh = scene->get(geomID);
+ const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
+ const range<int> tbounds = mesh->timeSegmentRange(time_range);
+ return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
+ }
+
+ __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
+ return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
+ }
+
+ __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
+ return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
+ }
+ };
+
+ struct BVHBuilderMSMBlur
+ {
+ /*! settings for msmblur builder */
+ struct Settings
+ {
+ /*! default settings */
+ Settings ()
+ : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
+ travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
+ singleThreadThreshold(1024) {}
+
+
+ Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
+ : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
+ travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
+ {
+ minLeafSize = min(minLeafSize,maxLeafSize);
+ }
+
+ public:
+ size_t branchingFactor; //!< branching factor of BVH to build
+ size_t maxDepth; //!< maximum depth of BVH to build
+ size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
+ size_t minLeafSize; //!< minimum size of a leaf
+ size_t maxLeafSize; //!< maximum size of a leaf
+ float travCost; //!< estimated cost of one traversal step
+ float intCost; //!< estimated cost of one primitive intersection
+ bool singleLeafTimeSegment; //!< split time to single time range
+ size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
+ };
+
+ struct BuildRecord
+ {
+ public:
+ __forceinline BuildRecord () {}
+
+ __forceinline BuildRecord (size_t depth)
+ : depth(depth) {}
+
+ __forceinline BuildRecord (const SetMB& prims, size_t depth)
+ : depth(depth), prims(prims) {}
+
+ __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
+ return a.prims.size() < b.prims.size();
+ }
+
+ __forceinline size_t size() const {
+ return prims.size();
+ }
+
+ public:
+ size_t depth; //!< Depth of the root of this subtree.
+ SetMB prims; //!< The list of primitives.
+ };
+
+ struct BuildRecordSplit : public BuildRecord
+ {
+ __forceinline BuildRecordSplit () {}
+
+ __forceinline BuildRecordSplit (size_t depth)
+ : BuildRecord(depth) {}
+
+ __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
+ : BuildRecord(record), split(split) {}
+
+ BinSplit<MBLUR_NUM_OBJECT_BINS> split;
+ };
+
+ template<
+ typename NodeRef,
+ typename RecalculatePrimRef,
+ typename Allocator,
+ typename CreateAllocFunc,
+ typename CreateNodeFunc,
+ typename SetNodeFunc,
+ typename CreateLeafFunc,
+ typename ProgressMonitor>
+
+ class BuilderT
+ {
+ ALIGNED_CLASS_(16);
+ static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
+ static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
+
+ typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
+ typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
+ typedef mvector<PrimRefMB>* PrimRefVector;
+ typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
+ typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
+ typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
+
+ public:
+
+ BuilderT (MemoryMonitorInterface* device,
+ const RecalculatePrimRef recalculatePrimRef,
+ const CreateAllocFunc createAlloc,
+ const CreateNodeFunc createNode,
+ const SetNodeFunc setNode,
+ const CreateLeafFunc createLeaf,
+ const ProgressMonitor progressMonitor,
+ const Settings& settings)
+ : cfg(settings),
+ heuristicObjectSplit(),
+ heuristicTemporalSplit(device, recalculatePrimRef),
+ recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
+ progressMonitor(progressMonitor)
+ {
+ if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
+ throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
+ }
+
+ /*! finds the best split */
+ const Split find(const SetMB& set)
+ {
+ /* first try standard object split */
+ const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
+ const float object_split_sah = object_split.splitSAH();
+
+ /* test temporal splits only when object split was bad */
+ const float leaf_sah = set.leafSAH(cfg.logBlockSize);
+ if (object_split_sah < 0.50f*leaf_sah)
+ return object_split;
+
+ /* do temporal splits only if the the time range is big enough */
+ if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
+ {
+ const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
+ const float temporal_split_sah = temporal_split.splitSAH();
+
+ /* take temporal split if it improved SAH */
+ if (temporal_split_sah < object_split_sah)
+ return temporal_split;
+ }
+
+ return object_split;
+ }
+
+ /*! array partitioning */
+ __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
+ {
+ /* perform object split */
+ if (likely(split.data == Split::SPLIT_OBJECT)) {
+ heuristicObjectSplit.split(split,set,lset,rset);
+ }
+ /* perform temporal split */
+ else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
+ return heuristicTemporalSplit.split(split,set,lset,rset);
+ }
+ /* perform fallback split */
+ else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
+ set.deterministic_order();
+ splitFallback(set,lset,rset);
+ }
+ /* split by geometry */
+ else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
+ set.deterministic_order();
+ splitByGeometry(set,lset,rset);
+ }
+ else
+ assert(false);
+
+ return std::unique_ptr<mvector<PrimRefMB>>();
+ }
+
+ /*! finds the best fallback split */
+ __noinline Split findFallback(const SetMB& set)
+ {
+ /* split if primitives are not from same geometry */
+ if (!sameGeometry(set))
+ return Split(0.0f,Split::SPLIT_GEOMID);
+
+ /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
+ if (cfg.singleLeafTimeSegment)
+ {
+ /* test if one primitive has more than one time segment in time range, if so split time */
+ for (size_t i=set.begin(); i<set.end(); i++)
+ {
+ const PrimRefMB& prim = (*set.prims)[i];
+ const range<int> itime_range = prim.timeSegmentRange(set.time_range);
+ const int localTimeSegments = itime_range.size();
+ assert(localTimeSegments > 0);
+ if (localTimeSegments > 1) {
+ const int icenter = (itime_range.begin() + itime_range.end())/2;
+ const float splitTime = prim.timeStep(icenter);
+ return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
+ }
+ }
+ }
+
+ /* otherwise return fallback split */
+ return Split(0.0f,Split::SPLIT_FALLBACK);
+ }
+
+ /*! performs fallback split */
+ void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
+ {
+ mvector<PrimRefMB>& prims = *set.prims;
+
+ const size_t begin = set.begin();
+ const size_t end = set.end();
+ const size_t center = (begin + end)/2;
+
+ PrimInfoMB linfo = empty;
+ for (size_t i=begin; i<center; i++)
+ linfo.add_primref(prims[i]);
+
+ PrimInfoMB rinfo = empty;
+ for (size_t i=center; i<end; i++)
+ rinfo.add_primref(prims[i]);
+
+ new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
+ new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
+ }
+
+ /*! checks if all primitives are from the same geometry */
+ __forceinline bool sameGeometry(const SetMB& set)
+ {
+ if (set.size() == 0) return true;
+ mvector<PrimRefMB>& prims = *set.prims;
+ const size_t begin = set.begin();
+ const size_t end = set.end();
+ unsigned int firstGeomID = prims[begin].geomID();
+ for (size_t i=begin+1; i<end; i++) {
+ if (prims[i].geomID() != firstGeomID){
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /* split by geometry ID */
+ void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
+ {
+ assert(set.size() > 1);
+
+ mvector<PrimRefMB>& prims = *set.prims;
+ const size_t begin = set.begin();
+ const size_t end = set.end();
+
+ PrimInfoMB left(empty);
+ PrimInfoMB right(empty);
+ unsigned int geomID = prims[begin].geomID();
+ size_t center = serial_partitioning(prims.data(),begin,end,left,right,
+ [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
+ [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
+
+ new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
+ new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
+ }
+
+ const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
+ {
+ /* this should never occur but is a fatal error */
+ if (in.depth > cfg.maxDepth)
+ throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
+
+ /* replace already found split by fallback split */
+ const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
+
+ /* special case when directly creating leaf without any splits that could shrink time_range */
+ bool force_split = false;
+ if (current.depth == 1 && current.size() > 0)
+ {
+ BBox1f c = empty;
+ BBox1f p = current.prims.time_range;
+ for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
+ mvector<PrimRefMB>& prims = *current.prims.prims;
+ c.extend(prims[i].time_range);
+ }
+
+ force_split = c.lower > p.lower || c.upper < p.upper;
+ }
+
+ /* create leaf for few primitives */
+ if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
+ return createLeaf(current,alloc);
+
+ /* fill all children by always splitting the largest one */
+ bool hasTimeSplits = false;
+ NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
+ LocalChildListSplit children(current);
+
+ do {
+ /* find best child with largest bounding box area */
+ size_t bestChild = -1;
+ size_t bestSize = 0;
+ for (size_t i=0; i<children.size(); i++)
+ {
+ /* ignore leaves as they cannot get split */
+ if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
+ continue;
+
+ force_split = false;
+
+ /* remember child with largest size */
+ if (children[i].size() > bestSize) {
+ bestSize = children[i].size();
+ bestChild = i;
+ }
+ }
+ if (bestChild == -1) break;
+
+ /* perform best found split */
+ BuildRecordSplit& brecord = children[bestChild];
+ BuildRecordSplit lrecord(current.depth+1);
+ BuildRecordSplit rrecord(current.depth+1);
+ std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
+ hasTimeSplits |= new_vector != nullptr;
+
+ /* find new splits */
+ lrecord.split = findFallback(lrecord.prims);
+ rrecord.split = findFallback(rrecord.prims);
+ children.split(bestChild,lrecord,rrecord,std::move(new_vector));
+
+ } while (children.size() < cfg.branchingFactor);
+
+ /* detect time_ranges that have shrunken */
+ for (size_t i=0; i<children.size(); i++) {
+ const BBox1f c = children[i].prims.time_range;
+ const BBox1f p = in.prims.time_range;
+ hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
+ }
+
+ /* create node */
+ auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
+
+ /* recurse into each child and perform reduction */
+ LBBox3fa gbounds = empty;
+ for (size_t i=0; i<children.size(); i++) {
+ values[i] = createLargeLeaf(children[i],alloc);
+ gbounds.extend(values[i].lbounds);
+ }
+
+ setNode(current,children.children.data(),node,values,children.numChildren);
+
+ /* calculate geometry bounds of this node */
+ if (hasTimeSplits)
+ return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
+ else
+ return NodeRecordMB4D(node,gbounds,current.prims.time_range);
+ }
+
+ const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
+ {
+ /* get thread local allocator */
+ if (!alloc)
+ alloc = createAlloc();
+
+ /* call memory monitor function to signal progress */
+ if (toplevel && current.size() <= cfg.singleThreadThreshold)
+ progressMonitor(current.size());
+
+ /*! find best split */
+ const Split csplit = find(current.prims);
+
+ /*! compute leaf and split cost */
+ const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
+ const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
+ assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
+
+ /*! create a leaf node when threshold reached or SAH tells us to stop */
+ if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
+ current.prims.deterministic_order();
+ return createLargeLeaf(current,alloc);
+ }
+
+ /*! perform initial split */
+ SetMB lprims,rprims;
+ std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
+ bool hasTimeSplits = new_vector != nullptr;
+ NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
+ LocalChildList children(current);
+ {
+ BuildRecord lrecord(lprims,current.depth+1);
+ BuildRecord rrecord(rprims,current.depth+1);
+ children.split(0,lrecord,rrecord,std::move(new_vector));
+ }
+
+ /*! split until node is full or SAH tells us to stop */
+ while (children.size() < cfg.branchingFactor)
+ {
+ /*! find best child to split */
+ float bestArea = neg_inf;
+ ssize_t bestChild = -1;
+ for (size_t i=0; i<children.size(); i++)
+ {
+ if (children[i].size() <= cfg.minLeafSize) continue;
+ if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
+ bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
+ }
+ }
+ if (bestChild == -1) break;
+
+ /* perform split */
+ BuildRecord& brecord = children[bestChild];
+ BuildRecord lrecord(current.depth+1);
+ BuildRecord rrecord(current.depth+1);
+ Split csplit = find(brecord.prims);
+ std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
+ hasTimeSplits |= new_vector != nullptr;
+ children.split(bestChild,lrecord,rrecord,std::move(new_vector));
+ }
+
+ /* detect time_ranges that have shrunken */
+ for (size_t i=0; i<children.size(); i++) {
+ const BBox1f c = children[i].prims.time_range;
+ const BBox1f p = current.prims.time_range;
+ hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
+ }
+
+ /* sort buildrecords for simpler shadow ray traversal */
+ //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
+
+ /*! create an inner node */
+ auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
+ LBBox3fa gbounds = empty;
+
+ /* spawn tasks */
+ if (unlikely(current.size() > cfg.singleThreadThreshold))
+ {
+ /*! parallel_for is faster than spawing sub-tasks */
+ parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
+ for (size_t i=r.begin(); i<r.end(); i++) {
+ values[i] = recurse(children[i],nullptr,true);
+ _mm_mfence(); // to allow non-temporal stores during build
+ }
+ });
+
+ /*! merge bounding boxes */
+ for (size_t i=0; i<children.size(); i++)
+ gbounds.extend(values[i].lbounds);
+ }
+ /* recurse into each child */
+ else
+ {
+ //for (size_t i=0; i<children.size(); i++)
+ for (ssize_t i=children.size()-1; i>=0; i--) {
+ values[i] = recurse(children[i],alloc,false);
+ gbounds.extend(values[i].lbounds);
+ }
+ }
+
+ setNode(current,children.children.data(),node,values,children.numChildren);
+
+ /* calculate geometry bounds of this node */
+ if (unlikely(hasTimeSplits))
+ return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
+ else
+ return NodeRecordMB4D(node,gbounds,current.prims.time_range);
+ }
+
+ /*! builder entry function */
+ __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
+ {
+ const SetMB set(pinfo,&prims);
+ auto ret = recurse(BuildRecord(set,1),nullptr,true);
+ _mm_mfence(); // to allow non-temporal stores during build
+ return ret;
+ }
+
+ private:
+ Settings cfg;
+ HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
+ HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
+ const RecalculatePrimRef recalculatePrimRef;
+ const CreateAllocFunc createAlloc;
+ const CreateNodeFunc createNode;
+ const SetNodeFunc setNode;
+ const CreateLeafFunc createLeaf;
+ const ProgressMonitor progressMonitor;
+ };
+
+ template<typename NodeRef,
+ typename RecalculatePrimRef,
+ typename CreateAllocFunc,
+ typename CreateNodeFunc,
+ typename SetNodeFunc,
+ typename CreateLeafFunc,
+ typename ProgressMonitorFunc>
+
+ static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
+ const PrimInfoMB& pinfo,
+ MemoryMonitorInterface* device,
+ const RecalculatePrimRef recalculatePrimRef,
+ const CreateAllocFunc createAlloc,
+ const CreateNodeFunc createNode,
+ const SetNodeFunc setNode,
+ const CreateLeafFunc createLeaf,
+ const ProgressMonitorFunc progressMonitor,
+ const Settings& settings)
+ {
+ typedef BuilderT<
+ NodeRef,
+ RecalculatePrimRef,
+ decltype(createAlloc()),
+ CreateAllocFunc,
+ CreateNodeFunc,
+ SetNodeFunc,
+ CreateLeafFunc,
+ ProgressMonitorFunc> Builder;
+
+ Builder builder(device,
+ recalculatePrimRef,
+ createAlloc,
+ createNode,
+ setNode,
+ createLeaf,
+ progressMonitor,
+ settings);
+
+
+ return builder(prims,pinfo);
+ }
+ };
+ }
+}