diff options
author | jfons <joan.fonssanchez@gmail.com> | 2021-04-20 18:38:09 +0200 |
---|---|---|
committer | jfons <joan.fonssanchez@gmail.com> | 2021-04-23 15:57:28 +0200 |
commit | 34b3e8f9e2ae076990ecf3b2827eff759ba2abf9 (patch) | |
tree | 854a526a5ba2d6128e44d995d1bc138cf84ee722 /thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h | |
parent | eeccab26c5641409092547e02ad11e6253ac1b87 (diff) |
Add Embree-aarch64 thirdparty library
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h')
-rw-r--r-- | thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h new file mode 100644 index 0000000000..755ce255fb --- /dev/null +++ b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h @@ -0,0 +1,411 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../bvh/bvh.h" +#include "../geometry/primitive.h" +#include "../builders/bvh_builder_sah.h" +#include "../builders/heuristic_binning_array_aligned.h" +#include "../builders/heuristic_binning_array_unaligned.h" +#include "../builders/heuristic_strand_array.h" + +#define NUM_HAIR_OBJECT_BINS 32 + +namespace embree +{ + namespace isa + { + struct BVHBuilderHair + { + /*! settings for builder */ + struct Settings + { + /*! default settings */ + Settings () + : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {} + + public: + size_t branchingFactor; //!< branching factor of BVH to build + size_t maxDepth; //!< maximum depth of BVH to build + size_t logBlockSize; //!< log2 of blocksize for SAH heuristic + size_t minLeafSize; //!< minimum size of a leaf + size_t maxLeafSize; //!< maximum size of a leaf + size_t finished_range_threshold; //!< finished range threshold + }; + + template<typename NodeRef, + typename CreateAllocFunc, + typename CreateAABBNodeFunc, + typename SetAABBNodeFunc, + typename CreateOBBNodeFunc, + typename SetOBBNodeFunc, + typename CreateLeafFunc, + typename ProgressMonitor, + typename ReportFinishedRangeFunc> + + class BuilderT + { + ALIGNED_CLASS_(16); + friend struct BVHBuilderHair; + + typedef FastAllocator::CachedAllocator Allocator; + typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH; + typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH; + typedef HeuristicStrandSplit HeuristicStrandSplitSAH; + + static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor + static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth + static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build + + static const size_t travCostAligned = 1; + static const size_t travCostUnaligned = 5; + static const size_t intCost = 6; + + BuilderT (Scene* scene, + PrimRef* prims, + const CreateAllocFunc& createAlloc, + const CreateAABBNodeFunc& createAABBNode, + const SetAABBNodeFunc& setAABBNode, + const CreateOBBNodeFunc& createOBBNode, + const SetOBBNodeFunc& setOBBNode, + const CreateLeafFunc& createLeaf, + const ProgressMonitor& progressMonitor, + const ReportFinishedRangeFunc& reportFinishedRange, + const Settings settings) + + : cfg(settings), + prims(prims), + createAlloc(createAlloc), + createAABBNode(createAABBNode), + setAABBNode(setAABBNode), + createOBBNode(createOBBNode), + setOBBNode(setOBBNode), + createLeaf(createLeaf), + progressMonitor(progressMonitor), + reportFinishedRange(reportFinishedRange), + alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {} + + /*! checks if all primitives are from the same geometry */ + __forceinline bool sameGeometry(const PrimInfoRange& range) + { + if (range.size() == 0) return true; + unsigned int firstGeomID = prims[range.begin()].geomID(); + for (size_t i=range.begin()+1; i<range.end(); i++) { + if (prims[i].geomID() != firstGeomID){ + return false; + } + } + return true; + } + + /*! creates a large leaf that could be larger than supported by the BVH */ + NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc) + { + /* this should never occur but is a fatal error */ + if (depth > cfg.maxDepth) + throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); + + /* create leaf for few primitives */ + if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo)) + return createLeaf(prims,pinfo,alloc); + + /* fill all children by always splitting the largest one */ + PrimInfoRange children[MAX_BRANCHING_FACTOR]; + unsigned numChildren = 1; + children[0] = pinfo; + + do { + + /* find best child with largest bounding box area */ + int bestChild = -1; + size_t bestSize = 0; + for (unsigned i=0; i<numChildren; i++) + { + /* ignore leaves as they cannot get split */ + if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i])) + continue; + + /* remember child with largest size */ + if (children[i].size() > bestSize) { + bestSize = children[i].size(); + bestChild = i; + } + } + if (bestChild == -1) break; + + /*! split best child into left and right child */ + __aligned(64) PrimInfoRange left, right; + if (!sameGeometry(children[bestChild])) { + alignedHeuristic.splitByGeometry(children[bestChild],left,right); + } else { + alignedHeuristic.splitFallback(children[bestChild],left,right); + } + + /* add new children left and right */ + children[bestChild] = children[numChildren-1]; + children[numChildren-1] = left; + children[numChildren+0] = right; + numChildren++; + + } while (numChildren < cfg.branchingFactor); + + /* create node */ + auto node = createAABBNode(alloc); + + for (size_t i=0; i<numChildren; i++) { + const NodeRef child = createLargeLeaf(depth+1,children[i],alloc); + setAABBNode(node,i,child,children[i].geomBounds); + } + + return node; + } + + /*! performs split */ + __noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack + { + /* variable to track the SAH of the best splitting approach */ + float bestSAH = inf; + const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize; + const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds); + + /* try standard binning in aligned space */ + float alignedObjectSAH = inf; + HeuristicBinningSAH::Split alignedObjectSplit; + if (aligned) { + alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize); + alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH(); + bestSAH = min(alignedObjectSAH,bestSAH); + } + + /* try standard binning in unaligned space */ + UnalignedHeuristicBinningSAH::Split unalignedObjectSplit; + LinearSpace3fa uspace; + float unalignedObjectSAH = inf; + if (bestSAH > 0.7f*leafSAH) { + uspace = unalignedHeuristic.computeAlignedSpace(pinfo); + const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace); + unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace); + unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH(); + bestSAH = min(unalignedObjectSAH,bestSAH); + } + + /* try splitting into two strands */ + HeuristicStrandSplitSAH::Split strandSplit; + float strandSAH = inf; + if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) { + strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize); + strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH(); + bestSAH = min(strandSAH,bestSAH); + } + + /* fallback if SAH heuristics failed */ + if (unlikely(!std::isfinite(bestSAH))) + { + alignedHeuristic.deterministic_order(pinfo); + alignedHeuristic.splitFallback(pinfo,linfo,rinfo); + } + + /* perform aligned split if this is best */ + else if (bestSAH == alignedObjectSAH) { + alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo); + } + + /* perform unaligned split if this is best */ + else if (bestSAH == unalignedObjectSAH) { + unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo); + aligned = false; + } + + /* perform strand split if this is best */ + else if (bestSAH == strandSAH) { + strandHeuristic.split(strandSplit,pinfo,linfo,rinfo); + aligned = false; + } + + /* can never happen */ + else + assert(false); + } + + /*! recursive build */ + NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier) + { + /* get thread local allocator */ + if (!alloc) + alloc = createAlloc(); + + /* call memory monitor function to signal progress */ + if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD) + progressMonitor(pinfo.size()); + + PrimInfoRange children[MAX_BRANCHING_FACTOR]; + + /* create leaf node */ + if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) { + alignedHeuristic.deterministic_order(pinfo); + return createLargeLeaf(depth,pinfo,alloc); + } + + /* fill all children by always splitting the one with the largest surface area */ + size_t numChildren = 1; + children[0] = pinfo; + bool aligned = true; + + do { + + /* find best child with largest bounding box area */ + ssize_t bestChild = -1; + float bestArea = neg_inf; + for (size_t i=0; i<numChildren; i++) + { + /* ignore leaves as they cannot get split */ + if (children[i].size() <= cfg.minLeafSize) + continue; + + /* remember child with largest area */ + if (area(children[i].geomBounds) > bestArea) { + bestArea = area(children[i].geomBounds); + bestChild = i; + } + } + if (bestChild == -1) break; + + /*! split best child into left and right child */ + PrimInfoRange left, right; + split(children[bestChild],left,right,aligned); + + /* add new children left and right */ + children[bestChild] = children[numChildren-1]; + children[numChildren-1] = left; + children[numChildren+0] = right; + numChildren++; + + } while (numChildren < cfg.branchingFactor); + + NodeRef node; + + /* create aligned node */ + if (aligned) + { + node = createAABBNode(alloc); + + /* spawn tasks or ... */ + if (pinfo.size() > SINGLE_THREADED_THRESHOLD) + { + parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { + for (size_t i=r.begin(); i<r.end(); i++) { + const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; + setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds); + _mm_mfence(); // to allow non-temporal stores during build + } + }); + } + /* ... continue sequentially */ + else { + for (size_t i=0; i<numChildren; i++) { + const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; + setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds); + } + } + } + + /* create unaligned node */ + else + { + node = createOBBNode(alloc); + + /* spawn tasks or ... */ + if (pinfo.size() > SINGLE_THREADED_THRESHOLD) + { + parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { + for (size_t i=r.begin(); i<r.end(); i++) { + const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); + const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space); + const OBBox3fa obounds(space,sinfo.geomBounds); + const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; + setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds); + _mm_mfence(); // to allow non-temporal stores during build + } + }); + } + /* ... continue sequentially */ + else + { + for (size_t i=0; i<numChildren; i++) { + const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); + const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space); + const OBBox3fa obounds(space,sinfo.geomBounds); + const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; + setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds); + } + } + } + + /* reports a finished range of primrefs */ + if (unlikely(alloc_barrier)) + reportFinishedRange(pinfo); + + return node; + } + + private: + Settings cfg; + PrimRef* prims; + const CreateAllocFunc& createAlloc; + const CreateAABBNodeFunc& createAABBNode; + const SetAABBNodeFunc& setAABBNode; + const CreateOBBNodeFunc& createOBBNode; + const SetOBBNodeFunc& setOBBNode; + const CreateLeafFunc& createLeaf; + const ProgressMonitor& progressMonitor; + const ReportFinishedRangeFunc& reportFinishedRange; + + private: + HeuristicBinningSAH alignedHeuristic; + UnalignedHeuristicBinningSAH unalignedHeuristic; + HeuristicStrandSplitSAH strandHeuristic; + }; + + template<typename NodeRef, + typename CreateAllocFunc, + typename CreateAABBNodeFunc, + typename SetAABBNodeFunc, + typename CreateOBBNodeFunc, + typename SetOBBNodeFunc, + typename CreateLeafFunc, + typename ProgressMonitor, + typename ReportFinishedRangeFunc> + + static NodeRef build (const CreateAllocFunc& createAlloc, + const CreateAABBNodeFunc& createAABBNode, + const SetAABBNodeFunc& setAABBNode, + const CreateOBBNodeFunc& createOBBNode, + const SetOBBNodeFunc& setOBBNode, + const CreateLeafFunc& createLeaf, + const ProgressMonitor& progressMonitor, + const ReportFinishedRangeFunc& reportFinishedRange, + Scene* scene, + PrimRef* prims, + const PrimInfo& pinfo, + const Settings settings) + { + typedef BuilderT<NodeRef, + CreateAllocFunc, + CreateAABBNodeFunc,SetAABBNodeFunc, + CreateOBBNodeFunc,SetOBBNodeFunc, + CreateLeafFunc,ProgressMonitor, + ReportFinishedRangeFunc> Builder; + + Builder builder(scene,prims,createAlloc, + createAABBNode,setAABBNode, + createOBBNode,setOBBNode, + createLeaf,progressMonitor,reportFinishedRange,settings); + + NodeRef root = builder.recurse(1,pinfo,nullptr,true,false); + _mm_mfence(); // to allow non-temporal stores during build + return root; + } + }; + } +} |