diff options
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/builders')
18 files changed, 0 insertions, 7286 deletions
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h deleted file mode 100644 index 755ce255fb..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h +++ /dev/null @@ -1,411 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../bvh/bvh.h" -#include "../geometry/primitive.h" -#include "../builders/bvh_builder_sah.h" -#include "../builders/heuristic_binning_array_aligned.h" -#include "../builders/heuristic_binning_array_unaligned.h" -#include "../builders/heuristic_strand_array.h" - -#define NUM_HAIR_OBJECT_BINS 32 - -namespace embree -{ - namespace isa - { - struct BVHBuilderHair - { - /*! settings for builder */ - struct Settings - { - /*! default settings */ - Settings () - : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {} - - public: - size_t branchingFactor; //!< branching factor of BVH to build - size_t maxDepth; //!< maximum depth of BVH to build - size_t logBlockSize; //!< log2 of blocksize for SAH heuristic - size_t minLeafSize; //!< minimum size of a leaf - size_t maxLeafSize; //!< maximum size of a leaf - size_t finished_range_threshold; //!< finished range threshold - }; - - template<typename NodeRef, - typename CreateAllocFunc, - typename CreateAABBNodeFunc, - typename SetAABBNodeFunc, - typename CreateOBBNodeFunc, - typename SetOBBNodeFunc, - typename CreateLeafFunc, - typename ProgressMonitor, - typename ReportFinishedRangeFunc> - - class BuilderT - { - ALIGNED_CLASS_(16); - friend struct BVHBuilderHair; - - typedef FastAllocator::CachedAllocator Allocator; - typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH; - typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH; - typedef HeuristicStrandSplit HeuristicStrandSplitSAH; - - static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor - static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth - static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build - - static const size_t travCostAligned = 1; - static const size_t travCostUnaligned = 5; - static const size_t intCost = 6; - - BuilderT (Scene* scene, - PrimRef* prims, - const CreateAllocFunc& createAlloc, - const CreateAABBNodeFunc& createAABBNode, - const SetAABBNodeFunc& setAABBNode, - const CreateOBBNodeFunc& createOBBNode, - const SetOBBNodeFunc& setOBBNode, - const CreateLeafFunc& createLeaf, - const ProgressMonitor& progressMonitor, - const ReportFinishedRangeFunc& reportFinishedRange, - const Settings settings) - - : cfg(settings), - prims(prims), - createAlloc(createAlloc), - createAABBNode(createAABBNode), - setAABBNode(setAABBNode), - createOBBNode(createOBBNode), - setOBBNode(setOBBNode), - createLeaf(createLeaf), - progressMonitor(progressMonitor), - reportFinishedRange(reportFinishedRange), - alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {} - - /*! checks if all primitives are from the same geometry */ - __forceinline bool sameGeometry(const PrimInfoRange& range) - { - if (range.size() == 0) return true; - unsigned int firstGeomID = prims[range.begin()].geomID(); - for (size_t i=range.begin()+1; i<range.end(); i++) { - if (prims[i].geomID() != firstGeomID){ - return false; - } - } - return true; - } - - /*! creates a large leaf that could be larger than supported by the BVH */ - NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc) - { - /* this should never occur but is a fatal error */ - if (depth > cfg.maxDepth) - throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); - - /* create leaf for few primitives */ - if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo)) - return createLeaf(prims,pinfo,alloc); - - /* fill all children by always splitting the largest one */ - PrimInfoRange children[MAX_BRANCHING_FACTOR]; - unsigned numChildren = 1; - children[0] = pinfo; - - do { - - /* find best child with largest bounding box area */ - int bestChild = -1; - size_t bestSize = 0; - for (unsigned i=0; i<numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i])) - continue; - - /* remember child with largest size */ - if (children[i].size() > bestSize) { - bestSize = children[i].size(); - bestChild = i; - } - } - if (bestChild == -1) break; - - /*! split best child into left and right child */ - __aligned(64) PrimInfoRange left, right; - if (!sameGeometry(children[bestChild])) { - alignedHeuristic.splitByGeometry(children[bestChild],left,right); - } else { - alignedHeuristic.splitFallback(children[bestChild],left,right); - } - - /* add new children left and right */ - children[bestChild] = children[numChildren-1]; - children[numChildren-1] = left; - children[numChildren+0] = right; - numChildren++; - - } while (numChildren < cfg.branchingFactor); - - /* create node */ - auto node = createAABBNode(alloc); - - for (size_t i=0; i<numChildren; i++) { - const NodeRef child = createLargeLeaf(depth+1,children[i],alloc); - setAABBNode(node,i,child,children[i].geomBounds); - } - - return node; - } - - /*! performs split */ - __noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack - { - /* variable to track the SAH of the best splitting approach */ - float bestSAH = inf; - const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize; - const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds); - - /* try standard binning in aligned space */ - float alignedObjectSAH = inf; - HeuristicBinningSAH::Split alignedObjectSplit; - if (aligned) { - alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize); - alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH(); - bestSAH = min(alignedObjectSAH,bestSAH); - } - - /* try standard binning in unaligned space */ - UnalignedHeuristicBinningSAH::Split unalignedObjectSplit; - LinearSpace3fa uspace; - float unalignedObjectSAH = inf; - if (bestSAH > 0.7f*leafSAH) { - uspace = unalignedHeuristic.computeAlignedSpace(pinfo); - const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace); - unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace); - unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH(); - bestSAH = min(unalignedObjectSAH,bestSAH); - } - - /* try splitting into two strands */ - HeuristicStrandSplitSAH::Split strandSplit; - float strandSAH = inf; - if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) { - strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize); - strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH(); - bestSAH = min(strandSAH,bestSAH); - } - - /* fallback if SAH heuristics failed */ - if (unlikely(!std::isfinite(bestSAH))) - { - alignedHeuristic.deterministic_order(pinfo); - alignedHeuristic.splitFallback(pinfo,linfo,rinfo); - } - - /* perform aligned split if this is best */ - else if (bestSAH == alignedObjectSAH) { - alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo); - } - - /* perform unaligned split if this is best */ - else if (bestSAH == unalignedObjectSAH) { - unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo); - aligned = false; - } - - /* perform strand split if this is best */ - else if (bestSAH == strandSAH) { - strandHeuristic.split(strandSplit,pinfo,linfo,rinfo); - aligned = false; - } - - /* can never happen */ - else - assert(false); - } - - /*! recursive build */ - NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier) - { - /* get thread local allocator */ - if (!alloc) - alloc = createAlloc(); - - /* call memory monitor function to signal progress */ - if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD) - progressMonitor(pinfo.size()); - - PrimInfoRange children[MAX_BRANCHING_FACTOR]; - - /* create leaf node */ - if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) { - alignedHeuristic.deterministic_order(pinfo); - return createLargeLeaf(depth,pinfo,alloc); - } - - /* fill all children by always splitting the one with the largest surface area */ - size_t numChildren = 1; - children[0] = pinfo; - bool aligned = true; - - do { - - /* find best child with largest bounding box area */ - ssize_t bestChild = -1; - float bestArea = neg_inf; - for (size_t i=0; i<numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= cfg.minLeafSize) - continue; - - /* remember child with largest area */ - if (area(children[i].geomBounds) > bestArea) { - bestArea = area(children[i].geomBounds); - bestChild = i; - } - } - if (bestChild == -1) break; - - /*! split best child into left and right child */ - PrimInfoRange left, right; - split(children[bestChild],left,right,aligned); - - /* add new children left and right */ - children[bestChild] = children[numChildren-1]; - children[numChildren-1] = left; - children[numChildren+0] = right; - numChildren++; - - } while (numChildren < cfg.branchingFactor); - - NodeRef node; - - /* create aligned node */ - if (aligned) - { - node = createAABBNode(alloc); - - /* spawn tasks or ... */ - if (pinfo.size() > SINGLE_THREADED_THRESHOLD) - { - parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; - setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - } - /* ... continue sequentially */ - else { - for (size_t i=0; i<numChildren; i++) { - const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; - setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds); - } - } - } - - /* create unaligned node */ - else - { - node = createOBBNode(alloc); - - /* spawn tasks or ... */ - if (pinfo.size() > SINGLE_THREADED_THRESHOLD) - { - parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); - const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space); - const OBBox3fa obounds(space,sinfo.geomBounds); - const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; - setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - } - /* ... continue sequentially */ - else - { - for (size_t i=0; i<numChildren; i++) { - const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); - const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space); - const OBBox3fa obounds(space,sinfo.geomBounds); - const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; - setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds); - } - } - } - - /* reports a finished range of primrefs */ - if (unlikely(alloc_barrier)) - reportFinishedRange(pinfo); - - return node; - } - - private: - Settings cfg; - PrimRef* prims; - const CreateAllocFunc& createAlloc; - const CreateAABBNodeFunc& createAABBNode; - const SetAABBNodeFunc& setAABBNode; - const CreateOBBNodeFunc& createOBBNode; - const SetOBBNodeFunc& setOBBNode; - const CreateLeafFunc& createLeaf; - const ProgressMonitor& progressMonitor; - const ReportFinishedRangeFunc& reportFinishedRange; - - private: - HeuristicBinningSAH alignedHeuristic; - UnalignedHeuristicBinningSAH unalignedHeuristic; - HeuristicStrandSplitSAH strandHeuristic; - }; - - template<typename NodeRef, - typename CreateAllocFunc, - typename CreateAABBNodeFunc, - typename SetAABBNodeFunc, - typename CreateOBBNodeFunc, - typename SetOBBNodeFunc, - typename CreateLeafFunc, - typename ProgressMonitor, - typename ReportFinishedRangeFunc> - - static NodeRef build (const CreateAllocFunc& createAlloc, - const CreateAABBNodeFunc& createAABBNode, - const SetAABBNodeFunc& setAABBNode, - const CreateOBBNodeFunc& createOBBNode, - const SetOBBNodeFunc& setOBBNode, - const CreateLeafFunc& createLeaf, - const ProgressMonitor& progressMonitor, - const ReportFinishedRangeFunc& reportFinishedRange, - Scene* scene, - PrimRef* prims, - const PrimInfo& pinfo, - const Settings settings) - { - typedef BuilderT<NodeRef, - CreateAllocFunc, - CreateAABBNodeFunc,SetAABBNodeFunc, - CreateOBBNodeFunc,SetOBBNodeFunc, - CreateLeafFunc,ProgressMonitor, - ReportFinishedRangeFunc> Builder; - - Builder builder(scene,prims,createAlloc, - createAABBNode,setAABBNode, - createOBBNode,setOBBNode, - createLeaf,progressMonitor,reportFinishedRange,settings); - - NodeRef root = builder.recurse(1,pinfo,nullptr,true,false); - _mm_mfence(); // to allow non-temporal stores during build - return root; - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h deleted file mode 100644 index 92be2f7e65..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h +++ /dev/null @@ -1,501 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/builder.h" -#include "../../common/algorithms/parallel_reduce.h" - -namespace embree -{ - namespace isa - { - struct BVHBuilderMorton - { - static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor - static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth - - /*! settings for morton builder */ - struct Settings - { - /*! default settings */ - Settings () - : branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) {} - - /*! initialize settings from API settings */ - Settings (const RTCBuildArguments& settings) - : branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) - { - if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor; - if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth; - if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize; - if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize; - - minLeafSize = min(minLeafSize,maxLeafSize); - } - - Settings (size_t branchingFactor, size_t maxDepth, size_t minLeafSize, size_t maxLeafSize, size_t singleThreadThreshold) - : branchingFactor(branchingFactor), maxDepth(maxDepth), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), singleThreadThreshold(singleThreadThreshold) - { - minLeafSize = min(minLeafSize,maxLeafSize); - } - - public: - size_t branchingFactor; //!< branching factor of BVH to build - size_t maxDepth; //!< maximum depth of BVH to build - size_t minLeafSize; //!< minimum size of a leaf - size_t maxLeafSize; //!< maximum size of a leaf - size_t singleThreadThreshold; //!< threshold when we switch to single threaded build - }; - - /*! Build primitive consisting of morton code and primitive ID. */ - struct __aligned(8) BuildPrim - { - union { - struct { - unsigned int code; //!< morton code - unsigned int index; //!< i'th primitive - }; - uint64_t t; - }; - - /*! interface for radix sort */ - __forceinline operator unsigned() const { return code; } - - /*! interface for standard sort */ - __forceinline bool operator<(const BuildPrim &m) const { return code < m.code; } - }; - - /*! maps bounding box to morton code */ - struct MortonCodeMapping - { - static const size_t LATTICE_BITS_PER_DIM = 10; - static const size_t LATTICE_SIZE_PER_DIM = size_t(1) << LATTICE_BITS_PER_DIM; - - vfloat4 base; - vfloat4 scale; - - __forceinline MortonCodeMapping(const BBox3fa& bounds) - { - base = (vfloat4)bounds.lower; - const vfloat4 diag = (vfloat4)bounds.upper - (vfloat4)bounds.lower; - scale = select(diag > vfloat4(1E-19f), rcp(diag) * vfloat4(LATTICE_SIZE_PER_DIM * 0.99f),vfloat4(0.0f)); - } - - __forceinline const vint4 bin (const BBox3fa& box) const - { - const vfloat4 lower = (vfloat4)box.lower; - const vfloat4 upper = (vfloat4)box.upper; - const vfloat4 centroid = lower+upper; - return vint4((centroid-base)*scale); - } - - __forceinline unsigned int code (const BBox3fa& box) const - { - const vint4 binID = bin(box); - const unsigned int x = extract<0>(binID); - const unsigned int y = extract<1>(binID); - const unsigned int z = extract<2>(binID); - const unsigned int xyz = bitInterleave(x,y,z); - return xyz; - } - }; - -#if defined (__AVX2__) - - /*! for AVX2 there is a fast scalar bitInterleave */ - struct MortonCodeGenerator - { - __forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest) - : mapping(mapping), dest(dest) {} - - __forceinline void operator() (const BBox3fa& b, const unsigned index) - { - dest->index = index; - dest->code = mapping.code(b); - dest++; - } - - public: - const MortonCodeMapping mapping; - BuildPrim* dest; - size_t currentID; - }; - -#else - - /*! before AVX2 is it better to use the SSE version of bitInterleave */ - struct MortonCodeGenerator - { - __forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest) - : mapping(mapping), dest(dest), currentID(0), slots(0), ax(0), ay(0), az(0), ai(0) {} - - __forceinline ~MortonCodeGenerator() - { - if (slots != 0) - { - const vint4 code = bitInterleave(ax,ay,az); - for (size_t i=0; i<slots; i++) { - dest[currentID-slots+i].index = ai[i]; - dest[currentID-slots+i].code = code[i]; - } - } - } - - __forceinline void operator() (const BBox3fa& b, const unsigned index) - { - const vint4 binID = mapping.bin(b); - ax[slots] = extract<0>(binID); - ay[slots] = extract<1>(binID); - az[slots] = extract<2>(binID); - ai[slots] = index; - slots++; - currentID++; - - if (slots == 4) - { - const vint4 code = bitInterleave(ax,ay,az); - vint4::storeu(&dest[currentID-4],unpacklo(code,ai)); - vint4::storeu(&dest[currentID-2],unpackhi(code,ai)); - slots = 0; - } - } - - public: - const MortonCodeMapping mapping; - BuildPrim* dest; - size_t currentID; - size_t slots; - vint4 ax, ay, az, ai; - }; - -#endif - - template< - typename ReductionTy, - typename Allocator, - typename CreateAllocator, - typename CreateNodeFunc, - typename SetNodeBoundsFunc, - typename CreateLeafFunc, - typename CalculateBounds, - typename ProgressMonitor> - - class BuilderT : private Settings - { - ALIGNED_CLASS_(16); - - public: - - BuilderT (CreateAllocator& createAllocator, - CreateNodeFunc& createNode, - SetNodeBoundsFunc& setBounds, - CreateLeafFunc& createLeaf, - CalculateBounds& calculateBounds, - ProgressMonitor& progressMonitor, - const Settings& settings) - - : Settings(settings), - createAllocator(createAllocator), - createNode(createNode), - setBounds(setBounds), - createLeaf(createLeaf), - calculateBounds(calculateBounds), - progressMonitor(progressMonitor), - morton(nullptr) {} - - ReductionTy createLargeLeaf(size_t depth, const range<unsigned>& current, Allocator alloc) - { - /* this should never occur but is a fatal error */ - if (depth > maxDepth) - throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); - - /* create leaf for few primitives */ - if (current.size() <= maxLeafSize) - return createLeaf(current,alloc); - - /* fill all children by always splitting the largest one */ - range<unsigned> children[MAX_BRANCHING_FACTOR]; - size_t numChildren = 1; - children[0] = current; - - do { - - /* find best child with largest number of primitives */ - size_t bestChild = -1; - size_t bestSize = 0; - for (size_t i=0; i<numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= maxLeafSize) - continue; - - /* remember child with largest size */ - if (children[i].size() > bestSize) { - bestSize = children[i].size(); - bestChild = i; - } - } - if (bestChild == size_t(-1)) break; - - /*! split best child into left and right child */ - auto split = children[bestChild].split(); - - /* add new children left and right */ - children[bestChild] = children[numChildren-1]; - children[numChildren-1] = split.first; - children[numChildren+0] = split.second; - numChildren++; - - } while (numChildren < branchingFactor); - - /* create node */ - auto node = createNode(alloc,numChildren); - - /* recurse into each child */ - ReductionTy bounds[MAX_BRANCHING_FACTOR]; - for (size_t i=0; i<numChildren; i++) - bounds[i] = createLargeLeaf(depth+1,children[i],alloc); - - return setBounds(node,bounds,numChildren); - } - - /*! recreates morton codes when reaching a region where all codes are identical */ - __noinline void recreateMortonCodes(const range<unsigned>& current) const - { - /* fast path for small ranges */ - if (likely(current.size() < 1024)) - { - /*! recalculate centroid bounds */ - BBox3fa centBounds(empty); - for (size_t i=current.begin(); i<current.end(); i++) - centBounds.extend(center2(calculateBounds(morton[i]))); - - /* recalculate morton codes */ - MortonCodeMapping mapping(centBounds); - for (size_t i=current.begin(); i<current.end(); i++) - morton[i].code = mapping.code(calculateBounds(morton[i])); - - /* sort morton codes */ - std::sort(morton+current.begin(),morton+current.end()); - } - else - { - /*! recalculate centroid bounds */ - auto calculateCentBounds = [&] ( const range<unsigned>& r ) { - BBox3fa centBounds = empty; - for (size_t i=r.begin(); i<r.end(); i++) - centBounds.extend(center2(calculateBounds(morton[i]))); - return centBounds; - }; - const BBox3fa centBounds = parallel_reduce(current.begin(), current.end(), unsigned(1024), - BBox3fa(empty), calculateCentBounds, BBox3fa::merge); - - /* recalculate morton codes */ - MortonCodeMapping mapping(centBounds); - parallel_for(current.begin(), current.end(), unsigned(1024), [&] ( const range<unsigned>& r ) { - for (size_t i=r.begin(); i<r.end(); i++) { - morton[i].code = mapping.code(calculateBounds(morton[i])); - } - }); - - /*! sort morton codes */ -#if defined(TASKING_TBB) - tbb::parallel_sort(morton+current.begin(),morton+current.end()); -#else - radixsort32(morton+current.begin(),current.size()); -#endif - } - } - - __forceinline void split(const range<unsigned>& current, range<unsigned>& left, range<unsigned>& right) const - { - const unsigned int code_start = morton[current.begin()].code; - const unsigned int code_end = morton[current.end()-1].code; - unsigned int bitpos = lzcnt(code_start^code_end); - - /* if all items mapped to same morton code, then re-create new morton codes for the items */ - if (unlikely(bitpos == 32)) - { - recreateMortonCodes(current); - const unsigned int code_start = morton[current.begin()].code; - const unsigned int code_end = morton[current.end()-1].code; - bitpos = lzcnt(code_start^code_end); - - /* if the morton code is still the same, goto fall back split */ - if (unlikely(bitpos == 32)) { - current.split(left,right); - return; - } - } - - /* split the items at the topmost different morton code bit */ - const unsigned int bitpos_diff = 31-bitpos; - const unsigned int bitmask = 1 << bitpos_diff; - - /* find location where bit differs using binary search */ - unsigned begin = current.begin(); - unsigned end = current.end(); - while (begin + 1 != end) { - const unsigned mid = (begin+end)/2; - const unsigned bit = morton[mid].code & bitmask; - if (bit == 0) begin = mid; else end = mid; - } - unsigned center = end; -#if defined(DEBUG) - for (unsigned int i=begin; i<center; i++) assert((morton[i].code & bitmask) == 0); - for (unsigned int i=center; i<end; i++) assert((morton[i].code & bitmask) == bitmask); -#endif - - left = make_range(current.begin(),center); - right = make_range(center,current.end()); - } - - ReductionTy recurse(size_t depth, const range<unsigned>& current, Allocator alloc, bool toplevel) - { - /* get thread local allocator */ - if (!alloc) - alloc = createAllocator(); - - /* call memory monitor function to signal progress */ - if (toplevel && current.size() <= singleThreadThreshold) - progressMonitor(current.size()); - - /* create leaf node */ - if (unlikely(depth+MIN_LARGE_LEAF_LEVELS >= maxDepth || current.size() <= minLeafSize)) - return createLargeLeaf(depth,current,alloc); - - /* fill all children by always splitting the one with the largest surface area */ - range<unsigned> children[MAX_BRANCHING_FACTOR]; - split(current,children[0],children[1]); - size_t numChildren = 2; - - while (numChildren < branchingFactor) - { - /* find best child with largest number of primitives */ - int bestChild = -1; - unsigned bestItems = 0; - for (unsigned int i=0; i<numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= minLeafSize) - continue; - - /* remember child with largest area */ - if (children[i].size() > bestItems) { - bestItems = children[i].size(); - bestChild = i; - } - } - if (bestChild == -1) break; - - /*! split best child into left and right child */ - range<unsigned> left, right; - split(children[bestChild],left,right); - - /* add new children left and right */ - children[bestChild] = children[numChildren-1]; - children[numChildren-1] = left; - children[numChildren+0] = right; - numChildren++; - } - - /* create leaf node if no split is possible */ - if (unlikely(numChildren == 1)) - return createLeaf(current,alloc); - - /* allocate node */ - auto node = createNode(alloc,numChildren); - - /* process top parts of tree parallel */ - ReductionTy bounds[MAX_BRANCHING_FACTOR]; - if (current.size() > singleThreadThreshold) - { - /*! parallel_for is faster than spawing sub-tasks */ - parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - bounds[i] = recurse(depth+1,children[i],nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - } - - /* finish tree sequentially */ - else - { - for (size_t i=0; i<numChildren; i++) - bounds[i] = recurse(depth+1,children[i],alloc,false); - } - - return setBounds(node,bounds,numChildren); - } - - /* build function */ - ReductionTy build(BuildPrim* src, BuildPrim* tmp, size_t numPrimitives) - { - /* sort morton codes */ - morton = src; - radix_sort_u32(src,tmp,numPrimitives,singleThreadThreshold); - - /* build BVH */ - const ReductionTy root = recurse(1, range<unsigned>(0,(unsigned)numPrimitives), nullptr, true); - _mm_mfence(); // to allow non-temporal stores during build - return root; - } - - public: - CreateAllocator& createAllocator; - CreateNodeFunc& createNode; - SetNodeBoundsFunc& setBounds; - CreateLeafFunc& createLeaf; - CalculateBounds& calculateBounds; - ProgressMonitor& progressMonitor; - - public: - BuildPrim* morton; - }; - - - template< - typename ReductionTy, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename SetBoundsFunc, - typename CreateLeafFunc, - typename CalculateBoundsFunc, - typename ProgressMonitor> - - static ReductionTy build(CreateAllocFunc createAllocator, - CreateNodeFunc createNode, - SetBoundsFunc setBounds, - CreateLeafFunc createLeaf, - CalculateBoundsFunc calculateBounds, - ProgressMonitor progressMonitor, - BuildPrim* src, - BuildPrim* tmp, - size_t numPrimitives, - const Settings& settings) - { - typedef BuilderT< - ReductionTy, - decltype(createAllocator()), - CreateAllocFunc, - CreateNodeFunc, - SetBoundsFunc, - CreateLeafFunc, - CalculateBoundsFunc, - ProgressMonitor> Builder; - - Builder builder(createAllocator, - createNode, - setBounds, - createLeaf, - calculateBounds, - progressMonitor, - settings); - - return builder.build(src,tmp,numPrimitives); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h deleted file mode 100644 index 4c138dacdb..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h +++ /dev/null @@ -1,692 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#define MBLUR_NUM_TEMPORAL_BINS 2 -#define MBLUR_NUM_OBJECT_BINS 32 - -#include "../bvh/bvh.h" -#include "../common/primref_mb.h" -#include "heuristic_binning_array_aligned.h" -#include "heuristic_timesplit_array.h" - -namespace embree -{ - namespace isa - { - template<typename T> - struct SharedVector - { - __forceinline SharedVector() {} - - __forceinline SharedVector(T* ptr, size_t refCount = 1) - : prims(ptr), refCount(refCount) {} - - __forceinline void incRef() { - refCount++; - } - - __forceinline void decRef() - { - if (--refCount == 0) - delete prims; - } - - T* prims; - size_t refCount; - }; - - template<typename BuildRecord, int MAX_BRANCHING_FACTOR> - struct LocalChildListT - { - typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector; - - __forceinline LocalChildListT (const BuildRecord& record) - : numChildren(1), numSharedPrimVecs(1) - { - /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */ - children[0] = record; - primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2); - } - - __forceinline ~LocalChildListT() - { - for (size_t i = 0; i < numChildren; i++) - primvecs[i]->decRef(); - } - - __forceinline BuildRecord& operator[] ( const size_t i ) { - return children[i]; - } - - __forceinline size_t size() const { - return numChildren; - } - - __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector) - { - SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild]; - if (lrecord.prims.prims == bsharedPrimVec->prims) { - primvecs[bestChild] = bsharedPrimVec; - bsharedPrimVec->incRef(); - } - else { - primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims); - } - - if (rrecord.prims.prims == bsharedPrimVec->prims) { - primvecs[numChildren] = bsharedPrimVec; - bsharedPrimVec->incRef(); - } - else { - primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims); - } - bsharedPrimVec->decRef(); - new_vector.release(); - - children[bestChild] = lrecord; - children[numChildren] = rrecord; - numChildren++; - } - - public: - array_t<BuildRecord,MAX_BRANCHING_FACTOR> children; - array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs; - size_t numChildren; - - array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs; - size_t numSharedPrimVecs; - }; - - template<typename Mesh> - struct RecalculatePrimRef - { - Scene* scene; - - __forceinline RecalculatePrimRef (Scene* scene) - : scene(scene) {} - - __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const - { - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const Mesh* mesh = scene->get<Mesh>(geomID); - const LBBox3fa lbounds = mesh->linearBounds(primID, time_range); - const range<int> tbounds = mesh->timeSegmentRange(time_range); - return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); - } - - // __noinline is workaround for ICC16 bug under MacOSX - __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const - { - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const Mesh* mesh = scene->get<Mesh>(geomID); - const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range); - const range<int> tbounds = mesh->timeSegmentRange(time_range); - return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); - } - - __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { - return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range); - } - - // __noinline is workaround for ICC16 bug under MacOSX - __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const { - return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range); - } - }; - - struct VirtualRecalculatePrimRef - { - Scene* scene; - - __forceinline VirtualRecalculatePrimRef (Scene* scene) - : scene(scene) {} - - __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const - { - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const Geometry* mesh = scene->get(geomID); - const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range); - const range<int> tbounds = mesh->timeSegmentRange(time_range); - return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); - } - - __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const - { - const unsigned geomID = prim.geomID(); - const unsigned primID = prim.primID(); - const Geometry* mesh = scene->get(geomID); - const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range); - const range<int> tbounds = mesh->timeSegmentRange(time_range); - return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID); - } - - __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const { - return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range); - } - - __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const { - return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range); - } - }; - - struct BVHBuilderMSMBlur - { - /*! settings for msmblur builder */ - struct Settings - { - /*! default settings */ - Settings () - : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8), - travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false), - singleThreadThreshold(1024) {} - - - Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold) - : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), - travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold) - { - minLeafSize = min(minLeafSize,maxLeafSize); - } - - public: - size_t branchingFactor; //!< branching factor of BVH to build - size_t maxDepth; //!< maximum depth of BVH to build - size_t logBlockSize; //!< log2 of blocksize for SAH heuristic - size_t minLeafSize; //!< minimum size of a leaf - size_t maxLeafSize; //!< maximum size of a leaf - float travCost; //!< estimated cost of one traversal step - float intCost; //!< estimated cost of one primitive intersection - bool singleLeafTimeSegment; //!< split time to single time range - size_t singleThreadThreshold; //!< threshold when we switch to single threaded build - }; - - struct BuildRecord - { - public: - __forceinline BuildRecord () {} - - __forceinline BuildRecord (size_t depth) - : depth(depth) {} - - __forceinline BuildRecord (const SetMB& prims, size_t depth) - : depth(depth), prims(prims) {} - - __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) { - return a.prims.size() < b.prims.size(); - } - - __forceinline size_t size() const { - return prims.size(); - } - - public: - size_t depth; //!< Depth of the root of this subtree. - SetMB prims; //!< The list of primitives. - }; - - struct BuildRecordSplit : public BuildRecord - { - __forceinline BuildRecordSplit () {} - - __forceinline BuildRecordSplit (size_t depth) - : BuildRecord(depth) {} - - __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split) - : BuildRecord(record), split(split) {} - - BinSplit<MBLUR_NUM_OBJECT_BINS> split; - }; - - template< - typename NodeRef, - typename RecalculatePrimRef, - typename Allocator, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename SetNodeFunc, - typename CreateLeafFunc, - typename ProgressMonitor> - - class BuilderT - { - ALIGNED_CLASS_(16); - static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor - static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth - - typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; - typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split; - typedef mvector<PrimRefMB>* PrimRefVector; - typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector; - typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList; - typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit; - - public: - - BuilderT (MemoryMonitorInterface* device, - const RecalculatePrimRef recalculatePrimRef, - const CreateAllocFunc createAlloc, - const CreateNodeFunc createNode, - const SetNodeFunc setNode, - const CreateLeafFunc createLeaf, - const ProgressMonitor progressMonitor, - const Settings& settings) - : cfg(settings), - heuristicObjectSplit(), - heuristicTemporalSplit(device, recalculatePrimRef), - recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf), - progressMonitor(progressMonitor) - { - if (cfg.branchingFactor > MAX_BRANCHING_FACTOR) - throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large"); - } - - /*! finds the best split */ - const Split find(const SetMB& set) - { - /* first try standard object split */ - const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize); - const float object_split_sah = object_split.splitSAH(); - - /* test temporal splits only when object split was bad */ - const float leaf_sah = set.leafSAH(cfg.logBlockSize); - if (object_split_sah < 0.50f*leaf_sah) - return object_split; - - /* do temporal splits only if the the time range is big enough */ - if (set.time_range.size() > 1.01f/float(set.max_num_time_segments)) - { - const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize); - const float temporal_split_sah = temporal_split.splitSAH(); - - /* take temporal split if it improved SAH */ - if (temporal_split_sah < object_split_sah) - return temporal_split; - } - - return object_split; - } - - /*! array partitioning */ - __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset) - { - /* perform object split */ - if (likely(split.data == Split::SPLIT_OBJECT)) { - heuristicObjectSplit.split(split,set,lset,rset); - } - /* perform temporal split */ - else if (likely(split.data == Split::SPLIT_TEMPORAL)) { - return heuristicTemporalSplit.split(split,set,lset,rset); - } - /* perform fallback split */ - else if (unlikely(split.data == Split::SPLIT_FALLBACK)) { - set.deterministic_order(); - splitFallback(set,lset,rset); - } - /* split by geometry */ - else if (unlikely(split.data == Split::SPLIT_GEOMID)) { - set.deterministic_order(); - splitByGeometry(set,lset,rset); - } - else - assert(false); - - return std::unique_ptr<mvector<PrimRefMB>>(); - } - - /*! finds the best fallback split */ - __noinline Split findFallback(const SetMB& set) - { - /* split if primitives are not from same geometry */ - if (!sameGeometry(set)) - return Split(0.0f,Split::SPLIT_GEOMID); - - /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */ - if (cfg.singleLeafTimeSegment) - { - /* test if one primitive has more than one time segment in time range, if so split time */ - for (size_t i=set.begin(); i<set.end(); i++) - { - const PrimRefMB& prim = (*set.prims)[i]; - const range<int> itime_range = prim.timeSegmentRange(set.time_range); - const int localTimeSegments = itime_range.size(); - assert(localTimeSegments > 0); - if (localTimeSegments > 1) { - const int icenter = (itime_range.begin() + itime_range.end())/2; - const float splitTime = prim.timeStep(icenter); - return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime); - } - } - } - - /* otherwise return fallback split */ - return Split(0.0f,Split::SPLIT_FALLBACK); - } - - /*! performs fallback split */ - void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset) - { - mvector<PrimRefMB>& prims = *set.prims; - - const size_t begin = set.begin(); - const size_t end = set.end(); - const size_t center = (begin + end)/2; - - PrimInfoMB linfo = empty; - for (size_t i=begin; i<center; i++) - linfo.add_primref(prims[i]); - - PrimInfoMB rinfo = empty; - for (size_t i=center; i<end; i++) - rinfo.add_primref(prims[i]); - - new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range); - new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range); - } - - /*! checks if all primitives are from the same geometry */ - __forceinline bool sameGeometry(const SetMB& set) - { - if (set.size() == 0) return true; - mvector<PrimRefMB>& prims = *set.prims; - const size_t begin = set.begin(); - const size_t end = set.end(); - unsigned int firstGeomID = prims[begin].geomID(); - for (size_t i=begin+1; i<end; i++) { - if (prims[i].geomID() != firstGeomID){ - return false; - } - } - return true; - } - - /* split by geometry ID */ - void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset) - { - assert(set.size() > 1); - - mvector<PrimRefMB>& prims = *set.prims; - const size_t begin = set.begin(); - const size_t end = set.end(); - - PrimInfoMB left(empty); - PrimInfoMB right(empty); - unsigned int geomID = prims[begin].geomID(); - size_t center = serial_partitioning(prims.data(),begin,end,left,right, - [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; }, - [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); }); - - new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range); - new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range); - } - - const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc) - { - /* this should never occur but is a fatal error */ - if (in.depth > cfg.maxDepth) - throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); - - /* replace already found split by fallback split */ - const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims)); - - /* special case when directly creating leaf without any splits that could shrink time_range */ - bool force_split = false; - if (current.depth == 1 && current.size() > 0) - { - BBox1f c = empty; - BBox1f p = current.prims.time_range; - for (size_t i=current.prims.begin(); i<current.prims.end(); i++) { - mvector<PrimRefMB>& prims = *current.prims.prims; - c.extend(prims[i].time_range); - } - - force_split = c.lower > p.lower || c.upper < p.upper; - } - - /* create leaf for few primitives */ - if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split) - return createLeaf(current,alloc); - - /* fill all children by always splitting the largest one */ - bool hasTimeSplits = false; - NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; - LocalChildListSplit children(current); - - do { - /* find best child with largest bounding box area */ - size_t bestChild = -1; - size_t bestSize = 0; - for (size_t i=0; i<children.size(); i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split) - continue; - - force_split = false; - - /* remember child with largest size */ - if (children[i].size() > bestSize) { - bestSize = children[i].size(); - bestChild = i; - } - } - if (bestChild == -1) break; - - /* perform best found split */ - BuildRecordSplit& brecord = children[bestChild]; - BuildRecordSplit lrecord(current.depth+1); - BuildRecordSplit rrecord(current.depth+1); - std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims); - hasTimeSplits |= new_vector != nullptr; - - /* find new splits */ - lrecord.split = findFallback(lrecord.prims); - rrecord.split = findFallback(rrecord.prims); - children.split(bestChild,lrecord,rrecord,std::move(new_vector)); - - } while (children.size() < cfg.branchingFactor); - - /* detect time_ranges that have shrunken */ - for (size_t i=0; i<children.size(); i++) { - const BBox1f c = children[i].prims.time_range; - const BBox1f p = in.prims.time_range; - hasTimeSplits |= c.lower > p.lower || c.upper < p.upper; - } - - /* create node */ - auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits); - - /* recurse into each child and perform reduction */ - LBBox3fa gbounds = empty; - for (size_t i=0; i<children.size(); i++) { - values[i] = createLargeLeaf(children[i],alloc); - gbounds.extend(values[i].lbounds); - } - - setNode(current,children.children.data(),node,values,children.numChildren); - - /* calculate geometry bounds of this node */ - if (hasTimeSplits) - return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range); - else - return NodeRecordMB4D(node,gbounds,current.prims.time_range); - } - - const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel) - { - /* get thread local allocator */ - if (!alloc) - alloc = createAlloc(); - - /* call memory monitor function to signal progress */ - if (toplevel && current.size() <= cfg.singleThreadThreshold) - progressMonitor(current.size()); - - /*! find best split */ - const Split csplit = find(current.prims); - - /*! compute leaf and split cost */ - const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize); - const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH(); - assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0))); - - /*! create a leaf node when threshold reached or SAH tells us to stop */ - if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) { - current.prims.deterministic_order(); - return createLargeLeaf(current,alloc); - } - - /*! perform initial split */ - SetMB lprims,rprims; - std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims); - bool hasTimeSplits = new_vector != nullptr; - NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; - LocalChildList children(current); - { - BuildRecord lrecord(lprims,current.depth+1); - BuildRecord rrecord(rprims,current.depth+1); - children.split(0,lrecord,rrecord,std::move(new_vector)); - } - - /*! split until node is full or SAH tells us to stop */ - while (children.size() < cfg.branchingFactor) - { - /*! find best child to split */ - float bestArea = neg_inf; - ssize_t bestChild = -1; - for (size_t i=0; i<children.size(); i++) - { - if (children[i].size() <= cfg.minLeafSize) continue; - if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) { - bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds); - } - } - if (bestChild == -1) break; - - /* perform split */ - BuildRecord& brecord = children[bestChild]; - BuildRecord lrecord(current.depth+1); - BuildRecord rrecord(current.depth+1); - Split csplit = find(brecord.prims); - std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims); - hasTimeSplits |= new_vector != nullptr; - children.split(bestChild,lrecord,rrecord,std::move(new_vector)); - } - - /* detect time_ranges that have shrunken */ - for (size_t i=0; i<children.size(); i++) { - const BBox1f c = children[i].prims.time_range; - const BBox1f p = current.prims.time_range; - hasTimeSplits |= c.lower > p.lower || c.upper < p.upper; - } - - /* sort buildrecords for simpler shadow ray traversal */ - //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !! - - /*! create an inner node */ - auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits); - LBBox3fa gbounds = empty; - - /* spawn tasks */ - if (unlikely(current.size() > cfg.singleThreadThreshold)) - { - /*! parallel_for is faster than spawing sub-tasks */ - parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - values[i] = recurse(children[i],nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - - /*! merge bounding boxes */ - for (size_t i=0; i<children.size(); i++) - gbounds.extend(values[i].lbounds); - } - /* recurse into each child */ - else - { - //for (size_t i=0; i<children.size(); i++) - for (ssize_t i=children.size()-1; i>=0; i--) { - values[i] = recurse(children[i],alloc,false); - gbounds.extend(values[i].lbounds); - } - } - - setNode(current,children.children.data(),node,values,children.numChildren); - - /* calculate geometry bounds of this node */ - if (unlikely(hasTimeSplits)) - return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range); - else - return NodeRecordMB4D(node,gbounds,current.prims.time_range); - } - - /*! builder entry function */ - __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo) - { - const SetMB set(pinfo,&prims); - auto ret = recurse(BuildRecord(set,1),nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - return ret; - } - - private: - Settings cfg; - HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit; - HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit; - const RecalculatePrimRef recalculatePrimRef; - const CreateAllocFunc createAlloc; - const CreateNodeFunc createNode; - const SetNodeFunc setNode; - const CreateLeafFunc createLeaf; - const ProgressMonitor progressMonitor; - }; - - template<typename NodeRef, - typename RecalculatePrimRef, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename SetNodeFunc, - typename CreateLeafFunc, - typename ProgressMonitorFunc> - - static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims, - const PrimInfoMB& pinfo, - MemoryMonitorInterface* device, - const RecalculatePrimRef recalculatePrimRef, - const CreateAllocFunc createAlloc, - const CreateNodeFunc createNode, - const SetNodeFunc setNode, - const CreateLeafFunc createLeaf, - const ProgressMonitorFunc progressMonitor, - const Settings& settings) - { - typedef BuilderT< - NodeRef, - RecalculatePrimRef, - decltype(createAlloc()), - CreateAllocFunc, - CreateNodeFunc, - SetNodeFunc, - CreateLeafFunc, - ProgressMonitorFunc> Builder; - - Builder builder(device, - recalculatePrimRef, - createAlloc, - createNode, - setNode, - createLeaf, - progressMonitor, - settings); - - - return builder(prims,pinfo); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h deleted file mode 100644 index e477c313a3..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h +++ /dev/null @@ -1,526 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../bvh/bvh.h" -#include "../geometry/primitive.h" -#include "../builders/bvh_builder_msmblur.h" -#include "../builders/heuristic_binning_array_aligned.h" -#include "../builders/heuristic_binning_array_unaligned.h" -#include "../builders/heuristic_timesplit_array.h" - -namespace embree -{ - namespace isa - { - struct BVHBuilderHairMSMBlur - { - /*! settings for msmblur builder */ - struct Settings - { - /*! default settings */ - Settings () - : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {} - - public: - size_t branchingFactor; //!< branching factor of BVH to build - size_t maxDepth; //!< maximum depth of BVH to build - size_t logBlockSize; //!< log2 of blocksize for SAH heuristic - size_t minLeafSize; //!< minimum size of a leaf - size_t maxLeafSize; //!< maximum size of a leaf - }; - - struct BuildRecord - { - public: - __forceinline BuildRecord () {} - - __forceinline BuildRecord (size_t depth) - : depth(depth) {} - - __forceinline BuildRecord (const SetMB& prims, size_t depth) - : depth(depth), prims(prims) {} - - __forceinline size_t size() const { - return prims.size(); - } - - public: - size_t depth; //!< depth of the root of this subtree - SetMB prims; //!< the list of primitives - }; - - template<typename NodeRef, - typename RecalculatePrimRef, - typename CreateAllocFunc, - typename CreateAABBNodeMBFunc, - typename SetAABBNodeMBFunc, - typename CreateOBBNodeMBFunc, - typename SetOBBNodeMBFunc, - typename CreateLeafFunc, - typename ProgressMonitor> - - class BuilderT - { - ALIGNED_CLASS_(16); - - static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor - static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth - static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build - - typedef BVHNodeRecordMB<NodeRef> NodeRecordMB; - typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; - - typedef FastAllocator::CachedAllocator Allocator; - typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList; - - typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal; - typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning; - typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning; - - public: - - BuilderT (Scene* scene, - const RecalculatePrimRef& recalculatePrimRef, - const CreateAllocFunc& createAlloc, - const CreateAABBNodeMBFunc& createAABBNodeMB, - const SetAABBNodeMBFunc& setAABBNodeMB, - const CreateOBBNodeMBFunc& createOBBNodeMB, - const SetOBBNodeMBFunc& setOBBNodeMB, - const CreateLeafFunc& createLeaf, - const ProgressMonitor& progressMonitor, - const Settings settings) - - : cfg(settings), - scene(scene), - recalculatePrimRef(recalculatePrimRef), - createAlloc(createAlloc), - createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB), - createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB), - createLeaf(createLeaf), - progressMonitor(progressMonitor), - unalignedHeuristic(scene), - temporalSplitHeuristic(scene->device,recalculatePrimRef) {} - - private: - - /*! checks if all primitives are from the same geometry */ - __forceinline bool sameGeometry(const SetMB& set) - { - mvector<PrimRefMB>& prims = *set.prims; - unsigned int firstGeomID = prims[set.begin()].geomID(); - for (size_t i=set.begin()+1; i<set.end(); i++) { - if (prims[i].geomID() != firstGeomID){ - return false; - } - } - return true; - } - - /*! performs some split if SAH approaches fail */ - void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset) - { - mvector<PrimRefMB>& prims = *set.prims; - - const size_t begin = set.begin(); - const size_t end = set.end(); - const size_t center = (begin + end)/2; - - PrimInfoMB linfo = empty; - for (size_t i=begin; i<center; i++) - linfo.add_primref(prims[i]); - - PrimInfoMB rinfo = empty; - for (size_t i=center; i<end; i++) - rinfo.add_primref(prims[i]); - - new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range); - new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range); - } - - void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset) - { - assert(set.size() > 1); - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfoMB linfo(empty); - PrimInfoMB rinfo(empty); - unsigned int geomID = (*set.prims)[begin].geomID(); - size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo, - [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; }, - [ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); }); - - new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range); - new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range); - } - - /*! creates a large leaf that could be larger than supported by the BVH */ - NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc) - { - /* this should never occur but is a fatal error */ - if (current.depth > cfg.maxDepth) - throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); - - /* special case when directly creating leaf without any splits that could shrink time_range */ - bool force_split = false; - if (current.depth == 1 && current.size() > 0) - { - BBox1f c = empty; - BBox1f p = current.prims.time_range; - for (size_t i=current.prims.begin(); i<current.prims.end(); i++) { - mvector<PrimRefMB>& prims = *current.prims.prims; - c.extend(prims[i].time_range); - } - - force_split = c.lower > p.lower || c.upper < p.upper; - } - - /* create leaf for few primitives */ - if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split) - return createLeaf(current.prims,alloc); - - /* fill all children by always splitting the largest one */ - LocalChildList children(current); - NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; - - do { - - /* find best child with largest bounding box area */ - int bestChild = -1; - size_t bestSize = 0; - for (unsigned i=0; i<children.size(); i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split) - continue; - - force_split = false; - - /* remember child with largest size */ - if (children[i].size() > bestSize) { - bestSize = children[i].size(); - bestChild = i; - } - } - if (bestChild == -1) break; - - /*! split best child into left and right child */ - BuildRecord left(current.depth+1); - BuildRecord right(current.depth+1); - if (!sameGeometry(children[bestChild].prims)) { - splitByGeometry(children[bestChild].prims,left.prims,right.prims); - } else { - splitFallback(children[bestChild].prims,left.prims,right.prims); - } - children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>()); - - } while (children.size() < cfg.branchingFactor); - - - /* detect time_ranges that have shrunken */ - bool timesplit = false; - for (size_t i=0; i<children.size(); i++) { - const BBox1f c = children[i].prims.time_range; - const BBox1f p = current.prims.time_range; - timesplit |= c.lower > p.lower || c.upper < p.upper; - } - - /* create node */ - NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit); - - LBBox3fa bounds = empty; - for (size_t i=0; i<children.size(); i++) { - values[i] = createLargeLeaf(children[i],alloc); - bounds.extend(values[i].lbounds); - } - - setAABBNodeMB(current,children.children.data(),node,values,children.numChildren); - - if (timesplit) - bounds = current.prims.linearBounds(recalculatePrimRef); - - return NodeRecordMB4D(node,bounds,current.prims.time_range); - } - - /*! performs split */ - std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit) - { - /* variable to track the SAH of the best splitting approach */ - float bestSAH = inf; - const float leafSAH = current.prims.leafSAH(cfg.logBlockSize); - - /* perform standard binning in aligned space */ - HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize); - float alignedObjectSAH = alignedObjectSplit.splitSAH(); - bestSAH = min(alignedObjectSAH,bestSAH); - - /* perform standard binning in unaligned space */ - UnalignedHeuristicBinning::Split unalignedObjectSplit; - LinearSpace3fa uspace; - float unalignedObjectSAH = inf; - if (alignedObjectSAH > 0.7f*leafSAH) { - uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims); - const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace); - unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace); - unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive - bestSAH = min(unalignedObjectSAH,bestSAH); - } - - /* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */ - float temporal_split_sah = inf; - typename HeuristicTemporal::Split temporal_split; - if (bestSAH > 0.5f*leafSAH) { - if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) { - temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize); - temporal_split_sah = temporal_split.splitSAH(); - bestSAH = min(temporal_split_sah,bestSAH); - } - } - - /* perform fallback split if SAH heuristics failed */ - if (unlikely(!std::isfinite(bestSAH))) { - current.prims.deterministic_order(); - splitFallback(current.prims,lrecord.prims,rrecord.prims); - } - /* perform aligned split if this is best */ - else if (likely(bestSAH == alignedObjectSAH)) { - alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims); - } - /* perform unaligned split if this is best */ - else if (likely(bestSAH == unalignedObjectSAH)) { - unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims); - aligned = false; - } - /* perform temporal split if this is best */ - else if (likely(bestSAH == temporal_split_sah)) { - timesplit = true; - return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims); - } - else - assert(false); - - return std::unique_ptr<mvector<PrimRefMB>>(); - } - - /*! recursive build */ - NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel) - { - /* get thread local allocator */ - if (!alloc) - alloc = createAlloc(); - - /* call memory monitor function to signal progress */ - if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD) - progressMonitor(current.size()); - - /* create leaf node */ - if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) { - current.prims.deterministic_order(); - return createLargeLeaf(current,alloc); - } - - /* fill all children by always splitting the one with the largest surface area */ - NodeRecordMB4D values[MAX_BRANCHING_FACTOR]; - LocalChildList children(current); - bool aligned = true; - bool timesplit = false; - - do { - - /* find best child with largest bounding box area */ - ssize_t bestChild = -1; - float bestArea = neg_inf; - for (size_t i=0; i<children.size(); i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].size() <= cfg.minLeafSize) - continue; - - /* remember child with largest area */ - const float A = children[i].prims.halfArea(); - if (A > bestArea) { - bestArea = children[i].prims.halfArea(); - bestChild = i; - } - } - if (bestChild == -1) break; - - /*! split best child into left and right child */ - BuildRecord left(current.depth+1); - BuildRecord right(current.depth+1); - std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit); - children.split(bestChild,left,right,std::move(new_vector)); - - } while (children.size() < cfg.branchingFactor); - - /* detect time_ranges that have shrunken */ - for (size_t i=0; i<children.size(); i++) { - const BBox1f c = children[i].prims.time_range; - const BBox1f p = current.prims.time_range; - timesplit |= c.lower > p.lower || c.upper < p.upper; - } - - /* create time split node */ - if (timesplit) - { - const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true); - - /* spawn tasks or ... */ - if (current.size() > SINGLE_THREADED_THRESHOLD) - { - parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - values[i] = recurse(children[i],nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - } - /* ... continue sequential */ - else { - for (size_t i=0; i<children.size(); i++) { - values[i] = recurse(children[i],alloc,false); - } - } - - setAABBNodeMB(current,children.children.data(),node,values,children.numChildren); - - const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef); - return NodeRecordMB4D(node,bounds,current.prims.time_range); - } - - /* create aligned node */ - else if (aligned) - { - const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true); - - /* spawn tasks or ... */ - if (current.size() > SINGLE_THREADED_THRESHOLD) - { - LBBox3fa cbounds[MAX_BRANCHING_FACTOR]; - parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - values[i] = recurse(children[i],nullptr,true); - cbounds[i] = values[i].lbounds; - _mm_mfence(); // to allow non-temporal stores during build - } - }); - - LBBox3fa bounds = empty; - for (size_t i=0; i<children.size(); i++) - bounds.extend(cbounds[i]); - setAABBNodeMB(current,children.children.data(),node,values,children.numChildren); - return NodeRecordMB4D(node,bounds,current.prims.time_range); - } - /* ... continue sequentially */ - else - { - LBBox3fa bounds = empty; - for (size_t i=0; i<children.size(); i++) { - values[i] = recurse(children[i],alloc,false); - bounds.extend(values[i].lbounds); - } - setAABBNodeMB(current,children.children.data(),node,values,children.numChildren); - return NodeRecordMB4D(node,bounds,current.prims.time_range); - } - } - - /* create unaligned node */ - else - { - const NodeRef node = createOBBNodeMB(alloc); - - /* spawn tasks or ... */ - if (current.size() > SINGLE_THREADED_THRESHOLD) - { - parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) { - const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims); - const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space); - const auto child = recurse(children[i],nullptr,true); - setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - } - /* ... continue sequentially */ - else - { - for (size_t i=0; i<children.size(); i++) { - const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims); - const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space); - const auto child = recurse(children[i],alloc,false); - setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range); - } - } - - const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef); - return NodeRecordMB4D(node,bounds,current.prims.time_range); - } - } - - public: - - /*! entry point into builder */ - NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo) - { - BuildRecord record(SetMB(pinfo,&prims),1); - auto root = recurse(record,nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - return root; - } - - private: - Settings cfg; - Scene* scene; - const RecalculatePrimRef& recalculatePrimRef; - const CreateAllocFunc& createAlloc; - const CreateAABBNodeMBFunc& createAABBNodeMB; - const SetAABBNodeMBFunc& setAABBNodeMB; - const CreateOBBNodeMBFunc& createOBBNodeMB; - const SetOBBNodeMBFunc& setOBBNodeMB; - const CreateLeafFunc& createLeaf; - const ProgressMonitor& progressMonitor; - - private: - HeuristicBinning alignedHeuristic; - UnalignedHeuristicBinning unalignedHeuristic; - HeuristicTemporal temporalSplitHeuristic; - }; - - template<typename NodeRef, - typename RecalculatePrimRef, - typename CreateAllocFunc, - typename CreateAABBNodeMBFunc, - typename SetAABBNodeMBFunc, - typename CreateOBBNodeMBFunc, - typename SetOBBNodeMBFunc, - typename CreateLeafFunc, - typename ProgressMonitor> - - static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo, - const RecalculatePrimRef& recalculatePrimRef, - const CreateAllocFunc& createAlloc, - const CreateAABBNodeMBFunc& createAABBNodeMB, - const SetAABBNodeMBFunc& setAABBNodeMB, - const CreateOBBNodeMBFunc& createOBBNodeMB, - const SetOBBNodeMBFunc& setOBBNodeMB, - const CreateLeafFunc& createLeaf, - const ProgressMonitor& progressMonitor, - const Settings settings) - { - typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc, - CreateAABBNodeMBFunc,SetAABBNodeMBFunc, - CreateOBBNodeMBFunc,SetOBBNodeMBFunc, - CreateLeafFunc,ProgressMonitor> Builder; - - Builder builder(scene,recalculatePrimRef,createAlloc, - createAABBNodeMB,setAABBNodeMB, - createOBBNodeMB,setOBBNodeMB, - createLeaf,progressMonitor,settings); - - return builder(prims,pinfo); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h deleted file mode 100644 index 3f7e678a10..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h +++ /dev/null @@ -1,669 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "heuristic_binning_array_aligned.h" -#include "heuristic_spatial_array.h" -#include "heuristic_openmerge_array.h" - -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL -# define NUM_OBJECT_BINS 16 -# define NUM_SPATIAL_BINS 16 -#else -# define NUM_OBJECT_BINS 32 -# define NUM_SPATIAL_BINS 16 -#endif - -namespace embree -{ - namespace isa - { - MAYBE_UNUSED static const float travCost = 1.0f; - MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024; - - struct GeneralBVHBuilder - { - static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor - static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth - - - /*! settings for SAH builder */ - struct Settings - { - /*! default settings */ - Settings () - : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), - travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {} - - /*! initialize settings from API settings */ - Settings (const RTCBuildArguments& settings) - : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), - travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) - { - if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor; - if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth; - if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(static_cast<size_t>(settings.sahBlockSize)); - if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize; - if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize; - if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost; - if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost; - - minLeafSize = min(minLeafSize,maxLeafSize); - } - - Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf) - : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), - travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc) - { - minLeafSize = min(minLeafSize,maxLeafSize); - } - - public: - size_t branchingFactor; //!< branching factor of BVH to build - size_t maxDepth; //!< maximum depth of BVH to build - size_t logBlockSize; //!< log2 of blocksize for SAH heuristic - size_t minLeafSize; //!< minimum size of a leaf - size_t maxLeafSize; //!< maximum size of a leaf - float travCost; //!< estimated cost of one traversal step - float intCost; //!< estimated cost of one primitive intersection - size_t singleThreadThreshold; //!< threshold when we switch to single threaded build - size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished - }; - - /*! recursive state of builder */ - template<typename Set, typename Split> - struct BuildRecordT - { - public: - __forceinline BuildRecordT () {} - - __forceinline BuildRecordT (size_t depth) - : depth(depth), alloc_barrier(false), prims(empty) {} - - __forceinline BuildRecordT (size_t depth, const Set& prims) - : depth(depth), alloc_barrier(false), prims(prims) {} - - __forceinline BBox3fa bounds() const { return prims.geomBounds; } - - __forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); } - __forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); } - - __forceinline size_t size() const { return prims.size(); } - - public: - size_t depth; //!< Depth of the root of this subtree. - bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes - Set prims; //!< The list of primitives. - }; - - template<typename PrimRef, typename Set> - struct DefaultCanCreateLeafFunc - { - __forceinline bool operator()(const PrimRef*, const Set&) const { return true; } - }; - - template<typename PrimRef, typename Set> - struct DefaultCanCreateLeafSplitFunc - { - __forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { } - }; - - template<typename BuildRecord, - typename Heuristic, - typename Set, - typename PrimRef, - typename ReductionTy, - typename Allocator, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename CanCreateLeafFunc, - typename CanCreateLeafSplitFunc, - typename ProgressMonitor> - - class BuilderT - { - friend struct GeneralBVHBuilder; - - BuilderT (PrimRef* prims, - Heuristic& heuristic, - const CreateAllocFunc& createAlloc, - const CreateNodeFunc& createNode, - const UpdateNodeFunc& updateNode, - const CreateLeafFunc& createLeaf, - const CanCreateLeafFunc& canCreateLeaf, - const CanCreateLeafSplitFunc& canCreateLeafSplit, - const ProgressMonitor& progressMonitor, - const Settings& settings) : - cfg(settings), - prims(prims), - heuristic(heuristic), - createAlloc(createAlloc), - createNode(createNode), - updateNode(updateNode), - createLeaf(createLeaf), - canCreateLeaf(canCreateLeaf), - canCreateLeafSplit(canCreateLeafSplit), - progressMonitor(progressMonitor) - { - if (cfg.branchingFactor > MAX_BRANCHING_FACTOR) - throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large"); - } - - const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc) - { - /* this should never occur but is a fatal error */ - if (current.depth > cfg.maxDepth) - throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); - - /* create leaf for few primitives */ - if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims)) - return createLeaf(prims,current.prims,alloc); - - /* fill all children by always splitting the largest one */ - ReductionTy values[MAX_BRANCHING_FACTOR]; - BuildRecord children[MAX_BRANCHING_FACTOR]; - size_t numChildren = 1; - children[0] = current; - do { - - /* find best child with largest bounding box area */ - size_t bestChild = -1; - size_t bestSize = 0; - for (size_t i=0; i<numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims)) - continue; - - /* remember child with largest size */ - if (children[i].prims.size() > bestSize) { - bestSize = children[i].prims.size(); - bestChild = i; - } - } - if (bestChild == (size_t)-1) break; - - /*! split best child into left and right child */ - BuildRecord left(current.depth+1); - BuildRecord right(current.depth+1); - if (!canCreateLeaf(prims,children[bestChild].prims)) { - canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims); - } else { - heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims); - } - - /* add new children left and right */ - children[bestChild] = children[numChildren-1]; - children[numChildren-1] = left; - children[numChildren+0] = right; - numChildren++; - - } while (numChildren < cfg.branchingFactor); - - /* set barrier for primrefarrayalloc */ - if (unlikely(current.size() > cfg.primrefarrayalloc)) - for (size_t i=0; i<numChildren; i++) - children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc; - - /* create node */ - auto node = createNode(children,numChildren,alloc); - - /* recurse into each child and perform reduction */ - for (size_t i=0; i<numChildren; i++) - values[i] = createLargeLeaf(children[i],alloc); - - /* perform reduction */ - return updateNode(current,children,node,values,numChildren); - } - - const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel) - { - /* get thread local allocator */ - if (!alloc) - alloc = createAlloc(); - - /* call memory monitor function to signal progress */ - if (toplevel && current.size() <= cfg.singleThreadThreshold) - progressMonitor(current.size()); - - /*! find best split */ - auto split = heuristic.find(current.prims,cfg.logBlockSize); - - /*! compute leaf and split cost */ - const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize); - const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH(); - assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0))); - - /*! create a leaf node when threshold reached or SAH tells us to stop */ - if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) { - heuristic.deterministic_order(current.prims); - return createLargeLeaf(current,alloc); - } - - /*! perform initial split */ - Set lprims,rprims; - heuristic.split(split,current.prims,lprims,rprims); - - /*! initialize child list with initial split */ - ReductionTy values[MAX_BRANCHING_FACTOR]; - BuildRecord children[MAX_BRANCHING_FACTOR]; - children[0] = BuildRecord(current.depth+1,lprims); - children[1] = BuildRecord(current.depth+1,rprims); - size_t numChildren = 2; - - /*! split until node is full or SAH tells us to stop */ - while (numChildren < cfg.branchingFactor) - { - /*! find best child to split */ - float bestArea = neg_inf; - ssize_t bestChild = -1; - for (size_t i=0; i<numChildren; i++) - { - /* ignore leaves as they cannot get split */ - if (children[i].prims.size() <= cfg.minLeafSize) continue; - - /* find child with largest surface area */ - if (halfArea(children[i].prims.geomBounds) > bestArea) { - bestChild = i; - bestArea = halfArea(children[i].prims.geomBounds); - } - } - if (bestChild == -1) break; - - /* perform best found split */ - BuildRecord& brecord = children[bestChild]; - BuildRecord lrecord(current.depth+1); - BuildRecord rrecord(current.depth+1); - auto split = heuristic.find(brecord.prims,cfg.logBlockSize); - heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims); - children[bestChild ] = lrecord; - children[numChildren] = rrecord; - numChildren++; - } - - /* set barrier for primrefarrayalloc */ - if (unlikely(current.size() > cfg.primrefarrayalloc)) - for (size_t i=0; i<numChildren; i++) - children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc; - - /* sort buildrecords for faster shadow ray traversal */ - std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>()); - - /*! create an inner node */ - auto node = createNode(children,numChildren,alloc); - - /* spawn tasks */ - if (current.size() > cfg.singleThreadThreshold) - { - /*! parallel_for is faster than spawing sub-tasks */ - parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here - for (size_t i=r.begin(); i<r.end(); i++) { - values[i] = recurse(children[i],nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - } - }); - - return updateNode(current,children,node,values,numChildren); - } - /* recurse into each child */ - else - { - for (size_t i=0; i<numChildren; i++) - values[i] = recurse(children[i],alloc,false); - - return updateNode(current,children,node,values,numChildren); - } - } - - private: - Settings cfg; - PrimRef* prims; - Heuristic& heuristic; - const CreateAllocFunc& createAlloc; - const CreateNodeFunc& createNode; - const UpdateNodeFunc& updateNode; - const CreateLeafFunc& createLeaf; - const CanCreateLeafFunc& canCreateLeaf; - const CanCreateLeafSplitFunc& canCreateLeafSplit; - const ProgressMonitor& progressMonitor; - }; - - template< - typename ReductionTy, - typename Heuristic, - typename Set, - typename PrimRef, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename ProgressMonitor> - - __noinline static ReductionTy build(Heuristic& heuristic, - PrimRef* prims, - const Set& set, - CreateAllocFunc createAlloc, - CreateNodeFunc createNode, UpdateNodeFunc updateNode, - const CreateLeafFunc& createLeaf, - const ProgressMonitor& progressMonitor, - const Settings& settings) - { - typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord; - - typedef BuilderT< - BuildRecord, - Heuristic, - Set, - PrimRef, - ReductionTy, - decltype(createAlloc()), - CreateAllocFunc, - CreateNodeFunc, - UpdateNodeFunc, - CreateLeafFunc, - DefaultCanCreateLeafFunc<PrimRef, Set>, - DefaultCanCreateLeafSplitFunc<PrimRef, Set>, - ProgressMonitor> Builder; - - /* instantiate builder */ - Builder builder(prims, - heuristic, - createAlloc, - createNode, - updateNode, - createLeaf, - DefaultCanCreateLeafFunc<PrimRef, Set>(), - DefaultCanCreateLeafSplitFunc<PrimRef, Set>(), - progressMonitor, - settings); - - /* build hierarchy */ - BuildRecord record(1,set); - const ReductionTy root = builder.recurse(record,nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - return root; - } - - template< - typename ReductionTy, - typename Heuristic, - typename Set, - typename PrimRef, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename CanCreateLeafFunc, - typename CanCreateLeafSplitFunc, - typename ProgressMonitor> - - __noinline static ReductionTy build(Heuristic& heuristic, - PrimRef* prims, - const Set& set, - CreateAllocFunc createAlloc, - CreateNodeFunc createNode, UpdateNodeFunc updateNode, - const CreateLeafFunc& createLeaf, - const CanCreateLeafFunc& canCreateLeaf, - const CanCreateLeafSplitFunc& canCreateLeafSplit, - const ProgressMonitor& progressMonitor, - const Settings& settings) - { - typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord; - - typedef BuilderT< - BuildRecord, - Heuristic, - Set, - PrimRef, - ReductionTy, - decltype(createAlloc()), - CreateAllocFunc, - CreateNodeFunc, - UpdateNodeFunc, - CreateLeafFunc, - CanCreateLeafFunc, - CanCreateLeafSplitFunc, - ProgressMonitor> Builder; - - /* instantiate builder */ - Builder builder(prims, - heuristic, - createAlloc, - createNode, - updateNode, - createLeaf, - canCreateLeaf, - canCreateLeafSplit, - progressMonitor, - settings); - - /* build hierarchy */ - BuildRecord record(1,set); - const ReductionTy root = builder.recurse(record,nullptr,true); - _mm_mfence(); // to allow non-temporal stores during build - return root; - } - }; - - /* SAH builder that operates on an array of BuildRecords */ - struct BVHBuilderBinnedSAH - { - typedef PrimInfoRange Set; - typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic; - typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord; - typedef GeneralBVHBuilder::Settings Settings; - - /*! special builder that propagates reduction over the tree */ - template< - typename ReductionTy, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename ProgressMonitor> - - static ReductionTy build(CreateAllocFunc createAlloc, - CreateNodeFunc createNode, UpdateNodeFunc updateNode, - const CreateLeafFunc& createLeaf, - const ProgressMonitor& progressMonitor, - PrimRef* prims, const PrimInfo& pinfo, - const Settings& settings) - { - Heuristic heuristic(prims); - return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( - heuristic, - prims, - PrimInfoRange(0,pinfo.size(),pinfo), - createAlloc, - createNode, - updateNode, - createLeaf, - progressMonitor, - settings); - } - - /*! special builder that propagates reduction over the tree */ - template< - typename ReductionTy, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename CanCreateLeafFunc, - typename CanCreateLeafSplitFunc, - typename ProgressMonitor> - - static ReductionTy build(CreateAllocFunc createAlloc, - CreateNodeFunc createNode, UpdateNodeFunc updateNode, - const CreateLeafFunc& createLeaf, - const CanCreateLeafFunc& canCreateLeaf, - const CanCreateLeafSplitFunc& canCreateLeafSplit, - const ProgressMonitor& progressMonitor, - PrimRef* prims, const PrimInfo& pinfo, - const Settings& settings) - { - Heuristic heuristic(prims); - return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( - heuristic, - prims, - PrimInfoRange(0,pinfo.size(),pinfo), - createAlloc, - createNode, - updateNode, - createLeaf, - canCreateLeaf, - canCreateLeafSplit, - progressMonitor, - settings); - } - }; - - /* Spatial SAH builder that operates on an double-buffered array of BuildRecords */ - struct BVHBuilderBinnedFastSpatialSAH - { - typedef PrimInfoExtRange Set; - typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split; - typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord; - typedef GeneralBVHBuilder::Settings Settings; - - static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; - static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); - - template<typename ReductionTy, typename UserCreateLeaf> - struct CreateLeafExt - { - __forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf) - : userCreateLeaf(userCreateLeaf) {} - - // __noinline is workaround for ICC2016 compiler bug - template<typename Allocator> - __noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const - { - for (size_t i=range.begin(); i<range.end(); i++) - prims[i].lower.u &= GEOMID_MASK; - - return userCreateLeaf(prims,range,alloc); - } - - const UserCreateLeaf userCreateLeaf; - }; - - /*! special builder that propagates reduction over the tree */ - template< - typename ReductionTy, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename SplitPrimitiveFunc, - typename ProgressMonitor> - - static ReductionTy build(CreateAllocFunc createAlloc, - CreateNodeFunc createNode, - UpdateNodeFunc updateNode, - const CreateLeafFunc& createLeaf, - SplitPrimitiveFunc splitPrimitive, - ProgressMonitor progressMonitor, - PrimRef* prims, - const size_t extSize, - const PrimInfo& pinfo, - const Settings& settings) - { - typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic; - Heuristic heuristic(splitPrimitive,prims,pinfo); - - /* calculate total surface area */ // FIXME: this sum is not deterministic - const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double { - - double A = 0.0f; - for (size_t i=r.begin(); i<r.end(); i++) - { - PrimRef& prim = prims[i]; - A += area(prim.bounds()); - } - return A; - },std::plus<double>()); - - - /* calculate maximum number of spatial splits per primitive */ - const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1; - const float f = 10.0f; - - const float invA = 1.0f / A; - parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) { - - for (size_t i=r.begin(); i<r.end(); i++) - { - PrimRef& prim = prims[i]; - assert((prim.geomID() & SPLITS_MASK) == 0); - // FIXME: is there a better general heuristic ? - const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA); - unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf))); - prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); - } - }); - - return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>( - heuristic, - prims, - PrimInfoExtRange(0,pinfo.size(),extSize,pinfo), - createAlloc, - createNode, - updateNode, - CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf), - progressMonitor, - settings); - } - }; - - /* Open/Merge SAH builder that operates on an array of BuildRecords */ - struct BVHBuilderBinnedOpenMergeSAH - { - static const size_t NUM_OBJECT_BINS_HQ = 32; - typedef PrimInfoExtRange Set; - typedef BinSplit<NUM_OBJECT_BINS_HQ> Split; - typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord; - typedef GeneralBVHBuilder::Settings Settings; - - /*! special builder that propagates reduction over the tree */ - template< - typename ReductionTy, - typename BuildRef, - typename CreateAllocFunc, - typename CreateNodeFunc, - typename UpdateNodeFunc, - typename CreateLeafFunc, - typename NodeOpenerFunc, - typename ProgressMonitor> - - static ReductionTy build(CreateAllocFunc createAlloc, - CreateNodeFunc createNode, - UpdateNodeFunc updateNode, - const CreateLeafFunc& createLeaf, - NodeOpenerFunc nodeOpenerFunc, - ProgressMonitor progressMonitor, - BuildRef* prims, - const size_t extSize, - const PrimInfo& pinfo, - const Settings& settings) - { - typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic; - Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor); - - return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>( - heuristic, - prims, - PrimInfoExtRange(0,pinfo.size(),extSize,pinfo), - createAlloc, - createNode, - updateNode, - createLeaf, - progressMonitor, - settings); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h deleted file mode 100644 index a4d3b68e46..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h +++ /dev/null @@ -1,972 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "priminfo.h" -#include "../../common/algorithms/parallel_reduce.h" -#include "../../common/algorithms/parallel_partition.h" - -namespace embree -{ - namespace isa - { - /*! mapping into bins */ - template<size_t BINS> - struct BinMapping - { - public: - __forceinline BinMapping() {} - - /*! calculates the mapping */ - __forceinline BinMapping(size_t N, const BBox3fa& centBounds) - { - num = min(BINS,size_t(4.0f + 0.05f*N)); - assert(num >= 1); - const vfloat4 eps = 1E-34f; - const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); - scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); - ofs = (vfloat4) centBounds.lower; - } - - /*! calculates the mapping */ - __forceinline BinMapping(const BBox3fa& centBounds) - { - num = BINS; - const vfloat4 eps = 1E-34f; - const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); - scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); - ofs = (vfloat4) centBounds.lower; - } - - /*! calculates the mapping */ - template<typename PrimInfo> - __forceinline BinMapping(const PrimInfo& pinfo) - { - const vfloat4 eps = 1E-34f; - num = min(BINS,size_t(4.0f + 0.05f*pinfo.size())); - const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size()); - scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); - ofs = (vfloat4) pinfo.centBounds.lower; - } - - /*! returns number of bins */ - __forceinline size_t size() const { return num; } - - /*! slower but safe binning */ - __forceinline Vec3ia bin(const Vec3fa& p) const - { - const vint4 i = floori((vfloat4(p)-ofs)*scale); -#if 1 - assert(i[0] >= 0 && (size_t)i[0] < num); - assert(i[1] >= 0 && (size_t)i[1] < num); - assert(i[2] >= 0 && (size_t)i[2] < num); - return Vec3ia(i); -#else - return Vec3ia(clamp(i,vint4(0),vint4(num-1))); -#endif - } - - /*! faster but unsafe binning */ - __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const { - return Vec3ia(floori((vfloat4(p)-ofs)*scale)); - } - - /*! faster but unsafe binning */ - template<typename PrimRef> - __forceinline Vec3ia bin_unsafe(const PrimRef& p) const { - return bin_unsafe(p.binCenter()); - } - - /*! faster but unsafe binning */ - template<typename PrimRef, typename BinBoundsAndCenter> - __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const { - return bin_unsafe(binBoundsAndCenter.binCenter(p)); - } - - template<typename PrimRef> - __forceinline bool bin_unsafe(const PrimRef& ref, - const vint4& vSplitPos, - const vbool4& splitDimMask) const // FIXME: rename to isLeft - { - return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask); - } - /*! calculates left spatial position of bin */ - __forceinline float pos(const size_t bin, const size_t dim) const { - return madd(float(bin),1.0f / scale[dim],ofs[dim]); - } - - /*! returns true if the mapping is invalid in some dimension */ - __forceinline bool invalid(const size_t dim) const { - return scale[dim] == 0.0f; - } - - /*! stream output */ - friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) { - return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}"; - } - - public: - size_t num; - vfloat4 ofs,scale; //!< linear function that maps to bin ID - }; - - /*! stores all information to perform some split */ - template<size_t BINS> - struct BinSplit - { - enum - { - SPLIT_OBJECT = 0, - SPLIT_FALLBACK = 1, - SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already - SPLIT_TEMPORAL = 2, - SPLIT_GEOMID = 3, - }; - - /*! construct an invalid split by default */ - __forceinline BinSplit() - : sah(inf), dim(-1), pos(0), data(0) {} - - __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0) - : sah(sah), dim(dim), fpos(fpos), data(data) {} - - /*! constructs specified split */ - __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping) - : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {} - - /*! tests if this split is valid */ - __forceinline bool valid() const { return dim != -1; } - - /*! calculates surface area heuristic for performing the split */ - __forceinline float splitSAH() const { return sah; } - - /*! stream output */ - friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) { - return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}"; - } - - public: - float sah; //!< SAH cost of the split - int dim; //!< split dimension - union { int pos; float fpos; }; //!< bin index for splitting - unsigned int data; //!< extra optional split data - BinMapping<BINS> mapping; //!< mapping into bins - }; - - /*! stores extended information about the split */ - template<typename BBox> - struct SplitInfoT - { - - __forceinline SplitInfoT () {} - - __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds) - : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {} - - public: - size_t leftCount,rightCount; - BBox leftBounds,rightBounds; - }; - - typedef SplitInfoT<BBox3fa> SplitInfo; - typedef SplitInfoT<LBBox3fa> SplitInfo2; - - /*! stores all binning information */ - template<size_t BINS, typename PrimRef, typename BBox> - struct __aligned(64) BinInfoT - { - typedef BinSplit<BINS> Split; - typedef vbool4 vbool; - typedef vint4 vint; - typedef vfloat4 vfloat; - - __forceinline BinInfoT() { - } - - __forceinline BinInfoT(EmptyTy) { - clear(); - } - - /*! bin access function */ - __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; } - __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; } - - __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; } - __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; } - - __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; } - __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; } - - /*! clears the bin info */ - __forceinline void clear() - { - for (size_t i=0; i<BINS; i++) { - bounds(i,0) = bounds(i,1) = bounds(i,2) = empty; - counts(i) = vuint4(zero); - } - } - - /*! bins an array of primitives */ - __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping) - { - if (unlikely(N == 0)) return; - size_t i; - for (i=0; i<N-1; i+=2) - { - /*! map even and odd primitive to bin */ - BBox prim0; Vec3fa center0; - prims[i+0].binBoundsAndCenter(prim0,center0); - const vint4 bin0 = (vint4)mapping.bin(center0); - - BBox prim1; Vec3fa center1; - prims[i+1].binBoundsAndCenter(prim1,center1); - const vint4 bin1 = (vint4)mapping.bin(center1); - - /*! increase bounds for bins for even primitive */ - const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0); - const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0); - const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0); - const unsigned int s0 = (unsigned int)prims[i+0].size(); - counts(b00,0)+=s0; - counts(b01,1)+=s0; - counts(b02,2)+=s0; - - /*! increase bounds of bins for odd primitive */ - const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1); - const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1); - const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1); - const unsigned int s1 = (unsigned int)prims[i+1].size(); - counts(b10,0)+=s1; - counts(b11,1)+=s1; - counts(b12,2)+=s1; - } - /*! for uneven number of primitives */ - if (i < N) - { - /*! map primitive to bin */ - BBox prim0; Vec3fa center0; - prims[i].binBoundsAndCenter(prim0,center0); - const vint4 bin0 = (vint4)mapping.bin(center0); - - /*! increase bounds of bins */ - const unsigned int s0 = (unsigned int)prims[i].size(); - const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); - const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); - const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); - } - } - - /*! bins an array of primitives */ - template<typename BinBoundsAndCenter> - __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) - { - if (N == 0) return; - - size_t i; - for (i=0; i<N-1; i+=2) - { - /*! map even and odd primitive to bin */ - BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); - const vint4 bin0 = (vint4)mapping.bin(center0); - BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1); - const vint4 bin1 = (vint4)mapping.bin(center1); - - /*! increase bounds for bins for even primitive */ - const unsigned int s0 = prims[i+0].size(); - const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); - const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); - const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); - - /*! increase bounds of bins for odd primitive */ - const unsigned int s1 = prims[i+1].size(); - const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1); - const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1); - const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1); - } - - /*! for uneven number of primitives */ - if (i < N) - { - /*! map primitive to bin */ - BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); - const vint4 bin0 = (vint4)mapping.bin(center0); - - /*! increase bounds of bins */ - const unsigned int s0 = prims[i+0].size(); - const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); - const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); - const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); - } - } - - __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) { - bin(prims+begin,end-begin,mapping); - } - - template<typename BinBoundsAndCenter> - __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { - bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter); - } - - /*! merges in other binning information */ - __forceinline void merge (const BinInfoT& other, size_t numBins) - { - - for (size_t i=0; i<numBins; i++) - { - counts(i) += other.counts(i); - bounds(i,0).extend(other.bounds(i,0)); - bounds(i,1).extend(other.bounds(i,1)); - bounds(i,2).extend(other.bounds(i,2)); - } - } - - /*! reduces binning information */ - static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS) - { - BinInfoT c; - for (size_t i=0; i<numBins; i++) - { - c.counts(i) = a.counts(i)+b.counts(i); - c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0)); - c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1)); - c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2)); - } - return c; - } - - /*! finds the best split by scanning binning information */ - __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const - { - /* sweep from right to left and compute parallel prefix of merged bounds */ - vfloat4 rAreas[BINS]; - vuint4 rCounts[BINS]; - vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; - for (size_t i=mapping.size()-1; i>0; i--) - { - count += counts(i); - rCounts[i] = count; - bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); - by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); - bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); - rAreas[i][3] = 0.0f; - } - /* sweep from left to right and compute SAH */ - vuint4 blocks_add = (1 << blocks_shift)-1; - vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; - count = 0; bx = empty; by = empty; bz = empty; - for (size_t i=1; i<mapping.size(); i++, ii+=1) - { - count += counts(i-1); - bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx); - by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by); - bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz); - const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); - const vfloat4 rArea = rAreas[i]; - const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions. - const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); - const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); - //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); - - vbestPos = select(sah < vbestSAH,ii ,vbestPos); - vbestSAH = select(sah < vbestSAH,sah,vbestSAH); - } - - /* find best dimension */ - float bestSAH = inf; - int bestDim = -1; - int bestPos = 0; - for (int dim=0; dim<3; dim++) - { - /* ignore zero sized dimensions */ - if (unlikely(mapping.invalid(dim))) - continue; - - /* test if this is a better dimension */ - if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { - bestDim = dim; - bestPos = vbestPos[dim]; - bestSAH = vbestSAH[dim]; - } - } - return Split(bestSAH,bestDim,bestPos,mapping); - } - - /*! calculates extended split information */ - __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const - { - if (split.dim == -1) { - new (&info) SplitInfoT<BBox>(0,empty,0,empty); - return; - } - - size_t leftCount = 0; - BBox leftBounds = empty; - for (size_t i=0; i<(size_t)split.pos; i++) { - leftCount += counts(i,split.dim); - leftBounds.extend(bounds(i,split.dim)); - } - size_t rightCount = 0; - BBox rightBounds = empty; - for (size_t i=split.pos; i<mapping.size(); i++) { - rightCount += counts(i,split.dim); - rightBounds.extend(bounds(i,split.dim)); - } - new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds); - } - - /*! gets the number of primitives left of the split */ - __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const - { - if (unlikely(split.dim == -1)) return -1; - - size_t leftCount = 0; - for (size_t i = 0; i < (size_t)split.pos; i++) { - leftCount += counts(i, split.dim); - } - return leftCount; - } - - /*! gets the number of primitives right of the split */ - __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const - { - if (unlikely(split.dim == -1)) return -1; - - size_t rightCount = 0; - for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { - rightCount += counts(i, split.dim); - } - return rightCount; - } - - private: - BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension - vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins - }; - -#if defined(__AVX512ER__) // KNL - - /*! mapping into bins */ - template<> - struct BinMapping<16> - { - public: - __forceinline BinMapping() {} - - /*! calculates the mapping */ - template<typename PrimInfo> - __forceinline BinMapping(const PrimInfo& pinfo) - { - num = 16; - const vfloat4 eps = 1E-34f; - const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size()); - scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); - ofs = (vfloat4) pinfo.centBounds.lower; - scale16 = scale; - ofs16 = ofs; - } - - /*! returns number of bins */ - __forceinline size_t size() const { return num; } - - __forceinline vint16 bin16(const Vec3fa& p) const { - return vint16(vint4(floori((vfloat4(p)-ofs)*scale))); - } - - __forceinline vint16 bin16(const vfloat16& p) const { - return floori((p-ofs16)*scale16); - } - - __forceinline int bin_unsafe(const PrimRef& ref, - const vint16& vSplitPos, - const vbool16& splitDimMask) const // FIXME: rename to isLeft - { - const vfloat16 lower(*(vfloat4*)&ref.lower); - const vfloat16 upper(*(vfloat4*)&ref.upper); - const vfloat16 p = lower + upper; - const vint16 i = floori((p-ofs16)*scale16); - return lt(splitDimMask,i,vSplitPos); - } - - /*! returns true if the mapping is invalid in some dimension */ - __forceinline bool invalid(const size_t dim) const { - return scale[dim] == 0.0f; - } - - public: - size_t num; - vfloat4 ofs,scale; //!< linear function that maps to bin ID - vfloat16 ofs16,scale16; //!< linear function that maps to bin ID - }; - - /* 16 bins in-register binner */ - template<typename PrimRef> - struct __aligned(64) BinInfoT<16,PrimRef,BBox3fa> - { - typedef BinSplit<16> Split; - typedef vbool16 vbool; - typedef vint16 vint; - typedef vfloat16 vfloat; - - __forceinline BinInfoT() { - } - - __forceinline BinInfoT(EmptyTy) { - clear(); - } - - /*! clears the bin info */ - __forceinline void clear() - { - lower[0] = lower[1] = lower[2] = pos_inf; - upper[0] = upper[1] = upper[2] = neg_inf; - count[0] = count[1] = count[2] = 0; - } - - - static __forceinline vfloat16 prefix_area_rl(const vfloat16 min_x, - const vfloat16 min_y, - const vfloat16 min_z, - const vfloat16 max_x, - const vfloat16 max_y, - const vfloat16 max_z) - { - const vfloat16 r_min_x = reverse_prefix_min(min_x); - const vfloat16 r_min_y = reverse_prefix_min(min_y); - const vfloat16 r_min_z = reverse_prefix_min(min_z); - const vfloat16 r_max_x = reverse_prefix_max(max_x); - const vfloat16 r_max_y = reverse_prefix_max(max_y); - const vfloat16 r_max_z = reverse_prefix_max(max_z); - const vfloat16 dx = r_max_x - r_min_x; - const vfloat16 dy = r_max_y - r_min_y; - const vfloat16 dz = r_max_z - r_min_z; - const vfloat16 area_rl = madd(dx,dy,madd(dx,dz,dy*dz)); - return area_rl; - } - - static __forceinline vfloat16 prefix_area_lr(const vfloat16 min_x, - const vfloat16 min_y, - const vfloat16 min_z, - const vfloat16 max_x, - const vfloat16 max_y, - const vfloat16 max_z) - { - const vfloat16 r_min_x = prefix_min(min_x); - const vfloat16 r_min_y = prefix_min(min_y); - const vfloat16 r_min_z = prefix_min(min_z); - const vfloat16 r_max_x = prefix_max(max_x); - const vfloat16 r_max_y = prefix_max(max_y); - const vfloat16 r_max_z = prefix_max(max_z); - const vfloat16 dx = r_max_x - r_min_x; - const vfloat16 dy = r_max_y - r_min_y; - const vfloat16 dz = r_max_z - r_min_z; - const vfloat16 area_lr = madd(dx,dy,madd(dx,dz,dy*dz)); - return area_lr; - } - - - /*! bins an array of primitives */ - __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<16>& mapping) - { - if (unlikely(N == 0)) return; - - const vfloat16 init_min(pos_inf); - const vfloat16 init_max(neg_inf); - - vfloat16 min_x0,min_x1,min_x2; - vfloat16 min_y0,min_y1,min_y2; - vfloat16 min_z0,min_z1,min_z2; - vfloat16 max_x0,max_x1,max_x2; - vfloat16 max_y0,max_y1,max_y2; - vfloat16 max_z0,max_z1,max_z2; - vuint16 count0,count1,count2; - - min_x0 = init_min; - min_x1 = init_min; - min_x2 = init_min; - min_y0 = init_min; - min_y1 = init_min; - min_y2 = init_min; - min_z0 = init_min; - min_z1 = init_min; - min_z2 = init_min; - - max_x0 = init_max; - max_x1 = init_max; - max_x2 = init_max; - max_y0 = init_max; - max_y1 = init_max; - max_y2 = init_max; - max_z0 = init_max; - max_z1 = init_max; - max_z2 = init_max; - - count0 = zero; - count1 = zero; - count2 = zero; - - const vint16 step16(step); - size_t i; - for (i=0; i<N-1; i+=2) - { - /*! map even and odd primitive to bin */ - const BBox3fa primA = prims[i+0].bounds(); - const vfloat16 centerA = vfloat16((vfloat4)primA.lower) + vfloat16((vfloat4)primA.upper); - const vint16 binA = mapping.bin16(centerA); - - const BBox3fa primB = prims[i+1].bounds(); - const vfloat16 centerB = vfloat16((vfloat4)primB.lower) + vfloat16((vfloat4)primB.upper); - const vint16 binB = mapping.bin16(centerB); - - /* A */ - { - const vfloat16 b_min_x = prims[i+0].lower.x; - const vfloat16 b_min_y = prims[i+0].lower.y; - const vfloat16 b_min_z = prims[i+0].lower.z; - const vfloat16 b_max_x = prims[i+0].upper.x; - const vfloat16 b_max_y = prims[i+0].upper.y; - const vfloat16 b_max_z = prims[i+0].upper.z; - - const vint16 bin0 = shuffle<0>(binA); - const vint16 bin1 = shuffle<1>(binA); - const vint16 bin2 = shuffle<2>(binA); - - const vbool16 m_update_x = step16 == bin0; - const vbool16 m_update_y = step16 == bin1; - const vbool16 m_update_z = step16 == bin2; - - assert(popcnt((size_t)m_update_x) == 1); - assert(popcnt((size_t)m_update_y) == 1); - assert(popcnt((size_t)m_update_z) == 1); - - min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x); - min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y); - min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z); - // ------------------------------------------------------------------------ - max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x); - max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y); - max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z); - // ------------------------------------------------------------------------ - min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x); - min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y); - min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z); - // ------------------------------------------------------------------------ - max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x); - max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y); - max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z); - // ------------------------------------------------------------------------ - min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x); - min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y); - min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z); - // ------------------------------------------------------------------------ - max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x); - max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y); - max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z); - // ------------------------------------------------------------------------ - count0 = mask_add(m_update_x,count0,count0,vuint16(1)); - count1 = mask_add(m_update_y,count1,count1,vuint16(1)); - count2 = mask_add(m_update_z,count2,count2,vuint16(1)); - } - - - /* B */ - { - const vfloat16 b_min_x = prims[i+1].lower.x; - const vfloat16 b_min_y = prims[i+1].lower.y; - const vfloat16 b_min_z = prims[i+1].lower.z; - const vfloat16 b_max_x = prims[i+1].upper.x; - const vfloat16 b_max_y = prims[i+1].upper.y; - const vfloat16 b_max_z = prims[i+1].upper.z; - - const vint16 bin0 = shuffle<0>(binB); - const vint16 bin1 = shuffle<1>(binB); - const vint16 bin2 = shuffle<2>(binB); - - const vbool16 m_update_x = step16 == bin0; - const vbool16 m_update_y = step16 == bin1; - const vbool16 m_update_z = step16 == bin2; - - assert(popcnt((size_t)m_update_x) == 1); - assert(popcnt((size_t)m_update_y) == 1); - assert(popcnt((size_t)m_update_z) == 1); - - min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x); - min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y); - min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z); - // ------------------------------------------------------------------------ - max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x); - max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y); - max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z); - // ------------------------------------------------------------------------ - min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x); - min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y); - min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z); - // ------------------------------------------------------------------------ - max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x); - max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y); - max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z); - // ------------------------------------------------------------------------ - min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x); - min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y); - min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z); - // ------------------------------------------------------------------------ - max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x); - max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y); - max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z); - // ------------------------------------------------------------------------ - count0 = mask_add(m_update_x,count0,count0,vuint16(1)); - count1 = mask_add(m_update_y,count1,count1,vuint16(1)); - count2 = mask_add(m_update_z,count2,count2,vuint16(1)); - } - - } - - if (i < N) - { - const BBox3fa prim0 = prims[i].bounds(); - const vfloat16 center0 = vfloat16((vfloat4)prim0.lower) + vfloat16((vfloat4)prim0.upper); - const vint16 bin = mapping.bin16(center0); - - const vfloat16 b_min_x = prims[i].lower.x; - const vfloat16 b_min_y = prims[i].lower.y; - const vfloat16 b_min_z = prims[i].lower.z; - const vfloat16 b_max_x = prims[i].upper.x; - const vfloat16 b_max_y = prims[i].upper.y; - const vfloat16 b_max_z = prims[i].upper.z; - - const vint16 bin0 = shuffle<0>(bin); - const vint16 bin1 = shuffle<1>(bin); - const vint16 bin2 = shuffle<2>(bin); - - const vbool16 m_update_x = step16 == bin0; - const vbool16 m_update_y = step16 == bin1; - const vbool16 m_update_z = step16 == bin2; - - assert(popcnt((size_t)m_update_x) == 1); - assert(popcnt((size_t)m_update_y) == 1); - assert(popcnt((size_t)m_update_z) == 1); - - min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x); - min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y); - min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z); - // ------------------------------------------------------------------------ - max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x); - max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y); - max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z); - // ------------------------------------------------------------------------ - min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x); - min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y); - min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z); - // ------------------------------------------------------------------------ - max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x); - max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y); - max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z); - // ------------------------------------------------------------------------ - min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x); - min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y); - min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z); - // ------------------------------------------------------------------------ - max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x); - max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y); - max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z); - // ------------------------------------------------------------------------ - count0 = mask_add(m_update_x,count0,count0,vuint16(1)); - count1 = mask_add(m_update_y,count1,count1,vuint16(1)); - count2 = mask_add(m_update_z,count2,count2,vuint16(1)); - } - - lower[0] = Vec3vf16( min_x0, min_y0, min_z0 ); - lower[1] = Vec3vf16( min_x1, min_y1, min_z1 ); - lower[2] = Vec3vf16( min_x2, min_y2, min_z2 ); - - upper[0] = Vec3vf16( max_x0, max_y0, max_z0 ); - upper[1] = Vec3vf16( max_x1, max_y1, max_z1 ); - upper[2] = Vec3vf16( max_x2, max_y2, max_z2 ); - - count[0] = count0; - count[1] = count1; - count[2] = count2; - } - - __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<16>& mapping) { - bin(prims+begin,end-begin,mapping); - } - - /*! merges in other binning information */ - __forceinline void merge (const BinInfoT& other, size_t numBins) - { - for (size_t i=0; i<3; i++) - { - lower[i] = min(lower[i],other.lower[i]); - upper[i] = max(upper[i],other.upper[i]); - count[i] += other.count[i]; - } - } - - /*! reducesr binning information */ - static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b) - { - BinInfoT c; - for (size_t i=0; i<3; i++) - { - c.counts[i] = a.counts[i] + b.counts[i]; - c.lower[i] = min(a.lower[i],b.lower[i]); - c.upper[i] = max(a.upper[i],b.upper[i]); - } - return c; - } - - /*! finds the best split by scanning binning information */ - __forceinline Split best(const BinMapping<16>& mapping, const size_t blocks_shift) const - { - /* find best dimension */ - float bestSAH = inf; - int bestDim = -1; - int bestPos = 0; - const vuint16 blocks_add = (1 << blocks_shift)-1; - const vfloat16 inf(pos_inf); - for (size_t dim=0; dim<3; dim++) - { - /* ignore zero sized dimensions */ - if (unlikely(mapping.invalid(dim))) - continue; - - const vfloat16 rArea16 = prefix_area_rl(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z); - const vfloat16 lArea16 = prefix_area_lr(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z); - const vuint16 lCount16 = prefix_sum(count[dim]); - const vuint16 rCount16 = reverse_prefix_sum(count[dim]); - - /* compute best split in this dimension */ - const vfloat16 leftArea = lArea16; - const vfloat16 rightArea = align_shift_right<1>(zero,rArea16); - const vuint16 lC = lCount16; - const vuint16 rC = align_shift_right<1>(zero,rCount16); - const vuint16 leftCount = ( lC + blocks_add) >> blocks_shift; - const vuint16 rightCount = ( rC + blocks_add) >> blocks_shift; - const vbool16 valid = (leftArea < inf) & (rightArea < inf) & vbool16(0x7fff); // handles inf entries - const vfloat16 sah = select(valid,madd(leftArea,vfloat16(leftCount),rightArea*vfloat16(rightCount)),vfloat16(pos_inf)); - /* test if this is a better dimension */ - if (any(sah < vfloat16(bestSAH))) - { - const size_t index = select_min(sah); - assert(index < 15); - assert(sah[index] < bestSAH); - bestDim = dim; - bestPos = index+1; - bestSAH = sah[index]; - } - } - - return Split(bestSAH,bestDim,bestPos,mapping); - - } - - /*! calculates extended split information */ - __forceinline void getSplitInfo(const BinMapping<16>& mapping, const Split& split, SplitInfo& info) const - { - if (split.dim == -1) { - new (&info) SplitInfo(0,empty,0,empty); - return; - } - // FIXME: horizontal reduction! - - size_t leftCount = 0; - BBox3fa leftBounds = empty; - for (size_t i=0; i<(size_t)split.pos; i++) { - leftCount += count[split.dim][i]; - Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]); - Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]); - leftBounds.extend(BBox3fa(bounds_lower,bounds_upper)); - } - size_t rightCount = 0; - BBox3fa rightBounds = empty; - for (size_t i=split.pos; i<mapping.size(); i++) { - rightCount += count[split.dim][i]; - Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]); - Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]); - rightBounds.extend(BBox3fa(bounds_lower,bounds_upper)); - } - new (&info) SplitInfo(leftCount,leftBounds,rightCount,rightBounds); - } - - /*! gets the number of primitives left of the split */ - __forceinline size_t getLeftCount(const BinMapping<16>& mapping, const Split& split) const - { - if (unlikely(split.dim == -1)) return -1; - - size_t leftCount = 0; - for (size_t i = 0; i < (size_t)split.pos; i++) { - leftCount += count[split.dim][i]; - } - return leftCount; - } - - /*! gets the number of primitives right of the split */ - __forceinline size_t getRightCount(const BinMapping<16>& mapping, const Split& split) const - { - if (unlikely(split.dim == -1)) return -1; - - size_t rightCount = 0; - for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { - rightCount += count[split.dim][i]; - } - return rightCount; - } - - private: - Vec3vf16 lower[3]; - Vec3vf16 upper[3]; - vuint16 count[3]; - }; -#endif - } - - template<typename BinInfoT, typename BinMapping, typename PrimRef> - __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping) - { - if (likely(end-begin < parallelThreshold)) { - binner.bin(prims,begin,end,mapping); - } else { - binner = parallel_reduce(begin,end,blockSize,binner, - [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, - [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); - } - } - - template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> - __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) - { - if (likely(end-begin < parallelThreshold)) { - binner.bin(prims,begin,end,mapping,binBoundsAndCenter); - } else { - binner = parallel_reduce(begin,end,blockSize,binner, - [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, - [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); - } - } - - template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef> - __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping) - { - if (!parallel) { - binner.bin(prims,begin,end,mapping); - } else { - binner = parallel_reduce(begin,end,blockSize,binner, - [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, - [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); - } - } - - template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> - __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) - { - if (!parallel) { - binner.bin(prims,begin,end,mapping,binBoundsAndCenter); - } else { - binner = parallel_reduce(begin,end,blockSize,binner, - [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, - [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h deleted file mode 100644 index a4c272f015..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "heuristic_binning.h" - -namespace embree -{ - namespace isa - { - struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t> - { - __forceinline PrimInfoRange () { - } - - __forceinline PrimInfoRange(const PrimInfo& pinfo) - : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {} - - __forceinline PrimInfoRange(EmptyTy) - : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {} - - __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) - : CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {} - - __forceinline float leafSAH() const { - return expectedApproxHalfArea(geomBounds)*float(size()); - } - - __forceinline float leafSAH(size_t block_shift) const { - return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); - } - }; - - /*! Performs standard object binning */ - template<typename PrimRef, size_t BINS> - struct HeuristicArrayBinningSAH - { - typedef BinSplit<BINS> Split; - typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner; - typedef range<size_t> Set; - -#if defined(__AVX512ER__) // KNL - static const size_t PARALLEL_THRESHOLD = 4*768; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 768; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 768; -#else - static const size_t PARALLEL_THRESHOLD = 3 * 1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; -#endif - __forceinline HeuristicArrayBinningSAH () - : prims(nullptr) {} - - /*! remember prim array */ - __forceinline HeuristicArrayBinningSAH (PrimRef* prims) - : prims(prims) {} - - /*! finds the best split */ - __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize) - { - if (likely(pinfo.size() < PARALLEL_THRESHOLD)) - return find_template<false>(pinfo,logBlockSize); - else - return find_template<true>(pinfo,logBlockSize); - } - - template<bool parallel> - __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize) - { - Binner binner(empty); - const BinMapping<BINS> mapping(pinfo); - bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping); - return binner.best(mapping,logBlockSize); - } - - /*! array partitioning */ - __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) - { - if (likely(pinfo.size() < PARALLEL_THRESHOLD)) - split_template<false>(split,pinfo,linfo,rinfo); - else - split_template<true>(split,pinfo,linfo,rinfo); - } - - template<bool parallel> - __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset) - { - if (!split.valid()) { - deterministic_order(set); - return splitFallback(set,lset,rset); - } - - const size_t begin = set.begin(); - const size_t end = set.end(); - CentGeomBBox3fa local_left(empty); - CentGeomBBox3fa local_right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - const typename Binner::vint vSplitPos(splitPos); - const typename Binner::vbool vSplitMask(splitDimMask); - auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; - - size_t center = 0; - if (!parallel) - center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft, - [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }); - else - center = parallel_partitioning( - prims,begin,end,EmptyTy(),local_left,local_right,isLeft, - [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }, - [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); }, - PARALLEL_PARTITION_BLOCK_SIZE); - - new (&lset) PrimInfoRange(begin,center,local_left); - new (&rset) PrimInfoRange(center,end,local_right); - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - } - - void deterministic_order(const PrimInfoRange& pinfo) - { - /* required as parallel partition destroys original primitive order */ - std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]); - } - - void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) - { - const size_t begin = pinfo.begin(); - const size_t end = pinfo.end(); - const size_t center = (begin + end)/2; - - CentGeomBBox3fa left(empty); - for (size_t i=begin; i<center; i++) - left.extend_center2(prims[i]); - new (&linfo) PrimInfoRange(begin,center,left); - - CentGeomBBox3fa right(empty); - for (size_t i=center; i<end; i++) - right.extend_center2(prims[i]); - new (&rinfo) PrimInfoRange(center,end,right); - } - - void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo) - { - assert(range.size() > 1); - CentGeomBBox3fa left(empty); - CentGeomBBox3fa right(empty); - unsigned int geomID = prims[range.begin()].geomID(); - size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right, - [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; }, - [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); }); - - new (&linfo) PrimInfoRange(range.begin(),center,left); - new (&rinfo) PrimInfoRange(center,range.end(),right); - } - - private: - PrimRef* const prims; - }; - - /*! Performs standard object binning */ - template<typename PrimRefMB, size_t BINS> - struct HeuristicArrayBinningMB - { - typedef BinSplit<BINS> Split; - typedef typename PrimRefMB::BBox BBox; - typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner; - static const size_t PARALLEL_THRESHOLD = 3 * 1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; - - /*! finds the best split */ - const Split find(const SetMB& set, const size_t logBlockSize) - { - ObjectBinner binner(empty); - const BinMapping<BINS> mapping(set.size(),set.centBounds); - bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping); - Split osplit = binner.best(mapping,logBlockSize); - osplit.sah *= set.time_range.size(); - if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split - return osplit; - } - - /*! array partitioning */ - __forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfoMB left = empty; - PrimInfoMB right = empty; - const vint4 vSplitPos(split.pos); - const vbool4 vSplitMask(1 << split.dim); - auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); }; - auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); }; - auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); }; - size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD); - new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range); - new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range); - } - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h deleted file mode 100644 index 1370244586..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "heuristic_binning.h" - -namespace embree -{ - namespace isa - { - /*! Performs standard object binning */ - template<typename PrimRef, size_t BINS> - struct UnalignedHeuristicArrayBinningSAH - { - typedef BinSplit<BINS> Split; - typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner; - typedef range<size_t> Set; - - __forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required? - : scene(nullptr), prims(nullptr) {} - - /*! remember prim array */ - __forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims) - : scene(scene), prims(prims) {} - - const LinearSpace3fa computeAlignedSpace(const range<size_t>& set) - { - Vec3fa axis(0,0,1); - uint64_t bestGeomPrimID = -1; - - /*! find curve with minimum ID that defines valid direction */ - for (size_t i=set.begin(); i<set.end(); i++) - { - const unsigned int geomID = prims[i].geomID(); - const unsigned int primID = prims[i].primID(); - const uint64_t geomprimID = prims[i].ID64(); - if (geomprimID >= bestGeomPrimID) continue; - const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID); - if (sqr_length(axis1) > 1E-18f) { - axis = normalize(axis1); - bestGeomPrimID = geomprimID; - } - } - return frame(axis).transposed(); - } - - const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space) - { - auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa - { - CentGeomBBox3fa bounds(empty); - for (size_t i=r.begin(); i<r.end(); i++) { - Geometry* mesh = scene->get(prims[i].geomID()); - bounds.extend(mesh->vbounds(space,prims[i].primID())); - } - return bounds; - }; - - const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096), - CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2); - - return PrimInfo(set.begin(),set.end(),bounds); - } - - struct BinBoundsAndCenter - { - __forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space) - : scene(scene), space(space) {} - - /*! returns center for binning */ - __forceinline Vec3fa binCenter(const PrimRef& ref) const - { - Geometry* mesh = (Geometry*) scene->get(ref.geomID()); - BBox3fa bounds = mesh->vbounds(space,ref.primID()); - return embree::center2(bounds); - } - - /*! returns bounds and centroid used for binning */ - __forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const - { - Geometry* mesh = (Geometry*) scene->get(ref.geomID()); - BBox3fa bounds = mesh->vbounds(space,ref.primID()); - bounds_o = bounds; - center_o = embree::center2(bounds); - } - - private: - Scene* scene; - const LinearSpace3fa space; - }; - - /*! finds the best split */ - __forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space) - { - if (likely(pinfo.size() < 10000)) - return find_template<false>(pinfo,logBlockSize,space); - else - return find_template<true>(pinfo,logBlockSize,space); - } - - /*! finds the best split */ - template<bool parallel> - const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space) - { - Binner binner(empty); - const BinMapping<BINS> mapping(set); - BinBoundsAndCenter binBoundsAndCenter(scene,space); - bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter); - return binner.best(mapping,logBlockSize); - } - - /*! array partitioning */ - __forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset) - { - if (likely(set.size() < 10000)) - split_template<false>(split,space,set,lset,rset); - else - split_template<true>(split,space,set,lset,rset); - } - - /*! array partitioning */ - template<bool parallel> - __forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset) - { - if (!split.valid()) { - deterministic_order(set); - return splitFallback(set,lset,rset); - } - - const size_t begin = set.begin(); - const size_t end = set.end(); - CentGeomBBox3fa local_left(empty); - CentGeomBBox3fa local_right(empty); - const int splitPos = split.pos; - const int splitDim = split.dim; - BinBoundsAndCenter binBoundsAndCenter(scene,space); - - size_t center = 0; - if (likely(set.size() < 10000)) - center = serial_partitioning(prims,begin,end,local_left,local_right, - [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; }, - [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }); - else - center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right, - [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; }, - [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }, - [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); }, - 128); - - new (&lset) PrimInfoRange(begin,center,local_left); - new (&rset) PrimInfoRange(center,end,local_right); - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - } - - void deterministic_order(const range<size_t>& set) - { - /* required as parallel partition destroys original primitive order */ - std::sort(&prims[set.begin()],&prims[set.end()]); - } - - void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - const size_t center = (begin + end)/2; - - CentGeomBBox3fa left(empty); - for (size_t i=begin; i<center; i++) - left.extend_center2(prims[i]); - new (&lset) PrimInfoRange(begin,center,left); - - CentGeomBBox3fa right(empty); - for (size_t i=center; i<end; i++) - right.extend_center2(prims[i]); - new (&rset) PrimInfoRange(center,end,right); - } - - private: - Scene* const scene; - PrimRef* const prims; - }; - - /*! Performs standard object binning */ - template<typename PrimRefMB, size_t BINS> - struct UnalignedHeuristicArrayBinningMB - { - typedef BinSplit<BINS> Split; - typedef typename PrimRefMB::BBox BBox; - typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner; - - static const size_t PARALLEL_THRESHOLD = 3 * 1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; - - UnalignedHeuristicArrayBinningMB(Scene* scene) - : scene(scene) {} - - const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set) - { - Vec3fa axis0(0,0,1); - uint64_t bestGeomPrimID = -1; - - /*! find curve with minimum ID that defines valid direction */ - for (size_t i=set.begin(); i<set.end(); i++) - { - const PrimRefMB& prim = (*set.prims)[i]; - const unsigned int geomID = prim.geomID(); - const unsigned int primID = prim.primID(); - const uint64_t geomprimID = prim.ID64(); - if (geomprimID >= bestGeomPrimID) continue; - - const Geometry* mesh = scene->get(geomID); - const range<int> tbounds = mesh->timeSegmentRange(set.time_range); - if (tbounds.size() == 0) continue; - - const size_t t = (tbounds.begin()+tbounds.end())/2; - const Vec3fa axis1 = mesh->computeDirection(primID,t); - if (sqr_length(axis1) > 1E-18f) { - axis0 = normalize(axis1); - bestGeomPrimID = geomprimID; - } - } - - return frame(axis0).transposed(); - } - - struct BinBoundsAndCenter - { - __forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space) - : scene(scene), time_range(time_range), space(space) {} - - /*! returns center for binning */ - template<typename PrimRef> - __forceinline Vec3fa binCenter(const PrimRef& ref) const - { - Geometry* mesh = scene->get(ref.geomID()); - LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range); - return center2(lbounds.interpolate(0.5f)); - } - - /*! returns bounds and centroid used for binning */ - __noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX - { - Geometry* mesh = scene->get(ref.geomID()); - LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range); - bounds_o = lbounds.interpolate(0.5f); - center_o = center2(bounds_o); - } - - /*! returns bounds and centroid used for binning */ - __noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX - { - Geometry* mesh = scene->get(ref.geomID()); - LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range); - bounds_o = lbounds; - center_o = center2(lbounds.interpolate(0.5f)); - } - - private: - Scene* scene; - BBox1f time_range; - const LinearSpace3fa space; - }; - - /*! finds the best split */ - const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space) - { - BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space); - ObjectBinner binner(empty); - const BinMapping<BINS> mapping(set.size(),set.centBounds); - bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter); - Split osplit = binner.best(mapping,logBlockSize); - osplit.sah *= set.time_range.size(); - if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split - return osplit; - } - - /*! array partitioning */ - __forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset) - { - BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space); - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfoMB left = empty; - PrimInfoMB right = empty; - const vint4 vSplitPos(split.pos); - const vbool4 vSplitMask(1 << split.dim); - auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); }; - auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); }; - auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); }; - size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD); - new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range); - new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range); - } - - private: - Scene* scene; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h deleted file mode 100644 index 21f18c0208..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h +++ /dev/null @@ -1,443 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -// TODO: -// - adjust parallel build thresholds -// - openNodesBasedOnExtend should consider max extended size - -#pragma once - -#include "heuristic_binning.h" -#include "heuristic_spatial.h" - -/* stop opening of all bref.geomIDs are the same */ -#define EQUAL_GEOMID_STOP_CRITERIA 1 - -/* 10% spatial extend threshold */ -#define MAX_EXTEND_THRESHOLD 0.1f - -/* maximum is 8 children */ -#define MAX_OPENED_CHILD_NODES 8 - -/* open until all build refs are below threshold size in one step */ -#define USE_LOOP_OPENING 0 - -namespace embree -{ - namespace isa - { - /*! Performs standard object binning */ - template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS> - struct HeuristicArrayOpenMergeSAH - { - typedef BinSplit<OBJECT_BINS> Split; - typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner; - - static const size_t PARALLEL_THRESHOLD = 1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 512; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; - - static const size_t MOVE_STEP_SIZE = 64; - static const size_t CREATE_SPLITS_STEP_SIZE = 128; - - __forceinline HeuristicArrayOpenMergeSAH () - : prims0(nullptr) {} - - /*! remember prim array */ - __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size) - : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size) - { - assert(max_open_size <= MAX_OPENED_CHILD_NODES); - } - - struct OpenHeuristic - { - __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo ) - { - const Vec3fa diag = pinfo.geomBounds.size(); - dim = maxDim(diag); - assert(diag[dim] > 0.0f); - inv_max_extend = 1.0f / diag[dim]; - } - - __forceinline bool operator () ( PrimRef& prim ) const { - return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD; - } - - private: - size_t dim; - float inv_max_extend; - }; - - /*! compute extended ranges */ - __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight) - { - assert(set.ext_range_size() > 0); - const float left_factor = (float)lweight / (lweight + rweight); - const size_t ext_range_size = set.ext_range_size(); - const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size); - const size_t right_ext_range_size = ext_range_size - left_ext_range_size; - lset.set_ext_range(lset.end() + left_ext_range_size); - rset.set_ext_range(rset.end() + right_ext_range_size); - } - - /*! move ranges */ - __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t left_ext_range_size = lset.ext_range_size(); - const size_t right_size = rset.size(); - - /* has the left child an extended range? */ - if (left_ext_range_size > 0) - { - /* left extended range smaller than right range ? */ - if (left_ext_range_size < right_size) - { - /* only move a small part of the beginning of the right range to the end */ - parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) - prims0[i+right_size] = prims0[i]; - }); - } - else - { - /* no overlap, move entire right range to new location, can be made fully parallel */ - parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) - prims0[i+left_ext_range_size] = prims0[i]; - }); - } - /* update right range */ - assert(rset.ext_end() + left_ext_range_size == set.ext_end()); - rset.move_right(left_ext_range_size); - } - } - - /* estimates the extra space required when opening, and checks if all primitives are from same geometry */ - __noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set) - { - const OpenHeuristic heuristic(set); - const unsigned int geomID = prims0[set.begin()].geomID(); - - auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> { - bool commonGeomID = true; - size_t opens = 0; - for (size_t i=r.begin(); i<r.end(); i++) { - commonGeomID &= prims0[i].geomID() == geomID; - if (heuristic(prims0[i])) - opens += prims0[i].node.getN()-1; // coarse approximation - } - return std::pair<size_t,bool>(opens,commonGeomID); - }; - auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> { - return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second); - }; - return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction); - } - - // FIXME: should consider maximum available extended size - __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set) - { - const OpenHeuristic heuristic(set); - const size_t ext_range_start = set.end(); - - if (false && set.size() < PARALLEL_THRESHOLD) - { - size_t extra_elements = 0; - for (size_t i=set.begin(); i<set.end(); i++) - { - if (heuristic(prims0[i])) - { - PrimRef tmp[MAX_OPENED_CHILD_NODES]; - const size_t n = nodeOpenerFunc(prims0[i],tmp); - assert(extra_elements + n-1 <= set.ext_range_size()); - for (size_t j=0; j<n; j++) - set.extend_center2(tmp[j]); - - prims0[i] = tmp[0]; - for (size_t j=1; j<n; j++) - prims0[ext_range_start+extra_elements+j-1] = tmp[j]; - extra_elements += n-1; - } - } - set._end += extra_elements; - } - else - { - std::atomic<size_t> ext_elements; - ext_elements.store(0); - PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo { - PrimInfo info(empty); - for (size_t i=r.begin(); i<r.end(); i++) - if (heuristic(prims0[i])) - { - PrimRef tmp[MAX_OPENED_CHILD_NODES]; - const size_t n = nodeOpenerFunc(prims0[i],tmp); - const size_t ID = ext_elements.fetch_add(n-1); - assert(ID + n-1 <= set.ext_range_size()); - - for (size_t j=0; j<n; j++) - info.extend_center2(tmp[j]); - - prims0[i] = tmp[0]; - for (size_t j=1; j<n; j++) - prims0[ext_range_start+ID+j-1] = tmp[j]; - } - return info; - }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); }); - set.centBounds.extend(info.centBounds); - assert(ext_elements.load() <= set.ext_range_size()); - set._end += ext_elements.load(); - } - } - - __noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements) - { - const OpenHeuristic heuristic(set); - size_t next_iteration_extra_elements = est_new_elements; - - while (next_iteration_extra_elements <= set.ext_range_size()) - { - next_iteration_extra_elements = 0; - size_t extra_elements = 0; - const size_t ext_range_start = set.end(); - - for (size_t i=set.begin(); i<set.end(); i++) - { - if (heuristic(prims0[i])) - { - PrimRef tmp[MAX_OPENED_CHILD_NODES]; - const size_t n = nodeOpenerFunc(prims0[i],tmp); - assert(extra_elements + n-1 <= set.ext_range_size()); - for (size_t j=0;j<n;j++) - set.extend_center2(tmp[j]); - - prims0[i] = tmp[0]; - for (size_t j=1;j<n;j++) - prims0[ext_range_start+extra_elements+j-1] = tmp[j]; - extra_elements += n-1; - - for (size_t j=0; j<n; j++) - if (heuristic(tmp[j])) - next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation - - } - } - assert( extra_elements <= set.ext_range_size()); - set._end += extra_elements; - - for (size_t i=set.begin();i<set.end();i++) - assert(prims0[i].numPrimitives() > 0); - - if (unlikely(next_iteration_extra_elements == 0)) break; - } - } - - __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize) - { - /* single element */ - if (set.size() <= 1) - return Split(); - - /* disable opening if there is no overlap */ - const size_t D = 4; - if (unlikely(set.has_ext_range() && set.size() <= D)) - { - bool disjoint = true; - for (size_t j=set.begin(); j<set.end()-1; j++) { - for (size_t i=set.begin()+1; i<set.end(); i++) { - if (conjoint(prims0[j].bounds(),prims0[i].bounds())) { - disjoint = false; break; - } - } - } - if (disjoint) set.set_ext_range(set.end()); /* disables opening */ - } - - std::pair<size_t,bool> p(0,false); - - /* disable opening when all primitives are from same geometry */ - if (unlikely(set.has_ext_range())) - { - p = getProperties(set); -#if EQUAL_GEOMID_STOP_CRITERIA == 1 - if (p.second) set.set_ext_range(set.end()); /* disable opening */ -#endif - } - - /* open nodes when we have sufficient space available */ - if (unlikely(set.has_ext_range())) - { -#if USE_LOOP_OPENING == 1 - openNodesBasedOnExtendLoop(set,p.first); -#else - if (p.first <= set.ext_range_size()) - openNodesBasedOnExtend(set); -#endif - - /* disable opening when unsufficient space for opening a node available */ - if (set.ext_range_size() < max_open_size-1) - set.set_ext_range(set.end()); /* disable opening */ - } - - /* find best split */ - return object_find(set,logBlockSize); - } - - - /*! finds the best object split */ - __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize) - { - if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize); - else return parallel_object_find (set,logBlockSize); - } - - /*! finds the best object split */ - __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize) - { - Binner binner(empty); - const BinMapping<OBJECT_BINS> mapping(set.centBounds); - binner.bin(prims0,set.begin(),set.end(),mapping); - return binner.best(mapping,logBlockSize); - } - - /*! finds the best split */ - __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize) - { - Binner binner(empty); - const BinMapping<OBJECT_BINS> mapping(set.centBounds); - const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround - auto body = [&] (const range<size_t>& r) -> Binner { - Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; - }; - auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner { - Binner r = b0; r.merge(b1,_mapping.size()); return r; - }; - binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction); - return binner.best(mapping,logBlockSize); - } - - /*! array partitioning */ - __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - PrimInfoExtRange set = set_i; - - /* valid split */ - if (unlikely(!split.valid())) { - deterministic_order(set); - splitFallback(set,lset,rset); - return; - } - - std::pair<size_t,size_t> ext_weights(0,0); - - /* object split */ - if (likely(set.size() < PARALLEL_THRESHOLD)) - ext_weights = sequential_object_split(split,set,lset,rset); - else - ext_weights = parallel_object_split(split,set,lset,rset); - - /* if we have an extended range, set extended child ranges and move right split range */ - if (unlikely(set.has_ext_range())) - { - setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second); - moveExtentedRange(set,lset,rset); - } - } - - /*! array partitioning */ - std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfo local_left(empty); - PrimInfo local_right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - const vint4 vSplitPos(splitPos); - const vbool4 vSplitMask( (int)splitDimMask ); - - size_t center = serial_partitioning(prims0, - begin,end,local_left,local_right, - [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }, - [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); }); - - new (&lset) PrimInfoExtRange(begin,center,center,local_left); - new (&rset) PrimInfoExtRange(center,end,end,local_right); - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - return std::pair<size_t,size_t>(local_left.size(),local_right.size()); - } - - /*! array partitioning */ - __noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfo left(empty); - PrimInfo right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - const vint4 vSplitPos(splitPos); - const vbool4 vSplitMask( (int)splitDimMask ); - auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; - - const size_t center = parallel_partitioning( - prims0,begin,end,EmptyTy(),left,right,isLeft, - [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); }, - [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); }, - PARALLEL_PARTITION_BLOCK_SIZE); - - new (&lset) PrimInfoExtRange(begin,center,center,left); - new (&rset) PrimInfoExtRange(center,end,end,right); - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - - return std::pair<size_t,size_t>(left.size(),right.size()); - } - - void deterministic_order(const extended_range<size_t>& set) - { - /* required as parallel partition destroys original primitive order */ - std::sort(&prims0[set.begin()],&prims0[set.end()]); - } - - __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - const size_t center = (begin + end)/2; - - PrimInfo left(empty); - for (size_t i=begin; i<center; i++) - left.add_center2(prims0[i]); - - const size_t lweight = left.end; - - PrimInfo right(empty); - for (size_t i=center; i<end; i++) - right.add_center2(prims0[i]); - - const size_t rweight = right.end; - new (&lset) PrimInfoExtRange(begin,center,center,left); - new (&rset) PrimInfoExtRange(center,end,end,right); - - /* if we have an extended range */ - if (set.has_ext_range()) - { - setExtentedRanges(set,lset,rset,lweight,rweight); - moveExtentedRange(set,lset,rset); - } - } - - private: - PrimRef* const prims0; - const NodeOpenerFunc& nodeOpenerFunc; - size_t max_open_size; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h deleted file mode 100644 index d8ca6cb92c..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h +++ /dev/null @@ -1,414 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/scene.h" -#include "priminfo.h" - -namespace embree -{ - static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5; - - namespace isa - { - - /*! mapping into bins */ - template<size_t BINS> - struct SpatialBinMapping - { - public: - __forceinline SpatialBinMapping() {} - - /*! calculates the mapping */ - __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo) - { - const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower; - const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper; - const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper)); - const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size()); - scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag); - ofs = (vfloat4) pinfo.geomBounds.lower; - inv_scale = 1.0f / scale; - } - - /*! slower but safe binning */ - __forceinline vint4 bin(const Vec3fa& p) const - { - const vint4 i = floori((vfloat4(p)-ofs)*scale); - return clamp(i,vint4(0),vint4(BINS-1)); - } - - __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const - { -#if defined(__AVX__) - const vfloat8 ofs8(ofs); - const vfloat8 scale8(scale); - const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8); - const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1)); - return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu)); -#else - const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale); - const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale); - const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1)); - const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1)); - return std::pair<vint4,vint4>(c_lower,c_upper); -#endif - } - - - /*! calculates left spatial position of bin */ - __forceinline float pos(const size_t bin, const size_t dim) const { - return madd(float(bin),inv_scale[dim],ofs[dim]); - } - - /*! calculates left spatial position of bin */ - template<size_t N> - __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const { - return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim])); - } - - /*! returns true if the mapping is invalid in some dimension */ - __forceinline bool invalid(const size_t dim) const { - return scale[dim] == 0.0f; - } - - public: - vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID - }; - - /*! stores all information required to perform some split */ - template<size_t BINS> - struct SpatialBinSplit - { - /*! construct an invalid split by default */ - __forceinline SpatialBinSplit() - : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {} - - /*! constructs specified split */ - __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping) - : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {} - - /*! constructs specified split */ - __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping) - : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {} - - /*! tests if this split is valid */ - __forceinline bool valid() const { return dim != -1; } - - /*! calculates surface area heuristic for performing the split */ - __forceinline float splitSAH() const { return sah; } - - /*! stream output */ - friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) { - return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}"; - } - - public: - float sah; //!< SAH cost of the split - int dim; //!< split dimension - int pos; //!< split position - int left; //!< number of elements on the left side - int right; //!< number of elements on the right side - float factor; //!< factor splitting the extended range - SpatialBinMapping<BINS> mapping; //!< mapping into bins - }; - - /*! stores all binning information */ - template<size_t BINS, typename PrimRef> - struct __aligned(64) SpatialBinInfo - { - SpatialBinInfo() { - } - - __forceinline SpatialBinInfo(EmptyTy) { - clear(); - } - - /*! clears the bin info */ - __forceinline void clear() - { - for (size_t i=0; i<BINS; i++) { - bounds[i][0] = bounds[i][1] = bounds[i][2] = empty; - numBegin[i] = numEnd[i] = 0; - } - } - - /*! adds binning data */ - __forceinline void add(const size_t dim, - const size_t beginID, - const size_t endID, - const size_t binID, - const BBox3fa &b, - const size_t n = 1) - { - assert(beginID < BINS); - assert(endID < BINS); - assert(binID < BINS); - - numBegin[beginID][dim]+=(unsigned int)n; - numEnd [endID][dim]+=(unsigned int)n; - bounds [binID][dim].extend(b); - } - - /*! extends binning bounds */ - __forceinline void extend(const size_t dim, - const size_t binID, - const BBox3fa &b) - { - assert(binID < BINS); - bounds [binID][dim].extend(b); - } - - /*! bins an array of triangles */ - template<typename SplitPrimitive> - __forceinline void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t N, const SpatialBinMapping<BINS>& mapping) - { - for (size_t i=0; i<N; i++) - { - const PrimRef prim = prims[i]; - unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); - - if (unlikely(splits == 1)) - { - const vint4 bin = mapping.bin(center(prim.bounds())); - for (size_t dim=0; dim<3; dim++) - { - assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS); - numBegin[bin[dim]][dim]++; - numEnd [bin[dim]][dim]++; - bounds [bin[dim]][dim].extend(prim.bounds()); - } - } - else - { - const vint4 bin0 = mapping.bin(prim.bounds().lower); - const vint4 bin1 = mapping.bin(prim.bounds().upper); - - for (size_t dim=0; dim<3; dim++) - { - size_t bin; - PrimRef rest = prim; - size_t l = bin0[dim]; - size_t r = bin1[dim]; - - // same bin optimization - if (likely(l == r)) - { - numBegin[l][dim]++; - numEnd [l][dim]++; - bounds [l][dim].extend(prim.bounds()); - continue; - } - - for (bin=(size_t)bin0[dim]; bin<(size_t)bin1[dim]; bin++) - { - const float pos = mapping.pos(bin+1,dim); - - PrimRef left,right; - splitPrimitive(rest,(int)dim,pos,left,right); - if (unlikely(left.bounds().empty())) l++; - bounds[bin][dim].extend(left.bounds()); - rest = right; - } - if (unlikely(rest.bounds().empty())) r--; - numBegin[l][dim]++; - numEnd [r][dim]++; - bounds [bin][dim].extend(rest.bounds()); - } - } - } - } - - /*! bins a range of primitives inside an array */ - template<typename SplitPrimitive> - void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) { - bin(splitPrimitive,prims+begin,end-begin,mapping); - } - - /*! bins an array of primitives */ - template<typename PrimitiveSplitterFactory> - __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) - { - for (size_t i=begin; i<end; i++) - { - const PrimRef &prim = source[i]; - const vint4 bin0 = mapping.bin(prim.bounds().lower); - const vint4 bin1 = mapping.bin(prim.bounds().upper); - - for (size_t dim=0; dim<3; dim++) - { - if (unlikely(mapping.invalid(dim))) - continue; - - size_t bin; - size_t l = bin0[dim]; - size_t r = bin1[dim]; - - // same bin optimization - if (likely(l == r)) - { - add(dim,l,l,l,prim.bounds()); - continue; - } - const size_t bin_start = bin0[dim]; - const size_t bin_end = bin1[dim]; - BBox3fa rest = prim.bounds(); - const auto splitter = splitterFactory(prim); - for (bin=bin_start; bin<bin_end; bin++) - { - const float pos = mapping.pos(bin+1,dim); - BBox3fa left,right; - splitter(rest,dim,pos,left,right); - if (unlikely(left.empty())) l++; - extend(dim,bin,left); - rest = right; - } - if (unlikely(rest.empty())) r--; - add(dim,l,r,bin,rest); - } - } - } - - - - /*! bins an array of primitives */ - __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) - { - for (size_t i=begin; i<end; i++) - { - const PrimRef &prim = source[i]; - const vint4 bin0 = mapping.bin(prim.bounds().lower); - const vint4 bin1 = mapping.bin(prim.bounds().upper); - - for (size_t dim=0; dim<3; dim++) - { - if (unlikely(mapping.invalid(dim))) - continue; - - const size_t l = bin0[dim]; - const size_t r = bin1[dim]; - - const unsigned int n = prim.primID(); - - // same bin optimization - if (likely(l == r)) - { - add(dim,l,l,l,prim.bounds(),n); - continue; - } - const size_t bin_start = bin0[dim]; - const size_t bin_end = bin1[dim]; - for (size_t bin=bin_start; bin<bin_end; bin++) - add(dim,l,r,bin,prim.bounds(),n); - } - } - } - - /*! merges in other binning information */ - void merge (const SpatialBinInfo& other) - { - for (size_t i=0; i<BINS; i++) - { - numBegin[i] += other.numBegin[i]; - numEnd [i] += other.numEnd [i]; - bounds[i][0].extend(other.bounds[i][0]); - bounds[i][1].extend(other.bounds[i][1]); - bounds[i][2].extend(other.bounds[i][2]); - } - } - - /*! merges in other binning information */ - static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b) - { - SpatialBinInfo c(empty); - for (size_t i=0; i<BINS; i++) - { - c.numBegin[i] += a.numBegin[i]+b.numBegin[i]; - c.numEnd [i] += a.numEnd [i]+b.numEnd [i]; - c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]); - c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]); - c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]); - } - return c; - } - - /*! finds the best split by scanning binning information */ - SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const - { - /* sweep from right to left and compute parallel prefix of merged bounds */ - vfloat4 rAreas[BINS]; - vuint4 rCounts[BINS]; - vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; - for (size_t i=BINS-1; i>0; i--) - { - count += numEnd[i]; - rCounts[i] = count; - bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx); - by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by); - bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz); - rAreas[i][3] = 0.0f; - } - - /* sweep from left to right and compute SAH */ - vuint4 blocks_add = (1 << blocks_shift)-1; - vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0; - count = 0; bx = empty; by = empty; bz = empty; - for (size_t i=1; i<BINS; i++, ii+=1) - { - count += numBegin[i-1]; - bx.extend(bounds[i-1][0]); float Ax = halfArea(bx); - by.extend(bounds[i-1][1]); float Ay = halfArea(by); - bz.extend(bounds[i-1][2]); float Az = halfArea(bz); - const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); - const vfloat4 rArea = rAreas[i]; - const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); - const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); - const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); - // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); - const vbool4 mask = sah < vbestSAH; - vbestPos = select(mask,ii ,vbestPos); - vbestSAH = select(mask,sah,vbestSAH); - vbestlCount = select(mask,count,vbestlCount); - vbestrCount = select(mask,rCounts[i],vbestrCount); - } - - /* find best dimension */ - float bestSAH = inf; - int bestDim = -1; - int bestPos = 0; - unsigned int bestlCount = 0; - unsigned int bestrCount = 0; - for (int dim=0; dim<3; dim++) - { - /* ignore zero sized dimensions */ - if (unlikely(mapping.invalid(dim))) - continue; - - /* test if this is a better dimension */ - if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { - bestDim = dim; - bestPos = vbestPos[dim]; - bestSAH = vbestSAH[dim]; - bestlCount = vbestlCount[dim]; - bestrCount = vbestrCount[dim]; - } - } - assert(bestSAH >= 0.0f); - - /* return invalid split if no split found */ - if (bestDim == -1) - return SpatialBinSplit<BINS>(inf,-1,0,mapping); - - /* return best found split */ - return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping); - } - - private: - BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension - vuint4 numBegin[BINS]; //!< number of primitives starting in bin - vuint4 numEnd[BINS]; //!< number of primitives ending in bin - }; - } -} - diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h deleted file mode 100644 index 911dcf950c..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h +++ /dev/null @@ -1,552 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "heuristic_binning.h" -#include "heuristic_spatial.h" - -namespace embree -{ - namespace isa - { -#if 0 -#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f -#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f -#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f -#else -#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f -#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f -#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f -#endif - - struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t> - { - __forceinline PrimInfoExtRange() { - } - - __forceinline PrimInfoExtRange(EmptyTy) - : CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {} - - __forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds) - : CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {} - - __forceinline float leafSAH() const { - return expectedApproxHalfArea(geomBounds)*float(size()); - } - - __forceinline float leafSAH(size_t block_shift) const { - return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); - } - }; - - template<typename ObjectSplit, typename SpatialSplit> - struct Split2 - { - __forceinline Split2 () {} - - __forceinline Split2 (const Split2& other) - { - spatial = other.spatial; - sah = other.sah; - if (spatial) spatialSplit() = other.spatialSplit(); - else objectSplit() = other.objectSplit(); - } - - __forceinline Split2& operator= (const Split2& other) - { - spatial = other.spatial; - sah = other.sah; - if (spatial) spatialSplit() = other.spatialSplit(); - else objectSplit() = other.objectSplit(); - return *this; - } - - __forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; } - __forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; } - - __forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; } - __forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; } - - __forceinline Split2 (const ObjectSplit& objectSplit, float sah) - : spatial(false), sah(sah) - { - new (data) ObjectSplit(objectSplit); - } - - __forceinline Split2 (const SpatialSplit& spatialSplit, float sah) - : spatial(true), sah(sah) - { - new (data) SpatialSplit(spatialSplit); - } - - __forceinline float splitSAH() const { - return sah; - } - - __forceinline bool valid() const { - return sah < float(inf); - } - - public: - __aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)]; - bool spatial; - float sah; - }; - - /*! Performs standard object binning */ - template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS> - struct HeuristicArraySpatialSAH - { - typedef BinSplit<OBJECT_BINS> ObjectSplit; - typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner; - - typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit; - typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner; - - //typedef extended_range<size_t> Set; - typedef Split2<ObjectSplit,SpatialSplit> Split; - -#if defined(__AVX512ER__) // KNL - static const size_t PARALLEL_THRESHOLD = 3*1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 768; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; -#else - static const size_t PARALLEL_THRESHOLD = 3*1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; -#endif - - static const size_t MOVE_STEP_SIZE = 64; - static const size_t CREATE_SPLITS_STEP_SIZE = 64; - - __forceinline HeuristicArraySpatialSAH () - : prims0(nullptr) {} - - /*! remember prim array */ - __forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info) - : prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {} - - - /*! compute extended ranges */ - __noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight) - { - assert(set.ext_range_size() > 0); - const float left_factor = (float)lweight / (lweight + rweight); - const size_t ext_range_size = set.ext_range_size(); - const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size); - const size_t right_ext_range_size = ext_range_size - left_ext_range_size; - lset.set_ext_range(lset.end() + left_ext_range_size); - rset.set_ext_range(rset.end() + right_ext_range_size); - } - - /*! move ranges */ - __noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t left_ext_range_size = lset.ext_range_size(); - const size_t right_size = rset.size(); - - /* has the left child an extended range? */ - if (left_ext_range_size > 0) - { - /* left extended range smaller than right range ? */ - if (left_ext_range_size < right_size) - { - /* only move a small part of the beginning of the right range to the end */ - parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) - prims0[i+right_size] = prims0[i]; - }); - } - else - { - /* no overlap, move entire right range to new location, can be made fully parallel */ - parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) { - for (size_t i=r.begin(); i<r.end(); i++) - prims0[i+left_ext_range_size] = prims0[i]; - }); - } - /* update right range */ - assert(rset.ext_end() + left_ext_range_size == set.ext_end()); - rset.move_right(left_ext_range_size); - } - } - - /*! finds the best split */ - const Split find(const PrimInfoExtRange& set, const size_t logBlockSize) - { - SplitInfo oinfo; - const ObjectSplit object_split = object_find(set,logBlockSize,oinfo); - const float object_split_sah = object_split.splitSAH(); - - if (unlikely(set.has_ext_range())) - { - const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds); - - /* do only spatial splits if the child bounds overlap */ - if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) && - safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds)) - { - const SpatialSplit spatial_split = spatial_find(set, logBlockSize); - const float spatial_split_sah = spatial_split.splitSAH(); - - /* valid spatial split, better SAH and number of splits do not exceed extended range */ - if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah && - spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size()) - { - return Split(spatial_split,spatial_split_sah); - } - } - } - - return Split(object_split,object_split_sah); - } - - /*! finds the best object split */ - __forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info) - { - if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info); - else return parallel_object_find (set,logBlockSize,info); - } - - /*! finds the best object split */ - __noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info) - { - ObjectBinner binner(empty); - const BinMapping<OBJECT_BINS> mapping(set); - binner.bin(prims0,set.begin(),set.end(),mapping); - ObjectSplit s = binner.best(mapping,logBlockSize); - binner.getSplitInfo(mapping, s, info); - return s; - } - - /*! finds the best split */ - __noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info) - { - ObjectBinner binner(empty); - const BinMapping<OBJECT_BINS> mapping(set); - const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround - binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner, - [&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; }, - [&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; }); - ObjectSplit s = binner.best(mapping,logBlockSize); - binner.getSplitInfo(mapping, s, info); - return s; - } - - /*! finds the best spatial split */ - __forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize) - { - if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize); - else return parallel_spatial_find (set, logBlockSize); - } - - /*! finds the best spatial split */ - __noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize) - { - SpatialBinner binner(empty); - const SpatialBinMapping<SPATIAL_BINS> mapping(set); - binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping); - /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/ - return binner.best(mapping,logBlockSize); //,set.ext_size()); - } - - __noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize) - { - SpatialBinner binner(empty); - const SpatialBinMapping<SPATIAL_BINS> mapping(set); - const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround - binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner, - [&] (const range<size_t>& r) -> SpatialBinner { - SpatialBinner binner(empty); - binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping); - return binner; }, - [&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); }); - /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/ - return binner.best(mapping,logBlockSize); //,set.ext_size()); - } - - - /*! subdivides primitives based on a spatial split */ - __noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping) - { - assert(set.has_ext_range()); - const size_t max_ext_range_size = set.ext_range_size(); - const size_t ext_range_start = set.end(); - - /* atomic counter for number of primref splits */ - std::atomic<size_t> ext_elements; - ext_elements.store(0); - - const float fpos = split.mapping.pos(split.pos,split.dim); - - const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; - - parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) { - for (size_t i=r.begin();i<r.end();i++) - { - const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); - - if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */ - - //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim]; - //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim]; - //if (unlikely(bin0 < split.pos && bin1 >= split.pos)) - if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos)) - { - assert(splits > 1); - - PrimRef left,right; - const auto splitter = splitterFactory(prims0[i]); - splitter(prims0[i],split.dim,fpos,left,right); - - // no empty splits - if (unlikely(left.bounds().empty() || right.bounds().empty())) continue; - - left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); - right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); - - const size_t ID = ext_elements.fetch_add(1); - - /* break if the number of subdivided elements are greater than the maximum allowed size */ - if (unlikely(ID >= max_ext_range_size)) - break; - - /* only write within the correct bounds */ - assert(ID < max_ext_range_size); - prims0[i] = left; - prims0[ext_range_start+ID] = right; - } - } - }); - - const size_t numExtElements = min(max_ext_range_size,ext_elements.load()); - assert(set.end()+numExtElements<=set.ext_end()); - set._end += numExtElements; - } - - /*! array partitioning */ - void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - PrimInfoExtRange set = set_i; - - /* valid split */ - if (unlikely(!split.valid())) { - deterministic_order(set); - return splitFallback(set,lset,rset); - } - - std::pair<size_t,size_t> ext_weights(0,0); - - if (unlikely(split.spatial)) - { - create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping); - - /* spatial split */ - if (likely(set.size() < PARALLEL_THRESHOLD)) - ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset); - else - ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset); - } - else - { - /* object split */ - if (likely(set.size() < PARALLEL_THRESHOLD)) - ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset); - else - ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset); - } - - /* if we have an extended range, set extended child ranges and move right split range */ - if (unlikely(set.has_ext_range())) - { - setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second); - moveExtentedRange(set,lset,rset); - } - } - - /*! array partitioning */ - std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfo local_left(empty); - PrimInfo local_right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - const typename ObjectBinner::vint vSplitPos(splitPos); - const typename ObjectBinner::vbool vSplitMask(splitDimMask); - size_t center = serial_partitioning(prims0, - begin,end,local_left,local_right, - [&] (const PrimRef& ref) { - return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); - }, - [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }); - const size_t left_weight = local_left.end; - const size_t right_weight = local_right.end; - - new (&lset) PrimInfoExtRange(begin,center,center,local_left); - new (&rset) PrimInfoExtRange(center,end,end,local_right); - - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - return std::pair<size_t,size_t>(left_weight,right_weight); - } - - - /*! array partitioning */ - __noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfo local_left(empty); - PrimInfo local_right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - /* init spatial mapping */ - const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping; - const vint4 vSplitPos(splitPos); - const vbool4 vSplitMask( (int)splitDimMask ); - - size_t center = serial_partitioning(prims0, - begin,end,local_left,local_right, - [&] (const PrimRef& ref) { - const Vec3fa c = ref.bounds().center(); - return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); - }, - [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }); - - const size_t left_weight = local_left.end; - const size_t right_weight = local_right.end; - - new (&lset) PrimInfoExtRange(begin,center,center,local_left); - new (&rset) PrimInfoExtRange(center,end,end,local_right); - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - return std::pair<size_t,size_t>(left_weight,right_weight); - } - - - - /*! array partitioning */ - __noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfo left(empty); - PrimInfo right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - const typename ObjectBinner::vint vSplitPos(splitPos); - const typename ObjectBinner::vbool vSplitMask(splitDimMask); - auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; - - const size_t center = parallel_partitioning( - prims0,begin,end,EmptyTy(),left,right,isLeft, - [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }, - [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); }, - PARALLEL_PARTITION_BLOCK_SIZE); - - const size_t left_weight = left.end; - const size_t right_weight = right.end; - - left.begin = begin; left.end = center; - right.begin = center; right.end = end; - - new (&lset) PrimInfoExtRange(begin,center,center,left); - new (&rset) PrimInfoExtRange(center,end,end,right); - - assert(area(left.geomBounds) >= 0.0f); - assert(area(right.geomBounds) >= 0.0f); - return std::pair<size_t,size_t>(left_weight,right_weight); - } - - /*! array partitioning */ - __noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - PrimInfo left(empty); - PrimInfo right(empty); - const unsigned int splitPos = split.pos; - const unsigned int splitDim = split.dim; - const unsigned int splitDimMask = (unsigned int)1 << splitDim; - - /* init spatial mapping */ - const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping; - const vint4 vSplitPos(splitPos); - const vbool4 vSplitMask( (int)splitDimMask ); - - auto isLeft = [&] (const PrimRef &ref) { - const Vec3fa c = ref.bounds().center(); - return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); }; - - const size_t center = parallel_partitioning( - prims0,begin,end,EmptyTy(),left,right,isLeft, - [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); }, - [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); }, - PARALLEL_PARTITION_BLOCK_SIZE); - - const size_t left_weight = left.end; - const size_t right_weight = right.end; - - left.begin = begin; left.end = center; - right.begin = center; right.end = end; - - new (&lset) PrimInfoExtRange(begin,center,center,left); - new (&rset) PrimInfoExtRange(center,end,end,right); - - assert(area(left.geomBounds) >= 0.0f); - assert(area(right.geomBounds) >= 0.0f); - return std::pair<size_t,size_t>(left_weight,right_weight); - } - - void deterministic_order(const PrimInfoExtRange& set) - { - /* required as parallel partition destroys original primitive order */ - std::sort(&prims0[set.begin()],&prims0[set.end()]); - } - - void splitFallback(const PrimInfoExtRange& set, - PrimInfoExtRange& lset, - PrimInfoExtRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - const size_t center = (begin + end)/2; - - PrimInfo left(empty); - for (size_t i=begin; i<center; i++) { - left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); - } - const size_t lweight = left.end; - - PrimInfo right(empty); - for (size_t i=center; i<end; i++) { - right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); - } - const size_t rweight = right.end; - - new (&lset) PrimInfoExtRange(begin,center,center,left); - new (&rset) PrimInfoExtRange(center,end,end,right); - - /* if we have an extended range */ - if (set.has_ext_range()) { - setExtentedRanges(set,lset,rset,lweight,rweight); - moveExtentedRange(set,lset,rset); - } - } - - private: - PrimRef* const prims0; - const PrimitiveSplitterFactory& splitterFactory; - const CentGeomBBox3fa& root_info; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h deleted file mode 100644 index ede0d04c78..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "priminfo.h" -#include "../../common/algorithms/parallel_reduce.h" -#include "../../common/algorithms/parallel_partition.h" - -namespace embree -{ - namespace isa - { - /*! Performs standard object binning */ - struct HeuristicStrandSplit - { - typedef range<size_t> Set; - - static const size_t PARALLEL_THRESHOLD = 10000; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64; - - /*! stores all information to perform some split */ - struct Split - { - /*! construct an invalid split by default */ - __forceinline Split() - : sah(inf), axis0(zero), axis1(zero) {} - - /*! constructs specified split */ - __forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1) - : sah(sah), axis0(axis0), axis1(axis1) {} - - /*! calculates standard surface area heuristic for the split */ - __forceinline float splitSAH() const { return sah; } - - /*! test if this split is valid */ - __forceinline bool valid() const { return sah != float(inf); } - - public: - float sah; //!< SAH cost of the split - Vec3fa axis0, axis1; //!< axis the two strands are aligned into - }; - - __forceinline HeuristicStrandSplit () // FIXME: required? - : scene(nullptr), prims(nullptr) {} - - /*! remember prim array */ - __forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims) - : scene(scene), prims(prims) {} - - __forceinline const Vec3fa direction(const PrimRef& prim) { - return scene->get(prim.geomID())->computeDirection(prim.primID()); - } - - __forceinline const BBox3fa bounds(const PrimRef& prim) { - return scene->get(prim.geomID())->vbounds(prim.primID()); - } - - __forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) { - return scene->get(prim.geomID())->vbounds(space,prim.primID()); - } - - /*! finds the best split */ - const Split find(const range<size_t>& set, size_t logBlockSize) - { - Vec3fa axis0(0,0,1); - uint64_t bestGeomPrimID = -1; - - /* curve with minimum ID determines first axis */ - for (size_t i=set.begin(); i<set.end(); i++) - { - const uint64_t geomprimID = prims[i].ID64(); - if (geomprimID >= bestGeomPrimID) continue; - const Vec3fa axis = direction(prims[i]); - if (sqr_length(axis) > 1E-18f) { - axis0 = normalize(axis); - bestGeomPrimID = geomprimID; - } - } - - /* find 2nd axis that is most misaligned with first axis and has minimum ID */ - float bestCos = 1.0f; - Vec3fa axis1 = axis0; - bestGeomPrimID = -1; - for (size_t i=set.begin(); i<set.end(); i++) - { - const uint64_t geomprimID = prims[i].ID64(); - Vec3fa axisi = direction(prims[i]); - float leni = length(axisi); - if (leni == 0.0f) continue; - axisi /= leni; - float cos = abs(dot(axisi,axis0)); - if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) { - bestCos = cos; axis1 = axisi; - bestGeomPrimID = geomprimID; - } - } - - /* partition the two strands */ - size_t lnum = 0, rnum = 0; - BBox3fa lbounds = empty, rbounds = empty; - const LinearSpace3fa space0 = frame(axis0).transposed(); - const LinearSpace3fa space1 = frame(axis1).transposed(); - - for (size_t i=set.begin(); i<set.end(); i++) - { - PrimRef& prim = prims[i]; - const Vec3fa axisi = normalize(direction(prim)); - const float cos0 = abs(dot(axisi,axis0)); - const float cos1 = abs(dot(axisi,axis1)); - - if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); } - else { rnum++; rbounds.extend(bounds(space1,prim)); } - } - - /*! return an invalid split if we do not partition */ - if (lnum == 0 || rnum == 0) - return Split(inf,axis0,axis1); - - /*! calculate sah for the split */ - const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize; - const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize; - const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds)); - return Split(sah,axis0,axis1); - } - - /*! array partitioning */ - void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset) - { - if (!split.valid()) { - deterministic_order(set); - return splitFallback(set,lset,rset); - } - - const size_t begin = set.begin(); - const size_t end = set.end(); - CentGeomBBox3fa local_left(empty); - CentGeomBBox3fa local_right(empty); - - auto primOnLeftSide = [&] (const PrimRef& prim) -> bool { - const Vec3fa axisi = normalize(direction(prim)); - const float cos0 = abs(dot(axisi,split.axis0)); - const float cos1 = abs(dot(axisi,split.axis1)); - return cos0 > cos1; - }; - - auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { - pinfo.extend(bounds(ref)); - }; - - size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds); - - new (&lset) PrimInfoRange(begin,center,local_left); - new (&rset) PrimInfoRange(center,end,local_right); - assert(area(lset.geomBounds) >= 0.0f); - assert(area(rset.geomBounds) >= 0.0f); - } - - void deterministic_order(const Set& set) - { - /* required as parallel partition destroys original primitive order */ - std::sort(&prims[set.begin()],&prims[set.end()]); - } - - void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset) - { - const size_t begin = set.begin(); - const size_t end = set.end(); - const size_t center = (begin + end)/2; - - CentGeomBBox3fa left(empty); - for (size_t i=begin; i<center; i++) - left.extend(bounds(prims[i])); - new (&lset) PrimInfoRange(begin,center,left); - - CentGeomBBox3fa right(empty); - for (size_t i=center; i<end; i++) - right.extend(bounds(prims[i])); - new (&rset) PrimInfoRange(center,end,right); - } - - private: - Scene* const scene; - PrimRef* const prims; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h deleted file mode 100644 index c999941a11..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/primref_mb.h" -#include "../../common/algorithms/parallel_filter.h" - -#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f - -namespace embree -{ - namespace isa - { - /*! Performs standard object binning */ - template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS> - struct HeuristicMBlurTemporalSplit - { - typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split; - typedef mvector<PrimRefMB>* PrimRefVector; - typedef typename PrimRefMB::BBox BBox; - - static const size_t PARALLEL_THRESHOLD = 3 * 1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; - - HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef) - : device(device), recalculatePrimRef(recalculatePrimRef) {} - - struct TemporalBinInfo - { - __forceinline TemporalBinInfo () { - } - - __forceinline TemporalBinInfo (EmptyTy) - { - for (size_t i=0; i<BINS-1; i++) - { - count0[i] = count1[i] = 0; - bounds0[i] = bounds1[i] = empty; - } - } - - void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef) - { - for (int b=0; b<BINS-1; b++) - { - const float t = float(b+1)/float(BINS); - const float ct = lerp(time_range.lower,time_range.upper,t); - const float center_time = set.align_time(ct); - if (center_time <= time_range.lower) continue; - if (center_time >= time_range.upper) continue; - const BBox1f dt0(time_range.lower,center_time); - const BBox1f dt1(center_time,time_range.upper); - - /* find linear bounds for both time segments */ - for (size_t i=begin; i<end; i++) - { - if (prims[i].time_range_overlap(dt0)) - { - const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0); -#if MBLUR_BIN_LBBOX - bounds0[b].extend(bn0); -#else - bounds0[b].extend(bn0.interpolate(0.5f)); -#endif - count0[b] += prims[i].timeSegmentRange(dt0).size(); - } - - if (prims[i].time_range_overlap(dt1)) - { - const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1); -#if MBLUR_BIN_LBBOX - bounds1[b].extend(bn1); -#else - bounds1[b].extend(bn1.interpolate(0.5f)); -#endif - count1[b] += prims[i].timeSegmentRange(dt1).size(); - } - } - } - } - - __forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef) - { - if (likely(end-begin < parallelThreshold)) { - bin(prims,begin,end,time_range,set,recalculatePrimRef); - } - else - { - auto bin = [&](const range<size_t>& r) -> TemporalBinInfo { - TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner; - }; - *this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2); - } - } - - /*! merges in other binning information */ - __forceinline void merge (const TemporalBinInfo& other) - { - for (size_t i=0; i<BINS-1; i++) - { - count0[i] += other.count0[i]; - count1[i] += other.count1[i]; - bounds0[i].extend(other.bounds0[i]); - bounds1[i].extend(other.bounds1[i]); - } - } - - static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) { - TemporalBinInfo r = a; r.merge(b); return r; - } - - Split best(int logBlockSize, BBox1f time_range, const SetMB& set) - { - float bestSAH = inf; - float bestPos = 0.0f; - for (int b=0; b<BINS-1; b++) - { - float t = float(b+1)/float(BINS); - float ct = lerp(time_range.lower,time_range.upper,t); - const float center_time = set.align_time(ct); - if (center_time <= time_range.lower) continue; - if (center_time >= time_range.upper) continue; - const BBox1f dt0(time_range.lower,center_time); - const BBox1f dt1(center_time,time_range.upper); - - /* calculate sah */ - const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize); - const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize); - float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size(); - float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size(); - if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time - if (unlikely(rCount == 0)) sah1 = 0.0f; - const float sah = sah0+sah1; - if (sah < bestSAH) { - bestSAH = sah; - bestPos = center_time; - } - } - return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos); - } - - public: - size_t count0[BINS-1]; - size_t count1[BINS-1]; - BBox bounds0[BINS-1]; - BBox bounds1[BINS-1]; - }; - - /*! finds the best split */ - const Split find(const SetMB& set, const size_t logBlockSize) - { - assert(set.size() > 0); - TemporalBinInfo binner(empty); - binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef); - Split tsplit = binner.best((int)logBlockSize,set.time_range,set); - if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split - return tsplit; - } - - __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset) - { - assert(tsplit.sah != float(inf)); - assert(tsplit.fpos > set.time_range.lower); - assert(tsplit.fpos < set.time_range.upper); - - float center_time = tsplit.fpos; - const BBox1f time_range0(set.time_range.lower,center_time); - const BBox1f time_range1(center_time,set.time_range.upper); - mvector<PrimRefMB>& prims = *set.prims; - - /* calculate primrefs for first time range */ - std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size())); - PrimRefVector lprims = new_vector.get(); - - auto reduction_func0 = [&] (const range<size_t>& r) { - PrimInfoMB pinfo = empty; - for (size_t i=r.begin(); i<r.end(); i++) - { - if (likely(prims[i].time_range_overlap(time_range0))) - { - const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0); - (*lprims)[i-set.begin()] = prim; - pinfo.add_primref(prim); - } - else - { - (*lprims)[i-set.begin()] = prims[i]; - } - } - return pinfo; - }; - PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2); - - /* primrefs for first time range are in lprims[0 .. set.size()) */ - /* some primitives may need to be filtered out */ - if (linfo.size() != set.size()) - linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024), - [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); }); - - lset = SetMB(linfo,lprims,time_range0); - - /* calculate primrefs for second time range */ - auto reduction_func1 = [&] (const range<size_t>& r) { - PrimInfoMB pinfo = empty; - for (size_t i=r.begin(); i<r.end(); i++) - { - if (likely(prims[i].time_range_overlap(time_range1))) - { - const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1); - prims[i] = prim; - pinfo.add_primref(prim); - } - } - return pinfo; - }; - PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2); - rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size()); - - /* primrefs for second time range are in prims[set.begin() .. set.end()) */ - /* some primitives may need to be filtered out */ - if (rinfo.size() != set.size()) - rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024), - [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); }); - - rset = SetMB(rinfo,&prims,time_range1); - - return new_vector; - } - - private: - MemoryMonitorInterface* device; // device to report memory usage to - const RecalculatePrimRef recalculatePrimRef; - }; - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/priminfo.h b/thirdparty/embree-aarch64/kernels/builders/priminfo.h deleted file mode 100644 index 06c1388742..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/priminfo.h +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/default.h" -#include "../common/primref.h" -#include "../common/primref_mb.h" - -namespace embree -{ - // FIXME: maybe there's a better place for this util fct - __forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2) - { - const Vec3fa e0 = v1-v0; - const Vec3fa e1 = v2-v0; - const Vec3fa d = cross(e0,e1); - return fabs(d.x) + fabs(d.y) + fabs(d.z); - } - - //namespace isa - //{ - template<typename BBox> - class CentGeom - { - public: - __forceinline CentGeom () {} - - __forceinline CentGeom (EmptyTy) - : geomBounds(empty), centBounds(empty) {} - - __forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds) - : geomBounds(geomBounds), centBounds(centBounds) {} - - template<typename PrimRef> - __forceinline void extend_primref(const PrimRef& prim) - { - BBox bounds; Vec3fa center; - prim.binBoundsAndCenter(bounds,center); - geomBounds.extend(bounds); - centBounds.extend(center); - } - - template<typename PrimRef> - __forceinline void extend_center2(const PrimRef& prim) - { - BBox3fa bounds = prim.bounds(); - geomBounds.extend(bounds); - centBounds.extend(bounds.center2()); - } - - __forceinline void extend(const BBox& geomBounds_) { - geomBounds.extend(geomBounds_); - centBounds.extend(center2(geomBounds_)); - } - - __forceinline void merge(const CentGeom& other) - { - geomBounds.extend(other.geomBounds); - centBounds.extend(other.centBounds); - } - - static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) { - CentGeom r = a; r.merge(b); return r; - } - - public: - BBox geomBounds; //!< geometry bounds of primitives - BBox3fa centBounds; //!< centroid bounds of primitives - }; - - typedef CentGeom<BBox3fa> CentGeomBBox3fa; - - /*! stores bounding information for a set of primitives */ - template<typename BBox> - class PrimInfoT : public CentGeom<BBox> - { - public: - using CentGeom<BBox>::geomBounds; - using CentGeom<BBox>::centBounds; - - __forceinline PrimInfoT () {} - - __forceinline PrimInfoT (EmptyTy) - : CentGeom<BBox>(empty), begin(0), end(0) {} - - __forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) - : CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {} - - template<typename PrimRef> - __forceinline void add_primref(const PrimRef& prim) - { - CentGeom<BBox>::extend_primref(prim); - end++; - } - - template<typename PrimRef> - __forceinline void add_center2(const PrimRef& prim) { - CentGeom<BBox>::extend_center2(prim); - end++; - } - - template<typename PrimRef> - __forceinline void add_center2(const PrimRef& prim, const size_t i) { - CentGeom<BBox>::extend_center2(prim); - end+=i; - } - - /*__forceinline void add(const BBox& geomBounds_) { - CentGeom<BBox>::extend(geomBounds_); - end++; - } - - __forceinline void add(const BBox& geomBounds_, const size_t i) { - CentGeom<BBox>::extend(geomBounds_); - end+=i; - }*/ - - __forceinline void merge(const PrimInfoT& other) - { - CentGeom<BBox>::merge(other); - begin += other.begin; - end += other.end; - } - - static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) { - PrimInfoT r = a; r.merge(b); return r; - } - - /*! returns the number of primitives */ - __forceinline size_t size() const { - return end-begin; - } - - __forceinline float halfArea() { - return expectedApproxHalfArea(geomBounds); - } - - __forceinline float leafSAH() const { - return expectedApproxHalfArea(geomBounds)*float(size()); - //return halfArea(geomBounds)*blocks(num); - } - - __forceinline float leafSAH(size_t block_shift) const { - return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); - //return halfArea(geomBounds)*float((num+3) >> 2); - //return halfArea(geomBounds)*blocks(num); - } - - /*! stream output */ - friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) { - return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}"; - } - - public: - size_t begin,end; //!< number of primitives - }; - - typedef PrimInfoT<BBox3fa> PrimInfo; - //typedef PrimInfoT<LBBox3fa> PrimInfoMB; - - /*! stores bounding information for a set of primitives */ - template<typename BBox> - class PrimInfoMBT : public CentGeom<BBox> - { - public: - using CentGeom<BBox>::geomBounds; - using CentGeom<BBox>::centBounds; - - __forceinline PrimInfoMBT () { - } - - __forceinline PrimInfoMBT (EmptyTy) - : CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {} - - __forceinline PrimInfoMBT (size_t begin, size_t end) - : CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {} - - template<typename PrimRef> - __forceinline void add_primref(const PrimRef& prim) - { - CentGeom<BBox>::extend_primref(prim); - time_range.extend(prim.time_range); - object_range._end++; - num_time_segments += prim.size(); - if (max_num_time_segments < prim.totalTimeSegments()) { - max_num_time_segments = prim.totalTimeSegments(); - max_time_range = prim.time_range; - } - } - - __forceinline void merge(const PrimInfoMBT& other) - { - CentGeom<BBox>::merge(other); - time_range.extend(other.time_range); - object_range._begin += other.object_range.begin(); - object_range._end += other.object_range.end(); - num_time_segments += other.num_time_segments; - if (max_num_time_segments < other.max_num_time_segments) { - max_num_time_segments = other.max_num_time_segments; - max_time_range = other.max_time_range; - } - } - - static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) { - PrimInfoMBT r = a; r.merge(b); return r; - } - - __forceinline size_t begin() const { - return object_range.begin(); - } - - __forceinline size_t end() const { - return object_range.end(); - } - - /*! returns the number of primitives */ - __forceinline size_t size() const { - return object_range.size(); - } - - __forceinline float halfArea() const { - return time_range.size()*expectedApproxHalfArea(geomBounds); - } - - __forceinline float leafSAH() const { - return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments); - } - - __forceinline float leafSAH(size_t block_shift) const { - return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift); - } - - __forceinline float align_time(float ct) const - { - //return roundf(ct * float(numTimeSegments)) / float(numTimeSegments); - float t0 = (ct-max_time_range.lower)/max_time_range.size(); - float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments); - return t1*max_time_range.size()+max_time_range.lower; - } - - /*! stream output */ - friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo) - { - return cout << "PrimInfo { " << - "object_range = " << pinfo.object_range << - ", time_range = " << pinfo.time_range << - ", time_segments = " << pinfo.num_time_segments << - ", geomBounds = " << pinfo.geomBounds << - ", centBounds = " << pinfo.centBounds << - "}"; - } - - public: - range<size_t> object_range; //!< primitive range - size_t num_time_segments; //!< total number of time segments of all added primrefs - size_t max_num_time_segments; //!< maximum number of time segments of a primitive - BBox1f max_time_range; //!< time range of primitive with max_num_time_segments - BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB - }; - - typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB; - - struct SetMB : public PrimInfoMB - { - static const size_t PARALLEL_THRESHOLD = 3 * 1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; - - typedef mvector<PrimRefMB>* PrimRefVector; - - __forceinline SetMB() {} - - __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims) - : PrimInfoMB(pinfo_i), prims(prims) {} - - __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in) - : PrimInfoMB(pinfo_i), prims(prims) - { - object_range = object_range_in; - time_range = intersect(time_range,time_range_in); - } - - __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in) - : PrimInfoMB(pinfo_i), prims(prims) - { - time_range = intersect(time_range,time_range_in); - } - - void deterministic_order() const - { - /* required as parallel partition destroys original primitive order */ - PrimRefMB* prim = prims->data(); - std::sort(&prim[object_range.begin()],&prim[object_range.end()]); - } - - template<typename RecalculatePrimRef> - __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const - { - auto reduce = [&](const range<size_t>& r) -> LBBox3fa - { - LBBox3fa cbounds(empty); - for (size_t j = r.begin(); j < r.end(); j++) - { - PrimRefMB& ref = (*prims)[j]; - const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range); - cbounds.extend(bn); - }; - return cbounds; - }; - - return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty), - reduce, - [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); }); - } - - template<typename RecalculatePrimRef> - __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const - { - auto reduce = [&](const range<size_t>& r) -> LBBox3fa - { - LBBox3fa cbounds(empty); - for (size_t j = r.begin(); j < r.end(); j++) - { - PrimRefMB& ref = (*prims)[j]; - const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space); - cbounds.extend(bn); - }; - return cbounds; - }; - - return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty), - reduce, - [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); }); - } - - template<typename RecalculatePrimRef> - const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const - { - auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB - { - PrimInfoMB pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - PrimRefMB& ref = (*prims)[j]; - PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space); - pinfo.add_primref(ref1); - }; - return pinfo; - }; - - const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, - PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2); - - return SetMB(pinfo,prims,object_range,time_range); - } - - public: - PrimRefVector prims; - }; -//} -} diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp b/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp deleted file mode 100644 index e23de3df28..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "primrefgen.h" -#include "primrefgen_presplit.h" - -#include "../../common/algorithms/parallel_for_for.h" -#include "../../common/algorithms/parallel_for_for_prefix_sum.h" - -namespace embree -{ - namespace isa - { - PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) - { - ParallelPrefixSumState<PrimInfo> pstate; - - /* first try */ - progressMonitor(0); - PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo { - return geometry->createPrimRefArray(prims,r,r.begin(),geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) - { - progressMonitor(0); - pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo { - return geometry->createPrimRefArray(prims,r,base.size(),geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - return pinfo; - } - - PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) - { - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator2 iter(scene,types,mblur); - - /* first try */ - progressMonitor(0); - pstate.init(iter,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - return pinfo; - } - - PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime) - { - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator2 iter(scene,types,true); - - /* first try */ - progressMonitor(0); - pstate.init(iter,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - return mesh->createPrimRefArrayMB(prims,itime,r,k,(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - return mesh->createPrimRefArrayMB(prims,itime,r,base.size(),(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - return pinfo; - } - - PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1) - { - ParallelForForPrefixSumState<PrimInfoMB> pstate; - Scene::Iterator2 iter(scene,types,true); - - /* first try */ - progressMonitor(0); - pstate.init(iter,size_t(1024)); - PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB { - return mesh->createPrimRefMBArray(prims,t0t1,r,k,(unsigned)geomID); - }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB { - return mesh->createPrimRefMBArray(prims,t0t1,r,base.size(),(unsigned)geomID); - }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); }); - } - - /* the BVH starts with that time range, even though primitives might have smaller/larger time range */ - pinfo.time_range = t0t1; - return pinfo; - } - - template<typename Mesh> - size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor) - { - size_t numPrimitives = morton.size(); - - /* compute scene bounds */ - std::pair<size_t,BBox3fa> cb_empty(0,empty); - auto cb = parallel_reduce - ( size_t(0), numPrimitives, size_t(1024), cb_empty, [&](const range<size_t>& r) -> std::pair<size_t,BBox3fa> - { - size_t num = 0; - BBox3fa bounds = empty; - - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa prim_bounds = empty; - if (unlikely(!mesh->buildBounds(j,&prim_bounds))) continue; - bounds.extend(center2(prim_bounds)); - num++; - } - return std::make_pair(num,bounds); - }, [] (const std::pair<size_t,BBox3fa>& a, const std::pair<size_t,BBox3fa>& b) { - return std::make_pair(a.first + b.first,merge(a.second,b.second)); - }); - - - size_t numPrimitivesGen = cb.first; - const BBox3fa centBounds = cb.second; - - /* compute morton codes */ - if (likely(numPrimitivesGen == numPrimitives)) - { - /* fast path if all primitives were valid */ - BVHBuilderMorton::MortonCodeMapping mapping(centBounds); - parallel_for( size_t(0), numPrimitives, size_t(1024), [&](const range<size_t>& r) -> void { - BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]); - for (size_t j=r.begin(); j<r.end(); j++) - generator(mesh->bounds(j),unsigned(j)); - }); - } - else - { - /* slow path, fallback in case some primitives were invalid */ - ParallelPrefixSumState<size_t> pstate; - BVHBuilderMorton::MortonCodeMapping mapping(centBounds); - parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t { - size_t num = 0; - BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (unlikely(!mesh->buildBounds(j,&bounds))) continue; - generator(bounds,unsigned(j)); - num++; - } - return num; - }, std::plus<size_t>()); - - parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t { - size_t num = 0; - BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[base]); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(j,&bounds)) continue; - generator(bounds,unsigned(j)); - num++; - } - return num; - }, std::plus<size_t>()); - } - return numPrimitivesGen; - } - - // ==================================================================================================== - // ==================================================================================================== - // ==================================================================================================== - - // template for grid meshes - -#if 0 - template<> - PrimInfo createPrimRefArray<GridMesh,false>(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) - { - PING; - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator<GridMesh,false> iter(scene); - - /* first try */ - progressMonitor(0); - pstate.init(iter,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k) -> PrimInfo - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,mesh->geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, const PrimInfo& base) -> PrimInfo - { - k = base.size(); - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,mesh->geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - return pinfo; - } -#endif - - // ==================================================================================================== - // ==================================================================================================== - // ==================================================================================================== - - IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor)); - IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor)); - IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor)); - IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor)); - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen.h deleted file mode 100644 index 9919c945c3..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/scene.h" -#include "../common/primref.h" -#include "../common/primref_mb.h" -#include "priminfo.h" -#include "bvh_builder_morton.h" - -namespace embree -{ - namespace isa - { - PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor); - - PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor); - - PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0); - - PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f)); - - template<typename Mesh> - size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor); - } -} - diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h deleted file mode 100644 index 8bdb38b955..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../builders/primrefgen.h" -#include "../builders/heuristic_spatial.h" -#include "../builders/splitter.h" - -#include "../../common/algorithms/parallel_for_for.h" -#include "../../common/algorithms/parallel_for_for_prefix_sum.h" - -#define DBG_PRESPLIT(x) -#define CHECK_PRESPLIT(x) - -#define GRID_SIZE 1024 -#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5 -#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG) -#define PRIORITY_CUTOFF_THRESHOLD 1.0f -#define PRIORITY_SPLIT_POS_WEIGHT 1.5f - -namespace embree -{ - namespace isa - { - - struct PresplitItem - { - union { - float priority; - unsigned int data; - }; - unsigned int index; - - __forceinline operator unsigned() const - { - return reinterpret_cast<const unsigned&>(priority); - } - __forceinline bool operator < (const PresplitItem& item) const - { - return (priority < item.priority); - } - - template<typename Mesh> - __forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc) - { - const unsigned int geomID = ref.geomID(); - const unsigned int primID = ref.primID(); - const float area_aabb = area(ref.bounds()); - const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID); - const unsigned int diff = 31 - lzcnt(mc.x^mc.y); - assert(area_prim <= area_aabb); - //const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f); - const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) )); - assert(priority >= 0.0f && priority < FLT_LARGE); - return priority; - } - - - }; - - inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) { - return cout << "index " << item.index << " priority " << item.priority; - }; - - template<typename SplitterFactory> - void splitPrimitive(SplitterFactory &Splitter, - const PrimRef &prim, - const unsigned int geomID, - const unsigned int primID, - const unsigned int split_level, - const Vec3fa &grid_base, - const float grid_scale, - const float grid_extend, - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE], - unsigned int& numSubPrims) - { - assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG); - if (split_level == 0) - { - assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); - subPrims[numSubPrims++] = prim; - } - else - { - const Vec3fa lower = prim.lower; - const Vec3fa upper = prim.upper; - const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f); - const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f); - Vec3ia ilower(floor(glower)); - Vec3ia iupper(floor(gupper)); - - /* this ignores dimensions that are empty */ - iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper))); - - /* compute a morton code for the lower and upper grid coordinates. */ - const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); - const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); - - /* if all bits are equal then we cannot split */ - if(unlikely(lower_code == upper_code)) - { - assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE); - subPrims[numSubPrims++] = prim; - return; - } - - /* compute octree level and dimension to perform the split in */ - const unsigned int diff = 31 - lzcnt(lower_code^upper_code); - const unsigned int level = diff / 3; - const unsigned int dim = diff % 3; - - /* now we compute the grid position of the split */ - const unsigned int isplit = iupper[dim] & ~((1<<level)-1); - - /* compute world space position of split */ - const float inv_grid_size = 1.0f / GRID_SIZE; - const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend; - - assert(prim.lower[dim] <= fsplit && - prim.upper[dim] >= fsplit); - - /* split primitive */ - const auto splitter = Splitter(prim); - BBox3fa left,right; - splitter(prim.bounds(),dim,fsplit,left,right); - assert(!left.empty()); - assert(!right.empty()); - - - splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - } - } - - - template<typename Mesh, typename SplitterFactory> - PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) - { - ParallelPrefixSumState<PrimInfo> pstate; - - /* first try */ - progressMonitor(0); - PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo { - return geometry->createPrimRefArray(prims,r,r.begin(),geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != numPrimRefs) - { - progressMonitor(0); - pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo { - return geometry->createPrimRefArray(prims,r,base.size(),geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - return pinfo; - } - - __forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref) - { - const Vec3fa lower = ref.lower; - const Vec3fa upper = ref.upper; - const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f); - const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f); - Vec3ia ilower(floor(glower)); - Vec3ia iupper(floor(gupper)); - - /* this ignores dimensions that are empty */ - iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)); - - /* compute a morton code for the lower and upper grid coordinates. */ - const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z); - const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z); - return Vec2i(lower_code,upper_code); - } - - template<typename Mesh, typename SplitterFactory> - PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) - { - static const size_t MIN_STEP_SIZE = 128; - - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator2 iter(scene,types,mblur); - - /* first try */ - progressMonitor(0); - pstate.init(iter,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != numPrimRefs) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID); - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } - - /* use correct number of primitives */ - size_t numPrimitives = pinfo.size(); - const size_t alloc_numPrimitives = prims.size(); - const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives; - - /* set up primitive splitter */ - SplitterFactory Splitter(scene); - - - DBG_PRESPLIT( - const size_t org_numPrimitives = pinfo.size(); - PRINT(numPrimitives); - PRINT(alloc_numPrimitives); - PRINT(numSplitPrimitivesBudget); - ); - - /* allocate double buffer presplit items */ - const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives; - PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64); - PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64); - - /* compute grid */ - const Vec3fa grid_base = pinfo.geomBounds.lower; - const Vec3fa grid_diag = pinfo.geomBounds.size(); - const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z)); - const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend; - - /* init presplit items and get total sum */ - const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float { - float sum = 0.0f; - for (size_t i=r.begin(); i<r.end(); i++) - { - presplitItem[i].index = (unsigned int)i; - const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]); - /* if all bits are equal then we cannot split */ - presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f; - /* FIXME: sum undeterministic */ - sum += presplitItem[i].priority; - } - return sum; - },[](const float& a, const float& b) -> float { return a+b; }); - - /* compute number of splits per primitive */ - const float inv_psum = 1.0f / psum; - parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void { - for (size_t i=r.begin(); i<r.end(); i++) - { - if (presplitItem[i].priority > 0.0f) - { - const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum; - if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims - { - presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f); - //presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG); - assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG); - } - else - presplitItem[i].priority = 0.0f; - } - } - }); - - auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; }; - size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024); - - /* anything to split ? */ - if (center < numPrimitives) - { - const size_t numPrimitivesToSplit = numPrimitives - center; - assert(presplitItem[center].priority >= 1.0f); - - /* sort presplit items in ascending order */ - radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024); - - CHECK_PRESPLIT( - parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void { - for (size_t i=r.begin(); i<r.end(); i++) - assert(presplitItem[i-1].priority <= presplitItem[i].priority); - }); - ); - - unsigned int *const primOffset0 = (unsigned int*)tmp_presplitItem; - unsigned int *const primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit; - - /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */ - const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t { - size_t sum = 0; - for (size_t i=t.begin(); i<t.end(); i++) - { - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; - assert(presplitItem[i].priority >= 1.0f); - const unsigned int primrefID = presplitItem[i].index; - const float prio = presplitItem[i].priority; - const unsigned int geomID = prims[primrefID].geomID(); - const unsigned int primID = prims[primrefID].primID(); - const unsigned int split_levels = (unsigned int)prio; - unsigned int numSubPrims = 0; - splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - assert(numSubPrims); - numSubPrims--; // can reuse slot - sum+=numSubPrims; - presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels; - primOffset0[i-center] = numSubPrims; - } - return sum; - },[](const size_t& a, const size_t& b) -> size_t { return a+b; }); - - /* if we are over budget, need to shrink the range */ - if (totalNumSubPrims > numSplitPrimitivesBudget) - { - size_t new_center = numPrimitives-1; - size_t sum = 0; - for (;new_center>=center;new_center--) - { - const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG; - if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break; - sum += numSubPrims; - } - new_center++; - center = new_center; - } - - /* parallel prefix sum to compute offsets for storing sub-primitives */ - const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>()); - - /* iterate over range, and split primitives into sub primitives and append them to prims array */ - parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void { - for (size_t j=rn.begin(); j<rn.end(); j++) - { - PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE]; - const unsigned int primrefID = presplitItem[j].index; - const unsigned int geomID = prims[primrefID].geomID(); - const unsigned int primID = prims[primrefID].primID(); - const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1); - - assert(split_levels); - assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG); - unsigned int numSubPrims = 0; - splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims); - const size_t newID = numPrimitives + primOffset1[j-center]; - assert(newID+numSubPrims <= alloc_numPrimitives); - prims[primrefID] = subPrims[0]; - for (size_t i=1;i<numSubPrims;i++) - prims[newID+i-1] = subPrims[i]; - } - }); - - numPrimitives += offset; - DBG_PRESPLIT( - PRINT(pinfo.size()); - PRINT(numPrimitives); - PRINT((float)numPrimitives/org_numPrimitives)); - } - - /* recompute centroid bounding boxes */ - pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo { - PrimInfo p(empty); - for (size_t j=r.begin(); j<r.end(); j++) - p.add_center2(prims[j]); - return p; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - assert(pinfo.size() == numPrimitives); - - /* free double buffer presplit items */ - alignedFree(tmp_presplitItem); - alignedFree(presplitItem); - return pinfo; - } - } -} diff --git a/thirdparty/embree-aarch64/kernels/builders/splitter.h b/thirdparty/embree-aarch64/kernels/builders/splitter.h deleted file mode 100644 index dbd6cf07c7..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/splitter.h +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/scene.h" -#include "../common/primref.h" - -namespace embree -{ - namespace isa - { - template<size_t N> - __forceinline void splitPolygon(const BBox3fa& bounds, - const size_t dim, - const float pos, - const Vec3fa (&v)[N+1], - const Vec3fa (&inv_length)[N], - BBox3fa& left_o, - BBox3fa& right_o) - { - BBox3fa left = empty, right = empty; - /* clip triangle to left and right box by processing all edges */ - for (size_t i=0; i<N; i++) - { - const Vec3fa &v0 = v[i]; - const Vec3fa &v1 = v[i+1]; - const float v0d = v0[dim]; - const float v1d = v1[dim]; - - if (v0d <= pos) left. extend(v0); // this point is on left side - if (v0d >= pos) right.extend(v0); // this point is on right side - - if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location - { - assert((v1d-v0d) != 0.0f); - const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0); - left.extend(c); - right.extend(c); - } - } - - /* clip against current bounds */ - left_o = intersect(left,bounds); - right_o = intersect(right,bounds); - } - - template<size_t N> - __forceinline void splitPolygon(const PrimRef& prim, - const size_t dim, - const float pos, - const Vec3fa (&v)[N+1], - PrimRef& left_o, - PrimRef& right_o) - { - BBox3fa left = empty, right = empty; - for (size_t i=0; i<N; i++) - { - const Vec3fa &v0 = v[i]; - const Vec3fa &v1 = v[i+1]; - const float v0d = v0[dim]; - const float v1d = v1[dim]; - - if (v0d <= pos) left. extend(v0); // this point is on left side - if (v0d >= pos) right.extend(v0); // this point is on right side - - if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location - { - assert((v1d-v0d) != 0.0f); - const float inv_length = 1.0f/(v1d-v0d); - const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0); - left.extend(c); - right.extend(c); - } - } - - /* clip against current bounds */ - new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID()); - new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID()); - } - - struct TriangleSplitter - { - __forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim) - { - const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; - const TriangleMesh* mesh = (const TriangleMesh*) scene->get(prim.geomID() & mask ); - TriangleMesh::Triangle tri = mesh->triangle(prim.primID()); - v[0] = mesh->vertex(tri.v[0]); - v[1] = mesh->vertex(tri.v[1]); - v[2] = mesh->vertex(tri.v[2]); - v[3] = mesh->vertex(tri.v[0]); - inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]); - inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]); - inv_length[2] = Vec3fa(1.0f) / (v[0]-v[2]); - } - - __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const { - splitPolygon<3>(prim,dim,pos,v,left_o,right_o); - } - - __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const { - splitPolygon<3>(prim,dim,pos,v,inv_length,left_o,right_o); - } - - private: - Vec3fa v[4]; - Vec3fa inv_length[3]; - }; - - struct TriangleSplitterFactory - { - __forceinline TriangleSplitterFactory(const Scene* scene) - : scene(scene) {} - - __forceinline TriangleSplitter operator() (const PrimRef& prim) const { - return TriangleSplitter(scene,prim); - } - - private: - const Scene* scene; - }; - - struct QuadSplitter - { - __forceinline QuadSplitter(const Scene* scene, const PrimRef& prim) - { - const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS; - const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask ); - QuadMesh::Quad quad = mesh->quad(prim.primID()); - v[0] = mesh->vertex(quad.v[0]); - v[1] = mesh->vertex(quad.v[1]); - v[2] = mesh->vertex(quad.v[2]); - v[3] = mesh->vertex(quad.v[3]); - v[4] = mesh->vertex(quad.v[0]); - inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]); - inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]); - inv_length[2] = Vec3fa(1.0f) / (v[3]-v[2]); - inv_length[3] = Vec3fa(1.0f) / (v[0]-v[3]); - } - - __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const { - splitPolygon<4>(prim,dim,pos,v,left_o,right_o); - } - - __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const { - splitPolygon<4>(prim,dim,pos,v,inv_length,left_o,right_o); - } - - private: - Vec3fa v[5]; - Vec3fa inv_length[4]; - }; - - struct QuadSplitterFactory - { - __forceinline QuadSplitterFactory(const Scene* scene) - : scene(scene) {} - - __forceinline QuadSplitter operator() (const PrimRef& prim) const { - return QuadSplitter(scene,prim); - } - - private: - const Scene* scene; - }; - } -} - |