summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/builders
diff options
context:
space:
mode:
authorjfons <joan.fonssanchez@gmail.com>2021-05-20 12:49:33 +0200
committerjfons <joan.fonssanchez@gmail.com>2021-05-21 17:00:24 +0200
commit767e374dced69b45db0afb30ca2ccf0bbbeef672 (patch)
treea712cecc2c8cc2c6d6ecdc4a50020d423ddb4c0c /thirdparty/embree-aarch64/kernels/builders
parent42b6602f1d4b108cecb94b94c0d2b645acaebd4f (diff)
Upgrade Embree to the latest official release.
Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library.
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/builders')
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h411
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h501
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h692
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h526
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h669
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h972
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h205
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h302
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h443
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h414
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h552
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h188
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h237
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/priminfo.h362
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp244
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/primrefgen.h28
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h371
-rw-r--r--thirdparty/embree-aarch64/kernels/builders/splitter.h169
18 files changed, 0 insertions, 7286 deletions
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h
deleted file mode 100644
index 755ce255fb..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h
+++ /dev/null
@@ -1,411 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-#include "../geometry/primitive.h"
-#include "../builders/bvh_builder_sah.h"
-#include "../builders/heuristic_binning_array_aligned.h"
-#include "../builders/heuristic_binning_array_unaligned.h"
-#include "../builders/heuristic_strand_array.h"
-
-#define NUM_HAIR_OBJECT_BINS 32
-
-namespace embree
-{
- namespace isa
- {
- struct BVHBuilderHair
- {
- /*! settings for builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- size_t finished_range_threshold; //!< finished range threshold
- };
-
- template<typename NodeRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeFunc,
- typename SetAABBNodeFunc,
- typename CreateOBBNodeFunc,
- typename SetOBBNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor,
- typename ReportFinishedRangeFunc>
-
- class BuilderT
- {
- ALIGNED_CLASS_(16);
- friend struct BVHBuilderHair;
-
- typedef FastAllocator::CachedAllocator Allocator;
- typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
- typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
- typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
-
- static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
- static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
-
- static const size_t travCostAligned = 1;
- static const size_t travCostUnaligned = 5;
- static const size_t intCost = 6;
-
- BuilderT (Scene* scene,
- PrimRef* prims,
- const CreateAllocFunc& createAlloc,
- const CreateAABBNodeFunc& createAABBNode,
- const SetAABBNodeFunc& setAABBNode,
- const CreateOBBNodeFunc& createOBBNode,
- const SetOBBNodeFunc& setOBBNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const ReportFinishedRangeFunc& reportFinishedRange,
- const Settings settings)
-
- : cfg(settings),
- prims(prims),
- createAlloc(createAlloc),
- createAABBNode(createAABBNode),
- setAABBNode(setAABBNode),
- createOBBNode(createOBBNode),
- setOBBNode(setOBBNode),
- createLeaf(createLeaf),
- progressMonitor(progressMonitor),
- reportFinishedRange(reportFinishedRange),
- alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
-
- /*! checks if all primitives are from the same geometry */
- __forceinline bool sameGeometry(const PrimInfoRange& range)
- {
- if (range.size() == 0) return true;
- unsigned int firstGeomID = prims[range.begin()].geomID();
- for (size_t i=range.begin()+1; i<range.end(); i++) {
- if (prims[i].geomID() != firstGeomID){
- return false;
- }
- }
- return true;
- }
-
- /*! creates a large leaf that could be larger than supported by the BVH */
- NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* create leaf for few primitives */
- if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
- return createLeaf(prims,pinfo,alloc);
-
- /* fill all children by always splitting the largest one */
- PrimInfoRange children[MAX_BRANCHING_FACTOR];
- unsigned numChildren = 1;
- children[0] = pinfo;
-
- do {
-
- /* find best child with largest bounding box area */
- int bestChild = -1;
- size_t bestSize = 0;
- for (unsigned i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
- continue;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- __aligned(64) PrimInfoRange left, right;
- if (!sameGeometry(children[bestChild])) {
- alignedHeuristic.splitByGeometry(children[bestChild],left,right);
- } else {
- alignedHeuristic.splitFallback(children[bestChild],left,right);
- }
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
-
- } while (numChildren < cfg.branchingFactor);
-
- /* create node */
- auto node = createAABBNode(alloc);
-
- for (size_t i=0; i<numChildren; i++) {
- const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
- setAABBNode(node,i,child,children[i].geomBounds);
- }
-
- return node;
- }
-
- /*! performs split */
- __noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
- {
- /* variable to track the SAH of the best splitting approach */
- float bestSAH = inf;
- const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
- const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
-
- /* try standard binning in aligned space */
- float alignedObjectSAH = inf;
- HeuristicBinningSAH::Split alignedObjectSplit;
- if (aligned) {
- alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
- alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
- bestSAH = min(alignedObjectSAH,bestSAH);
- }
-
- /* try standard binning in unaligned space */
- UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
- LinearSpace3fa uspace;
- float unalignedObjectSAH = inf;
- if (bestSAH > 0.7f*leafSAH) {
- uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
- const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
- unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
- unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
- bestSAH = min(unalignedObjectSAH,bestSAH);
- }
-
- /* try splitting into two strands */
- HeuristicStrandSplitSAH::Split strandSplit;
- float strandSAH = inf;
- if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
- strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
- strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
- bestSAH = min(strandSAH,bestSAH);
- }
-
- /* fallback if SAH heuristics failed */
- if (unlikely(!std::isfinite(bestSAH)))
- {
- alignedHeuristic.deterministic_order(pinfo);
- alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
- }
-
- /* perform aligned split if this is best */
- else if (bestSAH == alignedObjectSAH) {
- alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
- }
-
- /* perform unaligned split if this is best */
- else if (bestSAH == unalignedObjectSAH) {
- unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
- aligned = false;
- }
-
- /* perform strand split if this is best */
- else if (bestSAH == strandSAH) {
- strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
- aligned = false;
- }
-
- /* can never happen */
- else
- assert(false);
- }
-
- /*! recursive build */
- NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
- progressMonitor(pinfo.size());
-
- PrimInfoRange children[MAX_BRANCHING_FACTOR];
-
- /* create leaf node */
- if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
- alignedHeuristic.deterministic_order(pinfo);
- return createLargeLeaf(depth,pinfo,alloc);
- }
-
- /* fill all children by always splitting the one with the largest surface area */
- size_t numChildren = 1;
- children[0] = pinfo;
- bool aligned = true;
-
- do {
-
- /* find best child with largest bounding box area */
- ssize_t bestChild = -1;
- float bestArea = neg_inf;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.minLeafSize)
- continue;
-
- /* remember child with largest area */
- if (area(children[i].geomBounds) > bestArea) {
- bestArea = area(children[i].geomBounds);
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- PrimInfoRange left, right;
- split(children[bestChild],left,right,aligned);
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
-
- } while (numChildren < cfg.branchingFactor);
-
- NodeRef node;
-
- /* create aligned node */
- if (aligned)
- {
- node = createAABBNode(alloc);
-
- /* spawn tasks or ... */
- if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequentially */
- else {
- for (size_t i=0; i<numChildren; i++) {
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
- }
- }
- }
-
- /* create unaligned node */
- else
- {
- node = createOBBNode(alloc);
-
- /* spawn tasks or ... */
- if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
- const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
- const OBBox3fa obounds(space,sinfo.geomBounds);
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequentially */
- else
- {
- for (size_t i=0; i<numChildren; i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
- const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
- const OBBox3fa obounds(space,sinfo.geomBounds);
- const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
- setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
- }
- }
- }
-
- /* reports a finished range of primrefs */
- if (unlikely(alloc_barrier))
- reportFinishedRange(pinfo);
-
- return node;
- }
-
- private:
- Settings cfg;
- PrimRef* prims;
- const CreateAllocFunc& createAlloc;
- const CreateAABBNodeFunc& createAABBNode;
- const SetAABBNodeFunc& setAABBNode;
- const CreateOBBNodeFunc& createOBBNode;
- const SetOBBNodeFunc& setOBBNode;
- const CreateLeafFunc& createLeaf;
- const ProgressMonitor& progressMonitor;
- const ReportFinishedRangeFunc& reportFinishedRange;
-
- private:
- HeuristicBinningSAH alignedHeuristic;
- UnalignedHeuristicBinningSAH unalignedHeuristic;
- HeuristicStrandSplitSAH strandHeuristic;
- };
-
- template<typename NodeRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeFunc,
- typename SetAABBNodeFunc,
- typename CreateOBBNodeFunc,
- typename SetOBBNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor,
- typename ReportFinishedRangeFunc>
-
- static NodeRef build (const CreateAllocFunc& createAlloc,
- const CreateAABBNodeFunc& createAABBNode,
- const SetAABBNodeFunc& setAABBNode,
- const CreateOBBNodeFunc& createOBBNode,
- const SetOBBNodeFunc& setOBBNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const ReportFinishedRangeFunc& reportFinishedRange,
- Scene* scene,
- PrimRef* prims,
- const PrimInfo& pinfo,
- const Settings settings)
- {
- typedef BuilderT<NodeRef,
- CreateAllocFunc,
- CreateAABBNodeFunc,SetAABBNodeFunc,
- CreateOBBNodeFunc,SetOBBNodeFunc,
- CreateLeafFunc,ProgressMonitor,
- ReportFinishedRangeFunc> Builder;
-
- Builder builder(scene,prims,createAlloc,
- createAABBNode,setAABBNode,
- createOBBNode,setOBBNode,
- createLeaf,progressMonitor,reportFinishedRange,settings);
-
- NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h
deleted file mode 100644
index 92be2f7e65..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h
+++ /dev/null
@@ -1,501 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/builder.h"
-#include "../../common/algorithms/parallel_reduce.h"
-
-namespace embree
-{
- namespace isa
- {
- struct BVHBuilderMorton
- {
- static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
-
- /*! settings for morton builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) {}
-
- /*! initialize settings from API settings */
- Settings (const RTCBuildArguments& settings)
- : branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024)
- {
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
-
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- Settings (size_t branchingFactor, size_t maxDepth, size_t minLeafSize, size_t maxLeafSize, size_t singleThreadThreshold)
- : branchingFactor(branchingFactor), maxDepth(maxDepth), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), singleThreadThreshold(singleThreadThreshold)
- {
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
- };
-
- /*! Build primitive consisting of morton code and primitive ID. */
- struct __aligned(8) BuildPrim
- {
- union {
- struct {
- unsigned int code; //!< morton code
- unsigned int index; //!< i'th primitive
- };
- uint64_t t;
- };
-
- /*! interface for radix sort */
- __forceinline operator unsigned() const { return code; }
-
- /*! interface for standard sort */
- __forceinline bool operator<(const BuildPrim &m) const { return code < m.code; }
- };
-
- /*! maps bounding box to morton code */
- struct MortonCodeMapping
- {
- static const size_t LATTICE_BITS_PER_DIM = 10;
- static const size_t LATTICE_SIZE_PER_DIM = size_t(1) << LATTICE_BITS_PER_DIM;
-
- vfloat4 base;
- vfloat4 scale;
-
- __forceinline MortonCodeMapping(const BBox3fa& bounds)
- {
- base = (vfloat4)bounds.lower;
- const vfloat4 diag = (vfloat4)bounds.upper - (vfloat4)bounds.lower;
- scale = select(diag > vfloat4(1E-19f), rcp(diag) * vfloat4(LATTICE_SIZE_PER_DIM * 0.99f),vfloat4(0.0f));
- }
-
- __forceinline const vint4 bin (const BBox3fa& box) const
- {
- const vfloat4 lower = (vfloat4)box.lower;
- const vfloat4 upper = (vfloat4)box.upper;
- const vfloat4 centroid = lower+upper;
- return vint4((centroid-base)*scale);
- }
-
- __forceinline unsigned int code (const BBox3fa& box) const
- {
- const vint4 binID = bin(box);
- const unsigned int x = extract<0>(binID);
- const unsigned int y = extract<1>(binID);
- const unsigned int z = extract<2>(binID);
- const unsigned int xyz = bitInterleave(x,y,z);
- return xyz;
- }
- };
-
-#if defined (__AVX2__)
-
- /*! for AVX2 there is a fast scalar bitInterleave */
- struct MortonCodeGenerator
- {
- __forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
- : mapping(mapping), dest(dest) {}
-
- __forceinline void operator() (const BBox3fa& b, const unsigned index)
- {
- dest->index = index;
- dest->code = mapping.code(b);
- dest++;
- }
-
- public:
- const MortonCodeMapping mapping;
- BuildPrim* dest;
- size_t currentID;
- };
-
-#else
-
- /*! before AVX2 is it better to use the SSE version of bitInterleave */
- struct MortonCodeGenerator
- {
- __forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
- : mapping(mapping), dest(dest), currentID(0), slots(0), ax(0), ay(0), az(0), ai(0) {}
-
- __forceinline ~MortonCodeGenerator()
- {
- if (slots != 0)
- {
- const vint4 code = bitInterleave(ax,ay,az);
- for (size_t i=0; i<slots; i++) {
- dest[currentID-slots+i].index = ai[i];
- dest[currentID-slots+i].code = code[i];
- }
- }
- }
-
- __forceinline void operator() (const BBox3fa& b, const unsigned index)
- {
- const vint4 binID = mapping.bin(b);
- ax[slots] = extract<0>(binID);
- ay[slots] = extract<1>(binID);
- az[slots] = extract<2>(binID);
- ai[slots] = index;
- slots++;
- currentID++;
-
- if (slots == 4)
- {
- const vint4 code = bitInterleave(ax,ay,az);
- vint4::storeu(&dest[currentID-4],unpacklo(code,ai));
- vint4::storeu(&dest[currentID-2],unpackhi(code,ai));
- slots = 0;
- }
- }
-
- public:
- const MortonCodeMapping mapping;
- BuildPrim* dest;
- size_t currentID;
- size_t slots;
- vint4 ax, ay, az, ai;
- };
-
-#endif
-
- template<
- typename ReductionTy,
- typename Allocator,
- typename CreateAllocator,
- typename CreateNodeFunc,
- typename SetNodeBoundsFunc,
- typename CreateLeafFunc,
- typename CalculateBounds,
- typename ProgressMonitor>
-
- class BuilderT : private Settings
- {
- ALIGNED_CLASS_(16);
-
- public:
-
- BuilderT (CreateAllocator& createAllocator,
- CreateNodeFunc& createNode,
- SetNodeBoundsFunc& setBounds,
- CreateLeafFunc& createLeaf,
- CalculateBounds& calculateBounds,
- ProgressMonitor& progressMonitor,
- const Settings& settings)
-
- : Settings(settings),
- createAllocator(createAllocator),
- createNode(createNode),
- setBounds(setBounds),
- createLeaf(createLeaf),
- calculateBounds(calculateBounds),
- progressMonitor(progressMonitor),
- morton(nullptr) {}
-
- ReductionTy createLargeLeaf(size_t depth, const range<unsigned>& current, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (depth > maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* create leaf for few primitives */
- if (current.size() <= maxLeafSize)
- return createLeaf(current,alloc);
-
- /* fill all children by always splitting the largest one */
- range<unsigned> children[MAX_BRANCHING_FACTOR];
- size_t numChildren = 1;
- children[0] = current;
-
- do {
-
- /* find best child with largest number of primitives */
- size_t bestChild = -1;
- size_t bestSize = 0;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= maxLeafSize)
- continue;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == size_t(-1)) break;
-
- /*! split best child into left and right child */
- auto split = children[bestChild].split();
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = split.first;
- children[numChildren+0] = split.second;
- numChildren++;
-
- } while (numChildren < branchingFactor);
-
- /* create node */
- auto node = createNode(alloc,numChildren);
-
- /* recurse into each child */
- ReductionTy bounds[MAX_BRANCHING_FACTOR];
- for (size_t i=0; i<numChildren; i++)
- bounds[i] = createLargeLeaf(depth+1,children[i],alloc);
-
- return setBounds(node,bounds,numChildren);
- }
-
- /*! recreates morton codes when reaching a region where all codes are identical */
- __noinline void recreateMortonCodes(const range<unsigned>& current) const
- {
- /* fast path for small ranges */
- if (likely(current.size() < 1024))
- {
- /*! recalculate centroid bounds */
- BBox3fa centBounds(empty);
- for (size_t i=current.begin(); i<current.end(); i++)
- centBounds.extend(center2(calculateBounds(morton[i])));
-
- /* recalculate morton codes */
- MortonCodeMapping mapping(centBounds);
- for (size_t i=current.begin(); i<current.end(); i++)
- morton[i].code = mapping.code(calculateBounds(morton[i]));
-
- /* sort morton codes */
- std::sort(morton+current.begin(),morton+current.end());
- }
- else
- {
- /*! recalculate centroid bounds */
- auto calculateCentBounds = [&] ( const range<unsigned>& r ) {
- BBox3fa centBounds = empty;
- for (size_t i=r.begin(); i<r.end(); i++)
- centBounds.extend(center2(calculateBounds(morton[i])));
- return centBounds;
- };
- const BBox3fa centBounds = parallel_reduce(current.begin(), current.end(), unsigned(1024),
- BBox3fa(empty), calculateCentBounds, BBox3fa::merge);
-
- /* recalculate morton codes */
- MortonCodeMapping mapping(centBounds);
- parallel_for(current.begin(), current.end(), unsigned(1024), [&] ( const range<unsigned>& r ) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- morton[i].code = mapping.code(calculateBounds(morton[i]));
- }
- });
-
- /*! sort morton codes */
-#if defined(TASKING_TBB)
- tbb::parallel_sort(morton+current.begin(),morton+current.end());
-#else
- radixsort32(morton+current.begin(),current.size());
-#endif
- }
- }
-
- __forceinline void split(const range<unsigned>& current, range<unsigned>& left, range<unsigned>& right) const
- {
- const unsigned int code_start = morton[current.begin()].code;
- const unsigned int code_end = morton[current.end()-1].code;
- unsigned int bitpos = lzcnt(code_start^code_end);
-
- /* if all items mapped to same morton code, then re-create new morton codes for the items */
- if (unlikely(bitpos == 32))
- {
- recreateMortonCodes(current);
- const unsigned int code_start = morton[current.begin()].code;
- const unsigned int code_end = morton[current.end()-1].code;
- bitpos = lzcnt(code_start^code_end);
-
- /* if the morton code is still the same, goto fall back split */
- if (unlikely(bitpos == 32)) {
- current.split(left,right);
- return;
- }
- }
-
- /* split the items at the topmost different morton code bit */
- const unsigned int bitpos_diff = 31-bitpos;
- const unsigned int bitmask = 1 << bitpos_diff;
-
- /* find location where bit differs using binary search */
- unsigned begin = current.begin();
- unsigned end = current.end();
- while (begin + 1 != end) {
- const unsigned mid = (begin+end)/2;
- const unsigned bit = morton[mid].code & bitmask;
- if (bit == 0) begin = mid; else end = mid;
- }
- unsigned center = end;
-#if defined(DEBUG)
- for (unsigned int i=begin; i<center; i++) assert((morton[i].code & bitmask) == 0);
- for (unsigned int i=center; i<end; i++) assert((morton[i].code & bitmask) == bitmask);
-#endif
-
- left = make_range(current.begin(),center);
- right = make_range(center,current.end());
- }
-
- ReductionTy recurse(size_t depth, const range<unsigned>& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAllocator();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= singleThreadThreshold)
- progressMonitor(current.size());
-
- /* create leaf node */
- if (unlikely(depth+MIN_LARGE_LEAF_LEVELS >= maxDepth || current.size() <= minLeafSize))
- return createLargeLeaf(depth,current,alloc);
-
- /* fill all children by always splitting the one with the largest surface area */
- range<unsigned> children[MAX_BRANCHING_FACTOR];
- split(current,children[0],children[1]);
- size_t numChildren = 2;
-
- while (numChildren < branchingFactor)
- {
- /* find best child with largest number of primitives */
- int bestChild = -1;
- unsigned bestItems = 0;
- for (unsigned int i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= minLeafSize)
- continue;
-
- /* remember child with largest area */
- if (children[i].size() > bestItems) {
- bestItems = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- range<unsigned> left, right;
- split(children[bestChild],left,right);
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
- }
-
- /* create leaf node if no split is possible */
- if (unlikely(numChildren == 1))
- return createLeaf(current,alloc);
-
- /* allocate node */
- auto node = createNode(alloc,numChildren);
-
- /* process top parts of tree parallel */
- ReductionTy bounds[MAX_BRANCHING_FACTOR];
- if (current.size() > singleThreadThreshold)
- {
- /*! parallel_for is faster than spawing sub-tasks */
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- bounds[i] = recurse(depth+1,children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
-
- /* finish tree sequentially */
- else
- {
- for (size_t i=0; i<numChildren; i++)
- bounds[i] = recurse(depth+1,children[i],alloc,false);
- }
-
- return setBounds(node,bounds,numChildren);
- }
-
- /* build function */
- ReductionTy build(BuildPrim* src, BuildPrim* tmp, size_t numPrimitives)
- {
- /* sort morton codes */
- morton = src;
- radix_sort_u32(src,tmp,numPrimitives,singleThreadThreshold);
-
- /* build BVH */
- const ReductionTy root = recurse(1, range<unsigned>(0,(unsigned)numPrimitives), nullptr, true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
-
- public:
- CreateAllocator& createAllocator;
- CreateNodeFunc& createNode;
- SetNodeBoundsFunc& setBounds;
- CreateLeafFunc& createLeaf;
- CalculateBounds& calculateBounds;
- ProgressMonitor& progressMonitor;
-
- public:
- BuildPrim* morton;
- };
-
-
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename SetBoundsFunc,
- typename CreateLeafFunc,
- typename CalculateBoundsFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAllocator,
- CreateNodeFunc createNode,
- SetBoundsFunc setBounds,
- CreateLeafFunc createLeaf,
- CalculateBoundsFunc calculateBounds,
- ProgressMonitor progressMonitor,
- BuildPrim* src,
- BuildPrim* tmp,
- size_t numPrimitives,
- const Settings& settings)
- {
- typedef BuilderT<
- ReductionTy,
- decltype(createAllocator()),
- CreateAllocFunc,
- CreateNodeFunc,
- SetBoundsFunc,
- CreateLeafFunc,
- CalculateBoundsFunc,
- ProgressMonitor> Builder;
-
- Builder builder(createAllocator,
- createNode,
- setBounds,
- createLeaf,
- calculateBounds,
- progressMonitor,
- settings);
-
- return builder.build(src,tmp,numPrimitives);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
deleted file mode 100644
index 4c138dacdb..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h
+++ /dev/null
@@ -1,692 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#define MBLUR_NUM_TEMPORAL_BINS 2
-#define MBLUR_NUM_OBJECT_BINS 32
-
-#include "../bvh/bvh.h"
-#include "../common/primref_mb.h"
-#include "heuristic_binning_array_aligned.h"
-#include "heuristic_timesplit_array.h"
-
-namespace embree
-{
- namespace isa
- {
- template<typename T>
- struct SharedVector
- {
- __forceinline SharedVector() {}
-
- __forceinline SharedVector(T* ptr, size_t refCount = 1)
- : prims(ptr), refCount(refCount) {}
-
- __forceinline void incRef() {
- refCount++;
- }
-
- __forceinline void decRef()
- {
- if (--refCount == 0)
- delete prims;
- }
-
- T* prims;
- size_t refCount;
- };
-
- template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
- struct LocalChildListT
- {
- typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
-
- __forceinline LocalChildListT (const BuildRecord& record)
- : numChildren(1), numSharedPrimVecs(1)
- {
- /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
- children[0] = record;
- primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
- }
-
- __forceinline ~LocalChildListT()
- {
- for (size_t i = 0; i < numChildren; i++)
- primvecs[i]->decRef();
- }
-
- __forceinline BuildRecord& operator[] ( const size_t i ) {
- return children[i];
- }
-
- __forceinline size_t size() const {
- return numChildren;
- }
-
- __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
- {
- SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
- if (lrecord.prims.prims == bsharedPrimVec->prims) {
- primvecs[bestChild] = bsharedPrimVec;
- bsharedPrimVec->incRef();
- }
- else {
- primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
- }
-
- if (rrecord.prims.prims == bsharedPrimVec->prims) {
- primvecs[numChildren] = bsharedPrimVec;
- bsharedPrimVec->incRef();
- }
- else {
- primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
- }
- bsharedPrimVec->decRef();
- new_vector.release();
-
- children[bestChild] = lrecord;
- children[numChildren] = rrecord;
- numChildren++;
- }
-
- public:
- array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
- array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
- size_t numChildren;
-
- array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
- size_t numSharedPrimVecs;
- };
-
- template<typename Mesh>
- struct RecalculatePrimRef
- {
- Scene* scene;
-
- __forceinline RecalculatePrimRef (Scene* scene)
- : scene(scene) {}
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Mesh* mesh = scene->get<Mesh>(geomID);
- const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- // __noinline is workaround for ICC16 bug under MacOSX
- __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Mesh* mesh = scene->get<Mesh>(geomID);
- const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
- return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
- }
-
- // __noinline is workaround for ICC16 bug under MacOSX
- __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
- return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
- }
- };
-
- struct VirtualRecalculatePrimRef
- {
- Scene* scene;
-
- __forceinline VirtualRecalculatePrimRef (Scene* scene)
- : scene(scene) {}
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Geometry* mesh = scene->get(geomID);
- const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
- {
- const unsigned geomID = prim.geomID();
- const unsigned primID = prim.primID();
- const Geometry* mesh = scene->get(geomID);
- const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
- const range<int> tbounds = mesh->timeSegmentRange(time_range);
- return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
- return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
- }
-
- __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
- return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
- }
- };
-
- struct BVHBuilderMSMBlur
- {
- /*! settings for msmblur builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
- travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
- singleThreadThreshold(1024) {}
-
-
- Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
- : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
- travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
- {
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- float travCost; //!< estimated cost of one traversal step
- float intCost; //!< estimated cost of one primitive intersection
- bool singleLeafTimeSegment; //!< split time to single time range
- size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
- };
-
- struct BuildRecord
- {
- public:
- __forceinline BuildRecord () {}
-
- __forceinline BuildRecord (size_t depth)
- : depth(depth) {}
-
- __forceinline BuildRecord (const SetMB& prims, size_t depth)
- : depth(depth), prims(prims) {}
-
- __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
- return a.prims.size() < b.prims.size();
- }
-
- __forceinline size_t size() const {
- return prims.size();
- }
-
- public:
- size_t depth; //!< Depth of the root of this subtree.
- SetMB prims; //!< The list of primitives.
- };
-
- struct BuildRecordSplit : public BuildRecord
- {
- __forceinline BuildRecordSplit () {}
-
- __forceinline BuildRecordSplit (size_t depth)
- : BuildRecord(depth) {}
-
- __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
- : BuildRecord(record), split(split) {}
-
- BinSplit<MBLUR_NUM_OBJECT_BINS> split;
- };
-
- template<
- typename NodeRef,
- typename RecalculatePrimRef,
- typename Allocator,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename SetNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- class BuilderT
- {
- ALIGNED_CLASS_(16);
- static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
-
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
- typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
- typedef mvector<PrimRefMB>* PrimRefVector;
- typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
- typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
- typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
-
- public:
-
- BuilderT (MemoryMonitorInterface* device,
- const RecalculatePrimRef recalculatePrimRef,
- const CreateAllocFunc createAlloc,
- const CreateNodeFunc createNode,
- const SetNodeFunc setNode,
- const CreateLeafFunc createLeaf,
- const ProgressMonitor progressMonitor,
- const Settings& settings)
- : cfg(settings),
- heuristicObjectSplit(),
- heuristicTemporalSplit(device, recalculatePrimRef),
- recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
- progressMonitor(progressMonitor)
- {
- if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
- throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
- }
-
- /*! finds the best split */
- const Split find(const SetMB& set)
- {
- /* first try standard object split */
- const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
- const float object_split_sah = object_split.splitSAH();
-
- /* test temporal splits only when object split was bad */
- const float leaf_sah = set.leafSAH(cfg.logBlockSize);
- if (object_split_sah < 0.50f*leaf_sah)
- return object_split;
-
- /* do temporal splits only if the the time range is big enough */
- if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
- {
- const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
- const float temporal_split_sah = temporal_split.splitSAH();
-
- /* take temporal split if it improved SAH */
- if (temporal_split_sah < object_split_sah)
- return temporal_split;
- }
-
- return object_split;
- }
-
- /*! array partitioning */
- __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- /* perform object split */
- if (likely(split.data == Split::SPLIT_OBJECT)) {
- heuristicObjectSplit.split(split,set,lset,rset);
- }
- /* perform temporal split */
- else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
- return heuristicTemporalSplit.split(split,set,lset,rset);
- }
- /* perform fallback split */
- else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
- set.deterministic_order();
- splitFallback(set,lset,rset);
- }
- /* split by geometry */
- else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
- set.deterministic_order();
- splitByGeometry(set,lset,rset);
- }
- else
- assert(false);
-
- return std::unique_ptr<mvector<PrimRefMB>>();
- }
-
- /*! finds the best fallback split */
- __noinline Split findFallback(const SetMB& set)
- {
- /* split if primitives are not from same geometry */
- if (!sameGeometry(set))
- return Split(0.0f,Split::SPLIT_GEOMID);
-
- /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
- if (cfg.singleLeafTimeSegment)
- {
- /* test if one primitive has more than one time segment in time range, if so split time */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const PrimRefMB& prim = (*set.prims)[i];
- const range<int> itime_range = prim.timeSegmentRange(set.time_range);
- const int localTimeSegments = itime_range.size();
- assert(localTimeSegments > 0);
- if (localTimeSegments > 1) {
- const int icenter = (itime_range.begin() + itime_range.end())/2;
- const float splitTime = prim.timeStep(icenter);
- return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
- }
- }
- }
-
- /* otherwise return fallback split */
- return Split(0.0f,Split::SPLIT_FALLBACK);
- }
-
- /*! performs fallback split */
- void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- mvector<PrimRefMB>& prims = *set.prims;
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfoMB linfo = empty;
- for (size_t i=begin; i<center; i++)
- linfo.add_primref(prims[i]);
-
- PrimInfoMB rinfo = empty;
- for (size_t i=center; i<end; i++)
- rinfo.add_primref(prims[i]);
-
- new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- /*! checks if all primitives are from the same geometry */
- __forceinline bool sameGeometry(const SetMB& set)
- {
- if (set.size() == 0) return true;
- mvector<PrimRefMB>& prims = *set.prims;
- const size_t begin = set.begin();
- const size_t end = set.end();
- unsigned int firstGeomID = prims[begin].geomID();
- for (size_t i=begin+1; i<end; i++) {
- if (prims[i].geomID() != firstGeomID){
- return false;
- }
- }
- return true;
- }
-
- /* split by geometry ID */
- void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- assert(set.size() > 1);
-
- mvector<PrimRefMB>& prims = *set.prims;
- const size_t begin = set.begin();
- const size_t end = set.end();
-
- PrimInfoMB left(empty);
- PrimInfoMB right(empty);
- unsigned int geomID = prims[begin].geomID();
- size_t center = serial_partitioning(prims.data(),begin,end,left,right,
- [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
- [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
-
- new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (in.depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* replace already found split by fallback split */
- const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
-
- /* special case when directly creating leaf without any splits that could shrink time_range */
- bool force_split = false;
- if (current.depth == 1 && current.size() > 0)
- {
- BBox1f c = empty;
- BBox1f p = current.prims.time_range;
- for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
- mvector<PrimRefMB>& prims = *current.prims.prims;
- c.extend(prims[i].time_range);
- }
-
- force_split = c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create leaf for few primitives */
- if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
- return createLeaf(current,alloc);
-
- /* fill all children by always splitting the largest one */
- bool hasTimeSplits = false;
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
- LocalChildListSplit children(current);
-
- do {
- /* find best child with largest bounding box area */
- size_t bestChild = -1;
- size_t bestSize = 0;
- for (size_t i=0; i<children.size(); i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
- continue;
-
- force_split = false;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /* perform best found split */
- BuildRecordSplit& brecord = children[bestChild];
- BuildRecordSplit lrecord(current.depth+1);
- BuildRecordSplit rrecord(current.depth+1);
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
- hasTimeSplits |= new_vector != nullptr;
-
- /* find new splits */
- lrecord.split = findFallback(lrecord.prims);
- rrecord.split = findFallback(rrecord.prims);
- children.split(bestChild,lrecord,rrecord,std::move(new_vector));
-
- } while (children.size() < cfg.branchingFactor);
-
- /* detect time_ranges that have shrunken */
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = in.prims.time_range;
- hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create node */
- auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
-
- /* recurse into each child and perform reduction */
- LBBox3fa gbounds = empty;
- for (size_t i=0; i<children.size(); i++) {
- values[i] = createLargeLeaf(children[i],alloc);
- gbounds.extend(values[i].lbounds);
- }
-
- setNode(current,children.children.data(),node,values,children.numChildren);
-
- /* calculate geometry bounds of this node */
- if (hasTimeSplits)
- return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
- else
- return NodeRecordMB4D(node,gbounds,current.prims.time_range);
- }
-
- const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= cfg.singleThreadThreshold)
- progressMonitor(current.size());
-
- /*! find best split */
- const Split csplit = find(current.prims);
-
- /*! compute leaf and split cost */
- const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
- const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
- assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
-
- /*! create a leaf node when threshold reached or SAH tells us to stop */
- if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
- current.prims.deterministic_order();
- return createLargeLeaf(current,alloc);
- }
-
- /*! perform initial split */
- SetMB lprims,rprims;
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
- bool hasTimeSplits = new_vector != nullptr;
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
- LocalChildList children(current);
- {
- BuildRecord lrecord(lprims,current.depth+1);
- BuildRecord rrecord(rprims,current.depth+1);
- children.split(0,lrecord,rrecord,std::move(new_vector));
- }
-
- /*! split until node is full or SAH tells us to stop */
- while (children.size() < cfg.branchingFactor)
- {
- /*! find best child to split */
- float bestArea = neg_inf;
- ssize_t bestChild = -1;
- for (size_t i=0; i<children.size(); i++)
- {
- if (children[i].size() <= cfg.minLeafSize) continue;
- if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
- bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
- }
- }
- if (bestChild == -1) break;
-
- /* perform split */
- BuildRecord& brecord = children[bestChild];
- BuildRecord lrecord(current.depth+1);
- BuildRecord rrecord(current.depth+1);
- Split csplit = find(brecord.prims);
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
- hasTimeSplits |= new_vector != nullptr;
- children.split(bestChild,lrecord,rrecord,std::move(new_vector));
- }
-
- /* detect time_ranges that have shrunken */
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = current.prims.time_range;
- hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* sort buildrecords for simpler shadow ray traversal */
- //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
-
- /*! create an inner node */
- auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
- LBBox3fa gbounds = empty;
-
- /* spawn tasks */
- if (unlikely(current.size() > cfg.singleThreadThreshold))
- {
- /*! parallel_for is faster than spawing sub-tasks */
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
-
- /*! merge bounding boxes */
- for (size_t i=0; i<children.size(); i++)
- gbounds.extend(values[i].lbounds);
- }
- /* recurse into each child */
- else
- {
- //for (size_t i=0; i<children.size(); i++)
- for (ssize_t i=children.size()-1; i>=0; i--) {
- values[i] = recurse(children[i],alloc,false);
- gbounds.extend(values[i].lbounds);
- }
- }
-
- setNode(current,children.children.data(),node,values,children.numChildren);
-
- /* calculate geometry bounds of this node */
- if (unlikely(hasTimeSplits))
- return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
- else
- return NodeRecordMB4D(node,gbounds,current.prims.time_range);
- }
-
- /*! builder entry function */
- __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
- {
- const SetMB set(pinfo,&prims);
- auto ret = recurse(BuildRecord(set,1),nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return ret;
- }
-
- private:
- Settings cfg;
- HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
- HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
- const RecalculatePrimRef recalculatePrimRef;
- const CreateAllocFunc createAlloc;
- const CreateNodeFunc createNode;
- const SetNodeFunc setNode;
- const CreateLeafFunc createLeaf;
- const ProgressMonitor progressMonitor;
- };
-
- template<typename NodeRef,
- typename RecalculatePrimRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename SetNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitorFunc>
-
- static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
- const PrimInfoMB& pinfo,
- MemoryMonitorInterface* device,
- const RecalculatePrimRef recalculatePrimRef,
- const CreateAllocFunc createAlloc,
- const CreateNodeFunc createNode,
- const SetNodeFunc setNode,
- const CreateLeafFunc createLeaf,
- const ProgressMonitorFunc progressMonitor,
- const Settings& settings)
- {
- typedef BuilderT<
- NodeRef,
- RecalculatePrimRef,
- decltype(createAlloc()),
- CreateAllocFunc,
- CreateNodeFunc,
- SetNodeFunc,
- CreateLeafFunc,
- ProgressMonitorFunc> Builder;
-
- Builder builder(device,
- recalculatePrimRef,
- createAlloc,
- createNode,
- setNode,
- createLeaf,
- progressMonitor,
- settings);
-
-
- return builder(prims,pinfo);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h
deleted file mode 100644
index e477c313a3..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h
+++ /dev/null
@@ -1,526 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../bvh/bvh.h"
-#include "../geometry/primitive.h"
-#include "../builders/bvh_builder_msmblur.h"
-#include "../builders/heuristic_binning_array_aligned.h"
-#include "../builders/heuristic_binning_array_unaligned.h"
-#include "../builders/heuristic_timesplit_array.h"
-
-namespace embree
-{
- namespace isa
- {
- struct BVHBuilderHairMSMBlur
- {
- /*! settings for msmblur builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- };
-
- struct BuildRecord
- {
- public:
- __forceinline BuildRecord () {}
-
- __forceinline BuildRecord (size_t depth)
- : depth(depth) {}
-
- __forceinline BuildRecord (const SetMB& prims, size_t depth)
- : depth(depth), prims(prims) {}
-
- __forceinline size_t size() const {
- return prims.size();
- }
-
- public:
- size_t depth; //!< depth of the root of this subtree
- SetMB prims; //!< the list of primitives
- };
-
- template<typename NodeRef,
- typename RecalculatePrimRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeMBFunc,
- typename SetAABBNodeMBFunc,
- typename CreateOBBNodeMBFunc,
- typename SetOBBNodeMBFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- class BuilderT
- {
- ALIGNED_CLASS_(16);
-
- static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
- static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
-
- typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
- typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
-
- typedef FastAllocator::CachedAllocator Allocator;
- typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
-
- typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
- typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
- typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
-
- public:
-
- BuilderT (Scene* scene,
- const RecalculatePrimRef& recalculatePrimRef,
- const CreateAllocFunc& createAlloc,
- const CreateAABBNodeMBFunc& createAABBNodeMB,
- const SetAABBNodeMBFunc& setAABBNodeMB,
- const CreateOBBNodeMBFunc& createOBBNodeMB,
- const SetOBBNodeMBFunc& setOBBNodeMB,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const Settings settings)
-
- : cfg(settings),
- scene(scene),
- recalculatePrimRef(recalculatePrimRef),
- createAlloc(createAlloc),
- createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
- createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
- createLeaf(createLeaf),
- progressMonitor(progressMonitor),
- unalignedHeuristic(scene),
- temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
-
- private:
-
- /*! checks if all primitives are from the same geometry */
- __forceinline bool sameGeometry(const SetMB& set)
- {
- mvector<PrimRefMB>& prims = *set.prims;
- unsigned int firstGeomID = prims[set.begin()].geomID();
- for (size_t i=set.begin()+1; i<set.end(); i++) {
- if (prims[i].geomID() != firstGeomID){
- return false;
- }
- }
- return true;
- }
-
- /*! performs some split if SAH approaches fail */
- void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- mvector<PrimRefMB>& prims = *set.prims;
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfoMB linfo = empty;
- for (size_t i=begin; i<center; i++)
- linfo.add_primref(prims[i]);
-
- PrimInfoMB rinfo = empty;
- for (size_t i=center; i<end; i++)
- rinfo.add_primref(prims[i]);
-
- new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
- {
- assert(set.size() > 1);
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfoMB linfo(empty);
- PrimInfoMB rinfo(empty);
- unsigned int geomID = (*set.prims)[begin].geomID();
- size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
- [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
- [ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
-
- new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- /*! creates a large leaf that could be larger than supported by the BVH */
- NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (current.depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* special case when directly creating leaf without any splits that could shrink time_range */
- bool force_split = false;
- if (current.depth == 1 && current.size() > 0)
- {
- BBox1f c = empty;
- BBox1f p = current.prims.time_range;
- for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
- mvector<PrimRefMB>& prims = *current.prims.prims;
- c.extend(prims[i].time_range);
- }
-
- force_split = c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create leaf for few primitives */
- if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
- return createLeaf(current.prims,alloc);
-
- /* fill all children by always splitting the largest one */
- LocalChildList children(current);
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
-
- do {
-
- /* find best child with largest bounding box area */
- int bestChild = -1;
- size_t bestSize = 0;
- for (unsigned i=0; i<children.size(); i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
- continue;
-
- force_split = false;
-
- /* remember child with largest size */
- if (children[i].size() > bestSize) {
- bestSize = children[i].size();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- BuildRecord left(current.depth+1);
- BuildRecord right(current.depth+1);
- if (!sameGeometry(children[bestChild].prims)) {
- splitByGeometry(children[bestChild].prims,left.prims,right.prims);
- } else {
- splitFallback(children[bestChild].prims,left.prims,right.prims);
- }
- children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
-
- } while (children.size() < cfg.branchingFactor);
-
-
- /* detect time_ranges that have shrunken */
- bool timesplit = false;
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = current.prims.time_range;
- timesplit |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create node */
- NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<children.size(); i++) {
- values[i] = createLargeLeaf(children[i],alloc);
- bounds.extend(values[i].lbounds);
- }
-
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
-
- if (timesplit)
- bounds = current.prims.linearBounds(recalculatePrimRef);
-
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
-
- /*! performs split */
- std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
- {
- /* variable to track the SAH of the best splitting approach */
- float bestSAH = inf;
- const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
-
- /* perform standard binning in aligned space */
- HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
- float alignedObjectSAH = alignedObjectSplit.splitSAH();
- bestSAH = min(alignedObjectSAH,bestSAH);
-
- /* perform standard binning in unaligned space */
- UnalignedHeuristicBinning::Split unalignedObjectSplit;
- LinearSpace3fa uspace;
- float unalignedObjectSAH = inf;
- if (alignedObjectSAH > 0.7f*leafSAH) {
- uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
- const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
- unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
- unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
- bestSAH = min(unalignedObjectSAH,bestSAH);
- }
-
- /* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
- float temporal_split_sah = inf;
- typename HeuristicTemporal::Split temporal_split;
- if (bestSAH > 0.5f*leafSAH) {
- if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
- temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
- temporal_split_sah = temporal_split.splitSAH();
- bestSAH = min(temporal_split_sah,bestSAH);
- }
- }
-
- /* perform fallback split if SAH heuristics failed */
- if (unlikely(!std::isfinite(bestSAH))) {
- current.prims.deterministic_order();
- splitFallback(current.prims,lrecord.prims,rrecord.prims);
- }
- /* perform aligned split if this is best */
- else if (likely(bestSAH == alignedObjectSAH)) {
- alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
- }
- /* perform unaligned split if this is best */
- else if (likely(bestSAH == unalignedObjectSAH)) {
- unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
- aligned = false;
- }
- /* perform temporal split if this is best */
- else if (likely(bestSAH == temporal_split_sah)) {
- timesplit = true;
- return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
- }
- else
- assert(false);
-
- return std::unique_ptr<mvector<PrimRefMB>>();
- }
-
- /*! recursive build */
- NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
- progressMonitor(current.size());
-
- /* create leaf node */
- if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
- current.prims.deterministic_order();
- return createLargeLeaf(current,alloc);
- }
-
- /* fill all children by always splitting the one with the largest surface area */
- NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
- LocalChildList children(current);
- bool aligned = true;
- bool timesplit = false;
-
- do {
-
- /* find best child with largest bounding box area */
- ssize_t bestChild = -1;
- float bestArea = neg_inf;
- for (size_t i=0; i<children.size(); i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].size() <= cfg.minLeafSize)
- continue;
-
- /* remember child with largest area */
- const float A = children[i].prims.halfArea();
- if (A > bestArea) {
- bestArea = children[i].prims.halfArea();
- bestChild = i;
- }
- }
- if (bestChild == -1) break;
-
- /*! split best child into left and right child */
- BuildRecord left(current.depth+1);
- BuildRecord right(current.depth+1);
- std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
- children.split(bestChild,left,right,std::move(new_vector));
-
- } while (children.size() < cfg.branchingFactor);
-
- /* detect time_ranges that have shrunken */
- for (size_t i=0; i<children.size(); i++) {
- const BBox1f c = children[i].prims.time_range;
- const BBox1f p = current.prims.time_range;
- timesplit |= c.lower > p.lower || c.upper < p.upper;
- }
-
- /* create time split node */
- if (timesplit)
- {
- const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
-
- /* spawn tasks or ... */
- if (current.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequential */
- else {
- for (size_t i=0; i<children.size(); i++) {
- values[i] = recurse(children[i],alloc,false);
- }
- }
-
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
-
- const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
-
- /* create aligned node */
- else if (aligned)
- {
- const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
-
- /* spawn tasks or ... */
- if (current.size() > SINGLE_THREADED_THRESHOLD)
- {
- LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- cbounds[i] = values[i].lbounds;
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
-
- LBBox3fa bounds = empty;
- for (size_t i=0; i<children.size(); i++)
- bounds.extend(cbounds[i]);
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
- /* ... continue sequentially */
- else
- {
- LBBox3fa bounds = empty;
- for (size_t i=0; i<children.size(); i++) {
- values[i] = recurse(children[i],alloc,false);
- bounds.extend(values[i].lbounds);
- }
- setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
- }
-
- /* create unaligned node */
- else
- {
- const NodeRef node = createOBBNodeMB(alloc);
-
- /* spawn tasks or ... */
- if (current.size() > SINGLE_THREADED_THRESHOLD)
- {
- parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
- const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
- const auto child = recurse(children[i],nullptr,true);
- setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
- }
- /* ... continue sequentially */
- else
- {
- for (size_t i=0; i<children.size(); i++) {
- const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
- const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
- const auto child = recurse(children[i],alloc,false);
- setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
- }
- }
-
- const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
- return NodeRecordMB4D(node,bounds,current.prims.time_range);
- }
- }
-
- public:
-
- /*! entry point into builder */
- NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
- {
- BuildRecord record(SetMB(pinfo,&prims),1);
- auto root = recurse(record,nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
-
- private:
- Settings cfg;
- Scene* scene;
- const RecalculatePrimRef& recalculatePrimRef;
- const CreateAllocFunc& createAlloc;
- const CreateAABBNodeMBFunc& createAABBNodeMB;
- const SetAABBNodeMBFunc& setAABBNodeMB;
- const CreateOBBNodeMBFunc& createOBBNodeMB;
- const SetOBBNodeMBFunc& setOBBNodeMB;
- const CreateLeafFunc& createLeaf;
- const ProgressMonitor& progressMonitor;
-
- private:
- HeuristicBinning alignedHeuristic;
- UnalignedHeuristicBinning unalignedHeuristic;
- HeuristicTemporal temporalSplitHeuristic;
- };
-
- template<typename NodeRef,
- typename RecalculatePrimRef,
- typename CreateAllocFunc,
- typename CreateAABBNodeMBFunc,
- typename SetAABBNodeMBFunc,
- typename CreateOBBNodeMBFunc,
- typename SetOBBNodeMBFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
- const RecalculatePrimRef& recalculatePrimRef,
- const CreateAllocFunc& createAlloc,
- const CreateAABBNodeMBFunc& createAABBNodeMB,
- const SetAABBNodeMBFunc& setAABBNodeMB,
- const CreateOBBNodeMBFunc& createOBBNodeMB,
- const SetOBBNodeMBFunc& setOBBNodeMB,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const Settings settings)
- {
- typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
- CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
- CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
- CreateLeafFunc,ProgressMonitor> Builder;
-
- Builder builder(scene,recalculatePrimRef,createAlloc,
- createAABBNodeMB,setAABBNodeMB,
- createOBBNodeMB,setOBBNodeMB,
- createLeaf,progressMonitor,settings);
-
- return builder(prims,pinfo);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h b/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h
deleted file mode 100644
index 3f7e678a10..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h
+++ /dev/null
@@ -1,669 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning_array_aligned.h"
-#include "heuristic_spatial_array.h"
-#include "heuristic_openmerge_array.h"
-
-#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
-# define NUM_OBJECT_BINS 16
-# define NUM_SPATIAL_BINS 16
-#else
-# define NUM_OBJECT_BINS 32
-# define NUM_SPATIAL_BINS 16
-#endif
-
-namespace embree
-{
- namespace isa
- {
- MAYBE_UNUSED static const float travCost = 1.0f;
- MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
-
- struct GeneralBVHBuilder
- {
- static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
- static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
-
-
- /*! settings for SAH builder */
- struct Settings
- {
- /*! default settings */
- Settings ()
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
- travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}
-
- /*! initialize settings from API settings */
- Settings (const RTCBuildArguments& settings)
- : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
- travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)
- {
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(static_cast<size_t>(settings.sahBlockSize));
- if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;
- if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;
-
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
- : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
- travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
- {
- minLeafSize = min(minLeafSize,maxLeafSize);
- }
-
- public:
- size_t branchingFactor; //!< branching factor of BVH to build
- size_t maxDepth; //!< maximum depth of BVH to build
- size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
- size_t minLeafSize; //!< minimum size of a leaf
- size_t maxLeafSize; //!< maximum size of a leaf
- float travCost; //!< estimated cost of one traversal step
- float intCost; //!< estimated cost of one primitive intersection
- size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
- size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished
- };
-
- /*! recursive state of builder */
- template<typename Set, typename Split>
- struct BuildRecordT
- {
- public:
- __forceinline BuildRecordT () {}
-
- __forceinline BuildRecordT (size_t depth)
- : depth(depth), alloc_barrier(false), prims(empty) {}
-
- __forceinline BuildRecordT (size_t depth, const Set& prims)
- : depth(depth), alloc_barrier(false), prims(prims) {}
-
- __forceinline BBox3fa bounds() const { return prims.geomBounds; }
-
- __forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }
- __forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }
-
- __forceinline size_t size() const { return prims.size(); }
-
- public:
- size_t depth; //!< Depth of the root of this subtree.
- bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes
- Set prims; //!< The list of primitives.
- };
-
- template<typename PrimRef, typename Set>
- struct DefaultCanCreateLeafFunc
- {
- __forceinline bool operator()(const PrimRef*, const Set&) const { return true; }
- };
-
- template<typename PrimRef, typename Set>
- struct DefaultCanCreateLeafSplitFunc
- {
- __forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }
- };
-
- template<typename BuildRecord,
- typename Heuristic,
- typename Set,
- typename PrimRef,
- typename ReductionTy,
- typename Allocator,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename CanCreateLeafFunc,
- typename CanCreateLeafSplitFunc,
- typename ProgressMonitor>
-
- class BuilderT
- {
- friend struct GeneralBVHBuilder;
-
- BuilderT (PrimRef* prims,
- Heuristic& heuristic,
- const CreateAllocFunc& createAlloc,
- const CreateNodeFunc& createNode,
- const UpdateNodeFunc& updateNode,
- const CreateLeafFunc& createLeaf,
- const CanCreateLeafFunc& canCreateLeaf,
- const CanCreateLeafSplitFunc& canCreateLeafSplit,
- const ProgressMonitor& progressMonitor,
- const Settings& settings) :
- cfg(settings),
- prims(prims),
- heuristic(heuristic),
- createAlloc(createAlloc),
- createNode(createNode),
- updateNode(updateNode),
- createLeaf(createLeaf),
- canCreateLeaf(canCreateLeaf),
- canCreateLeafSplit(canCreateLeafSplit),
- progressMonitor(progressMonitor)
- {
- if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
- throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
- }
-
- const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)
- {
- /* this should never occur but is a fatal error */
- if (current.depth > cfg.maxDepth)
- throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
-
- /* create leaf for few primitives */
- if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))
- return createLeaf(prims,current.prims,alloc);
-
- /* fill all children by always splitting the largest one */
- ReductionTy values[MAX_BRANCHING_FACTOR];
- BuildRecord children[MAX_BRANCHING_FACTOR];
- size_t numChildren = 1;
- children[0] = current;
- do {
-
- /* find best child with largest bounding box area */
- size_t bestChild = -1;
- size_t bestSize = 0;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))
- continue;
-
- /* remember child with largest size */
- if (children[i].prims.size() > bestSize) {
- bestSize = children[i].prims.size();
- bestChild = i;
- }
- }
- if (bestChild == (size_t)-1) break;
-
- /*! split best child into left and right child */
- BuildRecord left(current.depth+1);
- BuildRecord right(current.depth+1);
- if (!canCreateLeaf(prims,children[bestChild].prims)) {
- canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);
- } else {
- heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);
- }
-
- /* add new children left and right */
- children[bestChild] = children[numChildren-1];
- children[numChildren-1] = left;
- children[numChildren+0] = right;
- numChildren++;
-
- } while (numChildren < cfg.branchingFactor);
-
- /* set barrier for primrefarrayalloc */
- if (unlikely(current.size() > cfg.primrefarrayalloc))
- for (size_t i=0; i<numChildren; i++)
- children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
-
- /* create node */
- auto node = createNode(children,numChildren,alloc);
-
- /* recurse into each child and perform reduction */
- for (size_t i=0; i<numChildren; i++)
- values[i] = createLargeLeaf(children[i],alloc);
-
- /* perform reduction */
- return updateNode(current,children,node,values,numChildren);
- }
-
- const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)
- {
- /* get thread local allocator */
- if (!alloc)
- alloc = createAlloc();
-
- /* call memory monitor function to signal progress */
- if (toplevel && current.size() <= cfg.singleThreadThreshold)
- progressMonitor(current.size());
-
- /*! find best split */
- auto split = heuristic.find(current.prims,cfg.logBlockSize);
-
- /*! compute leaf and split cost */
- const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
- const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();
- assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
-
- /*! create a leaf node when threshold reached or SAH tells us to stop */
- if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
- heuristic.deterministic_order(current.prims);
- return createLargeLeaf(current,alloc);
- }
-
- /*! perform initial split */
- Set lprims,rprims;
- heuristic.split(split,current.prims,lprims,rprims);
-
- /*! initialize child list with initial split */
- ReductionTy values[MAX_BRANCHING_FACTOR];
- BuildRecord children[MAX_BRANCHING_FACTOR];
- children[0] = BuildRecord(current.depth+1,lprims);
- children[1] = BuildRecord(current.depth+1,rprims);
- size_t numChildren = 2;
-
- /*! split until node is full or SAH tells us to stop */
- while (numChildren < cfg.branchingFactor)
- {
- /*! find best child to split */
- float bestArea = neg_inf;
- ssize_t bestChild = -1;
- for (size_t i=0; i<numChildren; i++)
- {
- /* ignore leaves as they cannot get split */
- if (children[i].prims.size() <= cfg.minLeafSize) continue;
-
- /* find child with largest surface area */
- if (halfArea(children[i].prims.geomBounds) > bestArea) {
- bestChild = i;
- bestArea = halfArea(children[i].prims.geomBounds);
- }
- }
- if (bestChild == -1) break;
-
- /* perform best found split */
- BuildRecord& brecord = children[bestChild];
- BuildRecord lrecord(current.depth+1);
- BuildRecord rrecord(current.depth+1);
- auto split = heuristic.find(brecord.prims,cfg.logBlockSize);
- heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);
- children[bestChild ] = lrecord;
- children[numChildren] = rrecord;
- numChildren++;
- }
-
- /* set barrier for primrefarrayalloc */
- if (unlikely(current.size() > cfg.primrefarrayalloc))
- for (size_t i=0; i<numChildren; i++)
- children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
-
- /* sort buildrecords for faster shadow ray traversal */
- std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());
-
- /*! create an inner node */
- auto node = createNode(children,numChildren,alloc);
-
- /* spawn tasks */
- if (current.size() > cfg.singleThreadThreshold)
- {
- /*! parallel_for is faster than spawing sub-tasks */
- parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
- for (size_t i=r.begin(); i<r.end(); i++) {
- values[i] = recurse(children[i],nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- }
- });
-
- return updateNode(current,children,node,values,numChildren);
- }
- /* recurse into each child */
- else
- {
- for (size_t i=0; i<numChildren; i++)
- values[i] = recurse(children[i],alloc,false);
-
- return updateNode(current,children,node,values,numChildren);
- }
- }
-
- private:
- Settings cfg;
- PrimRef* prims;
- Heuristic& heuristic;
- const CreateAllocFunc& createAlloc;
- const CreateNodeFunc& createNode;
- const UpdateNodeFunc& updateNode;
- const CreateLeafFunc& createLeaf;
- const CanCreateLeafFunc& canCreateLeaf;
- const CanCreateLeafSplitFunc& canCreateLeafSplit;
- const ProgressMonitor& progressMonitor;
- };
-
- template<
- typename ReductionTy,
- typename Heuristic,
- typename Set,
- typename PrimRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- __noinline static ReductionTy build(Heuristic& heuristic,
- PrimRef* prims,
- const Set& set,
- CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- const Settings& settings)
- {
- typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
-
- typedef BuilderT<
- BuildRecord,
- Heuristic,
- Set,
- PrimRef,
- ReductionTy,
- decltype(createAlloc()),
- CreateAllocFunc,
- CreateNodeFunc,
- UpdateNodeFunc,
- CreateLeafFunc,
- DefaultCanCreateLeafFunc<PrimRef, Set>,
- DefaultCanCreateLeafSplitFunc<PrimRef, Set>,
- ProgressMonitor> Builder;
-
- /* instantiate builder */
- Builder builder(prims,
- heuristic,
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- DefaultCanCreateLeafFunc<PrimRef, Set>(),
- DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),
- progressMonitor,
- settings);
-
- /* build hierarchy */
- BuildRecord record(1,set);
- const ReductionTy root = builder.recurse(record,nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
-
- template<
- typename ReductionTy,
- typename Heuristic,
- typename Set,
- typename PrimRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename CanCreateLeafFunc,
- typename CanCreateLeafSplitFunc,
- typename ProgressMonitor>
-
- __noinline static ReductionTy build(Heuristic& heuristic,
- PrimRef* prims,
- const Set& set,
- CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const CanCreateLeafFunc& canCreateLeaf,
- const CanCreateLeafSplitFunc& canCreateLeafSplit,
- const ProgressMonitor& progressMonitor,
- const Settings& settings)
- {
- typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
-
- typedef BuilderT<
- BuildRecord,
- Heuristic,
- Set,
- PrimRef,
- ReductionTy,
- decltype(createAlloc()),
- CreateAllocFunc,
- CreateNodeFunc,
- UpdateNodeFunc,
- CreateLeafFunc,
- CanCreateLeafFunc,
- CanCreateLeafSplitFunc,
- ProgressMonitor> Builder;
-
- /* instantiate builder */
- Builder builder(prims,
- heuristic,
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- canCreateLeaf,
- canCreateLeafSplit,
- progressMonitor,
- settings);
-
- /* build hierarchy */
- BuildRecord record(1,set);
- const ReductionTy root = builder.recurse(record,nullptr,true);
- _mm_mfence(); // to allow non-temporal stores during build
- return root;
- }
- };
-
- /* SAH builder that operates on an array of BuildRecords */
- struct BVHBuilderBinnedSAH
- {
- typedef PrimInfoRange Set;
- typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;
- typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
- typedef GeneralBVHBuilder::Settings Settings;
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const ProgressMonitor& progressMonitor,
- PrimRef* prims, const PrimInfo& pinfo,
- const Settings& settings)
- {
- Heuristic heuristic(prims);
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
- heuristic,
- prims,
- PrimInfoRange(0,pinfo.size(),pinfo),
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- progressMonitor,
- settings);
- }
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename CanCreateLeafFunc,
- typename CanCreateLeafSplitFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode, UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- const CanCreateLeafFunc& canCreateLeaf,
- const CanCreateLeafSplitFunc& canCreateLeafSplit,
- const ProgressMonitor& progressMonitor,
- PrimRef* prims, const PrimInfo& pinfo,
- const Settings& settings)
- {
- Heuristic heuristic(prims);
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
- heuristic,
- prims,
- PrimInfoRange(0,pinfo.size(),pinfo),
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- canCreateLeaf,
- canCreateLeafSplit,
- progressMonitor,
- settings);
- }
- };
-
- /* Spatial SAH builder that operates on an double-buffered array of BuildRecords */
- struct BVHBuilderBinnedFastSpatialSAH
- {
- typedef PrimInfoExtRange Set;
- typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;
- typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
- typedef GeneralBVHBuilder::Settings Settings;
-
- static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
- static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
- template<typename ReductionTy, typename UserCreateLeaf>
- struct CreateLeafExt
- {
- __forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)
- : userCreateLeaf(userCreateLeaf) {}
-
- // __noinline is workaround for ICC2016 compiler bug
- template<typename Allocator>
- __noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const
- {
- for (size_t i=range.begin(); i<range.end(); i++)
- prims[i].lower.u &= GEOMID_MASK;
-
- return userCreateLeaf(prims,range,alloc);
- }
-
- const UserCreateLeaf userCreateLeaf;
- };
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename SplitPrimitiveFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode,
- UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- SplitPrimitiveFunc splitPrimitive,
- ProgressMonitor progressMonitor,
- PrimRef* prims,
- const size_t extSize,
- const PrimInfo& pinfo,
- const Settings& settings)
- {
- typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;
- Heuristic heuristic(splitPrimitive,prims,pinfo);
-
- /* calculate total surface area */ // FIXME: this sum is not deterministic
- const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {
-
- double A = 0.0f;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- PrimRef& prim = prims[i];
- A += area(prim.bounds());
- }
- return A;
- },std::plus<double>());
-
-
- /* calculate maximum number of spatial splits per primitive */
- const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;
- const float f = 10.0f;
-
- const float invA = 1.0f / A;
- parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {
-
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- PrimRef& prim = prims[i];
- assert((prim.geomID() & SPLITS_MASK) == 0);
- // FIXME: is there a better general heuristic ?
- const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);
- unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));
- prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
- }
- });
-
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
- heuristic,
- prims,
- PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
- createAlloc,
- createNode,
- updateNode,
- CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),
- progressMonitor,
- settings);
- }
- };
-
- /* Open/Merge SAH builder that operates on an array of BuildRecords */
- struct BVHBuilderBinnedOpenMergeSAH
- {
- static const size_t NUM_OBJECT_BINS_HQ = 32;
- typedef PrimInfoExtRange Set;
- typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;
- typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
- typedef GeneralBVHBuilder::Settings Settings;
-
- /*! special builder that propagates reduction over the tree */
- template<
- typename ReductionTy,
- typename BuildRef,
- typename CreateAllocFunc,
- typename CreateNodeFunc,
- typename UpdateNodeFunc,
- typename CreateLeafFunc,
- typename NodeOpenerFunc,
- typename ProgressMonitor>
-
- static ReductionTy build(CreateAllocFunc createAlloc,
- CreateNodeFunc createNode,
- UpdateNodeFunc updateNode,
- const CreateLeafFunc& createLeaf,
- NodeOpenerFunc nodeOpenerFunc,
- ProgressMonitor progressMonitor,
- BuildRef* prims,
- const size_t extSize,
- const PrimInfo& pinfo,
- const Settings& settings)
- {
- typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;
- Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);
-
- return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(
- heuristic,
- prims,
- PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
- createAlloc,
- createNode,
- updateNode,
- createLeaf,
- progressMonitor,
- settings);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h
deleted file mode 100644
index a4d3b68e46..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h
+++ /dev/null
@@ -1,972 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "priminfo.h"
-#include "../../common/algorithms/parallel_reduce.h"
-#include "../../common/algorithms/parallel_partition.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! mapping into bins */
- template<size_t BINS>
- struct BinMapping
- {
- public:
- __forceinline BinMapping() {}
-
- /*! calculates the mapping */
- __forceinline BinMapping(size_t N, const BBox3fa& centBounds)
- {
- num = min(BINS,size_t(4.0f + 0.05f*N));
- assert(num >= 1);
- const vfloat4 eps = 1E-34f;
- const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) centBounds.lower;
- }
-
- /*! calculates the mapping */
- __forceinline BinMapping(const BBox3fa& centBounds)
- {
- num = BINS;
- const vfloat4 eps = 1E-34f;
- const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) centBounds.lower;
- }
-
- /*! calculates the mapping */
- template<typename PrimInfo>
- __forceinline BinMapping(const PrimInfo& pinfo)
- {
- const vfloat4 eps = 1E-34f;
- num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));
- const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) pinfo.centBounds.lower;
- }
-
- /*! returns number of bins */
- __forceinline size_t size() const { return num; }
-
- /*! slower but safe binning */
- __forceinline Vec3ia bin(const Vec3fa& p) const
- {
- const vint4 i = floori((vfloat4(p)-ofs)*scale);
-#if 1
- assert(i[0] >= 0 && (size_t)i[0] < num);
- assert(i[1] >= 0 && (size_t)i[1] < num);
- assert(i[2] >= 0 && (size_t)i[2] < num);
- return Vec3ia(i);
-#else
- return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
-#endif
- }
-
- /*! faster but unsafe binning */
- __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
- return Vec3ia(floori((vfloat4(p)-ofs)*scale));
- }
-
- /*! faster but unsafe binning */
- template<typename PrimRef>
- __forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
- return bin_unsafe(p.binCenter());
- }
-
- /*! faster but unsafe binning */
- template<typename PrimRef, typename BinBoundsAndCenter>
- __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
- return bin_unsafe(binBoundsAndCenter.binCenter(p));
- }
-
- template<typename PrimRef>
- __forceinline bool bin_unsafe(const PrimRef& ref,
- const vint4& vSplitPos,
- const vbool4& splitDimMask) const // FIXME: rename to isLeft
- {
- return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
- }
- /*! calculates left spatial position of bin */
- __forceinline float pos(const size_t bin, const size_t dim) const {
- return madd(float(bin),1.0f / scale[dim],ofs[dim]);
- }
-
- /*! returns true if the mapping is invalid in some dimension */
- __forceinline bool invalid(const size_t dim) const {
- return scale[dim] == 0.0f;
- }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
- return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
- }
-
- public:
- size_t num;
- vfloat4 ofs,scale; //!< linear function that maps to bin ID
- };
-
- /*! stores all information to perform some split */
- template<size_t BINS>
- struct BinSplit
- {
- enum
- {
- SPLIT_OBJECT = 0,
- SPLIT_FALLBACK = 1,
- SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
- SPLIT_TEMPORAL = 2,
- SPLIT_GEOMID = 3,
- };
-
- /*! construct an invalid split by default */
- __forceinline BinSplit()
- : sah(inf), dim(-1), pos(0), data(0) {}
-
- __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
- : sah(sah), dim(dim), fpos(fpos), data(data) {}
-
- /*! constructs specified split */
- __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
- : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
-
- /*! tests if this split is valid */
- __forceinline bool valid() const { return dim != -1; }
-
- /*! calculates surface area heuristic for performing the split */
- __forceinline float splitSAH() const { return sah; }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
- return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
- }
-
- public:
- float sah; //!< SAH cost of the split
- int dim; //!< split dimension
- union { int pos; float fpos; }; //!< bin index for splitting
- unsigned int data; //!< extra optional split data
- BinMapping<BINS> mapping; //!< mapping into bins
- };
-
- /*! stores extended information about the split */
- template<typename BBox>
- struct SplitInfoT
- {
-
- __forceinline SplitInfoT () {}
-
- __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
- : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
-
- public:
- size_t leftCount,rightCount;
- BBox leftBounds,rightBounds;
- };
-
- typedef SplitInfoT<BBox3fa> SplitInfo;
- typedef SplitInfoT<LBBox3fa> SplitInfo2;
-
- /*! stores all binning information */
- template<size_t BINS, typename PrimRef, typename BBox>
- struct __aligned(64) BinInfoT
- {
- typedef BinSplit<BINS> Split;
- typedef vbool4 vbool;
- typedef vint4 vint;
- typedef vfloat4 vfloat;
-
- __forceinline BinInfoT() {
- }
-
- __forceinline BinInfoT(EmptyTy) {
- clear();
- }
-
- /*! bin access function */
- __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
- __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
-
- __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
- __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
-
- __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
- __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
-
- /*! clears the bin info */
- __forceinline void clear()
- {
- for (size_t i=0; i<BINS; i++) {
- bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
- counts(i) = vuint4(zero);
- }
- }
-
- /*! bins an array of primitives */
- __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
- {
- if (unlikely(N == 0)) return;
- size_t i;
- for (i=0; i<N-1; i+=2)
- {
- /*! map even and odd primitive to bin */
- BBox prim0; Vec3fa center0;
- prims[i+0].binBoundsAndCenter(prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
-
- BBox prim1; Vec3fa center1;
- prims[i+1].binBoundsAndCenter(prim1,center1);
- const vint4 bin1 = (vint4)mapping.bin(center1);
-
- /*! increase bounds for bins for even primitive */
- const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);
- const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);
- const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);
- const unsigned int s0 = (unsigned int)prims[i+0].size();
- counts(b00,0)+=s0;
- counts(b01,1)+=s0;
- counts(b02,2)+=s0;
-
- /*! increase bounds of bins for odd primitive */
- const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);
- const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);
- const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);
- const unsigned int s1 = (unsigned int)prims[i+1].size();
- counts(b10,0)+=s1;
- counts(b11,1)+=s1;
- counts(b12,2)+=s1;
- }
- /*! for uneven number of primitives */
- if (i < N)
- {
- /*! map primitive to bin */
- BBox prim0; Vec3fa center0;
- prims[i].binBoundsAndCenter(prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
-
- /*! increase bounds of bins */
- const unsigned int s0 = (unsigned int)prims[i].size();
- const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
- const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
- const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
- }
- }
-
- /*! bins an array of primitives */
- template<typename BinBoundsAndCenter>
- __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
- {
- if (N == 0) return;
-
- size_t i;
- for (i=0; i<N-1; i+=2)
- {
- /*! map even and odd primitive to bin */
- BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
- BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
- const vint4 bin1 = (vint4)mapping.bin(center1);
-
- /*! increase bounds for bins for even primitive */
- const unsigned int s0 = prims[i+0].size();
- const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
- const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
- const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
-
- /*! increase bounds of bins for odd primitive */
- const unsigned int s1 = prims[i+1].size();
- const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
- const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
- const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
- }
-
- /*! for uneven number of primitives */
- if (i < N)
- {
- /*! map primitive to bin */
- BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
- const vint4 bin0 = (vint4)mapping.bin(center0);
-
- /*! increase bounds of bins */
- const unsigned int s0 = prims[i+0].size();
- const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
- const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
- const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
- }
- }
-
- __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
- bin(prims+begin,end-begin,mapping);
- }
-
- template<typename BinBoundsAndCenter>
- __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
- bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
- }
-
- /*! merges in other binning information */
- __forceinline void merge (const BinInfoT& other, size_t numBins)
- {
-
- for (size_t i=0; i<numBins; i++)
- {
- counts(i) += other.counts(i);
- bounds(i,0).extend(other.bounds(i,0));
- bounds(i,1).extend(other.bounds(i,1));
- bounds(i,2).extend(other.bounds(i,2));
- }
- }
-
- /*! reduces binning information */
- static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
- {
- BinInfoT c;
- for (size_t i=0; i<numBins; i++)
- {
- c.counts(i) = a.counts(i)+b.counts(i);
- c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
- c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
- c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
- }
- return c;
- }
-
- /*! finds the best split by scanning binning information */
- __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
- {
- /* sweep from right to left and compute parallel prefix of merged bounds */
- vfloat4 rAreas[BINS];
- vuint4 rCounts[BINS];
- vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
- for (size_t i=mapping.size()-1; i>0; i--)
- {
- count += counts(i);
- rCounts[i] = count;
- bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
- by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
- bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
- rAreas[i][3] = 0.0f;
- }
- /* sweep from left to right and compute SAH */
- vuint4 blocks_add = (1 << blocks_shift)-1;
- vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
- count = 0; bx = empty; by = empty; bz = empty;
- for (size_t i=1; i<mapping.size(); i++, ii+=1)
- {
- count += counts(i-1);
- bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
- by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
- bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
- const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
- const vfloat4 rArea = rAreas[i];
- const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
- const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
- const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
- //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
-
- vbestPos = select(sah < vbestSAH,ii ,vbestPos);
- vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
- }
-
- /* find best dimension */
- float bestSAH = inf;
- int bestDim = -1;
- int bestPos = 0;
- for (int dim=0; dim<3; dim++)
- {
- /* ignore zero sized dimensions */
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- /* test if this is a better dimension */
- if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
- bestDim = dim;
- bestPos = vbestPos[dim];
- bestSAH = vbestSAH[dim];
- }
- }
- return Split(bestSAH,bestDim,bestPos,mapping);
- }
-
- /*! calculates extended split information */
- __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
- {
- if (split.dim == -1) {
- new (&info) SplitInfoT<BBox>(0,empty,0,empty);
- return;
- }
-
- size_t leftCount = 0;
- BBox leftBounds = empty;
- for (size_t i=0; i<(size_t)split.pos; i++) {
- leftCount += counts(i,split.dim);
- leftBounds.extend(bounds(i,split.dim));
- }
- size_t rightCount = 0;
- BBox rightBounds = empty;
- for (size_t i=split.pos; i<mapping.size(); i++) {
- rightCount += counts(i,split.dim);
- rightBounds.extend(bounds(i,split.dim));
- }
- new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
- }
-
- /*! gets the number of primitives left of the split */
- __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t leftCount = 0;
- for (size_t i = 0; i < (size_t)split.pos; i++) {
- leftCount += counts(i, split.dim);
- }
- return leftCount;
- }
-
- /*! gets the number of primitives right of the split */
- __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t rightCount = 0;
- for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
- rightCount += counts(i, split.dim);
- }
- return rightCount;
- }
-
- private:
- BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
- vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
- };
-
-#if defined(__AVX512ER__) // KNL
-
- /*! mapping into bins */
- template<>
- struct BinMapping<16>
- {
- public:
- __forceinline BinMapping() {}
-
- /*! calculates the mapping */
- template<typename PrimInfo>
- __forceinline BinMapping(const PrimInfo& pinfo)
- {
- num = 16;
- const vfloat4 eps = 1E-34f;
- const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
- scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
- ofs = (vfloat4) pinfo.centBounds.lower;
- scale16 = scale;
- ofs16 = ofs;
- }
-
- /*! returns number of bins */
- __forceinline size_t size() const { return num; }
-
- __forceinline vint16 bin16(const Vec3fa& p) const {
- return vint16(vint4(floori((vfloat4(p)-ofs)*scale)));
- }
-
- __forceinline vint16 bin16(const vfloat16& p) const {
- return floori((p-ofs16)*scale16);
- }
-
- __forceinline int bin_unsafe(const PrimRef& ref,
- const vint16& vSplitPos,
- const vbool16& splitDimMask) const // FIXME: rename to isLeft
- {
- const vfloat16 lower(*(vfloat4*)&ref.lower);
- const vfloat16 upper(*(vfloat4*)&ref.upper);
- const vfloat16 p = lower + upper;
- const vint16 i = floori((p-ofs16)*scale16);
- return lt(splitDimMask,i,vSplitPos);
- }
-
- /*! returns true if the mapping is invalid in some dimension */
- __forceinline bool invalid(const size_t dim) const {
- return scale[dim] == 0.0f;
- }
-
- public:
- size_t num;
- vfloat4 ofs,scale; //!< linear function that maps to bin ID
- vfloat16 ofs16,scale16; //!< linear function that maps to bin ID
- };
-
- /* 16 bins in-register binner */
- template<typename PrimRef>
- struct __aligned(64) BinInfoT<16,PrimRef,BBox3fa>
- {
- typedef BinSplit<16> Split;
- typedef vbool16 vbool;
- typedef vint16 vint;
- typedef vfloat16 vfloat;
-
- __forceinline BinInfoT() {
- }
-
- __forceinline BinInfoT(EmptyTy) {
- clear();
- }
-
- /*! clears the bin info */
- __forceinline void clear()
- {
- lower[0] = lower[1] = lower[2] = pos_inf;
- upper[0] = upper[1] = upper[2] = neg_inf;
- count[0] = count[1] = count[2] = 0;
- }
-
-
- static __forceinline vfloat16 prefix_area_rl(const vfloat16 min_x,
- const vfloat16 min_y,
- const vfloat16 min_z,
- const vfloat16 max_x,
- const vfloat16 max_y,
- const vfloat16 max_z)
- {
- const vfloat16 r_min_x = reverse_prefix_min(min_x);
- const vfloat16 r_min_y = reverse_prefix_min(min_y);
- const vfloat16 r_min_z = reverse_prefix_min(min_z);
- const vfloat16 r_max_x = reverse_prefix_max(max_x);
- const vfloat16 r_max_y = reverse_prefix_max(max_y);
- const vfloat16 r_max_z = reverse_prefix_max(max_z);
- const vfloat16 dx = r_max_x - r_min_x;
- const vfloat16 dy = r_max_y - r_min_y;
- const vfloat16 dz = r_max_z - r_min_z;
- const vfloat16 area_rl = madd(dx,dy,madd(dx,dz,dy*dz));
- return area_rl;
- }
-
- static __forceinline vfloat16 prefix_area_lr(const vfloat16 min_x,
- const vfloat16 min_y,
- const vfloat16 min_z,
- const vfloat16 max_x,
- const vfloat16 max_y,
- const vfloat16 max_z)
- {
- const vfloat16 r_min_x = prefix_min(min_x);
- const vfloat16 r_min_y = prefix_min(min_y);
- const vfloat16 r_min_z = prefix_min(min_z);
- const vfloat16 r_max_x = prefix_max(max_x);
- const vfloat16 r_max_y = prefix_max(max_y);
- const vfloat16 r_max_z = prefix_max(max_z);
- const vfloat16 dx = r_max_x - r_min_x;
- const vfloat16 dy = r_max_y - r_min_y;
- const vfloat16 dz = r_max_z - r_min_z;
- const vfloat16 area_lr = madd(dx,dy,madd(dx,dz,dy*dz));
- return area_lr;
- }
-
-
- /*! bins an array of primitives */
- __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<16>& mapping)
- {
- if (unlikely(N == 0)) return;
-
- const vfloat16 init_min(pos_inf);
- const vfloat16 init_max(neg_inf);
-
- vfloat16 min_x0,min_x1,min_x2;
- vfloat16 min_y0,min_y1,min_y2;
- vfloat16 min_z0,min_z1,min_z2;
- vfloat16 max_x0,max_x1,max_x2;
- vfloat16 max_y0,max_y1,max_y2;
- vfloat16 max_z0,max_z1,max_z2;
- vuint16 count0,count1,count2;
-
- min_x0 = init_min;
- min_x1 = init_min;
- min_x2 = init_min;
- min_y0 = init_min;
- min_y1 = init_min;
- min_y2 = init_min;
- min_z0 = init_min;
- min_z1 = init_min;
- min_z2 = init_min;
-
- max_x0 = init_max;
- max_x1 = init_max;
- max_x2 = init_max;
- max_y0 = init_max;
- max_y1 = init_max;
- max_y2 = init_max;
- max_z0 = init_max;
- max_z1 = init_max;
- max_z2 = init_max;
-
- count0 = zero;
- count1 = zero;
- count2 = zero;
-
- const vint16 step16(step);
- size_t i;
- for (i=0; i<N-1; i+=2)
- {
- /*! map even and odd primitive to bin */
- const BBox3fa primA = prims[i+0].bounds();
- const vfloat16 centerA = vfloat16((vfloat4)primA.lower) + vfloat16((vfloat4)primA.upper);
- const vint16 binA = mapping.bin16(centerA);
-
- const BBox3fa primB = prims[i+1].bounds();
- const vfloat16 centerB = vfloat16((vfloat4)primB.lower) + vfloat16((vfloat4)primB.upper);
- const vint16 binB = mapping.bin16(centerB);
-
- /* A */
- {
- const vfloat16 b_min_x = prims[i+0].lower.x;
- const vfloat16 b_min_y = prims[i+0].lower.y;
- const vfloat16 b_min_z = prims[i+0].lower.z;
- const vfloat16 b_max_x = prims[i+0].upper.x;
- const vfloat16 b_max_y = prims[i+0].upper.y;
- const vfloat16 b_max_z = prims[i+0].upper.z;
-
- const vint16 bin0 = shuffle<0>(binA);
- const vint16 bin1 = shuffle<1>(binA);
- const vint16 bin2 = shuffle<2>(binA);
-
- const vbool16 m_update_x = step16 == bin0;
- const vbool16 m_update_y = step16 == bin1;
- const vbool16 m_update_z = step16 == bin2;
-
- assert(popcnt((size_t)m_update_x) == 1);
- assert(popcnt((size_t)m_update_y) == 1);
- assert(popcnt((size_t)m_update_z) == 1);
-
- min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x);
- min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y);
- min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z);
- // ------------------------------------------------------------------------
- max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x);
- max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y);
- max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z);
- // ------------------------------------------------------------------------
- min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x);
- min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y);
- min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z);
- // ------------------------------------------------------------------------
- max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x);
- max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y);
- max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z);
- // ------------------------------------------------------------------------
- min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x);
- min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y);
- min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z);
- // ------------------------------------------------------------------------
- max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x);
- max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y);
- max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z);
- // ------------------------------------------------------------------------
- count0 = mask_add(m_update_x,count0,count0,vuint16(1));
- count1 = mask_add(m_update_y,count1,count1,vuint16(1));
- count2 = mask_add(m_update_z,count2,count2,vuint16(1));
- }
-
-
- /* B */
- {
- const vfloat16 b_min_x = prims[i+1].lower.x;
- const vfloat16 b_min_y = prims[i+1].lower.y;
- const vfloat16 b_min_z = prims[i+1].lower.z;
- const vfloat16 b_max_x = prims[i+1].upper.x;
- const vfloat16 b_max_y = prims[i+1].upper.y;
- const vfloat16 b_max_z = prims[i+1].upper.z;
-
- const vint16 bin0 = shuffle<0>(binB);
- const vint16 bin1 = shuffle<1>(binB);
- const vint16 bin2 = shuffle<2>(binB);
-
- const vbool16 m_update_x = step16 == bin0;
- const vbool16 m_update_y = step16 == bin1;
- const vbool16 m_update_z = step16 == bin2;
-
- assert(popcnt((size_t)m_update_x) == 1);
- assert(popcnt((size_t)m_update_y) == 1);
- assert(popcnt((size_t)m_update_z) == 1);
-
- min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x);
- min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y);
- min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z);
- // ------------------------------------------------------------------------
- max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x);
- max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y);
- max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z);
- // ------------------------------------------------------------------------
- min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x);
- min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y);
- min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z);
- // ------------------------------------------------------------------------
- max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x);
- max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y);
- max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z);
- // ------------------------------------------------------------------------
- min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x);
- min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y);
- min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z);
- // ------------------------------------------------------------------------
- max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x);
- max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y);
- max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z);
- // ------------------------------------------------------------------------
- count0 = mask_add(m_update_x,count0,count0,vuint16(1));
- count1 = mask_add(m_update_y,count1,count1,vuint16(1));
- count2 = mask_add(m_update_z,count2,count2,vuint16(1));
- }
-
- }
-
- if (i < N)
- {
- const BBox3fa prim0 = prims[i].bounds();
- const vfloat16 center0 = vfloat16((vfloat4)prim0.lower) + vfloat16((vfloat4)prim0.upper);
- const vint16 bin = mapping.bin16(center0);
-
- const vfloat16 b_min_x = prims[i].lower.x;
- const vfloat16 b_min_y = prims[i].lower.y;
- const vfloat16 b_min_z = prims[i].lower.z;
- const vfloat16 b_max_x = prims[i].upper.x;
- const vfloat16 b_max_y = prims[i].upper.y;
- const vfloat16 b_max_z = prims[i].upper.z;
-
- const vint16 bin0 = shuffle<0>(bin);
- const vint16 bin1 = shuffle<1>(bin);
- const vint16 bin2 = shuffle<2>(bin);
-
- const vbool16 m_update_x = step16 == bin0;
- const vbool16 m_update_y = step16 == bin1;
- const vbool16 m_update_z = step16 == bin2;
-
- assert(popcnt((size_t)m_update_x) == 1);
- assert(popcnt((size_t)m_update_y) == 1);
- assert(popcnt((size_t)m_update_z) == 1);
-
- min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x);
- min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y);
- min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z);
- // ------------------------------------------------------------------------
- max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x);
- max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y);
- max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z);
- // ------------------------------------------------------------------------
- min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x);
- min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y);
- min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z);
- // ------------------------------------------------------------------------
- max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x);
- max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y);
- max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z);
- // ------------------------------------------------------------------------
- min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x);
- min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y);
- min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z);
- // ------------------------------------------------------------------------
- max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x);
- max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y);
- max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z);
- // ------------------------------------------------------------------------
- count0 = mask_add(m_update_x,count0,count0,vuint16(1));
- count1 = mask_add(m_update_y,count1,count1,vuint16(1));
- count2 = mask_add(m_update_z,count2,count2,vuint16(1));
- }
-
- lower[0] = Vec3vf16( min_x0, min_y0, min_z0 );
- lower[1] = Vec3vf16( min_x1, min_y1, min_z1 );
- lower[2] = Vec3vf16( min_x2, min_y2, min_z2 );
-
- upper[0] = Vec3vf16( max_x0, max_y0, max_z0 );
- upper[1] = Vec3vf16( max_x1, max_y1, max_z1 );
- upper[2] = Vec3vf16( max_x2, max_y2, max_z2 );
-
- count[0] = count0;
- count[1] = count1;
- count[2] = count2;
- }
-
- __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<16>& mapping) {
- bin(prims+begin,end-begin,mapping);
- }
-
- /*! merges in other binning information */
- __forceinline void merge (const BinInfoT& other, size_t numBins)
- {
- for (size_t i=0; i<3; i++)
- {
- lower[i] = min(lower[i],other.lower[i]);
- upper[i] = max(upper[i],other.upper[i]);
- count[i] += other.count[i];
- }
- }
-
- /*! reducesr binning information */
- static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b)
- {
- BinInfoT c;
- for (size_t i=0; i<3; i++)
- {
- c.counts[i] = a.counts[i] + b.counts[i];
- c.lower[i] = min(a.lower[i],b.lower[i]);
- c.upper[i] = max(a.upper[i],b.upper[i]);
- }
- return c;
- }
-
- /*! finds the best split by scanning binning information */
- __forceinline Split best(const BinMapping<16>& mapping, const size_t blocks_shift) const
- {
- /* find best dimension */
- float bestSAH = inf;
- int bestDim = -1;
- int bestPos = 0;
- const vuint16 blocks_add = (1 << blocks_shift)-1;
- const vfloat16 inf(pos_inf);
- for (size_t dim=0; dim<3; dim++)
- {
- /* ignore zero sized dimensions */
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- const vfloat16 rArea16 = prefix_area_rl(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z);
- const vfloat16 lArea16 = prefix_area_lr(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z);
- const vuint16 lCount16 = prefix_sum(count[dim]);
- const vuint16 rCount16 = reverse_prefix_sum(count[dim]);
-
- /* compute best split in this dimension */
- const vfloat16 leftArea = lArea16;
- const vfloat16 rightArea = align_shift_right<1>(zero,rArea16);
- const vuint16 lC = lCount16;
- const vuint16 rC = align_shift_right<1>(zero,rCount16);
- const vuint16 leftCount = ( lC + blocks_add) >> blocks_shift;
- const vuint16 rightCount = ( rC + blocks_add) >> blocks_shift;
- const vbool16 valid = (leftArea < inf) & (rightArea < inf) & vbool16(0x7fff); // handles inf entries
- const vfloat16 sah = select(valid,madd(leftArea,vfloat16(leftCount),rightArea*vfloat16(rightCount)),vfloat16(pos_inf));
- /* test if this is a better dimension */
- if (any(sah < vfloat16(bestSAH)))
- {
- const size_t index = select_min(sah);
- assert(index < 15);
- assert(sah[index] < bestSAH);
- bestDim = dim;
- bestPos = index+1;
- bestSAH = sah[index];
- }
- }
-
- return Split(bestSAH,bestDim,bestPos,mapping);
-
- }
-
- /*! calculates extended split information */
- __forceinline void getSplitInfo(const BinMapping<16>& mapping, const Split& split, SplitInfo& info) const
- {
- if (split.dim == -1) {
- new (&info) SplitInfo(0,empty,0,empty);
- return;
- }
- // FIXME: horizontal reduction!
-
- size_t leftCount = 0;
- BBox3fa leftBounds = empty;
- for (size_t i=0; i<(size_t)split.pos; i++) {
- leftCount += count[split.dim][i];
- Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]);
- Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]);
- leftBounds.extend(BBox3fa(bounds_lower,bounds_upper));
- }
- size_t rightCount = 0;
- BBox3fa rightBounds = empty;
- for (size_t i=split.pos; i<mapping.size(); i++) {
- rightCount += count[split.dim][i];
- Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]);
- Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]);
- rightBounds.extend(BBox3fa(bounds_lower,bounds_upper));
- }
- new (&info) SplitInfo(leftCount,leftBounds,rightCount,rightBounds);
- }
-
- /*! gets the number of primitives left of the split */
- __forceinline size_t getLeftCount(const BinMapping<16>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t leftCount = 0;
- for (size_t i = 0; i < (size_t)split.pos; i++) {
- leftCount += count[split.dim][i];
- }
- return leftCount;
- }
-
- /*! gets the number of primitives right of the split */
- __forceinline size_t getRightCount(const BinMapping<16>& mapping, const Split& split) const
- {
- if (unlikely(split.dim == -1)) return -1;
-
- size_t rightCount = 0;
- for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
- rightCount += count[split.dim][i];
- }
- return rightCount;
- }
-
- private:
- Vec3vf16 lower[3];
- Vec3vf16 upper[3];
- vuint16 count[3];
- };
-#endif
- }
-
- template<typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
- {
- if (likely(end-begin < parallelThreshold)) {
- binner.bin(prims,begin,end,mapping);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-
- template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
- {
- if (likely(end-begin < parallelThreshold)) {
- binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-
- template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
- {
- if (!parallel) {
- binner.bin(prims,begin,end,mapping);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-
- template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
- __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
- {
- if (!parallel) {
- binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
- } else {
- binner = parallel_reduce(begin,end,blockSize,binner,
- [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
- [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h
deleted file mode 100644
index a4c272f015..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning.h"
-
-namespace embree
-{
- namespace isa
- {
- struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
- {
- __forceinline PrimInfoRange () {
- }
-
- __forceinline PrimInfoRange(const PrimInfo& pinfo)
- : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
-
- __forceinline PrimInfoRange(EmptyTy)
- : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
-
- __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
- : CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
-
- __forceinline float leafSAH() const {
- return expectedApproxHalfArea(geomBounds)*float(size());
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
- }
- };
-
- /*! Performs standard object binning */
- template<typename PrimRef, size_t BINS>
- struct HeuristicArrayBinningSAH
- {
- typedef BinSplit<BINS> Split;
- typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
- typedef range<size_t> Set;
-
-#if defined(__AVX512ER__) // KNL
- static const size_t PARALLEL_THRESHOLD = 4*768;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 768;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 768;
-#else
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-#endif
- __forceinline HeuristicArrayBinningSAH ()
- : prims(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicArrayBinningSAH (PrimRef* prims)
- : prims(prims) {}
-
- /*! finds the best split */
- __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
- {
- if (likely(pinfo.size() < PARALLEL_THRESHOLD))
- return find_template<false>(pinfo,logBlockSize);
- else
- return find_template<true>(pinfo,logBlockSize);
- }
-
- template<bool parallel>
- __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
- {
- Binner binner(empty);
- const BinMapping<BINS> mapping(pinfo);
- bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
- {
- if (likely(pinfo.size() < PARALLEL_THRESHOLD))
- split_template<false>(split,pinfo,linfo,rinfo);
- else
- split_template<true>(split,pinfo,linfo,rinfo);
- }
-
- template<bool parallel>
- __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (!split.valid()) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- CentGeomBBox3fa local_left(empty);
- CentGeomBBox3fa local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const typename Binner::vint vSplitPos(splitPos);
- const typename Binner::vbool vSplitMask(splitDimMask);
- auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
-
- size_t center = 0;
- if (!parallel)
- center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
- else
- center = parallel_partitioning(
- prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
- [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- new (&lset) PrimInfoRange(begin,center,local_left);
- new (&rset) PrimInfoRange(center,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- }
-
- void deterministic_order(const PrimInfoRange& pinfo)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
- }
-
- void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
- {
- const size_t begin = pinfo.begin();
- const size_t end = pinfo.end();
- const size_t center = (begin + end)/2;
-
- CentGeomBBox3fa left(empty);
- for (size_t i=begin; i<center; i++)
- left.extend_center2(prims[i]);
- new (&linfo) PrimInfoRange(begin,center,left);
-
- CentGeomBBox3fa right(empty);
- for (size_t i=center; i<end; i++)
- right.extend_center2(prims[i]);
- new (&rinfo) PrimInfoRange(center,end,right);
- }
-
- void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
- {
- assert(range.size() > 1);
- CentGeomBBox3fa left(empty);
- CentGeomBBox3fa right(empty);
- unsigned int geomID = prims[range.begin()].geomID();
- size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
- [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
- [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
-
- new (&linfo) PrimInfoRange(range.begin(),center,left);
- new (&rinfo) PrimInfoRange(center,range.end(),right);
- }
-
- private:
- PrimRef* const prims;
- };
-
- /*! Performs standard object binning */
- template<typename PrimRefMB, size_t BINS>
- struct HeuristicArrayBinningMB
- {
- typedef BinSplit<BINS> Split;
- typedef typename PrimRefMB::BBox BBox;
- typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- /*! finds the best split */
- const Split find(const SetMB& set, const size_t logBlockSize)
- {
- ObjectBinner binner(empty);
- const BinMapping<BINS> mapping(set.size(),set.centBounds);
- bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
- Split osplit = binner.best(mapping,logBlockSize);
- osplit.sah *= set.time_range.size();
- if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
- return osplit;
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfoMB left = empty;
- PrimInfoMB right = empty;
- const vint4 vSplitPos(split.pos);
- const vbool4 vSplitMask(1 << split.dim);
- auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
- auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
- auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
- size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
- new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
- }
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h
deleted file mode 100644
index 1370244586..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h
+++ /dev/null
@@ -1,302 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- template<typename PrimRef, size_t BINS>
- struct UnalignedHeuristicArrayBinningSAH
- {
- typedef BinSplit<BINS> Split;
- typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
- typedef range<size_t> Set;
-
- __forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?
- : scene(nullptr), prims(nullptr) {}
-
- /*! remember prim array */
- __forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)
- : scene(scene), prims(prims) {}
-
- const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)
- {
- Vec3fa axis(0,0,1);
- uint64_t bestGeomPrimID = -1;
-
- /*! find curve with minimum ID that defines valid direction */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const unsigned int geomID = prims[i].geomID();
- const unsigned int primID = prims[i].primID();
- const uint64_t geomprimID = prims[i].ID64();
- if (geomprimID >= bestGeomPrimID) continue;
- const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);
- if (sqr_length(axis1) > 1E-18f) {
- axis = normalize(axis1);
- bestGeomPrimID = geomprimID;
- }
- }
- return frame(axis).transposed();
- }
-
- const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)
- {
- auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
- {
- CentGeomBBox3fa bounds(empty);
- for (size_t i=r.begin(); i<r.end(); i++) {
- Geometry* mesh = scene->get(prims[i].geomID());
- bounds.extend(mesh->vbounds(space,prims[i].primID()));
- }
- return bounds;
- };
-
- const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),
- CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
-
- return PrimInfo(set.begin(),set.end(),bounds);
- }
-
- struct BinBoundsAndCenter
- {
- __forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)
- : scene(scene), space(space) {}
-
- /*! returns center for binning */
- __forceinline Vec3fa binCenter(const PrimRef& ref) const
- {
- Geometry* mesh = (Geometry*) scene->get(ref.geomID());
- BBox3fa bounds = mesh->vbounds(space,ref.primID());
- return embree::center2(bounds);
- }
-
- /*! returns bounds and centroid used for binning */
- __forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const
- {
- Geometry* mesh = (Geometry*) scene->get(ref.geomID());
- BBox3fa bounds = mesh->vbounds(space,ref.primID());
- bounds_o = bounds;
- center_o = embree::center2(bounds);
- }
-
- private:
- Scene* scene;
- const LinearSpace3fa space;
- };
-
- /*! finds the best split */
- __forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)
- {
- if (likely(pinfo.size() < 10000))
- return find_template<false>(pinfo,logBlockSize,space);
- else
- return find_template<true>(pinfo,logBlockSize,space);
- }
-
- /*! finds the best split */
- template<bool parallel>
- const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)
- {
- Binner binner(empty);
- const BinMapping<BINS> mapping(set);
- BinBoundsAndCenter binBoundsAndCenter(scene,space);
- bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (likely(set.size() < 10000))
- split_template<false>(split,space,set,lset,rset);
- else
- split_template<true>(split,space,set,lset,rset);
- }
-
- /*! array partitioning */
- template<bool parallel>
- __forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (!split.valid()) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- CentGeomBBox3fa local_left(empty);
- CentGeomBBox3fa local_right(empty);
- const int splitPos = split.pos;
- const int splitDim = split.dim;
- BinBoundsAndCenter binBoundsAndCenter(scene,space);
-
- size_t center = 0;
- if (likely(set.size() < 10000))
- center = serial_partitioning(prims,begin,end,local_left,local_right,
- [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
- else
- center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,
- [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
- [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
- [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
- 128);
-
- new (&lset) PrimInfoRange(begin,center,local_left);
- new (&rset) PrimInfoRange(center,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- }
-
- void deterministic_order(const range<size_t>& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims[set.begin()],&prims[set.end()]);
- }
-
- void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- CentGeomBBox3fa left(empty);
- for (size_t i=begin; i<center; i++)
- left.extend_center2(prims[i]);
- new (&lset) PrimInfoRange(begin,center,left);
-
- CentGeomBBox3fa right(empty);
- for (size_t i=center; i<end; i++)
- right.extend_center2(prims[i]);
- new (&rset) PrimInfoRange(center,end,right);
- }
-
- private:
- Scene* const scene;
- PrimRef* const prims;
- };
-
- /*! Performs standard object binning */
- template<typename PrimRefMB, size_t BINS>
- struct UnalignedHeuristicArrayBinningMB
- {
- typedef BinSplit<BINS> Split;
- typedef typename PrimRefMB::BBox BBox;
- typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
-
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- UnalignedHeuristicArrayBinningMB(Scene* scene)
- : scene(scene) {}
-
- const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)
- {
- Vec3fa axis0(0,0,1);
- uint64_t bestGeomPrimID = -1;
-
- /*! find curve with minimum ID that defines valid direction */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const PrimRefMB& prim = (*set.prims)[i];
- const unsigned int geomID = prim.geomID();
- const unsigned int primID = prim.primID();
- const uint64_t geomprimID = prim.ID64();
- if (geomprimID >= bestGeomPrimID) continue;
-
- const Geometry* mesh = scene->get(geomID);
- const range<int> tbounds = mesh->timeSegmentRange(set.time_range);
- if (tbounds.size() == 0) continue;
-
- const size_t t = (tbounds.begin()+tbounds.end())/2;
- const Vec3fa axis1 = mesh->computeDirection(primID,t);
- if (sqr_length(axis1) > 1E-18f) {
- axis0 = normalize(axis1);
- bestGeomPrimID = geomprimID;
- }
- }
-
- return frame(axis0).transposed();
- }
-
- struct BinBoundsAndCenter
- {
- __forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)
- : scene(scene), time_range(time_range), space(space) {}
-
- /*! returns center for binning */
- template<typename PrimRef>
- __forceinline Vec3fa binCenter(const PrimRef& ref) const
- {
- Geometry* mesh = scene->get(ref.geomID());
- LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
- return center2(lbounds.interpolate(0.5f));
- }
-
- /*! returns bounds and centroid used for binning */
- __noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
- {
- Geometry* mesh = scene->get(ref.geomID());
- LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
- bounds_o = lbounds.interpolate(0.5f);
- center_o = center2(bounds_o);
- }
-
- /*! returns bounds and centroid used for binning */
- __noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
- {
- Geometry* mesh = scene->get(ref.geomID());
- LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
- bounds_o = lbounds;
- center_o = center2(lbounds.interpolate(0.5f));
- }
-
- private:
- Scene* scene;
- BBox1f time_range;
- const LinearSpace3fa space;
- };
-
- /*! finds the best split */
- const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)
- {
- BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
- ObjectBinner binner(empty);
- const BinMapping<BINS> mapping(set.size(),set.centBounds);
- bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);
- Split osplit = binner.best(mapping,logBlockSize);
- osplit.sah *= set.time_range.size();
- if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
- return osplit;
- }
-
- /*! array partitioning */
- __forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfoMB left = empty;
- PrimInfoMB right = empty;
- const vint4 vSplitPos(split.pos);
- const vbool4 vSplitMask(1 << split.dim);
- auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };
- auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
- auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
- size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
- new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);
- new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
- }
-
- private:
- Scene* scene;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h
deleted file mode 100644
index 21f18c0208..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h
+++ /dev/null
@@ -1,443 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-// TODO:
-// - adjust parallel build thresholds
-// - openNodesBasedOnExtend should consider max extended size
-
-#pragma once
-
-#include "heuristic_binning.h"
-#include "heuristic_spatial.h"
-
-/* stop opening of all bref.geomIDs are the same */
-#define EQUAL_GEOMID_STOP_CRITERIA 1
-
-/* 10% spatial extend threshold */
-#define MAX_EXTEND_THRESHOLD 0.1f
-
-/* maximum is 8 children */
-#define MAX_OPENED_CHILD_NODES 8
-
-/* open until all build refs are below threshold size in one step */
-#define USE_LOOP_OPENING 0
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
- struct HeuristicArrayOpenMergeSAH
- {
- typedef BinSplit<OBJECT_BINS> Split;
- typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
-
- static const size_t PARALLEL_THRESHOLD = 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- static const size_t MOVE_STEP_SIZE = 64;
- static const size_t CREATE_SPLITS_STEP_SIZE = 128;
-
- __forceinline HeuristicArrayOpenMergeSAH ()
- : prims0(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
- : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
- {
- assert(max_open_size <= MAX_OPENED_CHILD_NODES);
- }
-
- struct OpenHeuristic
- {
- __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
- {
- const Vec3fa diag = pinfo.geomBounds.size();
- dim = maxDim(diag);
- assert(diag[dim] > 0.0f);
- inv_max_extend = 1.0f / diag[dim];
- }
-
- __forceinline bool operator () ( PrimRef& prim ) const {
- return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
- }
-
- private:
- size_t dim;
- float inv_max_extend;
- };
-
- /*! compute extended ranges */
- __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
- {
- assert(set.ext_range_size() > 0);
- const float left_factor = (float)lweight / (lweight + rweight);
- const size_t ext_range_size = set.ext_range_size();
- const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
- const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
- lset.set_ext_range(lset.end() + left_ext_range_size);
- rset.set_ext_range(rset.end() + right_ext_range_size);
- }
-
- /*! move ranges */
- __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t left_ext_range_size = lset.ext_range_size();
- const size_t right_size = rset.size();
-
- /* has the left child an extended range? */
- if (left_ext_range_size > 0)
- {
- /* left extended range smaller than right range ? */
- if (left_ext_range_size < right_size)
- {
- /* only move a small part of the beginning of the right range to the end */
- parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+right_size] = prims0[i];
- });
- }
- else
- {
- /* no overlap, move entire right range to new location, can be made fully parallel */
- parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+left_ext_range_size] = prims0[i];
- });
- }
- /* update right range */
- assert(rset.ext_end() + left_ext_range_size == set.ext_end());
- rset.move_right(left_ext_range_size);
- }
- }
-
- /* estimates the extra space required when opening, and checks if all primitives are from same geometry */
- __noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
- {
- const OpenHeuristic heuristic(set);
- const unsigned int geomID = prims0[set.begin()].geomID();
-
- auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
- bool commonGeomID = true;
- size_t opens = 0;
- for (size_t i=r.begin(); i<r.end(); i++) {
- commonGeomID &= prims0[i].geomID() == geomID;
- if (heuristic(prims0[i]))
- opens += prims0[i].node.getN()-1; // coarse approximation
- }
- return std::pair<size_t,bool>(opens,commonGeomID);
- };
- auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
- return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
- };
- return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
- }
-
- // FIXME: should consider maximum available extended size
- __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
- {
- const OpenHeuristic heuristic(set);
- const size_t ext_range_start = set.end();
-
- if (false && set.size() < PARALLEL_THRESHOLD)
- {
- size_t extra_elements = 0;
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- if (heuristic(prims0[i]))
- {
- PrimRef tmp[MAX_OPENED_CHILD_NODES];
- const size_t n = nodeOpenerFunc(prims0[i],tmp);
- assert(extra_elements + n-1 <= set.ext_range_size());
- for (size_t j=0; j<n; j++)
- set.extend_center2(tmp[j]);
-
- prims0[i] = tmp[0];
- for (size_t j=1; j<n; j++)
- prims0[ext_range_start+extra_elements+j-1] = tmp[j];
- extra_elements += n-1;
- }
- }
- set._end += extra_elements;
- }
- else
- {
- std::atomic<size_t> ext_elements;
- ext_elements.store(0);
- PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
- PrimInfo info(empty);
- for (size_t i=r.begin(); i<r.end(); i++)
- if (heuristic(prims0[i]))
- {
- PrimRef tmp[MAX_OPENED_CHILD_NODES];
- const size_t n = nodeOpenerFunc(prims0[i],tmp);
- const size_t ID = ext_elements.fetch_add(n-1);
- assert(ID + n-1 <= set.ext_range_size());
-
- for (size_t j=0; j<n; j++)
- info.extend_center2(tmp[j]);
-
- prims0[i] = tmp[0];
- for (size_t j=1; j<n; j++)
- prims0[ext_range_start+ID+j-1] = tmp[j];
- }
- return info;
- }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
- set.centBounds.extend(info.centBounds);
- assert(ext_elements.load() <= set.ext_range_size());
- set._end += ext_elements.load();
- }
- }
-
- __noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
- {
- const OpenHeuristic heuristic(set);
- size_t next_iteration_extra_elements = est_new_elements;
-
- while (next_iteration_extra_elements <= set.ext_range_size())
- {
- next_iteration_extra_elements = 0;
- size_t extra_elements = 0;
- const size_t ext_range_start = set.end();
-
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- if (heuristic(prims0[i]))
- {
- PrimRef tmp[MAX_OPENED_CHILD_NODES];
- const size_t n = nodeOpenerFunc(prims0[i],tmp);
- assert(extra_elements + n-1 <= set.ext_range_size());
- for (size_t j=0;j<n;j++)
- set.extend_center2(tmp[j]);
-
- prims0[i] = tmp[0];
- for (size_t j=1;j<n;j++)
- prims0[ext_range_start+extra_elements+j-1] = tmp[j];
- extra_elements += n-1;
-
- for (size_t j=0; j<n; j++)
- if (heuristic(tmp[j]))
- next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
-
- }
- }
- assert( extra_elements <= set.ext_range_size());
- set._end += extra_elements;
-
- for (size_t i=set.begin();i<set.end();i++)
- assert(prims0[i].numPrimitives() > 0);
-
- if (unlikely(next_iteration_extra_elements == 0)) break;
- }
- }
-
- __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
- {
- /* single element */
- if (set.size() <= 1)
- return Split();
-
- /* disable opening if there is no overlap */
- const size_t D = 4;
- if (unlikely(set.has_ext_range() && set.size() <= D))
- {
- bool disjoint = true;
- for (size_t j=set.begin(); j<set.end()-1; j++) {
- for (size_t i=set.begin()+1; i<set.end(); i++) {
- if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
- disjoint = false; break;
- }
- }
- }
- if (disjoint) set.set_ext_range(set.end()); /* disables opening */
- }
-
- std::pair<size_t,bool> p(0,false);
-
- /* disable opening when all primitives are from same geometry */
- if (unlikely(set.has_ext_range()))
- {
- p = getProperties(set);
-#if EQUAL_GEOMID_STOP_CRITERIA == 1
- if (p.second) set.set_ext_range(set.end()); /* disable opening */
-#endif
- }
-
- /* open nodes when we have sufficient space available */
- if (unlikely(set.has_ext_range()))
- {
-#if USE_LOOP_OPENING == 1
- openNodesBasedOnExtendLoop(set,p.first);
-#else
- if (p.first <= set.ext_range_size())
- openNodesBasedOnExtend(set);
-#endif
-
- /* disable opening when unsufficient space for opening a node available */
- if (set.ext_range_size() < max_open_size-1)
- set.set_ext_range(set.end()); /* disable opening */
- }
-
- /* find best split */
- return object_find(set,logBlockSize);
- }
-
-
- /*! finds the best object split */
- __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
- {
- if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
- else return parallel_object_find (set,logBlockSize);
- }
-
- /*! finds the best object split */
- __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- Binner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set.centBounds);
- binner.bin(prims0,set.begin(),set.end(),mapping);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! finds the best split */
- __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- Binner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set.centBounds);
- const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
- auto body = [&] (const range<size_t>& r) -> Binner {
- Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
- };
- auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
- Binner r = b0; r.merge(b1,_mapping.size()); return r;
- };
- binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
- return binner.best(mapping,logBlockSize);
- }
-
- /*! array partitioning */
- __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- PrimInfoExtRange set = set_i;
-
- /* valid split */
- if (unlikely(!split.valid())) {
- deterministic_order(set);
- splitFallback(set,lset,rset);
- return;
- }
-
- std::pair<size_t,size_t> ext_weights(0,0);
-
- /* object split */
- if (likely(set.size() < PARALLEL_THRESHOLD))
- ext_weights = sequential_object_split(split,set,lset,rset);
- else
- ext_weights = parallel_object_split(split,set,lset,rset);
-
- /* if we have an extended range, set extended child ranges and move right split range */
- if (unlikely(set.has_ext_range()))
- {
- setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- /*! array partitioning */
- std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo local_left(empty);
- PrimInfo local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
-
- size_t center = serial_partitioning(prims0,
- begin,end,local_left,local_right,
- [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
-
- new (&lset) PrimInfoExtRange(begin,center,center,local_left);
- new (&rset) PrimInfoExtRange(center,end,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(local_left.size(),local_right.size());
- }
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo left(empty);
- PrimInfo right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
- auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
-
- const size_t center = parallel_partitioning(
- prims0,begin,end,EmptyTy(),left,right,isLeft,
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
- [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
-
- return std::pair<size_t,size_t>(left.size(),right.size());
- }
-
- void deterministic_order(const extended_range<size_t>& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims0[set.begin()],&prims0[set.end()]);
- }
-
- __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfo left(empty);
- for (size_t i=begin; i<center; i++)
- left.add_center2(prims0[i]);
-
- const size_t lweight = left.end;
-
- PrimInfo right(empty);
- for (size_t i=center; i<end; i++)
- right.add_center2(prims0[i]);
-
- const size_t rweight = right.end;
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- /* if we have an extended range */
- if (set.has_ext_range())
- {
- setExtentedRanges(set,lset,rset,lweight,rweight);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- private:
- PrimRef* const prims0;
- const NodeOpenerFunc& nodeOpenerFunc;
- size_t max_open_size;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h
deleted file mode 100644
index d8ca6cb92c..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h
+++ /dev/null
@@ -1,414 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "priminfo.h"
-
-namespace embree
-{
- static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
-
- namespace isa
- {
-
- /*! mapping into bins */
- template<size_t BINS>
- struct SpatialBinMapping
- {
- public:
- __forceinline SpatialBinMapping() {}
-
- /*! calculates the mapping */
- __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
- {
- const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
- const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
- const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
- const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
- scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
- ofs = (vfloat4) pinfo.geomBounds.lower;
- inv_scale = 1.0f / scale;
- }
-
- /*! slower but safe binning */
- __forceinline vint4 bin(const Vec3fa& p) const
- {
- const vint4 i = floori((vfloat4(p)-ofs)*scale);
- return clamp(i,vint4(0),vint4(BINS-1));
- }
-
- __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
- {
-#if defined(__AVX__)
- const vfloat8 ofs8(ofs);
- const vfloat8 scale8(scale);
- const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
- const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
- return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
-#else
- const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
- const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
- const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
- const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
- return std::pair<vint4,vint4>(c_lower,c_upper);
-#endif
- }
-
-
- /*! calculates left spatial position of bin */
- __forceinline float pos(const size_t bin, const size_t dim) const {
- return madd(float(bin),inv_scale[dim],ofs[dim]);
- }
-
- /*! calculates left spatial position of bin */
- template<size_t N>
- __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
- return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
- }
-
- /*! returns true if the mapping is invalid in some dimension */
- __forceinline bool invalid(const size_t dim) const {
- return scale[dim] == 0.0f;
- }
-
- public:
- vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
- };
-
- /*! stores all information required to perform some split */
- template<size_t BINS>
- struct SpatialBinSplit
- {
- /*! construct an invalid split by default */
- __forceinline SpatialBinSplit()
- : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
-
- /*! constructs specified split */
- __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
- : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
-
- /*! constructs specified split */
- __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
- : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
-
- /*! tests if this split is valid */
- __forceinline bool valid() const { return dim != -1; }
-
- /*! calculates surface area heuristic for performing the split */
- __forceinline float splitSAH() const { return sah; }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
- return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
- }
-
- public:
- float sah; //!< SAH cost of the split
- int dim; //!< split dimension
- int pos; //!< split position
- int left; //!< number of elements on the left side
- int right; //!< number of elements on the right side
- float factor; //!< factor splitting the extended range
- SpatialBinMapping<BINS> mapping; //!< mapping into bins
- };
-
- /*! stores all binning information */
- template<size_t BINS, typename PrimRef>
- struct __aligned(64) SpatialBinInfo
- {
- SpatialBinInfo() {
- }
-
- __forceinline SpatialBinInfo(EmptyTy) {
- clear();
- }
-
- /*! clears the bin info */
- __forceinline void clear()
- {
- for (size_t i=0; i<BINS; i++) {
- bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
- numBegin[i] = numEnd[i] = 0;
- }
- }
-
- /*! adds binning data */
- __forceinline void add(const size_t dim,
- const size_t beginID,
- const size_t endID,
- const size_t binID,
- const BBox3fa &b,
- const size_t n = 1)
- {
- assert(beginID < BINS);
- assert(endID < BINS);
- assert(binID < BINS);
-
- numBegin[beginID][dim]+=(unsigned int)n;
- numEnd [endID][dim]+=(unsigned int)n;
- bounds [binID][dim].extend(b);
- }
-
- /*! extends binning bounds */
- __forceinline void extend(const size_t dim,
- const size_t binID,
- const BBox3fa &b)
- {
- assert(binID < BINS);
- bounds [binID][dim].extend(b);
- }
-
- /*! bins an array of triangles */
- template<typename SplitPrimitive>
- __forceinline void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t N, const SpatialBinMapping<BINS>& mapping)
- {
- for (size_t i=0; i<N; i++)
- {
- const PrimRef prim = prims[i];
- unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
- if (unlikely(splits == 1))
- {
- const vint4 bin = mapping.bin(center(prim.bounds()));
- for (size_t dim=0; dim<3; dim++)
- {
- assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
- numBegin[bin[dim]][dim]++;
- numEnd [bin[dim]][dim]++;
- bounds [bin[dim]][dim].extend(prim.bounds());
- }
- }
- else
- {
- const vint4 bin0 = mapping.bin(prim.bounds().lower);
- const vint4 bin1 = mapping.bin(prim.bounds().upper);
-
- for (size_t dim=0; dim<3; dim++)
- {
- size_t bin;
- PrimRef rest = prim;
- size_t l = bin0[dim];
- size_t r = bin1[dim];
-
- // same bin optimization
- if (likely(l == r))
- {
- numBegin[l][dim]++;
- numEnd [l][dim]++;
- bounds [l][dim].extend(prim.bounds());
- continue;
- }
-
- for (bin=(size_t)bin0[dim]; bin<(size_t)bin1[dim]; bin++)
- {
- const float pos = mapping.pos(bin+1,dim);
-
- PrimRef left,right;
- splitPrimitive(rest,(int)dim,pos,left,right);
- if (unlikely(left.bounds().empty())) l++;
- bounds[bin][dim].extend(left.bounds());
- rest = right;
- }
- if (unlikely(rest.bounds().empty())) r--;
- numBegin[l][dim]++;
- numEnd [r][dim]++;
- bounds [bin][dim].extend(rest.bounds());
- }
- }
- }
- }
-
- /*! bins a range of primitives inside an array */
- template<typename SplitPrimitive>
- void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) {
- bin(splitPrimitive,prims+begin,end-begin,mapping);
- }
-
- /*! bins an array of primitives */
- template<typename PrimitiveSplitterFactory>
- __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
- {
- for (size_t i=begin; i<end; i++)
- {
- const PrimRef &prim = source[i];
- const vint4 bin0 = mapping.bin(prim.bounds().lower);
- const vint4 bin1 = mapping.bin(prim.bounds().upper);
-
- for (size_t dim=0; dim<3; dim++)
- {
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- size_t bin;
- size_t l = bin0[dim];
- size_t r = bin1[dim];
-
- // same bin optimization
- if (likely(l == r))
- {
- add(dim,l,l,l,prim.bounds());
- continue;
- }
- const size_t bin_start = bin0[dim];
- const size_t bin_end = bin1[dim];
- BBox3fa rest = prim.bounds();
- const auto splitter = splitterFactory(prim);
- for (bin=bin_start; bin<bin_end; bin++)
- {
- const float pos = mapping.pos(bin+1,dim);
- BBox3fa left,right;
- splitter(rest,dim,pos,left,right);
- if (unlikely(left.empty())) l++;
- extend(dim,bin,left);
- rest = right;
- }
- if (unlikely(rest.empty())) r--;
- add(dim,l,r,bin,rest);
- }
- }
- }
-
-
-
- /*! bins an array of primitives */
- __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
- {
- for (size_t i=begin; i<end; i++)
- {
- const PrimRef &prim = source[i];
- const vint4 bin0 = mapping.bin(prim.bounds().lower);
- const vint4 bin1 = mapping.bin(prim.bounds().upper);
-
- for (size_t dim=0; dim<3; dim++)
- {
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- const size_t l = bin0[dim];
- const size_t r = bin1[dim];
-
- const unsigned int n = prim.primID();
-
- // same bin optimization
- if (likely(l == r))
- {
- add(dim,l,l,l,prim.bounds(),n);
- continue;
- }
- const size_t bin_start = bin0[dim];
- const size_t bin_end = bin1[dim];
- for (size_t bin=bin_start; bin<bin_end; bin++)
- add(dim,l,r,bin,prim.bounds(),n);
- }
- }
- }
-
- /*! merges in other binning information */
- void merge (const SpatialBinInfo& other)
- {
- for (size_t i=0; i<BINS; i++)
- {
- numBegin[i] += other.numBegin[i];
- numEnd [i] += other.numEnd [i];
- bounds[i][0].extend(other.bounds[i][0]);
- bounds[i][1].extend(other.bounds[i][1]);
- bounds[i][2].extend(other.bounds[i][2]);
- }
- }
-
- /*! merges in other binning information */
- static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
- {
- SpatialBinInfo c(empty);
- for (size_t i=0; i<BINS; i++)
- {
- c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
- c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
- c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
- c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
- c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
- }
- return c;
- }
-
- /*! finds the best split by scanning binning information */
- SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
- {
- /* sweep from right to left and compute parallel prefix of merged bounds */
- vfloat4 rAreas[BINS];
- vuint4 rCounts[BINS];
- vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
- for (size_t i=BINS-1; i>0; i--)
- {
- count += numEnd[i];
- rCounts[i] = count;
- bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
- by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
- bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
- rAreas[i][3] = 0.0f;
- }
-
- /* sweep from left to right and compute SAH */
- vuint4 blocks_add = (1 << blocks_shift)-1;
- vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
- count = 0; bx = empty; by = empty; bz = empty;
- for (size_t i=1; i<BINS; i++, ii+=1)
- {
- count += numBegin[i-1];
- bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
- by.extend(bounds[i-1][1]); float Ay = halfArea(by);
- bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
- const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
- const vfloat4 rArea = rAreas[i];
- const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
- const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
- const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
- // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
- const vbool4 mask = sah < vbestSAH;
- vbestPos = select(mask,ii ,vbestPos);
- vbestSAH = select(mask,sah,vbestSAH);
- vbestlCount = select(mask,count,vbestlCount);
- vbestrCount = select(mask,rCounts[i],vbestrCount);
- }
-
- /* find best dimension */
- float bestSAH = inf;
- int bestDim = -1;
- int bestPos = 0;
- unsigned int bestlCount = 0;
- unsigned int bestrCount = 0;
- for (int dim=0; dim<3; dim++)
- {
- /* ignore zero sized dimensions */
- if (unlikely(mapping.invalid(dim)))
- continue;
-
- /* test if this is a better dimension */
- if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
- bestDim = dim;
- bestPos = vbestPos[dim];
- bestSAH = vbestSAH[dim];
- bestlCount = vbestlCount[dim];
- bestrCount = vbestrCount[dim];
- }
- }
- assert(bestSAH >= 0.0f);
-
- /* return invalid split if no split found */
- if (bestDim == -1)
- return SpatialBinSplit<BINS>(inf,-1,0,mapping);
-
- /* return best found split */
- return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
- }
-
- private:
- BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
- vuint4 numBegin[BINS]; //!< number of primitives starting in bin
- vuint4 numEnd[BINS]; //!< number of primitives ending in bin
- };
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h
deleted file mode 100644
index 911dcf950c..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h
+++ /dev/null
@@ -1,552 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "heuristic_binning.h"
-#include "heuristic_spatial.h"
-
-namespace embree
-{
- namespace isa
- {
-#if 0
-#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f
-#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f
-#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f
-#else
-#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f
-#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f
-#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f
-#endif
-
- struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>
- {
- __forceinline PrimInfoExtRange() {
- }
-
- __forceinline PrimInfoExtRange(EmptyTy)
- : CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}
-
- __forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)
- : CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {}
-
- __forceinline float leafSAH() const {
- return expectedApproxHalfArea(geomBounds)*float(size());
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
- }
- };
-
- template<typename ObjectSplit, typename SpatialSplit>
- struct Split2
- {
- __forceinline Split2 () {}
-
- __forceinline Split2 (const Split2& other)
- {
- spatial = other.spatial;
- sah = other.sah;
- if (spatial) spatialSplit() = other.spatialSplit();
- else objectSplit() = other.objectSplit();
- }
-
- __forceinline Split2& operator= (const Split2& other)
- {
- spatial = other.spatial;
- sah = other.sah;
- if (spatial) spatialSplit() = other.spatialSplit();
- else objectSplit() = other.objectSplit();
- return *this;
- }
-
- __forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }
- __forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }
-
- __forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }
- __forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }
-
- __forceinline Split2 (const ObjectSplit& objectSplit, float sah)
- : spatial(false), sah(sah)
- {
- new (data) ObjectSplit(objectSplit);
- }
-
- __forceinline Split2 (const SpatialSplit& spatialSplit, float sah)
- : spatial(true), sah(sah)
- {
- new (data) SpatialSplit(spatialSplit);
- }
-
- __forceinline float splitSAH() const {
- return sah;
- }
-
- __forceinline bool valid() const {
- return sah < float(inf);
- }
-
- public:
- __aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];
- bool spatial;
- float sah;
- };
-
- /*! Performs standard object binning */
- template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS>
- struct HeuristicArraySpatialSAH
- {
- typedef BinSplit<OBJECT_BINS> ObjectSplit;
- typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;
-
- typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;
- typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;
-
- //typedef extended_range<size_t> Set;
- typedef Split2<ObjectSplit,SpatialSplit> Split;
-
-#if defined(__AVX512ER__) // KNL
- static const size_t PARALLEL_THRESHOLD = 3*1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 768;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-#else
- static const size_t PARALLEL_THRESHOLD = 3*1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-#endif
-
- static const size_t MOVE_STEP_SIZE = 64;
- static const size_t CREATE_SPLITS_STEP_SIZE = 64;
-
- __forceinline HeuristicArraySpatialSAH ()
- : prims0(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)
- : prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}
-
-
- /*! compute extended ranges */
- __noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
- {
- assert(set.ext_range_size() > 0);
- const float left_factor = (float)lweight / (lweight + rweight);
- const size_t ext_range_size = set.ext_range_size();
- const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
- const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
- lset.set_ext_range(lset.end() + left_ext_range_size);
- rset.set_ext_range(rset.end() + right_ext_range_size);
- }
-
- /*! move ranges */
- __noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t left_ext_range_size = lset.ext_range_size();
- const size_t right_size = rset.size();
-
- /* has the left child an extended range? */
- if (left_ext_range_size > 0)
- {
- /* left extended range smaller than right range ? */
- if (left_ext_range_size < right_size)
- {
- /* only move a small part of the beginning of the right range to the end */
- parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+right_size] = prims0[i];
- });
- }
- else
- {
- /* no overlap, move entire right range to new location, can be made fully parallel */
- parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin(); i<r.end(); i++)
- prims0[i+left_ext_range_size] = prims0[i];
- });
- }
- /* update right range */
- assert(rset.ext_end() + left_ext_range_size == set.ext_end());
- rset.move_right(left_ext_range_size);
- }
- }
-
- /*! finds the best split */
- const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- SplitInfo oinfo;
- const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);
- const float object_split_sah = object_split.splitSAH();
-
- if (unlikely(set.has_ext_range()))
- {
- const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);
-
- /* do only spatial splits if the child bounds overlap */
- if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&
- safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))
- {
- const SpatialSplit spatial_split = spatial_find(set, logBlockSize);
- const float spatial_split_sah = spatial_split.splitSAH();
-
- /* valid spatial split, better SAH and number of splits do not exceed extended range */
- if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&
- spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())
- {
- return Split(spatial_split,spatial_split_sah);
- }
- }
- }
-
- return Split(object_split,object_split_sah);
- }
-
- /*! finds the best object split */
- __forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
- {
- if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);
- else return parallel_object_find (set,logBlockSize,info);
- }
-
- /*! finds the best object split */
- __noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
- {
- ObjectBinner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set);
- binner.bin(prims0,set.begin(),set.end(),mapping);
- ObjectSplit s = binner.best(mapping,logBlockSize);
- binner.getSplitInfo(mapping, s, info);
- return s;
- }
-
- /*! finds the best split */
- __noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
- {
- ObjectBinner binner(empty);
- const BinMapping<OBJECT_BINS> mapping(set);
- const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
- binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
- [&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },
- [&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });
- ObjectSplit s = binner.best(mapping,logBlockSize);
- binner.getSplitInfo(mapping, s, info);
- return s;
- }
-
- /*! finds the best spatial split */
- __forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);
- else return parallel_spatial_find (set, logBlockSize);
- }
-
- /*! finds the best spatial split */
- __noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- SpatialBinner binner(empty);
- const SpatialBinMapping<SPATIAL_BINS> mapping(set);
- binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
- /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
- return binner.best(mapping,logBlockSize); //,set.ext_size());
- }
-
- __noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
- {
- SpatialBinner binner(empty);
- const SpatialBinMapping<SPATIAL_BINS> mapping(set);
- const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
- binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
- [&] (const range<size_t>& r) -> SpatialBinner {
- SpatialBinner binner(empty);
- binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
- return binner; },
- [&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
- /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
- return binner.best(mapping,logBlockSize); //,set.ext_size());
- }
-
-
- /*! subdivides primitives based on a spatial split */
- __noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)
- {
- assert(set.has_ext_range());
- const size_t max_ext_range_size = set.ext_range_size();
- const size_t ext_range_start = set.end();
-
- /* atomic counter for number of primref splits */
- std::atomic<size_t> ext_elements;
- ext_elements.store(0);
-
- const float fpos = split.mapping.pos(split.pos,split.dim);
-
- const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
-
- parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {
- for (size_t i=r.begin();i<r.end();i++)
- {
- const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
- if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
-
- //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
- //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
- //if (unlikely(bin0 < split.pos && bin1 >= split.pos))
- if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
- {
- assert(splits > 1);
-
- PrimRef left,right;
- const auto splitter = splitterFactory(prims0[i]);
- splitter(prims0[i],split.dim,fpos,left,right);
-
- // no empty splits
- if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;
-
- left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
- right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
-
- const size_t ID = ext_elements.fetch_add(1);
-
- /* break if the number of subdivided elements are greater than the maximum allowed size */
- if (unlikely(ID >= max_ext_range_size))
- break;
-
- /* only write within the correct bounds */
- assert(ID < max_ext_range_size);
- prims0[i] = left;
- prims0[ext_range_start+ID] = right;
- }
- }
- });
-
- const size_t numExtElements = min(max_ext_range_size,ext_elements.load());
- assert(set.end()+numExtElements<=set.ext_end());
- set._end += numExtElements;
- }
-
- /*! array partitioning */
- void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- PrimInfoExtRange set = set_i;
-
- /* valid split */
- if (unlikely(!split.valid())) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- std::pair<size_t,size_t> ext_weights(0,0);
-
- if (unlikely(split.spatial))
- {
- create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);
-
- /* spatial split */
- if (likely(set.size() < PARALLEL_THRESHOLD))
- ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);
- else
- ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);
- }
- else
- {
- /* object split */
- if (likely(set.size() < PARALLEL_THRESHOLD))
- ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);
- else
- ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);
- }
-
- /* if we have an extended range, set extended child ranges and move right split range */
- if (unlikely(set.has_ext_range()))
- {
- setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- /*! array partitioning */
- std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo local_left(empty);
- PrimInfo local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const typename ObjectBinner::vint vSplitPos(splitPos);
- const typename ObjectBinner::vbool vSplitMask(splitDimMask);
- size_t center = serial_partitioning(prims0,
- begin,end,local_left,local_right,
- [&] (const PrimRef& ref) {
- return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);
- },
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
- const size_t left_weight = local_left.end;
- const size_t right_weight = local_right.end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,local_left);
- new (&rset) PrimInfoExtRange(center,end,end,local_right);
-
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo local_left(empty);
- PrimInfo local_right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- /* init spatial mapping */
- const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping;
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
-
- size_t center = serial_partitioning(prims0,
- begin,end,local_left,local_right,
- [&] (const PrimRef& ref) {
- const Vec3fa c = ref.bounds().center();
- return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
- },
- [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
-
- const size_t left_weight = local_left.end;
- const size_t right_weight = local_right.end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,local_left);
- new (&rset) PrimInfoExtRange(center,end,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
-
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo left(empty);
- PrimInfo right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- const typename ObjectBinner::vint vSplitPos(splitPos);
- const typename ObjectBinner::vbool vSplitMask(splitDimMask);
- auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
-
- const size_t center = parallel_partitioning(
- prims0,begin,end,EmptyTy(),left,right,isLeft,
- [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
- [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- const size_t left_weight = left.end;
- const size_t right_weight = right.end;
-
- left.begin = begin; left.end = center;
- right.begin = center; right.end = end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- assert(area(left.geomBounds) >= 0.0f);
- assert(area(right.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
- /*! array partitioning */
- __noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- PrimInfo left(empty);
- PrimInfo right(empty);
- const unsigned int splitPos = split.pos;
- const unsigned int splitDim = split.dim;
- const unsigned int splitDimMask = (unsigned int)1 << splitDim;
-
- /* init spatial mapping */
- const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;
- const vint4 vSplitPos(splitPos);
- const vbool4 vSplitMask( (int)splitDimMask );
-
- auto isLeft = [&] (const PrimRef &ref) {
- const Vec3fa c = ref.bounds().center();
- return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };
-
- const size_t center = parallel_partitioning(
- prims0,begin,end,EmptyTy(),left,right,isLeft,
- [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
- [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
- PARALLEL_PARTITION_BLOCK_SIZE);
-
- const size_t left_weight = left.end;
- const size_t right_weight = right.end;
-
- left.begin = begin; left.end = center;
- right.begin = center; right.end = end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- assert(area(left.geomBounds) >= 0.0f);
- assert(area(right.geomBounds) >= 0.0f);
- return std::pair<size_t,size_t>(left_weight,right_weight);
- }
-
- void deterministic_order(const PrimInfoExtRange& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims0[set.begin()],&prims0[set.end()]);
- }
-
- void splitFallback(const PrimInfoExtRange& set,
- PrimInfoExtRange& lset,
- PrimInfoExtRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- PrimInfo left(empty);
- for (size_t i=begin; i<center; i++) {
- left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
- }
- const size_t lweight = left.end;
-
- PrimInfo right(empty);
- for (size_t i=center; i<end; i++) {
- right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
- }
- const size_t rweight = right.end;
-
- new (&lset) PrimInfoExtRange(begin,center,center,left);
- new (&rset) PrimInfoExtRange(center,end,end,right);
-
- /* if we have an extended range */
- if (set.has_ext_range()) {
- setExtentedRanges(set,lset,rset,lweight,rweight);
- moveExtentedRange(set,lset,rset);
- }
- }
-
- private:
- PrimRef* const prims0;
- const PrimitiveSplitterFactory& splitterFactory;
- const CentGeomBBox3fa& root_info;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h
deleted file mode 100644
index ede0d04c78..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "priminfo.h"
-#include "../../common/algorithms/parallel_reduce.h"
-#include "../../common/algorithms/parallel_partition.h"
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- struct HeuristicStrandSplit
- {
- typedef range<size_t> Set;
-
- static const size_t PARALLEL_THRESHOLD = 10000;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
-
- /*! stores all information to perform some split */
- struct Split
- {
- /*! construct an invalid split by default */
- __forceinline Split()
- : sah(inf), axis0(zero), axis1(zero) {}
-
- /*! constructs specified split */
- __forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
- : sah(sah), axis0(axis0), axis1(axis1) {}
-
- /*! calculates standard surface area heuristic for the split */
- __forceinline float splitSAH() const { return sah; }
-
- /*! test if this split is valid */
- __forceinline bool valid() const { return sah != float(inf); }
-
- public:
- float sah; //!< SAH cost of the split
- Vec3fa axis0, axis1; //!< axis the two strands are aligned into
- };
-
- __forceinline HeuristicStrandSplit () // FIXME: required?
- : scene(nullptr), prims(nullptr) {}
-
- /*! remember prim array */
- __forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
- : scene(scene), prims(prims) {}
-
- __forceinline const Vec3fa direction(const PrimRef& prim) {
- return scene->get(prim.geomID())->computeDirection(prim.primID());
- }
-
- __forceinline const BBox3fa bounds(const PrimRef& prim) {
- return scene->get(prim.geomID())->vbounds(prim.primID());
- }
-
- __forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
- return scene->get(prim.geomID())->vbounds(space,prim.primID());
- }
-
- /*! finds the best split */
- const Split find(const range<size_t>& set, size_t logBlockSize)
- {
- Vec3fa axis0(0,0,1);
- uint64_t bestGeomPrimID = -1;
-
- /* curve with minimum ID determines first axis */
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const uint64_t geomprimID = prims[i].ID64();
- if (geomprimID >= bestGeomPrimID) continue;
- const Vec3fa axis = direction(prims[i]);
- if (sqr_length(axis) > 1E-18f) {
- axis0 = normalize(axis);
- bestGeomPrimID = geomprimID;
- }
- }
-
- /* find 2nd axis that is most misaligned with first axis and has minimum ID */
- float bestCos = 1.0f;
- Vec3fa axis1 = axis0;
- bestGeomPrimID = -1;
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- const uint64_t geomprimID = prims[i].ID64();
- Vec3fa axisi = direction(prims[i]);
- float leni = length(axisi);
- if (leni == 0.0f) continue;
- axisi /= leni;
- float cos = abs(dot(axisi,axis0));
- if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
- bestCos = cos; axis1 = axisi;
- bestGeomPrimID = geomprimID;
- }
- }
-
- /* partition the two strands */
- size_t lnum = 0, rnum = 0;
- BBox3fa lbounds = empty, rbounds = empty;
- const LinearSpace3fa space0 = frame(axis0).transposed();
- const LinearSpace3fa space1 = frame(axis1).transposed();
-
- for (size_t i=set.begin(); i<set.end(); i++)
- {
- PrimRef& prim = prims[i];
- const Vec3fa axisi = normalize(direction(prim));
- const float cos0 = abs(dot(axisi,axis0));
- const float cos1 = abs(dot(axisi,axis1));
-
- if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
- else { rnum++; rbounds.extend(bounds(space1,prim)); }
- }
-
- /*! return an invalid split if we do not partition */
- if (lnum == 0 || rnum == 0)
- return Split(inf,axis0,axis1);
-
- /*! calculate sah for the split */
- const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
- const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
- const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
- return Split(sah,axis0,axis1);
- }
-
- /*! array partitioning */
- void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- if (!split.valid()) {
- deterministic_order(set);
- return splitFallback(set,lset,rset);
- }
-
- const size_t begin = set.begin();
- const size_t end = set.end();
- CentGeomBBox3fa local_left(empty);
- CentGeomBBox3fa local_right(empty);
-
- auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
- const Vec3fa axisi = normalize(direction(prim));
- const float cos0 = abs(dot(axisi,split.axis0));
- const float cos1 = abs(dot(axisi,split.axis1));
- return cos0 > cos1;
- };
-
- auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
- pinfo.extend(bounds(ref));
- };
-
- size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
-
- new (&lset) PrimInfoRange(begin,center,local_left);
- new (&rset) PrimInfoRange(center,end,local_right);
- assert(area(lset.geomBounds) >= 0.0f);
- assert(area(rset.geomBounds) >= 0.0f);
- }
-
- void deterministic_order(const Set& set)
- {
- /* required as parallel partition destroys original primitive order */
- std::sort(&prims[set.begin()],&prims[set.end()]);
- }
-
- void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
- {
- const size_t begin = set.begin();
- const size_t end = set.end();
- const size_t center = (begin + end)/2;
-
- CentGeomBBox3fa left(empty);
- for (size_t i=begin; i<center; i++)
- left.extend(bounds(prims[i]));
- new (&lset) PrimInfoRange(begin,center,left);
-
- CentGeomBBox3fa right(empty);
- for (size_t i=center; i<end; i++)
- right.extend(bounds(prims[i]));
- new (&rset) PrimInfoRange(center,end,right);
- }
-
- private:
- Scene* const scene;
- PrimRef* const prims;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h b/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h
deleted file mode 100644
index c999941a11..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/primref_mb.h"
-#include "../../common/algorithms/parallel_filter.h"
-
-#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f
-
-namespace embree
-{
- namespace isa
- {
- /*! Performs standard object binning */
- template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS>
- struct HeuristicMBlurTemporalSplit
- {
- typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
- typedef mvector<PrimRefMB>* PrimRefVector;
- typedef typename PrimRefMB::BBox BBox;
-
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef)
- : device(device), recalculatePrimRef(recalculatePrimRef) {}
-
- struct TemporalBinInfo
- {
- __forceinline TemporalBinInfo () {
- }
-
- __forceinline TemporalBinInfo (EmptyTy)
- {
- for (size_t i=0; i<BINS-1; i++)
- {
- count0[i] = count1[i] = 0;
- bounds0[i] = bounds1[i] = empty;
- }
- }
-
- void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
- {
- for (int b=0; b<BINS-1; b++)
- {
- const float t = float(b+1)/float(BINS);
- const float ct = lerp(time_range.lower,time_range.upper,t);
- const float center_time = set.align_time(ct);
- if (center_time <= time_range.lower) continue;
- if (center_time >= time_range.upper) continue;
- const BBox1f dt0(time_range.lower,center_time);
- const BBox1f dt1(center_time,time_range.upper);
-
- /* find linear bounds for both time segments */
- for (size_t i=begin; i<end; i++)
- {
- if (prims[i].time_range_overlap(dt0))
- {
- const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0);
-#if MBLUR_BIN_LBBOX
- bounds0[b].extend(bn0);
-#else
- bounds0[b].extend(bn0.interpolate(0.5f));
-#endif
- count0[b] += prims[i].timeSegmentRange(dt0).size();
- }
-
- if (prims[i].time_range_overlap(dt1))
- {
- const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1);
-#if MBLUR_BIN_LBBOX
- bounds1[b].extend(bn1);
-#else
- bounds1[b].extend(bn1.interpolate(0.5f));
-#endif
- count1[b] += prims[i].timeSegmentRange(dt1).size();
- }
- }
- }
- }
-
- __forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
- {
- if (likely(end-begin < parallelThreshold)) {
- bin(prims,begin,end,time_range,set,recalculatePrimRef);
- }
- else
- {
- auto bin = [&](const range<size_t>& r) -> TemporalBinInfo {
- TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner;
- };
- *this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2);
- }
- }
-
- /*! merges in other binning information */
- __forceinline void merge (const TemporalBinInfo& other)
- {
- for (size_t i=0; i<BINS-1; i++)
- {
- count0[i] += other.count0[i];
- count1[i] += other.count1[i];
- bounds0[i].extend(other.bounds0[i]);
- bounds1[i].extend(other.bounds1[i]);
- }
- }
-
- static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) {
- TemporalBinInfo r = a; r.merge(b); return r;
- }
-
- Split best(int logBlockSize, BBox1f time_range, const SetMB& set)
- {
- float bestSAH = inf;
- float bestPos = 0.0f;
- for (int b=0; b<BINS-1; b++)
- {
- float t = float(b+1)/float(BINS);
- float ct = lerp(time_range.lower,time_range.upper,t);
- const float center_time = set.align_time(ct);
- if (center_time <= time_range.lower) continue;
- if (center_time >= time_range.upper) continue;
- const BBox1f dt0(time_range.lower,center_time);
- const BBox1f dt1(center_time,time_range.upper);
-
- /* calculate sah */
- const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
- const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
- float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size();
- float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size();
- if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time
- if (unlikely(rCount == 0)) sah1 = 0.0f;
- const float sah = sah0+sah1;
- if (sah < bestSAH) {
- bestSAH = sah;
- bestPos = center_time;
- }
- }
- return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos);
- }
-
- public:
- size_t count0[BINS-1];
- size_t count1[BINS-1];
- BBox bounds0[BINS-1];
- BBox bounds1[BINS-1];
- };
-
- /*! finds the best split */
- const Split find(const SetMB& set, const size_t logBlockSize)
- {
- assert(set.size() > 0);
- TemporalBinInfo binner(empty);
- binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef);
- Split tsplit = binner.best((int)logBlockSize,set.time_range,set);
- if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split
- return tsplit;
- }
-
- __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset)
- {
- assert(tsplit.sah != float(inf));
- assert(tsplit.fpos > set.time_range.lower);
- assert(tsplit.fpos < set.time_range.upper);
-
- float center_time = tsplit.fpos;
- const BBox1f time_range0(set.time_range.lower,center_time);
- const BBox1f time_range1(center_time,set.time_range.upper);
- mvector<PrimRefMB>& prims = *set.prims;
-
- /* calculate primrefs for first time range */
- std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size()));
- PrimRefVector lprims = new_vector.get();
-
- auto reduction_func0 = [&] (const range<size_t>& r) {
- PrimInfoMB pinfo = empty;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- if (likely(prims[i].time_range_overlap(time_range0)))
- {
- const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0);
- (*lprims)[i-set.begin()] = prim;
- pinfo.add_primref(prim);
- }
- else
- {
- (*lprims)[i-set.begin()] = prims[i];
- }
- }
- return pinfo;
- };
- PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2);
-
- /* primrefs for first time range are in lprims[0 .. set.size()) */
- /* some primitives may need to be filtered out */
- if (linfo.size() != set.size())
- linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024),
- [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); });
-
- lset = SetMB(linfo,lprims,time_range0);
-
- /* calculate primrefs for second time range */
- auto reduction_func1 = [&] (const range<size_t>& r) {
- PrimInfoMB pinfo = empty;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- if (likely(prims[i].time_range_overlap(time_range1)))
- {
- const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1);
- prims[i] = prim;
- pinfo.add_primref(prim);
- }
- }
- return pinfo;
- };
- PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2);
- rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size());
-
- /* primrefs for second time range are in prims[set.begin() .. set.end()) */
- /* some primitives may need to be filtered out */
- if (rinfo.size() != set.size())
- rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024),
- [&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); });
-
- rset = SetMB(rinfo,&prims,time_range1);
-
- return new_vector;
- }
-
- private:
- MemoryMonitorInterface* device; // device to report memory usage to
- const RecalculatePrimRef recalculatePrimRef;
- };
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/priminfo.h b/thirdparty/embree-aarch64/kernels/builders/priminfo.h
deleted file mode 100644
index 06c1388742..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/priminfo.h
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/default.h"
-#include "../common/primref.h"
-#include "../common/primref_mb.h"
-
-namespace embree
-{
- // FIXME: maybe there's a better place for this util fct
- __forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2)
- {
- const Vec3fa e0 = v1-v0;
- const Vec3fa e1 = v2-v0;
- const Vec3fa d = cross(e0,e1);
- return fabs(d.x) + fabs(d.y) + fabs(d.z);
- }
-
- //namespace isa
- //{
- template<typename BBox>
- class CentGeom
- {
- public:
- __forceinline CentGeom () {}
-
- __forceinline CentGeom (EmptyTy)
- : geomBounds(empty), centBounds(empty) {}
-
- __forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds)
- : geomBounds(geomBounds), centBounds(centBounds) {}
-
- template<typename PrimRef>
- __forceinline void extend_primref(const PrimRef& prim)
- {
- BBox bounds; Vec3fa center;
- prim.binBoundsAndCenter(bounds,center);
- geomBounds.extend(bounds);
- centBounds.extend(center);
- }
-
- template<typename PrimRef>
- __forceinline void extend_center2(const PrimRef& prim)
- {
- BBox3fa bounds = prim.bounds();
- geomBounds.extend(bounds);
- centBounds.extend(bounds.center2());
- }
-
- __forceinline void extend(const BBox& geomBounds_) {
- geomBounds.extend(geomBounds_);
- centBounds.extend(center2(geomBounds_));
- }
-
- __forceinline void merge(const CentGeom& other)
- {
- geomBounds.extend(other.geomBounds);
- centBounds.extend(other.centBounds);
- }
-
- static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) {
- CentGeom r = a; r.merge(b); return r;
- }
-
- public:
- BBox geomBounds; //!< geometry bounds of primitives
- BBox3fa centBounds; //!< centroid bounds of primitives
- };
-
- typedef CentGeom<BBox3fa> CentGeomBBox3fa;
-
- /*! stores bounding information for a set of primitives */
- template<typename BBox>
- class PrimInfoT : public CentGeom<BBox>
- {
- public:
- using CentGeom<BBox>::geomBounds;
- using CentGeom<BBox>::centBounds;
-
- __forceinline PrimInfoT () {}
-
- __forceinline PrimInfoT (EmptyTy)
- : CentGeom<BBox>(empty), begin(0), end(0) {}
-
- __forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
- : CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
-
- template<typename PrimRef>
- __forceinline void add_primref(const PrimRef& prim)
- {
- CentGeom<BBox>::extend_primref(prim);
- end++;
- }
-
- template<typename PrimRef>
- __forceinline void add_center2(const PrimRef& prim) {
- CentGeom<BBox>::extend_center2(prim);
- end++;
- }
-
- template<typename PrimRef>
- __forceinline void add_center2(const PrimRef& prim, const size_t i) {
- CentGeom<BBox>::extend_center2(prim);
- end+=i;
- }
-
- /*__forceinline void add(const BBox& geomBounds_) {
- CentGeom<BBox>::extend(geomBounds_);
- end++;
- }
-
- __forceinline void add(const BBox& geomBounds_, const size_t i) {
- CentGeom<BBox>::extend(geomBounds_);
- end+=i;
- }*/
-
- __forceinline void merge(const PrimInfoT& other)
- {
- CentGeom<BBox>::merge(other);
- begin += other.begin;
- end += other.end;
- }
-
- static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) {
- PrimInfoT r = a; r.merge(b); return r;
- }
-
- /*! returns the number of primitives */
- __forceinline size_t size() const {
- return end-begin;
- }
-
- __forceinline float halfArea() {
- return expectedApproxHalfArea(geomBounds);
- }
-
- __forceinline float leafSAH() const {
- return expectedApproxHalfArea(geomBounds)*float(size());
- //return halfArea(geomBounds)*blocks(num);
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
- //return halfArea(geomBounds)*float((num+3) >> 2);
- //return halfArea(geomBounds)*blocks(num);
- }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) {
- return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}";
- }
-
- public:
- size_t begin,end; //!< number of primitives
- };
-
- typedef PrimInfoT<BBox3fa> PrimInfo;
- //typedef PrimInfoT<LBBox3fa> PrimInfoMB;
-
- /*! stores bounding information for a set of primitives */
- template<typename BBox>
- class PrimInfoMBT : public CentGeom<BBox>
- {
- public:
- using CentGeom<BBox>::geomBounds;
- using CentGeom<BBox>::centBounds;
-
- __forceinline PrimInfoMBT () {
- }
-
- __forceinline PrimInfoMBT (EmptyTy)
- : CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
-
- __forceinline PrimInfoMBT (size_t begin, size_t end)
- : CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
-
- template<typename PrimRef>
- __forceinline void add_primref(const PrimRef& prim)
- {
- CentGeom<BBox>::extend_primref(prim);
- time_range.extend(prim.time_range);
- object_range._end++;
- num_time_segments += prim.size();
- if (max_num_time_segments < prim.totalTimeSegments()) {
- max_num_time_segments = prim.totalTimeSegments();
- max_time_range = prim.time_range;
- }
- }
-
- __forceinline void merge(const PrimInfoMBT& other)
- {
- CentGeom<BBox>::merge(other);
- time_range.extend(other.time_range);
- object_range._begin += other.object_range.begin();
- object_range._end += other.object_range.end();
- num_time_segments += other.num_time_segments;
- if (max_num_time_segments < other.max_num_time_segments) {
- max_num_time_segments = other.max_num_time_segments;
- max_time_range = other.max_time_range;
- }
- }
-
- static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
- PrimInfoMBT r = a; r.merge(b); return r;
- }
-
- __forceinline size_t begin() const {
- return object_range.begin();
- }
-
- __forceinline size_t end() const {
- return object_range.end();
- }
-
- /*! returns the number of primitives */
- __forceinline size_t size() const {
- return object_range.size();
- }
-
- __forceinline float halfArea() const {
- return time_range.size()*expectedApproxHalfArea(geomBounds);
- }
-
- __forceinline float leafSAH() const {
- return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
- }
-
- __forceinline float leafSAH(size_t block_shift) const {
- return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
- }
-
- __forceinline float align_time(float ct) const
- {
- //return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
- float t0 = (ct-max_time_range.lower)/max_time_range.size();
- float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
- return t1*max_time_range.size()+max_time_range.lower;
- }
-
- /*! stream output */
- friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
- {
- return cout << "PrimInfo { " <<
- "object_range = " << pinfo.object_range <<
- ", time_range = " << pinfo.time_range <<
- ", time_segments = " << pinfo.num_time_segments <<
- ", geomBounds = " << pinfo.geomBounds <<
- ", centBounds = " << pinfo.centBounds <<
- "}";
- }
-
- public:
- range<size_t> object_range; //!< primitive range
- size_t num_time_segments; //!< total number of time segments of all added primrefs
- size_t max_num_time_segments; //!< maximum number of time segments of a primitive
- BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
- BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
- };
-
- typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
-
- struct SetMB : public PrimInfoMB
- {
- static const size_t PARALLEL_THRESHOLD = 3 * 1024;
- static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
- static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
-
- typedef mvector<PrimRefMB>* PrimRefVector;
-
- __forceinline SetMB() {}
-
- __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
- : PrimInfoMB(pinfo_i), prims(prims) {}
-
- __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
- : PrimInfoMB(pinfo_i), prims(prims)
- {
- object_range = object_range_in;
- time_range = intersect(time_range,time_range_in);
- }
-
- __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
- : PrimInfoMB(pinfo_i), prims(prims)
- {
- time_range = intersect(time_range,time_range_in);
- }
-
- void deterministic_order() const
- {
- /* required as parallel partition destroys original primitive order */
- PrimRefMB* prim = prims->data();
- std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
- }
-
- template<typename RecalculatePrimRef>
- __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
- {
- auto reduce = [&](const range<size_t>& r) -> LBBox3fa
- {
- LBBox3fa cbounds(empty);
- for (size_t j = r.begin(); j < r.end(); j++)
- {
- PrimRefMB& ref = (*prims)[j];
- const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
- cbounds.extend(bn);
- };
- return cbounds;
- };
-
- return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
- reduce,
- [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
- }
-
- template<typename RecalculatePrimRef>
- __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
- {
- auto reduce = [&](const range<size_t>& r) -> LBBox3fa
- {
- LBBox3fa cbounds(empty);
- for (size_t j = r.begin(); j < r.end(); j++)
- {
- PrimRefMB& ref = (*prims)[j];
- const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
- cbounds.extend(bn);
- };
- return cbounds;
- };
-
- return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
- reduce,
- [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
- }
-
- template<typename RecalculatePrimRef>
- const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
- {
- auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
- {
- PrimInfoMB pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- PrimRefMB& ref = (*prims)[j];
- PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
- pinfo.add_primref(ref1);
- };
- return pinfo;
- };
-
- const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
- PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
-
- return SetMB(pinfo,prims,object_range,time_range);
- }
-
- public:
- PrimRefVector prims;
- };
-//}
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp b/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp
deleted file mode 100644
index e23de3df28..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "primrefgen.h"
-#include "primrefgen_presplit.h"
-
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-namespace embree
-{
- namespace isa
- {
- PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- ParallelPrefixSumState<PrimInfo> pstate;
-
- /* first try */
- progressMonitor(0);
- PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,base.size(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator2 iter(scene,types,mblur);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
- {
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator2 iter(scene,types,true);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- return mesh->createPrimRefArrayMB(prims,itime,r,k,(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- return mesh->createPrimRefArrayMB(prims,itime,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
- {
- ParallelForForPrefixSumState<PrimInfoMB> pstate;
- Scene::Iterator2 iter(scene,types,true);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
- return mesh->createPrimRefMBArray(prims,t0t1,r,k,(unsigned)geomID);
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
- return mesh->createPrimRefMBArray(prims,t0t1,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
- }
-
- /* the BVH starts with that time range, even though primitives might have smaller/larger time range */
- pinfo.time_range = t0t1;
- return pinfo;
- }
-
- template<typename Mesh>
- size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
- {
- size_t numPrimitives = morton.size();
-
- /* compute scene bounds */
- std::pair<size_t,BBox3fa> cb_empty(0,empty);
- auto cb = parallel_reduce
- ( size_t(0), numPrimitives, size_t(1024), cb_empty, [&](const range<size_t>& r) -> std::pair<size_t,BBox3fa>
- {
- size_t num = 0;
- BBox3fa bounds = empty;
-
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa prim_bounds = empty;
- if (unlikely(!mesh->buildBounds(j,&prim_bounds))) continue;
- bounds.extend(center2(prim_bounds));
- num++;
- }
- return std::make_pair(num,bounds);
- }, [] (const std::pair<size_t,BBox3fa>& a, const std::pair<size_t,BBox3fa>& b) {
- return std::make_pair(a.first + b.first,merge(a.second,b.second));
- });
-
-
- size_t numPrimitivesGen = cb.first;
- const BBox3fa centBounds = cb.second;
-
- /* compute morton codes */
- if (likely(numPrimitivesGen == numPrimitives))
- {
- /* fast path if all primitives were valid */
- BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
- parallel_for( size_t(0), numPrimitives, size_t(1024), [&](const range<size_t>& r) -> void {
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
- for (size_t j=r.begin(); j<r.end(); j++)
- generator(mesh->bounds(j),unsigned(j));
- });
- }
- else
- {
- /* slow path, fallback in case some primitives were invalid */
- ParallelPrefixSumState<size_t> pstate;
- BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
- parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
- size_t num = 0;
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (unlikely(!mesh->buildBounds(j,&bounds))) continue;
- generator(bounds,unsigned(j));
- num++;
- }
- return num;
- }, std::plus<size_t>());
-
- parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
- size_t num = 0;
- BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[base]);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(j,&bounds)) continue;
- generator(bounds,unsigned(j));
- num++;
- }
- return num;
- }, std::plus<size_t>());
- }
- return numPrimitivesGen;
- }
-
- // ====================================================================================================
- // ====================================================================================================
- // ====================================================================================================
-
- // template for grid meshes
-
-#if 0
- template<>
- PrimInfo createPrimRefArray<GridMesh,false>(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- PING;
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator<GridMesh,false> iter(scene);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k) -> PrimInfo
- {
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,mesh->geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != prims.size())
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, const PrimInfo& base) -> PrimInfo
- {
- k = base.size();
- PrimInfo pinfo(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- {
- BBox3fa bounds = empty;
- if (!mesh->buildBounds(j,&bounds)) continue;
- const PrimRef prim(bounds,mesh->geomID,unsigned(j));
- pinfo.add_center2(prim);
- prims[k++] = prim;
- }
- return pinfo;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-#endif
-
- // ====================================================================================================
- // ====================================================================================================
- // ====================================================================================================
-
- IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen.h
deleted file mode 100644
index 9919c945c3..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "../common/primref.h"
-#include "../common/primref_mb.h"
-#include "priminfo.h"
-#include "bvh_builder_morton.h"
-
-namespace embree
-{
- namespace isa
- {
- PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
-
- PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
-
- PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
-
- PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
-
- template<typename Mesh>
- size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
- }
-}
-
diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h
deleted file mode 100644
index 8bdb38b955..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../builders/primrefgen.h"
-#include "../builders/heuristic_spatial.h"
-#include "../builders/splitter.h"
-
-#include "../../common/algorithms/parallel_for_for.h"
-#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
-
-#define DBG_PRESPLIT(x)
-#define CHECK_PRESPLIT(x)
-
-#define GRID_SIZE 1024
-#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
-#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
-#define PRIORITY_CUTOFF_THRESHOLD 1.0f
-#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
-
-namespace embree
-{
- namespace isa
- {
-
- struct PresplitItem
- {
- union {
- float priority;
- unsigned int data;
- };
- unsigned int index;
-
- __forceinline operator unsigned() const
- {
- return reinterpret_cast<const unsigned&>(priority);
- }
- __forceinline bool operator < (const PresplitItem& item) const
- {
- return (priority < item.priority);
- }
-
- template<typename Mesh>
- __forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc)
- {
- const unsigned int geomID = ref.geomID();
- const unsigned int primID = ref.primID();
- const float area_aabb = area(ref.bounds());
- const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
- const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
- assert(area_prim <= area_aabb);
- //const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
- const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
- assert(priority >= 0.0f && priority < FLT_LARGE);
- return priority;
- }
-
-
- };
-
- inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
- return cout << "index " << item.index << " priority " << item.priority;
- };
-
- template<typename SplitterFactory>
- void splitPrimitive(SplitterFactory &Splitter,
- const PrimRef &prim,
- const unsigned int geomID,
- const unsigned int primID,
- const unsigned int split_level,
- const Vec3fa &grid_base,
- const float grid_scale,
- const float grid_extend,
- PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
- unsigned int& numSubPrims)
- {
- assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- if (split_level == 0)
- {
- assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
- subPrims[numSubPrims++] = prim;
- }
- else
- {
- const Vec3fa lower = prim.lower;
- const Vec3fa upper = prim.upper;
- const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
- const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
- Vec3ia ilower(floor(glower));
- Vec3ia iupper(floor(gupper));
-
- /* this ignores dimensions that are empty */
- iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)));
-
- /* compute a morton code for the lower and upper grid coordinates. */
- const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
- const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
-
- /* if all bits are equal then we cannot split */
- if(unlikely(lower_code == upper_code))
- {
- assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
- subPrims[numSubPrims++] = prim;
- return;
- }
-
- /* compute octree level and dimension to perform the split in */
- const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
- const unsigned int level = diff / 3;
- const unsigned int dim = diff % 3;
-
- /* now we compute the grid position of the split */
- const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
-
- /* compute world space position of split */
- const float inv_grid_size = 1.0f / GRID_SIZE;
- const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend;
-
- assert(prim.lower[dim] <= fsplit &&
- prim.upper[dim] >= fsplit);
-
- /* split primitive */
- const auto splitter = Splitter(prim);
- BBox3fa left,right;
- splitter(prim.bounds(),dim,fsplit,left,right);
- assert(!left.empty());
- assert(!right.empty());
-
-
- splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- }
- }
-
-
- template<typename Mesh, typename SplitterFactory>
- PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- ParallelPrefixSumState<PrimInfo> pstate;
-
- /* first try */
- progressMonitor(0);
- PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != numPrimRefs)
- {
- progressMonitor(0);
- pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
- return geometry->createPrimRefArray(prims,r,base.size(),geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
- return pinfo;
- }
-
- __forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref)
- {
- const Vec3fa lower = ref.lower;
- const Vec3fa upper = ref.upper;
- const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
- const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
- Vec3ia ilower(floor(glower));
- Vec3ia iupper(floor(gupper));
-
- /* this ignores dimensions that are empty */
- iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
-
- /* compute a morton code for the lower and upper grid coordinates. */
- const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
- const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
- return Vec2i(lower_code,upper_code);
- }
-
- template<typename Mesh, typename SplitterFactory>
- PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
- {
- static const size_t MIN_STEP_SIZE = 128;
-
- ParallelForForPrefixSumState<PrimInfo> pstate;
- Scene::Iterator2 iter(scene,types,mblur);
-
- /* first try */
- progressMonitor(0);
- pstate.init(iter,size_t(1024));
- PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- /* if we need to filter out geometry, run again */
- if (pinfo.size() != numPrimRefs)
- {
- progressMonitor(0);
- pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
- return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
- }
-
- /* use correct number of primitives */
- size_t numPrimitives = pinfo.size();
- const size_t alloc_numPrimitives = prims.size();
- const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives;
-
- /* set up primitive splitter */
- SplitterFactory Splitter(scene);
-
-
- DBG_PRESPLIT(
- const size_t org_numPrimitives = pinfo.size();
- PRINT(numPrimitives);
- PRINT(alloc_numPrimitives);
- PRINT(numSplitPrimitivesBudget);
- );
-
- /* allocate double buffer presplit items */
- const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives;
- PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
- PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
-
- /* compute grid */
- const Vec3fa grid_base = pinfo.geomBounds.lower;
- const Vec3fa grid_diag = pinfo.geomBounds.size();
- const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z));
- const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend;
-
- /* init presplit items and get total sum */
- const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
- float sum = 0.0f;
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- presplitItem[i].index = (unsigned int)i;
- const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]);
- /* if all bits are equal then we cannot split */
- presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f;
- /* FIXME: sum undeterministic */
- sum += presplitItem[i].priority;
- }
- return sum;
- },[](const float& a, const float& b) -> float { return a+b; });
-
- /* compute number of splits per primitive */
- const float inv_psum = 1.0f / psum;
- parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
- for (size_t i=r.begin(); i<r.end(); i++)
- {
- if (presplitItem[i].priority > 0.0f)
- {
- const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum;
- if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims
- {
- presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
- //presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- }
- else
- presplitItem[i].priority = 0.0f;
- }
- }
- });
-
- auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; };
- size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024);
-
- /* anything to split ? */
- if (center < numPrimitives)
- {
- const size_t numPrimitivesToSplit = numPrimitives - center;
- assert(presplitItem[center].priority >= 1.0f);
-
- /* sort presplit items in ascending order */
- radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024);
-
- CHECK_PRESPLIT(
- parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
- for (size_t i=r.begin(); i<r.end(); i++)
- assert(presplitItem[i-1].priority <= presplitItem[i].priority);
- });
- );
-
- unsigned int *const primOffset0 = (unsigned int*)tmp_presplitItem;
- unsigned int *const primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
-
- /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
- const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
- size_t sum = 0;
- for (size_t i=t.begin(); i<t.end(); i++)
- {
- PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
- assert(presplitItem[i].priority >= 1.0f);
- const unsigned int primrefID = presplitItem[i].index;
- const float prio = presplitItem[i].priority;
- const unsigned int geomID = prims[primrefID].geomID();
- const unsigned int primID = prims[primrefID].primID();
- const unsigned int split_levels = (unsigned int)prio;
- unsigned int numSubPrims = 0;
- splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- assert(numSubPrims);
- numSubPrims--; // can reuse slot
- sum+=numSubPrims;
- presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels;
- primOffset0[i-center] = numSubPrims;
- }
- return sum;
- },[](const size_t& a, const size_t& b) -> size_t { return a+b; });
-
- /* if we are over budget, need to shrink the range */
- if (totalNumSubPrims > numSplitPrimitivesBudget)
- {
- size_t new_center = numPrimitives-1;
- size_t sum = 0;
- for (;new_center>=center;new_center--)
- {
- const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG;
- if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
- sum += numSubPrims;
- }
- new_center++;
- center = new_center;
- }
-
- /* parallel prefix sum to compute offsets for storing sub-primitives */
- const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
-
- /* iterate over range, and split primitives into sub primitives and append them to prims array */
- parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
- for (size_t j=rn.begin(); j<rn.end(); j++)
- {
- PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
- const unsigned int primrefID = presplitItem[j].index;
- const unsigned int geomID = prims[primrefID].geomID();
- const unsigned int primID = prims[primrefID].primID();
- const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1);
-
- assert(split_levels);
- assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
- unsigned int numSubPrims = 0;
- splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
- const size_t newID = numPrimitives + primOffset1[j-center];
- assert(newID+numSubPrims <= alloc_numPrimitives);
- prims[primrefID] = subPrims[0];
- for (size_t i=1;i<numSubPrims;i++)
- prims[newID+i-1] = subPrims[i];
- }
- });
-
- numPrimitives += offset;
- DBG_PRESPLIT(
- PRINT(pinfo.size());
- PRINT(numPrimitives);
- PRINT((float)numPrimitives/org_numPrimitives));
- }
-
- /* recompute centroid bounding boxes */
- pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
- PrimInfo p(empty);
- for (size_t j=r.begin(); j<r.end(); j++)
- p.add_center2(prims[j]);
- return p;
- }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
-
- assert(pinfo.size() == numPrimitives);
-
- /* free double buffer presplit items */
- alignedFree(tmp_presplitItem);
- alignedFree(presplitItem);
- return pinfo;
- }
- }
-}
diff --git a/thirdparty/embree-aarch64/kernels/builders/splitter.h b/thirdparty/embree-aarch64/kernels/builders/splitter.h
deleted file mode 100644
index dbd6cf07c7..0000000000
--- a/thirdparty/embree-aarch64/kernels/builders/splitter.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/scene.h"
-#include "../common/primref.h"
-
-namespace embree
-{
- namespace isa
- {
- template<size_t N>
- __forceinline void splitPolygon(const BBox3fa& bounds,
- const size_t dim,
- const float pos,
- const Vec3fa (&v)[N+1],
- const Vec3fa (&inv_length)[N],
- BBox3fa& left_o,
- BBox3fa& right_o)
- {
- BBox3fa left = empty, right = empty;
- /* clip triangle to left and right box by processing all edges */
- for (size_t i=0; i<N; i++)
- {
- const Vec3fa &v0 = v[i];
- const Vec3fa &v1 = v[i+1];
- const float v0d = v0[dim];
- const float v1d = v1[dim];
-
- if (v0d <= pos) left. extend(v0); // this point is on left side
- if (v0d >= pos) right.extend(v0); // this point is on right side
-
- if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
- {
- assert((v1d-v0d) != 0.0f);
- const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0);
- left.extend(c);
- right.extend(c);
- }
- }
-
- /* clip against current bounds */
- left_o = intersect(left,bounds);
- right_o = intersect(right,bounds);
- }
-
- template<size_t N>
- __forceinline void splitPolygon(const PrimRef& prim,
- const size_t dim,
- const float pos,
- const Vec3fa (&v)[N+1],
- PrimRef& left_o,
- PrimRef& right_o)
- {
- BBox3fa left = empty, right = empty;
- for (size_t i=0; i<N; i++)
- {
- const Vec3fa &v0 = v[i];
- const Vec3fa &v1 = v[i+1];
- const float v0d = v0[dim];
- const float v1d = v1[dim];
-
- if (v0d <= pos) left. extend(v0); // this point is on left side
- if (v0d >= pos) right.extend(v0); // this point is on right side
-
- if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
- {
- assert((v1d-v0d) != 0.0f);
- const float inv_length = 1.0f/(v1d-v0d);
- const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
- left.extend(c);
- right.extend(c);
- }
- }
-
- /* clip against current bounds */
- new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID());
- new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
- }
-
- struct TriangleSplitter
- {
- __forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
- {
- const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
- const TriangleMesh* mesh = (const TriangleMesh*) scene->get(prim.geomID() & mask );
- TriangleMesh::Triangle tri = mesh->triangle(prim.primID());
- v[0] = mesh->vertex(tri.v[0]);
- v[1] = mesh->vertex(tri.v[1]);
- v[2] = mesh->vertex(tri.v[2]);
- v[3] = mesh->vertex(tri.v[0]);
- inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
- inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
- inv_length[2] = Vec3fa(1.0f) / (v[0]-v[2]);
- }
-
- __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
- splitPolygon<3>(prim,dim,pos,v,left_o,right_o);
- }
-
- __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
- splitPolygon<3>(prim,dim,pos,v,inv_length,left_o,right_o);
- }
-
- private:
- Vec3fa v[4];
- Vec3fa inv_length[3];
- };
-
- struct TriangleSplitterFactory
- {
- __forceinline TriangleSplitterFactory(const Scene* scene)
- : scene(scene) {}
-
- __forceinline TriangleSplitter operator() (const PrimRef& prim) const {
- return TriangleSplitter(scene,prim);
- }
-
- private:
- const Scene* scene;
- };
-
- struct QuadSplitter
- {
- __forceinline QuadSplitter(const Scene* scene, const PrimRef& prim)
- {
- const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
- const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask );
- QuadMesh::Quad quad = mesh->quad(prim.primID());
- v[0] = mesh->vertex(quad.v[0]);
- v[1] = mesh->vertex(quad.v[1]);
- v[2] = mesh->vertex(quad.v[2]);
- v[3] = mesh->vertex(quad.v[3]);
- v[4] = mesh->vertex(quad.v[0]);
- inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
- inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
- inv_length[2] = Vec3fa(1.0f) / (v[3]-v[2]);
- inv_length[3] = Vec3fa(1.0f) / (v[0]-v[3]);
- }
-
- __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
- splitPolygon<4>(prim,dim,pos,v,left_o,right_o);
- }
-
- __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
- splitPolygon<4>(prim,dim,pos,v,inv_length,left_o,right_o);
- }
-
- private:
- Vec3fa v[5];
- Vec3fa inv_length[4];
- };
-
- struct QuadSplitterFactory
- {
- __forceinline QuadSplitterFactory(const Scene* scene)
- : scene(scene) {}
-
- __forceinline QuadSplitter operator() (const PrimRef& prim) const {
- return QuadSplitter(scene,prim);
- }
-
- private:
- const Scene* scene;
- };
- }
-}
-