// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // TODO: // - adjust parallel build thresholds // - openNodesBasedOnExtend should consider max extended size #pragma once #include "heuristic_binning.h" #include "heuristic_spatial.h" /* stop opening of all bref.geomIDs are the same */ #define EQUAL_GEOMID_STOP_CRITERIA 1 /* 10% spatial extend threshold */ #define MAX_EXTEND_THRESHOLD 0.1f /* maximum is 8 children */ #define MAX_OPENED_CHILD_NODES 8 /* open until all build refs are below threshold size in one step */ #define USE_LOOP_OPENING 0 namespace embree { namespace isa { /*! Performs standard object binning */ template struct HeuristicArrayOpenMergeSAH { typedef BinSplit Split; typedef BinInfoT Binner; static const size_t PARALLEL_THRESHOLD = 1024; static const size_t PARALLEL_FIND_BLOCK_SIZE = 512; static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; static const size_t MOVE_STEP_SIZE = 64; static const size_t CREATE_SPLITS_STEP_SIZE = 128; __forceinline HeuristicArrayOpenMergeSAH () : prims0(nullptr) {} /*! remember prim array */ __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size) : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size) { assert(max_open_size <= MAX_OPENED_CHILD_NODES); } struct OpenHeuristic { __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo ) { const Vec3fa diag = pinfo.geomBounds.size(); dim = maxDim(diag); assert(diag[dim] > 0.0f); inv_max_extend = 1.0f / diag[dim]; } __forceinline bool operator () ( PrimRef& prim ) const { return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD; } private: size_t dim; float inv_max_extend; }; /*! compute extended ranges */ __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight) { assert(set.ext_range_size() > 0); const float left_factor = (float)lweight / (lweight + rweight); const size_t ext_range_size = set.ext_range_size(); const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size); const size_t right_ext_range_size = ext_range_size - left_ext_range_size; lset.set_ext_range(lset.end() + left_ext_range_size); rset.set_ext_range(rset.end() + right_ext_range_size); } /*! move ranges */ __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset) { const size_t left_ext_range_size = lset.ext_range_size(); const size_t right_size = rset.size(); /* has the left child an extended range? */ if (left_ext_range_size > 0) { /* left extended range smaller than right range ? */ if (left_ext_range_size < right_size) { /* only move a small part of the beginning of the right range to the end */ parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range& r) { for (size_t i=r.begin(); i& r) { for (size_t i=r.begin(); i getProperties(const PrimInfoExtRange& set) { const OpenHeuristic heuristic(set); const unsigned int geomID = prims0[set.begin()].geomID(); auto body = [&] (const range& r) -> std::pair { bool commonGeomID = true; size_t opens = 0; for (size_t i=r.begin(); i(opens,commonGeomID); }; auto reduction = [&] (const std::pair& b0, const std::pair& b1) -> std::pair { return std::pair(b0.first+b1.first,b0.second && b1.second); }; return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair(0,true),body,reduction); } // FIXME: should consider maximum available extended size __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set) { const OpenHeuristic heuristic(set); const size_t ext_range_start = set.end(); if (false && set.size() < PARALLEL_THRESHOLD) { size_t extra_elements = 0; for (size_t i=set.begin(); i ext_elements; ext_elements.store(0); PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range& r) -> PrimInfo { PrimInfo info(empty); for (size_t i=r.begin(); i 0); if (unlikely(next_iteration_extra_elements == 0)) break; } } __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize) { /* single element */ if (set.size() <= 1) return Split(); /* disable opening if there is no overlap */ const size_t D = 4; if (unlikely(set.has_ext_range() && set.size() <= D)) { bool disjoint = true; for (size_t j=set.begin(); j p(0,false); /* disable opening when all primitives are from same geometry */ if (unlikely(set.has_ext_range())) { p = getProperties(set); #if EQUAL_GEOMID_STOP_CRITERIA == 1 if (p.second) set.set_ext_range(set.end()); /* disable opening */ #endif } /* open nodes when we have sufficient space available */ if (unlikely(set.has_ext_range())) { #if USE_LOOP_OPENING == 1 openNodesBasedOnExtendLoop(set,p.first); #else if (p.first <= set.ext_range_size()) openNodesBasedOnExtend(set); #endif /* disable opening when insufficient space for opening a node available */ if (set.ext_range_size() < max_open_size-1) set.set_ext_range(set.end()); /* disable opening */ } /* find best split */ return object_find(set,logBlockSize); } /*! finds the best object split */ __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize) { if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize); else return parallel_object_find (set,logBlockSize); } /*! finds the best object split */ __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize) { Binner binner(empty); const BinMapping mapping(set.centBounds); binner.bin(prims0,set.begin(),set.end(),mapping); return binner.best(mapping,logBlockSize); } /*! finds the best split */ __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize) { Binner binner(empty); const BinMapping mapping(set.centBounds); const BinMapping& _mapping = mapping; // CLANG 3.4 parser bug workaround auto body = [&] (const range& r) -> Binner { Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; }; auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner { Binner r = b0; r.merge(b1,_mapping.size()); return r; }; binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction); return binner.best(mapping,logBlockSize); } /*! array partitioning */ __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset) { PrimInfoExtRange set = set_i; /* valid split */ if (unlikely(!split.valid())) { deterministic_order(set); splitFallback(set,lset,rset); return; } std::pair ext_weights(0,0); /* object split */ if (likely(set.size() < PARALLEL_THRESHOLD)) ext_weights = sequential_object_split(split,set,lset,rset); else ext_weights = parallel_object_split(split,set,lset,rset); /* if we have an extended range, set extended child ranges and move right split range */ if (unlikely(set.has_ext_range())) { setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second); moveExtentedRange(set,lset,rset); } } /*! array partitioning */ std::pair sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) { const size_t begin = set.begin(); const size_t end = set.end(); PrimInfo local_left(empty); PrimInfo local_right(empty); const unsigned int splitPos = split.pos; const unsigned int splitDim = split.dim; const unsigned int splitDimMask = (unsigned int)1 << splitDim; const vint4 vSplitPos(splitPos); const vbool4 vSplitMask( (int)splitDimMask ); size_t center = serial_partitioning(prims0, begin,end,local_left,local_right, [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }, [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); }); new (&lset) PrimInfoExtRange(begin,center,center,local_left); new (&rset) PrimInfoExtRange(center,end,end,local_right); assert(area(lset.geomBounds) >= 0.0f); assert(area(rset.geomBounds) >= 0.0f); return std::pair(local_left.size(),local_right.size()); } /*! array partitioning */ __noinline std::pair parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) { const size_t begin = set.begin(); const size_t end = set.end(); PrimInfo left(empty); PrimInfo right(empty); const unsigned int splitPos = split.pos; const unsigned int splitDim = split.dim; const unsigned int splitDimMask = (unsigned int)1 << splitDim; const vint4 vSplitPos(splitPos); const vbool4 vSplitMask( (int)splitDimMask ); auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); }; const size_t center = parallel_partitioning( prims0,begin,end,EmptyTy(),left,right,isLeft, [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); }, [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); }, PARALLEL_PARTITION_BLOCK_SIZE); new (&lset) PrimInfoExtRange(begin,center,center,left); new (&rset) PrimInfoExtRange(center,end,end,right); assert(area(lset.geomBounds) >= 0.0f); assert(area(rset.geomBounds) >= 0.0f); return std::pair(left.size(),right.size()); } void deterministic_order(const extended_range& set) { /* required as parallel partition destroys original primitive order */ std::sort(&prims0[set.begin()],&prims0[set.end()]); } __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) { const size_t begin = set.begin(); const size_t end = set.end(); const size_t center = (begin + end)/2; PrimInfo left(empty); for (size_t i=begin; i