// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "heuristic_binning.h" namespace embree { namespace isa { /*! Performs standard object binning */ template struct UnalignedHeuristicArrayBinningSAH { typedef BinSplit Split; typedef BinInfoT Binner; typedef range Set; __forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required? : scene(nullptr), prims(nullptr) {} /*! remember prim array */ __forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims) : scene(scene), prims(prims) {} const LinearSpace3fa computeAlignedSpace(const range& set) { Vec3fa axis(0,0,1); uint64_t bestGeomPrimID = -1; /*! find curve with minimum ID that defines valid direction */ for (size_t i=set.begin(); i= bestGeomPrimID) continue; const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID); if (sqr_length(axis1) > 1E-18f) { axis = normalize(axis1); bestGeomPrimID = geomprimID; } } return frame(axis).transposed(); } const PrimInfo computePrimInfo(const range& set, const LinearSpace3fa& space) { auto computeBounds = [&](const range& r) -> CentGeomBBox3fa { CentGeomBBox3fa bounds(empty); for (size_t i=r.begin(); iget(prims[i].geomID()); bounds.extend(mesh->vbounds(space,prims[i].primID())); } return bounds; }; const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096), CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2); return PrimInfo(set.begin(),set.end(),bounds); } struct BinBoundsAndCenter { __forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space) : scene(scene), space(space) {} /*! returns center for binning */ __forceinline Vec3fa binCenter(const PrimRef& ref) const { Geometry* mesh = (Geometry*) scene->get(ref.geomID()); BBox3fa bounds = mesh->vbounds(space,ref.primID()); return embree::center2(bounds); } /*! returns bounds and centroid used for binning */ __forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const { Geometry* mesh = (Geometry*) scene->get(ref.geomID()); BBox3fa bounds = mesh->vbounds(space,ref.primID()); bounds_o = bounds; center_o = embree::center2(bounds); } private: Scene* scene; const LinearSpace3fa space; }; /*! finds the best split */ __forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space) { if (likely(pinfo.size() < 10000)) return find_template(pinfo,logBlockSize,space); else return find_template(pinfo,logBlockSize,space); } /*! finds the best split */ template const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space) { Binner binner(empty); const BinMapping mapping(set); BinBoundsAndCenter binBoundsAndCenter(scene,space); bin_serial_or_parallel(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter); return binner.best(mapping,logBlockSize); } /*! array partitioning */ __forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset) { if (likely(set.size() < 10000)) split_template(split,space,set,lset,rset); else split_template(split,space,set,lset,rset); } /*! array partitioning */ template __forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset) { if (!split.valid()) { deterministic_order(set); return splitFallback(set,lset,rset); } const size_t begin = set.begin(); const size_t end = set.end(); CentGeomBBox3fa local_left(empty); CentGeomBBox3fa local_right(empty); const int splitPos = split.pos; const int splitDim = split.dim; BinBoundsAndCenter binBoundsAndCenter(scene,space); size_t center = 0; if (likely(set.size() < 10000)) center = serial_partitioning(prims,begin,end,local_left,local_right, [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; }, [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }); else center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right, [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; }, [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); }, [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); }, 128); new (&lset) PrimInfoRange(begin,center,local_left); new (&rset) PrimInfoRange(center,end,local_right); assert(area(lset.geomBounds) >= 0.0f); assert(area(rset.geomBounds) >= 0.0f); } void deterministic_order(const range& set) { /* required as parallel partition destroys original primitive order */ std::sort(&prims[set.begin()],&prims[set.end()]); } void splitFallback(const range& set, PrimInfoRange& lset, PrimInfoRange& rset) { const size_t begin = set.begin(); const size_t end = set.end(); const size_t center = (begin + end)/2; CentGeomBBox3fa left(empty); for (size_t i=begin; i struct UnalignedHeuristicArrayBinningMB { typedef BinSplit Split; typedef typename PrimRefMB::BBox BBox; typedef BinInfoT ObjectBinner; static const size_t PARALLEL_THRESHOLD = 3 * 1024; static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; UnalignedHeuristicArrayBinningMB(Scene* scene) : scene(scene) {} const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set) { Vec3fa axis0(0,0,1); uint64_t bestGeomPrimID = -1; /*! find curve with minimum ID that defines valid direction */ for (size_t i=set.begin(); i= bestGeomPrimID) continue; const Geometry* mesh = scene->get(geomID); const range tbounds = mesh->timeSegmentRange(set.time_range); if (tbounds.size() == 0) continue; const size_t t = (tbounds.begin()+tbounds.end())/2; const Vec3fa axis1 = mesh->computeDirection(primID,t); if (sqr_length(axis1) > 1E-18f) { axis0 = normalize(axis1); bestGeomPrimID = geomprimID; } } return frame(axis0).transposed(); } struct BinBoundsAndCenter { __forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space) : scene(scene), time_range(time_range), space(space) {} /*! returns center for binning */ template __forceinline Vec3fa binCenter(const PrimRef& ref) const { Geometry* mesh = scene->get(ref.geomID()); LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range); return center2(lbounds.interpolate(0.5f)); } /*! returns bounds and centroid used for binning */ __noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX { Geometry* mesh = scene->get(ref.geomID()); LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range); bounds_o = lbounds.interpolate(0.5f); center_o = center2(bounds_o); } /*! returns bounds and centroid used for binning */ __noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX { Geometry* mesh = scene->get(ref.geomID()); LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range); bounds_o = lbounds; center_o = center2(lbounds.interpolate(0.5f)); } private: Scene* scene; BBox1f time_range; const LinearSpace3fa space; }; /*! finds the best split */ const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space) { BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space); ObjectBinner binner(empty); const BinMapping mapping(set.size(),set.centBounds); bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter); Split osplit = binner.best(mapping,logBlockSize); osplit.sah *= set.time_range.size(); if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split return osplit; } /*! array partitioning */ __forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset) { BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space); const size_t begin = set.begin(); const size_t end = set.end(); PrimInfoMB left = empty; PrimInfoMB right = empty; const vint4 vSplitPos(split.pos); const vbool4 vSplitMask(1 << split.dim); auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); }; auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); }; auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); }; size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD); new (&lset) SetMB(left,set.prims,range(begin,center),set.time_range); new (&rset) SetMB(right,set.prims,range(center,end ),set.time_range); } private: Scene* scene; }; } }