// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../common/scene.h" #include "priminfo.h" namespace embree { static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5; namespace isa { /*! mapping into bins */ template struct SpatialBinMapping { public: __forceinline SpatialBinMapping() {} /*! calculates the mapping */ __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo) { const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower; const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper; const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper)); const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size()); scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag); ofs = (vfloat4) pinfo.geomBounds.lower; inv_scale = 1.0f / scale; } /*! slower but safe binning */ __forceinline vint4 bin(const Vec3fa& p) const { const vint4 i = floori((vfloat4(p)-ofs)*scale); return clamp(i,vint4(0),vint4(BINS-1)); } __forceinline std::pair bin(const BBox3fa& b) const { #if defined(__AVX__) const vfloat8 ofs8(ofs); const vfloat8 scale8(scale); const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8); const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1)); return std::pair(extract4<0>(c_lu),extract4<1>(c_lu)); #else const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale); const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale); const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1)); const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1)); return std::pair(c_lower,c_upper); #endif } /*! calculates left spatial position of bin */ __forceinline float pos(const size_t bin, const size_t dim) const { return madd(float(bin),inv_scale[dim],ofs[dim]); } /*! calculates left spatial position of bin */ template __forceinline vfloat posN(const vfloat bin, const size_t dim) const { return madd(bin,vfloat(inv_scale[dim]),vfloat(ofs[dim])); } /*! returns true if the mapping is invalid in some dimension */ __forceinline bool invalid(const size_t dim) const { return scale[dim] == 0.0f; } public: vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID }; /*! stores all information required to perform some split */ template struct SpatialBinSplit { /*! construct an invalid split by default */ __forceinline SpatialBinSplit() : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {} /*! constructs specified split */ __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping& mapping) : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {} /*! constructs specified split */ __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping& mapping) : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {} /*! tests if this split is valid */ __forceinline bool valid() const { return dim != -1; } /*! calculates surface area heuristic for performing the split */ __forceinline float splitSAH() const { return sah; } /*! stream output */ friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) { return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}"; } public: float sah; //!< SAH cost of the split int dim; //!< split dimension int pos; //!< split position int left; //!< number of elements on the left side int right; //!< number of elements on the right side float factor; //!< factor splitting the extended range SpatialBinMapping mapping; //!< mapping into bins }; /*! stores all binning information */ template struct __aligned(64) SpatialBinInfo { SpatialBinInfo() { } __forceinline SpatialBinInfo(EmptyTy) { clear(); } /*! clears the bin info */ __forceinline void clear() { for (size_t i=0; i __forceinline void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t N, const SpatialBinMapping& mapping) { for (size_t i=0; i> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); if (unlikely(splits == 1)) { const vint4 bin = mapping.bin(center(prim.bounds())); for (size_t dim=0; dim<3; dim++) { assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS); numBegin[bin[dim]][dim]++; numEnd [bin[dim]][dim]++; bounds [bin[dim]][dim].extend(prim.bounds()); } } else { const vint4 bin0 = mapping.bin(prim.bounds().lower); const vint4 bin1 = mapping.bin(prim.bounds().upper); for (size_t dim=0; dim<3; dim++) { size_t bin; PrimRef rest = prim; size_t l = bin0[dim]; size_t r = bin1[dim]; // same bin optimization if (likely(l == r)) { numBegin[l][dim]++; numEnd [l][dim]++; bounds [l][dim].extend(prim.bounds()); continue; } for (bin=(size_t)bin0[dim]; bin<(size_t)bin1[dim]; bin++) { const float pos = mapping.pos(bin+1,dim); PrimRef left,right; splitPrimitive(rest,(int)dim,pos,left,right); if (unlikely(left.bounds().empty())) l++; bounds[bin][dim].extend(left.bounds()); rest = right; } if (unlikely(rest.bounds().empty())) r--; numBegin[l][dim]++; numEnd [r][dim]++; bounds [bin][dim].extend(rest.bounds()); } } } } /*! bins a range of primitives inside an array */ template void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t begin, size_t end, const SpatialBinMapping& mapping) { bin(splitPrimitive,prims+begin,end-begin,mapping); } /*! bins an array of primitives */ template __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping& mapping) { for (size_t i=begin; i& mapping) { for (size_t i=begin; i best(const SpatialBinMapping& mapping, const size_t blocks_shift) const { /* sweep from right to left and compute parallel prefix of merged bounds */ vfloat4 rAreas[BINS]; vuint4 rCounts[BINS]; vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; for (size_t i=BINS-1; i>0; i--) { count += numEnd[i]; rCounts[i] = count; bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx); by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by); bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz); rAreas[i][3] = 0.0f; } /* sweep from left to right and compute SAH */ vuint4 blocks_add = (1 << blocks_shift)-1; vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0; count = 0; bx = empty; by = empty; bz = empty; for (size_t i=1; i> (unsigned int)(blocks_shift); const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); const vbool4 mask = sah < vbestSAH; vbestPos = select(mask,ii ,vbestPos); vbestSAH = select(mask,sah,vbestSAH); vbestlCount = select(mask,count,vbestlCount); vbestrCount = select(mask,rCounts[i],vbestrCount); } /* find best dimension */ float bestSAH = inf; int bestDim = -1; int bestPos = 0; unsigned int bestlCount = 0; unsigned int bestrCount = 0; for (int dim=0; dim<3; dim++) { /* ignore zero sized dimensions */ if (unlikely(mapping.invalid(dim))) continue; /* test if this is a better dimension */ if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { bestDim = dim; bestPos = vbestPos[dim]; bestSAH = vbestSAH[dim]; bestlCount = vbestlCount[dim]; bestrCount = vbestrCount[dim]; } } assert(bestSAH >= 0.0f); /* return invalid split if no split found */ if (bestDim == -1) return SpatialBinSplit(inf,-1,0,mapping); /* return best found split */ return SpatialBinSplit(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping); } private: BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension vuint4 numBegin[BINS]; //!< number of primitives starting in bin vuint4 numEnd[BINS]; //!< number of primitives ending in bin }; } }